1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
72 struct processor_costs size_cost
= { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 /* Processor costs (relative to an add) */
126 struct processor_costs i386_cost
= { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
179 struct processor_costs i486_cost
= { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
232 struct processor_costs pentium_cost
= {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
285 struct processor_costs pentiumpro_cost
= {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
338 struct processor_costs geode_cost
= {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
392 struct processor_costs k6_cost
= {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
445 struct processor_costs athlon_cost
= {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
498 struct processor_costs k8_cost
= {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
545 100, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
556 struct processor_costs pentium4_cost
= {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
609 struct processor_costs nocona_cost
= {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
661 /* Generic64 should produce code tuned for Nocona and K8. */
663 struct processor_costs generic64_cost
= {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 /* On all chips taken into consideration lea is 2 cycles and more. With
666 this cost however our current implementation of synth_mult results in
667 use of unnecessary temporary registers causing regression on several
668 SPECfp benchmarks. */
669 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
670 COSTS_N_INSNS (1), /* variable shift costs */
671 COSTS_N_INSNS (1), /* constant shift costs */
672 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
673 COSTS_N_INSNS (4), /* HI */
674 COSTS_N_INSNS (3), /* SI */
675 COSTS_N_INSNS (4), /* DI */
676 COSTS_N_INSNS (2)}, /* other */
677 0, /* cost of multiply per each bit set */
678 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
679 COSTS_N_INSNS (26), /* HI */
680 COSTS_N_INSNS (42), /* SI */
681 COSTS_N_INSNS (74), /* DI */
682 COSTS_N_INSNS (74)}, /* other */
683 COSTS_N_INSNS (1), /* cost of movsx */
684 COSTS_N_INSNS (1), /* cost of movzx */
685 8, /* "large" insn */
687 4, /* cost for loading QImode using movzbl */
688 {4, 4, 4}, /* cost of loading integer registers
689 in QImode, HImode and SImode.
690 Relative to reg-reg move (2). */
691 {4, 4, 4}, /* cost of storing integer registers */
692 4, /* cost of reg,reg fld/fst */
693 {12, 12, 12}, /* cost of loading fp registers
694 in SFmode, DFmode and XFmode */
695 {6, 6, 8}, /* cost of storing fp registers
696 in SFmode, DFmode and XFmode */
697 2, /* cost of moving MMX register */
698 {8, 8}, /* cost of loading MMX registers
699 in SImode and DImode */
700 {8, 8}, /* cost of storing MMX registers
701 in SImode and DImode */
702 2, /* cost of moving SSE register */
703 {8, 8, 8}, /* cost of loading SSE registers
704 in SImode, DImode and TImode */
705 {8, 8, 8}, /* cost of storing SSE registers
706 in SImode, DImode and TImode */
707 5, /* MMX or SSE register to integer */
708 64, /* size of prefetch block */
709 6, /* number of parallel prefetches */
710 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
711 is increased to perhaps more appropriate value of 5. */
713 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
714 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
715 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
716 COSTS_N_INSNS (8), /* cost of FABS instruction. */
717 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
718 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
721 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
723 struct processor_costs generic32_cost
= {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (4), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (4), /* DI */
732 COSTS_N_INSNS (2)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (26), /* HI */
736 COSTS_N_INSNS (42), /* SI */
737 COSTS_N_INSNS (74), /* DI */
738 COSTS_N_INSNS (74)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
743 4, /* cost for loading QImode using movzbl */
744 {4, 4, 4}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 4, /* cost of reg,reg fld/fst */
749 {12, 12, 12}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {6, 6, 8}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 2, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {8, 8, 8}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {8, 8, 8}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 5, /* MMX or SSE register to integer */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
775 const struct processor_costs
*ix86_cost
= &pentium_cost
;
777 /* Processor feature/optimization bitmasks. */
778 #define m_386 (1<<PROCESSOR_I386)
779 #define m_486 (1<<PROCESSOR_I486)
780 #define m_PENT (1<<PROCESSOR_PENTIUM)
781 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
782 #define m_GEODE (1<<PROCESSOR_GEODE)
783 #define m_K6_GEODE (m_K6 | m_GEODE)
784 #define m_K6 (1<<PROCESSOR_K6)
785 #define m_ATHLON (1<<PROCESSOR_ATHLON)
786 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
787 #define m_K8 (1<<PROCESSOR_K8)
788 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
789 #define m_NOCONA (1<<PROCESSOR_NOCONA)
790 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
791 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
792 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
794 /* Generic instruction choice should be common subset of supported CPUs
795 (PPro/PENT4/NOCONA/Athlon/K8). */
797 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
798 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
799 generic because it is not working well with PPro base chips. */
800 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC64
;
801 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
802 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
803 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
804 const int x86_double_with_add
= ~m_386
;
805 const int x86_use_bit_test
= m_386
;
806 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_GENERIC
;
807 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
808 const int x86_3dnow_a
= m_ATHLON_K8
;
809 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
810 /* Branch hints were put in P4 based on simulation result. But
811 after P4 was made, no performance benefit was observed with
812 branch hints. It also increases the code size. As the result,
813 icc never generates branch hints. */
814 const int x86_branch_hints
= 0;
815 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
816 /* We probably ought to watch for partial register stalls on Generic32
817 compilation setting as well. However in current implementation the
818 partial register stalls are not eliminated very well - they can
819 be introduced via subregs synthesized by combine and can happen
820 in caller/callee saving sequences.
821 Because this option pays back little on PPro based chips and is in conflict
822 with partial reg. dependencies used by Athlon/P4 based chips, it is better
823 to leave it off for generic32 for now. */
824 const int x86_partial_reg_stall
= m_PPRO
;
825 const int x86_partial_flag_reg_stall
= m_GENERIC
;
826 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
827 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_GENERIC
);
828 const int x86_use_mov0
= m_K6
;
829 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_GENERIC
);
830 const int x86_read_modify_write
= ~m_PENT
;
831 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
832 const int x86_split_long_moves
= m_PPRO
;
833 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_GENERIC
; /* m_PENT4 ? */
834 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
835 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
836 const int x86_qimode_math
= ~(0);
837 const int x86_promote_qi_regs
= 0;
838 /* On PPro this flag is meant to avoid partial register stalls. Just like
839 the x86_partial_reg_stall this option might be considered for Generic32
840 if our scheme for avoiding partial stalls was more effective. */
841 const int x86_himode_math
= ~(m_PPRO
);
842 const int x86_promote_hi_regs
= m_PPRO
;
843 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
;
844 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
845 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC
;
846 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6_GEODE
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
847 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
| m_GEODE
);
848 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
849 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
850 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
851 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
852 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
853 const int x86_shift1
= ~m_486
;
854 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
855 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
856 that thread 128bit SSE registers as single units versus K8 based chips that
857 divide SSE registers to two 64bit halves.
858 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
859 to allow register renaming on 128bit SSE units, but usually results in one
860 extra microop on 64bit SSE units. Experimental results shows that disabling
861 this option on P4 brings over 20% SPECfp regression, while enabling it on
862 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
864 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
865 /* Set for machines where the type and dependencies are resolved on SSE
866 register parts instead of whole registers, so we may maintain just
867 lower part of scalar values in proper format leaving the upper part
869 const int x86_sse_split_regs
= m_ATHLON_K8
;
870 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
871 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
872 const int x86_use_ffreep
= m_ATHLON_K8
;
873 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6_GEODE
;
874 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_GENERIC
);
876 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
877 integer data in xmm registers. Which results in pretty abysmal code. */
878 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
880 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
881 /* Some CPU cores are not able to predict more than 4 branch instructions in
882 the 16 byte window. */
883 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
884 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6_GEODE
| m_PENT
| m_GENERIC
;
885 const int x86_use_bt
= m_ATHLON_K8
;
886 /* Compare and exchange was added for 80486. */
887 const int x86_cmpxchg
= ~m_386
;
888 /* Compare and exchange 8 bytes was added for pentium. */
889 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
890 /* Compare and exchange 16 bytes was added for nocona. */
891 const int x86_cmpxchg16b
= m_NOCONA
;
892 /* Exchange and add was added for 80486. */
893 const int x86_xadd
= ~m_386
;
894 /* Byteswap was added for 80486. */
895 const int x86_bswap
= ~m_386
;
896 const int x86_pad_returns
= m_ATHLON_K8
| m_GENERIC
;
898 /* In case the average insn count for single function invocation is
899 lower than this constant, emit fast (but longer) prologue and
901 #define FAST_PROLOGUE_INSN_COUNT 20
903 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
904 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
905 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
906 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
908 /* Array of the smallest class containing reg number REGNO, indexed by
909 REGNO. Used by REGNO_REG_CLASS in i386.h. */
911 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
914 AREG
, DREG
, CREG
, BREG
,
916 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
918 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
919 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
922 /* flags, fpsr, fpcr, dirflag, frame */
923 NO_REGS
, NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
924 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
926 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
928 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
929 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
930 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
934 /* The "default" register map used in 32bit mode. */
936 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
938 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
939 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
940 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
941 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
942 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
943 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
944 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
947 static int const x86_64_int_parameter_registers
[6] =
949 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
950 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
953 static int const x86_64_int_return_registers
[4] =
955 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
958 /* The "default" register map used in 64bit mode. */
959 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
961 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
962 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
963 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
964 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
965 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
966 8,9,10,11,12,13,14,15, /* extended integer registers */
967 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
970 /* Define the register numbers to be used in Dwarf debugging information.
971 The SVR4 reference port C compiler uses the following register numbers
972 in its Dwarf output code:
973 0 for %eax (gcc regno = 0)
974 1 for %ecx (gcc regno = 2)
975 2 for %edx (gcc regno = 1)
976 3 for %ebx (gcc regno = 3)
977 4 for %esp (gcc regno = 7)
978 5 for %ebp (gcc regno = 6)
979 6 for %esi (gcc regno = 4)
980 7 for %edi (gcc regno = 5)
981 The following three DWARF register numbers are never generated by
982 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
983 believes these numbers have these meanings.
984 8 for %eip (no gcc equivalent)
985 9 for %eflags (gcc regno = 17)
986 10 for %trapno (no gcc equivalent)
987 It is not at all clear how we should number the FP stack registers
988 for the x86 architecture. If the version of SDB on x86/svr4 were
989 a bit less brain dead with respect to floating-point then we would
990 have a precedent to follow with respect to DWARF register numbers
991 for x86 FP registers, but the SDB on x86/svr4 is so completely
992 broken with respect to FP registers that it is hardly worth thinking
993 of it as something to strive for compatibility with.
994 The version of x86/svr4 SDB I have at the moment does (partially)
995 seem to believe that DWARF register number 11 is associated with
996 the x86 register %st(0), but that's about all. Higher DWARF
997 register numbers don't seem to be associated with anything in
998 particular, and even for DWARF regno 11, SDB only seems to under-
999 stand that it should say that a variable lives in %st(0) (when
1000 asked via an `=' command) if we said it was in DWARF regno 11,
1001 but SDB still prints garbage when asked for the value of the
1002 variable in question (via a `/' command).
1003 (Also note that the labels SDB prints for various FP stack regs
1004 when doing an `x' command are all wrong.)
1005 Note that these problems generally don't affect the native SVR4
1006 C compiler because it doesn't allow the use of -O with -g and
1007 because when it is *not* optimizing, it allocates a memory
1008 location for each floating-point variable, and the memory
1009 location is what gets described in the DWARF AT_location
1010 attribute for the variable in question.
1011 Regardless of the severe mental illness of the x86/svr4 SDB, we
1012 do something sensible here and we use the following DWARF
1013 register numbers. Note that these are all stack-top-relative
1015 11 for %st(0) (gcc regno = 8)
1016 12 for %st(1) (gcc regno = 9)
1017 13 for %st(2) (gcc regno = 10)
1018 14 for %st(3) (gcc regno = 11)
1019 15 for %st(4) (gcc regno = 12)
1020 16 for %st(5) (gcc regno = 13)
1021 17 for %st(6) (gcc regno = 14)
1022 18 for %st(7) (gcc regno = 15)
1024 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1026 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1027 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1028 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1029 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1030 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1031 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1032 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1035 /* Test and compare insns in i386.md store the information needed to
1036 generate branch and scc insns here. */
1038 rtx ix86_compare_op0
= NULL_RTX
;
1039 rtx ix86_compare_op1
= NULL_RTX
;
1040 rtx ix86_compare_emitted
= NULL_RTX
;
1042 /* Size of the register save area. */
1043 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1045 /* Define the structure for the machine field in struct function. */
1047 struct stack_local_entry
GTY(())
1049 unsigned short mode
;
1052 struct stack_local_entry
*next
;
1055 /* Structure describing stack frame layout.
1056 Stack grows downward:
1062 saved frame pointer if frame_pointer_needed
1063 <- HARD_FRAME_POINTER
1068 [va_arg registers] (
1069 > to_allocate <- FRAME_POINTER
1079 HOST_WIDE_INT frame
;
1081 int outgoing_arguments_size
;
1084 HOST_WIDE_INT to_allocate
;
1085 /* The offsets relative to ARG_POINTER. */
1086 HOST_WIDE_INT frame_pointer_offset
;
1087 HOST_WIDE_INT hard_frame_pointer_offset
;
1088 HOST_WIDE_INT stack_pointer_offset
;
1090 /* When save_regs_using_mov is set, emit prologue using
1091 move instead of push instructions. */
1092 bool save_regs_using_mov
;
1095 /* Code model option. */
1096 enum cmodel ix86_cmodel
;
1098 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1100 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1102 /* Which unit we are generating floating point math for. */
1103 enum fpmath_unit ix86_fpmath
;
1105 /* Which cpu are we scheduling for. */
1106 enum processor_type ix86_tune
;
1107 /* Which instruction set architecture to use. */
1108 enum processor_type ix86_arch
;
1110 /* true if sse prefetch instruction is not NOOP. */
1111 int x86_prefetch_sse
;
1113 /* ix86_regparm_string as a number */
1114 static int ix86_regparm
;
1116 /* -mstackrealign option */
1117 extern int ix86_force_align_arg_pointer
;
1118 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1120 /* Preferred alignment for stack boundary in bits. */
1121 unsigned int ix86_preferred_stack_boundary
;
1123 /* Values 1-5: see jump.c */
1124 int ix86_branch_cost
;
1126 /* Variables which are this size or smaller are put in the data/bss
1127 or ldata/lbss sections. */
1129 int ix86_section_threshold
= 65536;
1131 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1132 char internal_label_prefix
[16];
1133 int internal_label_prefix_len
;
1135 static bool ix86_handle_option (size_t, const char *, int);
1136 static void output_pic_addr_const (FILE *, rtx
, int);
1137 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1139 static const char *get_some_local_dynamic_name (void);
1140 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1141 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1142 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1144 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1145 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1147 static rtx
get_thread_pointer (int);
1148 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1149 static void get_pc_thunk_name (char [32], unsigned int);
1150 static rtx
gen_push (rtx
);
1151 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1152 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1153 static struct machine_function
* ix86_init_machine_status (void);
1154 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1155 static int ix86_nsaved_regs (void);
1156 static void ix86_emit_save_regs (void);
1157 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1158 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1159 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1160 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1161 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1162 static rtx
ix86_expand_aligntest (rtx
, int);
1163 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1164 static int ix86_issue_rate (void);
1165 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1166 static int ia32_multipass_dfa_lookahead (void);
1167 static void ix86_init_mmx_sse_builtins (void);
1168 static rtx
x86_this_parameter (tree
);
1169 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1170 HOST_WIDE_INT
, tree
);
1171 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1172 static void x86_file_start (void);
1173 static void ix86_reorg (void);
1174 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1175 static tree
ix86_build_builtin_va_list (void);
1176 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1178 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1179 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1180 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1182 static int ix86_address_cost (rtx
);
1183 static bool ix86_cannot_force_const_mem (rtx
);
1184 static rtx
ix86_delegitimize_address (rtx
);
1186 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1188 struct builtin_description
;
1189 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1191 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1193 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1194 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1195 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1196 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1197 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1198 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1199 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1200 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1201 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1202 static int ix86_fp_comparison_cost (enum rtx_code code
);
1203 static unsigned int ix86_select_alt_pic_regnum (void);
1204 static int ix86_save_reg (unsigned int, int);
1205 static void ix86_compute_frame_layout (struct ix86_frame
*);
1206 static int ix86_comp_type_attributes (tree
, tree
);
1207 static int ix86_function_regparm (tree
, tree
);
1208 const struct attribute_spec ix86_attribute_table
[];
1209 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1210 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1211 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1212 static bool contains_128bit_aligned_vector_p (tree
);
1213 static rtx
ix86_struct_value_rtx (tree
, int);
1214 static bool ix86_ms_bitfield_layout_p (tree
);
1215 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1216 static int extended_reg_mentioned_1 (rtx
*, void *);
1217 static bool ix86_rtx_costs (rtx
, int, int, int *);
1218 static int min_insn_size (rtx
);
1219 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1220 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1221 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1223 static void ix86_init_builtins (void);
1224 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1225 static const char *ix86_mangle_fundamental_type (tree
);
1226 static tree
ix86_stack_protect_fail (void);
1227 static rtx
ix86_internal_arg_pointer (void);
1228 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1230 /* This function is only used on Solaris. */
1231 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1234 /* Register class used for passing given 64bit part of the argument.
1235 These represent classes as documented by the PS ABI, with the exception
1236 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1237 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1239 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1240 whenever possible (upper half does contain padding).
1242 enum x86_64_reg_class
1245 X86_64_INTEGER_CLASS
,
1246 X86_64_INTEGERSI_CLASS
,
1253 X86_64_COMPLEX_X87_CLASS
,
1256 static const char * const x86_64_reg_class_name
[] = {
1257 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1258 "sseup", "x87", "x87up", "cplx87", "no"
1261 #define MAX_CLASSES 4
1263 /* Table of constants used by fldpi, fldln2, etc.... */
1264 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1265 static bool ext_80387_constants_init
= 0;
1266 static void init_ext_80387_constants (void);
1267 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1268 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1269 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1270 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1271 unsigned HOST_WIDE_INT align
)
1274 /* Initialize the GCC target structure. */
1275 #undef TARGET_ATTRIBUTE_TABLE
1276 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1277 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1278 # undef TARGET_MERGE_DECL_ATTRIBUTES
1279 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1282 #undef TARGET_COMP_TYPE_ATTRIBUTES
1283 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1285 #undef TARGET_INIT_BUILTINS
1286 #define TARGET_INIT_BUILTINS ix86_init_builtins
1287 #undef TARGET_EXPAND_BUILTIN
1288 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1290 #undef TARGET_ASM_FUNCTION_EPILOGUE
1291 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1293 #undef TARGET_ENCODE_SECTION_INFO
1294 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1295 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1297 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1300 #undef TARGET_ASM_OPEN_PAREN
1301 #define TARGET_ASM_OPEN_PAREN ""
1302 #undef TARGET_ASM_CLOSE_PAREN
1303 #define TARGET_ASM_CLOSE_PAREN ""
1305 #undef TARGET_ASM_ALIGNED_HI_OP
1306 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1307 #undef TARGET_ASM_ALIGNED_SI_OP
1308 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1314 #undef TARGET_ASM_UNALIGNED_HI_OP
1315 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1316 #undef TARGET_ASM_UNALIGNED_SI_OP
1317 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1318 #undef TARGET_ASM_UNALIGNED_DI_OP
1319 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1321 #undef TARGET_SCHED_ADJUST_COST
1322 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1323 #undef TARGET_SCHED_ISSUE_RATE
1324 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1325 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1326 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1327 ia32_multipass_dfa_lookahead
1329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1330 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1333 #undef TARGET_HAVE_TLS
1334 #define TARGET_HAVE_TLS true
1336 #undef TARGET_CANNOT_FORCE_CONST_MEM
1337 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1338 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1339 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1341 #undef TARGET_DELEGITIMIZE_ADDRESS
1342 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1344 #undef TARGET_MS_BITFIELD_LAYOUT_P
1345 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1348 #undef TARGET_BINDS_LOCAL_P
1349 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1352 #undef TARGET_ASM_OUTPUT_MI_THUNK
1353 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1357 #undef TARGET_ASM_FILE_START
1358 #define TARGET_ASM_FILE_START x86_file_start
1360 #undef TARGET_DEFAULT_TARGET_FLAGS
1361 #define TARGET_DEFAULT_TARGET_FLAGS \
1363 | TARGET_64BIT_DEFAULT \
1364 | TARGET_SUBTARGET_DEFAULT \
1365 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1367 #undef TARGET_HANDLE_OPTION
1368 #define TARGET_HANDLE_OPTION ix86_handle_option
1370 #undef TARGET_RTX_COSTS
1371 #define TARGET_RTX_COSTS ix86_rtx_costs
1372 #undef TARGET_ADDRESS_COST
1373 #define TARGET_ADDRESS_COST ix86_address_cost
1375 #undef TARGET_FIXED_CONDITION_CODE_REGS
1376 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1377 #undef TARGET_CC_MODES_COMPATIBLE
1378 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1380 #undef TARGET_MACHINE_DEPENDENT_REORG
1381 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1383 #undef TARGET_BUILD_BUILTIN_VA_LIST
1384 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1386 #undef TARGET_MD_ASM_CLOBBERS
1387 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1389 #undef TARGET_PROMOTE_PROTOTYPES
1390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1391 #undef TARGET_STRUCT_VALUE_RTX
1392 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1393 #undef TARGET_SETUP_INCOMING_VARARGS
1394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1395 #undef TARGET_MUST_PASS_IN_STACK
1396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1397 #undef TARGET_PASS_BY_REFERENCE
1398 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1399 #undef TARGET_INTERNAL_ARG_POINTER
1400 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1401 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1402 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1404 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1405 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1407 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1408 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1411 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1414 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1415 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1418 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1419 #undef TARGET_INSERT_ATTRIBUTES
1420 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1423 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1424 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1426 #undef TARGET_STACK_PROTECT_FAIL
1427 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1429 #undef TARGET_FUNCTION_VALUE
1430 #define TARGET_FUNCTION_VALUE ix86_function_value
1432 struct gcc_target targetm
= TARGET_INITIALIZER
;
1435 /* The svr4 ABI for the i386 says that records and unions are returned
1437 #ifndef DEFAULT_PCC_STRUCT_RETURN
1438 #define DEFAULT_PCC_STRUCT_RETURN 1
1441 /* Implement TARGET_HANDLE_OPTION. */
1444 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1451 target_flags
&= ~MASK_3DNOW_A
;
1452 target_flags_explicit
|= MASK_3DNOW_A
;
1459 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1460 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1467 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1468 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1475 target_flags
&= ~MASK_SSE3
;
1476 target_flags_explicit
|= MASK_SSE3
;
1485 /* Sometimes certain combinations of command options do not make
1486 sense on a particular target machine. You can define a macro
1487 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1488 defined, is executed once just after all the command options have
1491 Don't use this macro to turn on various extra optimizations for
1492 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1495 override_options (void)
1498 int ix86_tune_defaulted
= 0;
1500 /* Comes from final.c -- no real reason to change it. */
1501 #define MAX_CODE_ALIGN 16
1505 const struct processor_costs
*cost
; /* Processor costs */
1506 const int target_enable
; /* Target flags to enable. */
1507 const int target_disable
; /* Target flags to disable. */
1508 const int align_loop
; /* Default alignments. */
1509 const int align_loop_max_skip
;
1510 const int align_jump
;
1511 const int align_jump_max_skip
;
1512 const int align_func
;
1514 const processor_target_table
[PROCESSOR_max
] =
1516 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1517 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1518 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1519 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1520 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1521 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1522 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1523 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1524 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1525 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1526 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1527 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1530 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1533 const char *const name
; /* processor name or nickname. */
1534 const enum processor_type processor
;
1535 const enum pta_flags
1541 PTA_PREFETCH_SSE
= 16,
1548 const processor_alias_table
[] =
1550 {"i386", PROCESSOR_I386
, 0},
1551 {"i486", PROCESSOR_I486
, 0},
1552 {"i586", PROCESSOR_PENTIUM
, 0},
1553 {"pentium", PROCESSOR_PENTIUM
, 0},
1554 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1555 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1556 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1557 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1558 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1559 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1560 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1561 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1562 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1563 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1564 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1565 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1566 | PTA_MMX
| PTA_PREFETCH_SSE
},
1567 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1568 | PTA_MMX
| PTA_PREFETCH_SSE
},
1569 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1570 | PTA_MMX
| PTA_PREFETCH_SSE
},
1571 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1572 | PTA_MMX
| PTA_PREFETCH_SSE
},
1573 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1575 {"k6", PROCESSOR_K6
, PTA_MMX
},
1576 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1577 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1578 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1580 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1581 | PTA_3DNOW
| PTA_3DNOW_A
},
1582 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1583 | PTA_3DNOW_A
| PTA_SSE
},
1584 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1585 | PTA_3DNOW_A
| PTA_SSE
},
1586 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1587 | PTA_3DNOW_A
| PTA_SSE
},
1588 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1589 | PTA_SSE
| PTA_SSE2
},
1590 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1591 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1592 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1593 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1594 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1595 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1596 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1597 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1598 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1599 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1602 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1605 SUBTARGET_OVERRIDE_OPTIONS
;
1608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1609 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1612 /* -fPIC is the default for x86_64. */
1613 if (TARGET_MACHO
&& TARGET_64BIT
)
1616 /* Set the default values for switches whose default depends on TARGET_64BIT
1617 in case they weren't overwritten by command line options. */
1620 /* Mach-O doesn't support omitting the frame pointer for now. */
1621 if (flag_omit_frame_pointer
== 2)
1622 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1623 if (flag_asynchronous_unwind_tables
== 2)
1624 flag_asynchronous_unwind_tables
= 1;
1625 if (flag_pcc_struct_return
== 2)
1626 flag_pcc_struct_return
= 0;
1630 if (flag_omit_frame_pointer
== 2)
1631 flag_omit_frame_pointer
= 0;
1632 if (flag_asynchronous_unwind_tables
== 2)
1633 flag_asynchronous_unwind_tables
= 0;
1634 if (flag_pcc_struct_return
== 2)
1635 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1638 /* Need to check -mtune=generic first. */
1639 if (ix86_tune_string
)
1641 if (!strcmp (ix86_tune_string
, "generic")
1642 || !strcmp (ix86_tune_string
, "i686")
1643 /* As special support for cross compilers we read -mtune=native
1644 as -mtune=generic. With native compilers we won't see the
1645 -mtune=native, as it was changed by the driver. */
1646 || !strcmp (ix86_tune_string
, "native"))
1649 ix86_tune_string
= "generic64";
1651 ix86_tune_string
= "generic32";
1653 else if (!strncmp (ix86_tune_string
, "generic", 7))
1654 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1658 if (ix86_arch_string
)
1659 ix86_tune_string
= ix86_arch_string
;
1660 if (!ix86_tune_string
)
1662 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1663 ix86_tune_defaulted
= 1;
1666 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1667 need to use a sensible tune option. */
1668 if (!strcmp (ix86_tune_string
, "generic")
1669 || !strcmp (ix86_tune_string
, "x86-64")
1670 || !strcmp (ix86_tune_string
, "i686"))
1673 ix86_tune_string
= "generic64";
1675 ix86_tune_string
= "generic32";
1678 if (!strcmp (ix86_tune_string
, "x86-64"))
1679 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1680 "-mtune=generic instead as appropriate.");
1682 if (!ix86_arch_string
)
1683 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1684 if (!strcmp (ix86_arch_string
, "generic"))
1685 error ("generic CPU can be used only for -mtune= switch");
1686 if (!strncmp (ix86_arch_string
, "generic", 7))
1687 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1689 if (ix86_cmodel_string
!= 0)
1691 if (!strcmp (ix86_cmodel_string
, "small"))
1692 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1693 else if (!strcmp (ix86_cmodel_string
, "medium"))
1694 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1696 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1697 else if (!strcmp (ix86_cmodel_string
, "32"))
1698 ix86_cmodel
= CM_32
;
1699 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1700 ix86_cmodel
= CM_KERNEL
;
1701 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1702 ix86_cmodel
= CM_LARGE
;
1704 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1708 ix86_cmodel
= CM_32
;
1710 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1712 if (ix86_asm_string
!= 0)
1715 && !strcmp (ix86_asm_string
, "intel"))
1716 ix86_asm_dialect
= ASM_INTEL
;
1717 else if (!strcmp (ix86_asm_string
, "att"))
1718 ix86_asm_dialect
= ASM_ATT
;
1720 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1722 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1723 error ("code model %qs not supported in the %s bit mode",
1724 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1725 if (ix86_cmodel
== CM_LARGE
)
1726 sorry ("code model %<large%> not supported yet");
1727 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1728 sorry ("%i-bit mode not compiled in",
1729 (target_flags
& MASK_64BIT
) ? 64 : 32);
1731 for (i
= 0; i
< pta_size
; i
++)
1732 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1734 ix86_arch
= processor_alias_table
[i
].processor
;
1735 /* Default cpu tuning to the architecture. */
1736 ix86_tune
= ix86_arch
;
1737 if (processor_alias_table
[i
].flags
& PTA_MMX
1738 && !(target_flags_explicit
& MASK_MMX
))
1739 target_flags
|= MASK_MMX
;
1740 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1741 && !(target_flags_explicit
& MASK_3DNOW
))
1742 target_flags
|= MASK_3DNOW
;
1743 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1744 && !(target_flags_explicit
& MASK_3DNOW_A
))
1745 target_flags
|= MASK_3DNOW_A
;
1746 if (processor_alias_table
[i
].flags
& PTA_SSE
1747 && !(target_flags_explicit
& MASK_SSE
))
1748 target_flags
|= MASK_SSE
;
1749 if (processor_alias_table
[i
].flags
& PTA_SSE2
1750 && !(target_flags_explicit
& MASK_SSE2
))
1751 target_flags
|= MASK_SSE2
;
1752 if (processor_alias_table
[i
].flags
& PTA_SSE3
1753 && !(target_flags_explicit
& MASK_SSE3
))
1754 target_flags
|= MASK_SSE3
;
1755 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1756 && !(target_flags_explicit
& MASK_SSSE3
))
1757 target_flags
|= MASK_SSSE3
;
1758 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1759 x86_prefetch_sse
= true;
1760 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1761 error ("CPU you selected does not support x86-64 "
1767 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1769 for (i
= 0; i
< pta_size
; i
++)
1770 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1772 ix86_tune
= processor_alias_table
[i
].processor
;
1773 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1775 if (ix86_tune_defaulted
)
1777 ix86_tune_string
= "x86-64";
1778 for (i
= 0; i
< pta_size
; i
++)
1779 if (! strcmp (ix86_tune_string
,
1780 processor_alias_table
[i
].name
))
1782 ix86_tune
= processor_alias_table
[i
].processor
;
1785 error ("CPU you selected does not support x86-64 "
1788 /* Intel CPUs have always interpreted SSE prefetch instructions as
1789 NOPs; so, we can enable SSE prefetch instructions even when
1790 -mtune (rather than -march) points us to a processor that has them.
1791 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1792 higher processors. */
1793 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1794 x86_prefetch_sse
= true;
1798 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1801 ix86_cost
= &size_cost
;
1803 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1804 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1805 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1807 /* Arrange to set up i386_stack_locals for all functions. */
1808 init_machine_status
= ix86_init_machine_status
;
1810 /* Validate -mregparm= value. */
1811 if (ix86_regparm_string
)
1813 i
= atoi (ix86_regparm_string
);
1814 if (i
< 0 || i
> REGPARM_MAX
)
1815 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1821 ix86_regparm
= REGPARM_MAX
;
1823 /* If the user has provided any of the -malign-* options,
1824 warn and use that value only if -falign-* is not set.
1825 Remove this code in GCC 3.2 or later. */
1826 if (ix86_align_loops_string
)
1828 warning (0, "-malign-loops is obsolete, use -falign-loops");
1829 if (align_loops
== 0)
1831 i
= atoi (ix86_align_loops_string
);
1832 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1833 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1835 align_loops
= 1 << i
;
1839 if (ix86_align_jumps_string
)
1841 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1842 if (align_jumps
== 0)
1844 i
= atoi (ix86_align_jumps_string
);
1845 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1846 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1848 align_jumps
= 1 << i
;
1852 if (ix86_align_funcs_string
)
1854 warning (0, "-malign-functions is obsolete, use -falign-functions");
1855 if (align_functions
== 0)
1857 i
= atoi (ix86_align_funcs_string
);
1858 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1859 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1861 align_functions
= 1 << i
;
1865 /* Default align_* from the processor table. */
1866 if (align_loops
== 0)
1868 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1869 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1871 if (align_jumps
== 0)
1873 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1874 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1876 if (align_functions
== 0)
1878 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1881 /* Validate -mbranch-cost= value, or provide default. */
1882 ix86_branch_cost
= ix86_cost
->branch_cost
;
1883 if (ix86_branch_cost_string
)
1885 i
= atoi (ix86_branch_cost_string
);
1887 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1889 ix86_branch_cost
= i
;
1891 if (ix86_section_threshold_string
)
1893 i
= atoi (ix86_section_threshold_string
);
1895 error ("-mlarge-data-threshold=%d is negative", i
);
1897 ix86_section_threshold
= i
;
1900 if (ix86_tls_dialect_string
)
1902 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1903 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1904 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1905 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1906 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1907 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1909 error ("bad value (%s) for -mtls-dialect= switch",
1910 ix86_tls_dialect_string
);
1913 /* Keep nonleaf frame pointers. */
1914 if (flag_omit_frame_pointer
)
1915 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1916 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1917 flag_omit_frame_pointer
= 1;
1919 /* If we're doing fast math, we don't care about comparison order
1920 wrt NaNs. This lets us use a shorter comparison sequence. */
1921 if (flag_finite_math_only
)
1922 target_flags
&= ~MASK_IEEE_FP
;
1924 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1925 since the insns won't need emulation. */
1926 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1927 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1929 /* Likewise, if the target doesn't have a 387, or we've specified
1930 software floating point, don't use 387 inline intrinsics. */
1932 target_flags
|= MASK_NO_FANCY_MATH_387
;
1934 /* Turn on SSE3 builtins for -mssse3. */
1936 target_flags
|= MASK_SSE3
;
1938 /* Turn on SSE2 builtins for -msse3. */
1940 target_flags
|= MASK_SSE2
;
1942 /* Turn on SSE builtins for -msse2. */
1944 target_flags
|= MASK_SSE
;
1946 /* Turn on MMX builtins for -msse. */
1949 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1950 x86_prefetch_sse
= true;
1953 /* Turn on MMX builtins for 3Dnow. */
1955 target_flags
|= MASK_MMX
;
1959 if (TARGET_ALIGN_DOUBLE
)
1960 error ("-malign-double makes no sense in the 64bit mode");
1962 error ("-mrtd calling convention not supported in the 64bit mode");
1964 /* Enable by default the SSE and MMX builtins. Do allow the user to
1965 explicitly disable any of these. In particular, disabling SSE and
1966 MMX for kernel code is extremely useful. */
1968 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1969 & ~target_flags_explicit
);
1973 /* i386 ABI does not specify red zone. It still makes sense to use it
1974 when programmer takes care to stack from being destroyed. */
1975 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1976 target_flags
|= MASK_NO_RED_ZONE
;
1979 /* Validate -mpreferred-stack-boundary= value, or provide default.
1980 The default of 128 bits is for Pentium III's SSE __m128. We can't
1981 change it because of optimize_size. Otherwise, we can't mix object
1982 files compiled with -Os and -On. */
1983 ix86_preferred_stack_boundary
= 128;
1984 if (ix86_preferred_stack_boundary_string
)
1986 i
= atoi (ix86_preferred_stack_boundary_string
);
1987 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1988 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1989 TARGET_64BIT
? 4 : 2);
1991 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1994 /* Accept -msseregparm only if at least SSE support is enabled. */
1995 if (TARGET_SSEREGPARM
1997 error ("-msseregparm used without SSE enabled");
1999 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2001 if (ix86_fpmath_string
!= 0)
2003 if (! strcmp (ix86_fpmath_string
, "387"))
2004 ix86_fpmath
= FPMATH_387
;
2005 else if (! strcmp (ix86_fpmath_string
, "sse"))
2009 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2010 ix86_fpmath
= FPMATH_387
;
2013 ix86_fpmath
= FPMATH_SSE
;
2015 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2016 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2020 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2021 ix86_fpmath
= FPMATH_387
;
2023 else if (!TARGET_80387
)
2025 warning (0, "387 instruction set disabled, using SSE arithmetics");
2026 ix86_fpmath
= FPMATH_SSE
;
2029 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2032 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2035 /* If the i387 is disabled, then do not return values in it. */
2037 target_flags
&= ~MASK_FLOAT_RETURNS
;
2039 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2040 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2042 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2044 /* ??? Unwind info is not correct around the CFG unless either a frame
2045 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2046 unwind info generation to be aware of the CFG and propagating states
2048 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2049 || flag_exceptions
|| flag_non_call_exceptions
)
2050 && flag_omit_frame_pointer
2051 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2053 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2054 warning (0, "unwind tables currently require either a frame pointer "
2055 "or -maccumulate-outgoing-args for correctness");
2056 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2059 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2062 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2063 p
= strchr (internal_label_prefix
, 'X');
2064 internal_label_prefix_len
= p
- internal_label_prefix
;
2068 /* When scheduling description is not available, disable scheduler pass
2069 so it won't slow down the compilation and make x87 code slower. */
2070 if (!TARGET_SCHEDULE
)
2071 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2073 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2074 set_param_value ("simultaneous-prefetches",
2075 ix86_cost
->simultaneous_prefetches
);
2076 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2077 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2080 /* switch to the appropriate section for output of DECL.
2081 DECL is either a `VAR_DECL' node or a constant of some sort.
2082 RELOC indicates whether forming the initial value of DECL requires
2083 link-time relocations. */
2086 x86_64_elf_select_section (tree decl
, int reloc
,
2087 unsigned HOST_WIDE_INT align
)
2089 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2090 && ix86_in_large_data_p (decl
))
2092 const char *sname
= NULL
;
2093 unsigned int flags
= SECTION_WRITE
;
2094 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2099 case SECCAT_DATA_REL
:
2100 sname
= ".ldata.rel";
2102 case SECCAT_DATA_REL_LOCAL
:
2103 sname
= ".ldata.rel.local";
2105 case SECCAT_DATA_REL_RO
:
2106 sname
= ".ldata.rel.ro";
2108 case SECCAT_DATA_REL_RO_LOCAL
:
2109 sname
= ".ldata.rel.ro.local";
2113 flags
|= SECTION_BSS
;
2116 case SECCAT_RODATA_MERGE_STR
:
2117 case SECCAT_RODATA_MERGE_STR_INIT
:
2118 case SECCAT_RODATA_MERGE_CONST
:
2122 case SECCAT_SRODATA
:
2129 /* We don't split these for medium model. Place them into
2130 default sections and hope for best. */
2135 /* We might get called with string constants, but get_named_section
2136 doesn't like them as they are not DECLs. Also, we need to set
2137 flags in that case. */
2139 return get_section (sname
, flags
, NULL
);
2140 return get_named_section (decl
, sname
, reloc
);
2143 return default_elf_select_section (decl
, reloc
, align
);
2146 /* Build up a unique section name, expressed as a
2147 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2148 RELOC indicates whether the initial value of EXP requires
2149 link-time relocations. */
2152 x86_64_elf_unique_section (tree decl
, int reloc
)
2154 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2155 && ix86_in_large_data_p (decl
))
2157 const char *prefix
= NULL
;
2158 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2159 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2161 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2164 case SECCAT_DATA_REL
:
2165 case SECCAT_DATA_REL_LOCAL
:
2166 case SECCAT_DATA_REL_RO
:
2167 case SECCAT_DATA_REL_RO_LOCAL
:
2168 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2171 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2174 case SECCAT_RODATA_MERGE_STR
:
2175 case SECCAT_RODATA_MERGE_STR_INIT
:
2176 case SECCAT_RODATA_MERGE_CONST
:
2177 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2179 case SECCAT_SRODATA
:
2186 /* We don't split these for medium model. Place them into
2187 default sections and hope for best. */
2195 plen
= strlen (prefix
);
2197 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2198 name
= targetm
.strip_name_encoding (name
);
2199 nlen
= strlen (name
);
2201 string
= alloca (nlen
+ plen
+ 1);
2202 memcpy (string
, prefix
, plen
);
2203 memcpy (string
+ plen
, name
, nlen
+ 1);
2205 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2209 default_unique_section (decl
, reloc
);
2212 #ifdef COMMON_ASM_OP
2213 /* This says how to output assembler code to declare an
2214 uninitialized external linkage data object.
2216 For medium model x86-64 we need to use .largecomm opcode for
2219 x86_elf_aligned_common (FILE *file
,
2220 const char *name
, unsigned HOST_WIDE_INT size
,
2223 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2224 && size
> (unsigned int)ix86_section_threshold
)
2225 fprintf (file
, ".largecomm\t");
2227 fprintf (file
, "%s", COMMON_ASM_OP
);
2228 assemble_name (file
, name
);
2229 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2230 size
, align
/ BITS_PER_UNIT
);
2233 /* Utility function for targets to use in implementing
2234 ASM_OUTPUT_ALIGNED_BSS. */
2237 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2238 const char *name
, unsigned HOST_WIDE_INT size
,
2241 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2242 && size
> (unsigned int)ix86_section_threshold
)
2243 switch_to_section (get_named_section (decl
, ".lbss", 0));
2245 switch_to_section (bss_section
);
2246 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2247 #ifdef ASM_DECLARE_OBJECT_NAME
2248 last_assemble_variable_decl
= decl
;
2249 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2251 /* Standard thing is just output label for the object. */
2252 ASM_OUTPUT_LABEL (file
, name
);
2253 #endif /* ASM_DECLARE_OBJECT_NAME */
2254 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2259 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2261 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2262 make the problem with not enough registers even worse. */
2263 #ifdef INSN_SCHEDULING
2265 flag_schedule_insns
= 0;
2269 /* The Darwin libraries never set errno, so we might as well
2270 avoid calling them when that's the only reason we would. */
2271 flag_errno_math
= 0;
2273 /* The default values of these switches depend on the TARGET_64BIT
2274 that is not known at this moment. Mark these values with 2 and
2275 let user the to override these. In case there is no command line option
2276 specifying them, we will set the defaults in override_options. */
2278 flag_omit_frame_pointer
= 2;
2279 flag_pcc_struct_return
= 2;
2280 flag_asynchronous_unwind_tables
= 2;
2281 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2282 SUBTARGET_OPTIMIZATION_OPTIONS
;
2286 /* Table of valid machine attributes. */
2287 const struct attribute_spec ix86_attribute_table
[] =
2289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2290 /* Stdcall attribute says callee is responsible for popping arguments
2291 if they are not variable. */
2292 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2293 /* Fastcall attribute says callee is responsible for popping arguments
2294 if they are not variable. */
2295 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2296 /* Cdecl attribute says the callee is a normal C declaration */
2297 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2298 /* Regparm attribute specifies how many integer arguments are to be
2299 passed in registers. */
2300 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2301 /* Sseregparm attribute says we are using x86_64 calling conventions
2302 for FP arguments. */
2303 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2304 /* force_align_arg_pointer says this function realigns the stack at entry. */
2305 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2306 false, true, true, ix86_handle_cconv_attribute
},
2307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2308 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2309 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2310 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2312 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2313 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2314 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2315 SUBTARGET_ATTRIBUTE_TABLE
,
2317 { NULL
, 0, 0, false, false, false, NULL
}
2320 /* Decide whether we can make a sibling call to a function. DECL is the
2321 declaration of the function being targeted by the call and EXP is the
2322 CALL_EXPR representing the call. */
2325 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2330 /* If we are generating position-independent code, we cannot sibcall
2331 optimize any indirect call, or a direct call to a global function,
2332 as the PLT requires %ebx be live. */
2333 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2340 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2341 if (POINTER_TYPE_P (func
))
2342 func
= TREE_TYPE (func
);
2345 /* Check that the return value locations are the same. Like
2346 if we are returning floats on the 80387 register stack, we cannot
2347 make a sibcall from a function that doesn't return a float to a
2348 function that does or, conversely, from a function that does return
2349 a float to a function that doesn't; the necessary stack adjustment
2350 would not be executed. This is also the place we notice
2351 differences in the return value ABI. Note that it is ok for one
2352 of the functions to have void return type as long as the return
2353 value of the other is passed in a register. */
2354 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2355 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2357 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2359 if (!rtx_equal_p (a
, b
))
2362 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2364 else if (!rtx_equal_p (a
, b
))
2367 /* If this call is indirect, we'll need to be able to use a call-clobbered
2368 register for the address of the target function. Make sure that all
2369 such registers are not used for passing parameters. */
2370 if (!decl
&& !TARGET_64BIT
)
2374 /* We're looking at the CALL_EXPR, we need the type of the function. */
2375 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2376 type
= TREE_TYPE (type
); /* pointer type */
2377 type
= TREE_TYPE (type
); /* function type */
2379 if (ix86_function_regparm (type
, NULL
) >= 3)
2381 /* ??? Need to count the actual number of registers to be used,
2382 not the possible number of registers. Fix later. */
2387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2388 /* Dllimport'd functions are also called indirectly. */
2389 if (decl
&& DECL_DLLIMPORT_P (decl
)
2390 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2394 /* If we forced aligned the stack, then sibcalling would unalign the
2395 stack, which may break the called function. */
2396 if (cfun
->machine
->force_align_arg_pointer
)
2399 /* Otherwise okay. That also includes certain types of indirect calls. */
2403 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2404 calling convention attributes;
2405 arguments as in struct attribute_spec.handler. */
2408 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2410 int flags ATTRIBUTE_UNUSED
,
2413 if (TREE_CODE (*node
) != FUNCTION_TYPE
2414 && TREE_CODE (*node
) != METHOD_TYPE
2415 && TREE_CODE (*node
) != FIELD_DECL
2416 && TREE_CODE (*node
) != TYPE_DECL
)
2418 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2419 IDENTIFIER_POINTER (name
));
2420 *no_add_attrs
= true;
2424 /* Can combine regparm with all attributes but fastcall. */
2425 if (is_attribute_p ("regparm", name
))
2429 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2431 error ("fastcall and regparm attributes are not compatible");
2434 cst
= TREE_VALUE (args
);
2435 if (TREE_CODE (cst
) != INTEGER_CST
)
2437 warning (OPT_Wattributes
,
2438 "%qs attribute requires an integer constant argument",
2439 IDENTIFIER_POINTER (name
));
2440 *no_add_attrs
= true;
2442 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2444 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2445 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2446 *no_add_attrs
= true;
2450 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2451 TYPE_ATTRIBUTES (*node
))
2452 && compare_tree_int (cst
, REGPARM_MAX
-1))
2454 error ("%s functions limited to %d register parameters",
2455 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2463 warning (OPT_Wattributes
, "%qs attribute ignored",
2464 IDENTIFIER_POINTER (name
));
2465 *no_add_attrs
= true;
2469 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2470 if (is_attribute_p ("fastcall", name
))
2472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2474 error ("fastcall and cdecl attributes are not compatible");
2476 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2478 error ("fastcall and stdcall attributes are not compatible");
2480 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2482 error ("fastcall and regparm attributes are not compatible");
2486 /* Can combine stdcall with fastcall (redundant), regparm and
2488 else if (is_attribute_p ("stdcall", name
))
2490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2492 error ("stdcall and cdecl attributes are not compatible");
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2496 error ("stdcall and fastcall attributes are not compatible");
2500 /* Can combine cdecl with regparm and sseregparm. */
2501 else if (is_attribute_p ("cdecl", name
))
2503 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2505 error ("stdcall and cdecl attributes are not compatible");
2507 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2509 error ("fastcall and cdecl attributes are not compatible");
2513 /* Can combine sseregparm with all attributes. */
2518 /* Return 0 if the attributes for two types are incompatible, 1 if they
2519 are compatible, and 2 if they are nearly compatible (which causes a
2520 warning to be generated). */
2523 ix86_comp_type_attributes (tree type1
, tree type2
)
2525 /* Check for mismatch of non-default calling convention. */
2526 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2528 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2531 /* Check for mismatched fastcall/regparm types. */
2532 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2533 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2534 || (ix86_function_regparm (type1
, NULL
)
2535 != ix86_function_regparm (type2
, NULL
)))
2538 /* Check for mismatched sseregparm types. */
2539 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2540 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2543 /* Check for mismatched return types (cdecl vs stdcall). */
2544 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2545 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2551 /* Return the regparm value for a function with the indicated TYPE and DECL.
2552 DECL may be NULL when calling function indirectly
2553 or considering a libcall. */
2556 ix86_function_regparm (tree type
, tree decl
)
2559 int regparm
= ix86_regparm
;
2560 bool user_convention
= false;
2564 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2567 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2568 user_convention
= true;
2571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2574 user_convention
= true;
2577 /* Use register calling convention for local functions when possible. */
2578 if (!TARGET_64BIT
&& !user_convention
&& decl
2579 && flag_unit_at_a_time
&& !profile_flag
)
2581 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2584 int local_regparm
, globals
= 0, regno
;
2586 /* Make sure no regparm register is taken by a global register
2588 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2589 if (global_regs
[local_regparm
])
2591 /* We can't use regparm(3) for nested functions as these use
2592 static chain pointer in third argument. */
2593 if (local_regparm
== 3
2594 && decl_function_context (decl
)
2595 && !DECL_NO_STATIC_CHAIN (decl
))
2597 /* If the function realigns its stackpointer, the
2598 prologue will clobber %ecx. If we've already
2599 generated code for the callee, the callee
2600 DECL_STRUCT_FUNCTION is gone, so we fall back to
2601 scanning the attributes for the self-realigning
2603 if ((DECL_STRUCT_FUNCTION (decl
)
2604 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2605 || (!DECL_STRUCT_FUNCTION (decl
)
2606 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2607 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2609 /* Each global register variable increases register preassure,
2610 so the more global reg vars there are, the smaller regparm
2611 optimization use, unless requested by the user explicitly. */
2612 for (regno
= 0; regno
< 6; regno
++)
2613 if (global_regs
[regno
])
2616 = globals
< local_regparm
? local_regparm
- globals
: 0;
2618 if (local_regparm
> regparm
)
2619 regparm
= local_regparm
;
2626 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2627 in SSE registers for a function with the indicated TYPE and DECL.
2628 DECL may be NULL when calling function indirectly
2629 or considering a libcall. Otherwise return 0. */
2632 ix86_function_sseregparm (tree type
, tree decl
)
2634 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2635 by the sseregparm attribute. */
2636 if (TARGET_SSEREGPARM
2638 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2643 error ("Calling %qD with attribute sseregparm without "
2644 "SSE/SSE2 enabled", decl
);
2646 error ("Calling %qT with attribute sseregparm without "
2647 "SSE/SSE2 enabled", type
);
2654 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2655 in SSE registers even for 32-bit mode and not just 3, but up to
2656 8 SSE arguments in registers. */
2657 if (!TARGET_64BIT
&& decl
2658 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2660 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2662 return TARGET_SSE2
? 2 : 1;
2668 /* Return true if EAX is live at the start of the function. Used by
2669 ix86_expand_prologue to determine if we need special help before
2670 calling allocate_stack_worker. */
2673 ix86_eax_live_at_start_p (void)
2675 /* Cheat. Don't bother working forward from ix86_function_regparm
2676 to the function type to whether an actual argument is located in
2677 eax. Instead just look at cfg info, which is still close enough
2678 to correct at this point. This gives false positives for broken
2679 functions that might use uninitialized data that happens to be
2680 allocated in eax, but who cares? */
2681 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2684 /* Value is the number of bytes of arguments automatically
2685 popped when returning from a subroutine call.
2686 FUNDECL is the declaration node of the function (as a tree),
2687 FUNTYPE is the data type of the function (as a tree),
2688 or for a library call it is an identifier node for the subroutine name.
2689 SIZE is the number of bytes of arguments passed on the stack.
2691 On the 80386, the RTD insn may be used to pop them if the number
2692 of args is fixed, but if the number is variable then the caller
2693 must pop them all. RTD can't be used for library calls now
2694 because the library is compiled with the Unix compiler.
2695 Use of RTD is a selectable option, since it is incompatible with
2696 standard Unix calling sequences. If the option is not selected,
2697 the caller must always pop the args.
2699 The attribute stdcall is equivalent to RTD on a per module basis. */
2702 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2704 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2706 /* Cdecl functions override -mrtd, and never pop the stack. */
2707 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2709 /* Stdcall and fastcall functions will pop the stack if not
2711 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2712 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2716 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2717 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2718 == void_type_node
)))
2722 /* Lose any fake structure return argument if it is passed on the stack. */
2723 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2725 && !KEEP_AGGREGATE_RETURN_POINTER
)
2727 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2730 return GET_MODE_SIZE (Pmode
);
2736 /* Argument support functions. */
2738 /* Return true when register may be used to pass function parameters. */
2740 ix86_function_arg_regno_p (int regno
)
2744 return (regno
< REGPARM_MAX
2745 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2746 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2747 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2748 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2750 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2751 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2753 /* RAX is used as hidden argument to va_arg functions. */
2756 for (i
= 0; i
< REGPARM_MAX
; i
++)
2757 if (regno
== x86_64_int_parameter_registers
[i
])
2762 /* Return if we do not know how to pass TYPE solely in registers. */
2765 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2767 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2770 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2771 The layout_type routine is crafty and tries to trick us into passing
2772 currently unsupported vector types on the stack by using TImode. */
2773 return (!TARGET_64BIT
&& mode
== TImode
2774 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2777 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2778 for a call to a function whose data type is FNTYPE.
2779 For a library call, FNTYPE is 0. */
2782 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2783 tree fntype
, /* tree ptr for function decl */
2784 rtx libname
, /* SYMBOL_REF of library name or 0 */
2787 static CUMULATIVE_ARGS zero_cum
;
2788 tree param
, next_param
;
2790 if (TARGET_DEBUG_ARG
)
2792 fprintf (stderr
, "\ninit_cumulative_args (");
2794 fprintf (stderr
, "fntype code = %s, ret code = %s",
2795 tree_code_name
[(int) TREE_CODE (fntype
)],
2796 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2798 fprintf (stderr
, "no fntype");
2801 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2806 /* Set up the number of registers to use for passing arguments. */
2807 cum
->nregs
= ix86_regparm
;
2809 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2811 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2812 cum
->warn_sse
= true;
2813 cum
->warn_mmx
= true;
2814 cum
->maybe_vaarg
= false;
2816 /* Use ecx and edx registers if function has fastcall attribute,
2817 else look for regparm information. */
2818 if (fntype
&& !TARGET_64BIT
)
2820 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2826 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2829 /* Set up the number of SSE registers used for passing SFmode
2830 and DFmode arguments. Warn for mismatching ABI. */
2831 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2833 /* Determine if this function has variable arguments. This is
2834 indicated by the last argument being 'void_type_mode' if there
2835 are no variable arguments. If there are variable arguments, then
2836 we won't pass anything in registers in 32-bit mode. */
2838 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2840 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2841 param
!= 0; param
= next_param
)
2843 next_param
= TREE_CHAIN (param
);
2844 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2854 cum
->float_in_sse
= 0;
2856 cum
->maybe_vaarg
= true;
2860 if ((!fntype
&& !libname
)
2861 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2862 cum
->maybe_vaarg
= true;
2864 if (TARGET_DEBUG_ARG
)
2865 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2870 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2871 But in the case of vector types, it is some vector mode.
2873 When we have only some of our vector isa extensions enabled, then there
2874 are some modes for which vector_mode_supported_p is false. For these
2875 modes, the generic vector support in gcc will choose some non-vector mode
2876 in order to implement the type. By computing the natural mode, we'll
2877 select the proper ABI location for the operand and not depend on whatever
2878 the middle-end decides to do with these vector types. */
2880 static enum machine_mode
2881 type_natural_mode (tree type
)
2883 enum machine_mode mode
= TYPE_MODE (type
);
2885 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2887 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2888 if ((size
== 8 || size
== 16)
2889 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2890 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2892 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2894 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2895 mode
= MIN_MODE_VECTOR_FLOAT
;
2897 mode
= MIN_MODE_VECTOR_INT
;
2899 /* Get the mode which has this inner mode and number of units. */
2900 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2901 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2902 && GET_MODE_INNER (mode
) == innermode
)
2912 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2913 this may not agree with the mode that the type system has chosen for the
2914 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2915 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2918 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2923 if (orig_mode
!= BLKmode
)
2924 tmp
= gen_rtx_REG (orig_mode
, regno
);
2927 tmp
= gen_rtx_REG (mode
, regno
);
2928 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2929 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2935 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2936 of this code is to classify each 8bytes of incoming argument by the register
2937 class and assign registers accordingly. */
2939 /* Return the union class of CLASS1 and CLASS2.
2940 See the x86-64 PS ABI for details. */
2942 static enum x86_64_reg_class
2943 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2945 /* Rule #1: If both classes are equal, this is the resulting class. */
2946 if (class1
== class2
)
2949 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2951 if (class1
== X86_64_NO_CLASS
)
2953 if (class2
== X86_64_NO_CLASS
)
2956 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2957 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2958 return X86_64_MEMORY_CLASS
;
2960 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2961 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2962 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2963 return X86_64_INTEGERSI_CLASS
;
2964 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2965 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2966 return X86_64_INTEGER_CLASS
;
2968 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2970 if (class1
== X86_64_X87_CLASS
2971 || class1
== X86_64_X87UP_CLASS
2972 || class1
== X86_64_COMPLEX_X87_CLASS
2973 || class2
== X86_64_X87_CLASS
2974 || class2
== X86_64_X87UP_CLASS
2975 || class2
== X86_64_COMPLEX_X87_CLASS
)
2976 return X86_64_MEMORY_CLASS
;
2978 /* Rule #6: Otherwise class SSE is used. */
2979 return X86_64_SSE_CLASS
;
2982 /* Classify the argument of type TYPE and mode MODE.
2983 CLASSES will be filled by the register class used to pass each word
2984 of the operand. The number of words is returned. In case the parameter
2985 should be passed in memory, 0 is returned. As a special case for zero
2986 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2988 BIT_OFFSET is used internally for handling records and specifies offset
2989 of the offset in bits modulo 256 to avoid overflow cases.
2991 See the x86-64 PS ABI for details.
2995 classify_argument (enum machine_mode mode
, tree type
,
2996 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2998 HOST_WIDE_INT bytes
=
2999 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3000 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3002 /* Variable sized entities are always passed/returned in memory. */
3006 if (mode
!= VOIDmode
3007 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3010 if (type
&& AGGREGATE_TYPE_P (type
))
3014 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3016 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3020 for (i
= 0; i
< words
; i
++)
3021 classes
[i
] = X86_64_NO_CLASS
;
3023 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3024 signalize memory class, so handle it as special case. */
3027 classes
[0] = X86_64_NO_CLASS
;
3031 /* Classify each field of record and merge classes. */
3032 switch (TREE_CODE (type
))
3035 /* And now merge the fields of structure. */
3036 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3038 if (TREE_CODE (field
) == FIELD_DECL
)
3042 if (TREE_TYPE (field
) == error_mark_node
)
3045 /* Bitfields are always classified as integer. Handle them
3046 early, since later code would consider them to be
3047 misaligned integers. */
3048 if (DECL_BIT_FIELD (field
))
3050 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3051 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3052 + tree_low_cst (DECL_SIZE (field
), 0)
3055 merge_classes (X86_64_INTEGER_CLASS
,
3060 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3061 TREE_TYPE (field
), subclasses
,
3062 (int_bit_position (field
)
3063 + bit_offset
) % 256);
3066 for (i
= 0; i
< num
; i
++)
3069 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3071 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3079 /* Arrays are handled as small records. */
3082 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3083 TREE_TYPE (type
), subclasses
, bit_offset
);
3087 /* The partial classes are now full classes. */
3088 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3089 subclasses
[0] = X86_64_SSE_CLASS
;
3090 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3091 subclasses
[0] = X86_64_INTEGER_CLASS
;
3093 for (i
= 0; i
< words
; i
++)
3094 classes
[i
] = subclasses
[i
% num
];
3099 case QUAL_UNION_TYPE
:
3100 /* Unions are similar to RECORD_TYPE but offset is always 0.
3102 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3104 if (TREE_CODE (field
) == FIELD_DECL
)
3108 if (TREE_TYPE (field
) == error_mark_node
)
3111 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3112 TREE_TYPE (field
), subclasses
,
3116 for (i
= 0; i
< num
; i
++)
3117 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3126 /* Final merger cleanup. */
3127 for (i
= 0; i
< words
; i
++)
3129 /* If one class is MEMORY, everything should be passed in
3131 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3134 /* The X86_64_SSEUP_CLASS should be always preceded by
3135 X86_64_SSE_CLASS. */
3136 if (classes
[i
] == X86_64_SSEUP_CLASS
3137 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3138 classes
[i
] = X86_64_SSE_CLASS
;
3140 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3141 if (classes
[i
] == X86_64_X87UP_CLASS
3142 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3143 classes
[i
] = X86_64_SSE_CLASS
;
3148 /* Compute alignment needed. We align all types to natural boundaries with
3149 exception of XFmode that is aligned to 64bits. */
3150 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3152 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3155 mode_alignment
= 128;
3156 else if (mode
== XCmode
)
3157 mode_alignment
= 256;
3158 if (COMPLEX_MODE_P (mode
))
3159 mode_alignment
/= 2;
3160 /* Misaligned fields are always returned in memory. */
3161 if (bit_offset
% mode_alignment
)
3165 /* for V1xx modes, just use the base mode */
3166 if (VECTOR_MODE_P (mode
)
3167 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3168 mode
= GET_MODE_INNER (mode
);
3170 /* Classification of atomic types. */
3175 classes
[0] = X86_64_SSE_CLASS
;
3178 classes
[0] = X86_64_SSE_CLASS
;
3179 classes
[1] = X86_64_SSEUP_CLASS
;
3188 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3189 classes
[0] = X86_64_INTEGERSI_CLASS
;
3191 classes
[0] = X86_64_INTEGER_CLASS
;
3195 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3200 if (!(bit_offset
% 64))
3201 classes
[0] = X86_64_SSESF_CLASS
;
3203 classes
[0] = X86_64_SSE_CLASS
;
3206 classes
[0] = X86_64_SSEDF_CLASS
;
3209 classes
[0] = X86_64_X87_CLASS
;
3210 classes
[1] = X86_64_X87UP_CLASS
;
3213 classes
[0] = X86_64_SSE_CLASS
;
3214 classes
[1] = X86_64_SSEUP_CLASS
;
3217 classes
[0] = X86_64_SSE_CLASS
;
3220 classes
[0] = X86_64_SSEDF_CLASS
;
3221 classes
[1] = X86_64_SSEDF_CLASS
;
3224 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3227 /* This modes is larger than 16 bytes. */
3235 classes
[0] = X86_64_SSE_CLASS
;
3236 classes
[1] = X86_64_SSEUP_CLASS
;
3242 classes
[0] = X86_64_SSE_CLASS
;
3248 gcc_assert (VECTOR_MODE_P (mode
));
3253 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3255 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3256 classes
[0] = X86_64_INTEGERSI_CLASS
;
3258 classes
[0] = X86_64_INTEGER_CLASS
;
3259 classes
[1] = X86_64_INTEGER_CLASS
;
3260 return 1 + (bytes
> 8);
3264 /* Examine the argument and return set number of register required in each
3265 class. Return 0 iff parameter should be passed in memory. */
3267 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3268 int *int_nregs
, int *sse_nregs
)
3270 enum x86_64_reg_class
class[MAX_CLASSES
];
3271 int n
= classify_argument (mode
, type
, class, 0);
3277 for (n
--; n
>= 0; n
--)
3280 case X86_64_INTEGER_CLASS
:
3281 case X86_64_INTEGERSI_CLASS
:
3284 case X86_64_SSE_CLASS
:
3285 case X86_64_SSESF_CLASS
:
3286 case X86_64_SSEDF_CLASS
:
3289 case X86_64_NO_CLASS
:
3290 case X86_64_SSEUP_CLASS
:
3292 case X86_64_X87_CLASS
:
3293 case X86_64_X87UP_CLASS
:
3297 case X86_64_COMPLEX_X87_CLASS
:
3298 return in_return
? 2 : 0;
3299 case X86_64_MEMORY_CLASS
:
3305 /* Construct container for the argument used by GCC interface. See
3306 FUNCTION_ARG for the detailed description. */
3309 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3310 tree type
, int in_return
, int nintregs
, int nsseregs
,
3311 const int *intreg
, int sse_regno
)
3313 /* The following variables hold the static issued_error state. */
3314 static bool issued_sse_arg_error
;
3315 static bool issued_sse_ret_error
;
3316 static bool issued_x87_ret_error
;
3318 enum machine_mode tmpmode
;
3320 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3321 enum x86_64_reg_class
class[MAX_CLASSES
];
3325 int needed_sseregs
, needed_intregs
;
3326 rtx exp
[MAX_CLASSES
];
3329 n
= classify_argument (mode
, type
, class, 0);
3330 if (TARGET_DEBUG_ARG
)
3333 fprintf (stderr
, "Memory class\n");
3336 fprintf (stderr
, "Classes:");
3337 for (i
= 0; i
< n
; i
++)
3339 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3341 fprintf (stderr
, "\n");
3346 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3349 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3352 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3353 some less clueful developer tries to use floating-point anyway. */
3354 if (needed_sseregs
&& !TARGET_SSE
)
3358 if (!issued_sse_ret_error
)
3360 error ("SSE register return with SSE disabled");
3361 issued_sse_ret_error
= true;
3364 else if (!issued_sse_arg_error
)
3366 error ("SSE register argument with SSE disabled");
3367 issued_sse_arg_error
= true;
3372 /* Likewise, error if the ABI requires us to return values in the
3373 x87 registers and the user specified -mno-80387. */
3374 if (!TARGET_80387
&& in_return
)
3375 for (i
= 0; i
< n
; i
++)
3376 if (class[i
] == X86_64_X87_CLASS
3377 || class[i
] == X86_64_X87UP_CLASS
3378 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3380 if (!issued_x87_ret_error
)
3382 error ("x87 register return with x87 disabled");
3383 issued_x87_ret_error
= true;
3388 /* First construct simple cases. Avoid SCmode, since we want to use
3389 single register to pass this type. */
3390 if (n
== 1 && mode
!= SCmode
)
3393 case X86_64_INTEGER_CLASS
:
3394 case X86_64_INTEGERSI_CLASS
:
3395 return gen_rtx_REG (mode
, intreg
[0]);
3396 case X86_64_SSE_CLASS
:
3397 case X86_64_SSESF_CLASS
:
3398 case X86_64_SSEDF_CLASS
:
3399 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3400 case X86_64_X87_CLASS
:
3401 case X86_64_COMPLEX_X87_CLASS
:
3402 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3403 case X86_64_NO_CLASS
:
3404 /* Zero sized array, struct or class. */
3409 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3411 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3413 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3414 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3415 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3416 && class[1] == X86_64_INTEGER_CLASS
3417 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3418 && intreg
[0] + 1 == intreg
[1])
3419 return gen_rtx_REG (mode
, intreg
[0]);
3421 /* Otherwise figure out the entries of the PARALLEL. */
3422 for (i
= 0; i
< n
; i
++)
3426 case X86_64_NO_CLASS
:
3428 case X86_64_INTEGER_CLASS
:
3429 case X86_64_INTEGERSI_CLASS
:
3430 /* Merge TImodes on aligned occasions here too. */
3431 if (i
* 8 + 8 > bytes
)
3432 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3433 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3437 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3438 if (tmpmode
== BLKmode
)
3440 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3441 gen_rtx_REG (tmpmode
, *intreg
),
3445 case X86_64_SSESF_CLASS
:
3446 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3447 gen_rtx_REG (SFmode
,
3448 SSE_REGNO (sse_regno
)),
3452 case X86_64_SSEDF_CLASS
:
3453 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3454 gen_rtx_REG (DFmode
,
3455 SSE_REGNO (sse_regno
)),
3459 case X86_64_SSE_CLASS
:
3460 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3464 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3465 gen_rtx_REG (tmpmode
,
3466 SSE_REGNO (sse_regno
)),
3468 if (tmpmode
== TImode
)
3477 /* Empty aligned struct, union or class. */
3481 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3482 for (i
= 0; i
< nexps
; i
++)
3483 XVECEXP (ret
, 0, i
) = exp
[i
];
3487 /* Update the data in CUM to advance over an argument
3488 of mode MODE and data type TYPE.
3489 (TYPE is null for libcalls where that information may not be available.) */
3492 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3493 tree type
, int named
)
3496 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3497 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3500 mode
= type_natural_mode (type
);
3502 if (TARGET_DEBUG_ARG
)
3503 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3504 "mode=%s, named=%d)\n\n",
3505 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3506 GET_MODE_NAME (mode
), named
);
3510 int int_nregs
, sse_nregs
;
3511 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3512 cum
->words
+= words
;
3513 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3515 cum
->nregs
-= int_nregs
;
3516 cum
->sse_nregs
-= sse_nregs
;
3517 cum
->regno
+= int_nregs
;
3518 cum
->sse_regno
+= sse_nregs
;
3521 cum
->words
+= words
;
3539 cum
->words
+= words
;
3540 cum
->nregs
-= words
;
3541 cum
->regno
+= words
;
3543 if (cum
->nregs
<= 0)
3551 if (cum
->float_in_sse
< 2)
3554 if (cum
->float_in_sse
< 1)
3565 if (!type
|| !AGGREGATE_TYPE_P (type
))
3567 cum
->sse_words
+= words
;
3568 cum
->sse_nregs
-= 1;
3569 cum
->sse_regno
+= 1;
3570 if (cum
->sse_nregs
<= 0)
3582 if (!type
|| !AGGREGATE_TYPE_P (type
))
3584 cum
->mmx_words
+= words
;
3585 cum
->mmx_nregs
-= 1;
3586 cum
->mmx_regno
+= 1;
3587 if (cum
->mmx_nregs
<= 0)
3598 /* Define where to put the arguments to a function.
3599 Value is zero to push the argument on the stack,
3600 or a hard register in which to store the argument.
3602 MODE is the argument's machine mode.
3603 TYPE is the data type of the argument (as a tree).
3604 This is null for libcalls where that information may
3606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3607 the preceding args and about the function being called.
3608 NAMED is nonzero if this argument is a named parameter
3609 (otherwise it is an extra parameter matching an ellipsis). */
3612 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3613 tree type
, int named
)
3615 enum machine_mode mode
= orig_mode
;
3618 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3619 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3620 static bool warnedsse
, warnedmmx
;
3622 /* To simplify the code below, represent vector types with a vector mode
3623 even if MMX/SSE are not active. */
3624 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3625 mode
= type_natural_mode (type
);
3627 /* Handle a hidden AL argument containing number of registers for varargs
3628 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3630 if (mode
== VOIDmode
)
3633 return GEN_INT (cum
->maybe_vaarg
3634 ? (cum
->sse_nregs
< 0
3642 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3644 &x86_64_int_parameter_registers
[cum
->regno
],
3649 /* For now, pass fp/complex values on the stack. */
3661 if (words
<= cum
->nregs
)
3663 int regno
= cum
->regno
;
3665 /* Fastcall allocates the first two DWORD (SImode) or
3666 smaller arguments to ECX and EDX. */
3669 if (mode
== BLKmode
|| mode
== DImode
)
3672 /* ECX not EAX is the first allocated register. */
3676 ret
= gen_rtx_REG (mode
, regno
);
3680 if (cum
->float_in_sse
< 2)
3683 if (cum
->float_in_sse
< 1)
3693 if (!type
|| !AGGREGATE_TYPE_P (type
))
3695 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3698 warning (0, "SSE vector argument without SSE enabled "
3702 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3703 cum
->sse_regno
+ FIRST_SSE_REG
);
3710 if (!type
|| !AGGREGATE_TYPE_P (type
))
3712 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3715 warning (0, "MMX vector argument without MMX enabled "
3719 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3720 cum
->mmx_regno
+ FIRST_MMX_REG
);
3725 if (TARGET_DEBUG_ARG
)
3728 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3729 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3732 print_simple_rtl (stderr
, ret
);
3734 fprintf (stderr
, ", stack");
3736 fprintf (stderr
, " )\n");
3742 /* A C expression that indicates when an argument must be passed by
3743 reference. If nonzero for an argument, a copy of that argument is
3744 made in memory and a pointer to the argument is passed instead of
3745 the argument itself. The pointer is passed in whatever way is
3746 appropriate for passing a pointer to that type. */
3749 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3750 enum machine_mode mode ATTRIBUTE_UNUSED
,
3751 tree type
, bool named ATTRIBUTE_UNUSED
)
3756 if (type
&& int_size_in_bytes (type
) == -1)
3758 if (TARGET_DEBUG_ARG
)
3759 fprintf (stderr
, "function_arg_pass_by_reference\n");
3766 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3767 ABI. Only called if TARGET_SSE. */
3769 contains_128bit_aligned_vector_p (tree type
)
3771 enum machine_mode mode
= TYPE_MODE (type
);
3772 if (SSE_REG_MODE_P (mode
)
3773 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3775 if (TYPE_ALIGN (type
) < 128)
3778 if (AGGREGATE_TYPE_P (type
))
3780 /* Walk the aggregates recursively. */
3781 switch (TREE_CODE (type
))
3785 case QUAL_UNION_TYPE
:
3789 /* Walk all the structure fields. */
3790 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3792 if (TREE_CODE (field
) == FIELD_DECL
3793 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3800 /* Just for use if some languages passes arrays by value. */
3801 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3812 /* Gives the alignment boundary, in bits, of an argument with the
3813 specified mode and type. */
3816 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3820 align
= TYPE_ALIGN (type
);
3822 align
= GET_MODE_ALIGNMENT (mode
);
3823 if (align
< PARM_BOUNDARY
)
3824 align
= PARM_BOUNDARY
;
3827 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3828 make an exception for SSE modes since these require 128bit
3831 The handling here differs from field_alignment. ICC aligns MMX
3832 arguments to 4 byte boundaries, while structure fields are aligned
3833 to 8 byte boundaries. */
3835 align
= PARM_BOUNDARY
;
3838 if (!SSE_REG_MODE_P (mode
))
3839 align
= PARM_BOUNDARY
;
3843 if (!contains_128bit_aligned_vector_p (type
))
3844 align
= PARM_BOUNDARY
;
3852 /* Return true if N is a possible register number of function value. */
3854 ix86_function_value_regno_p (int regno
)
3857 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3858 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
3862 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
3868 /* Define how to find the value returned by a function.
3869 VALTYPE is the data type of the value (as a tree).
3870 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3871 otherwise, FUNC is 0. */
3873 ix86_function_value (tree valtype
, tree fntype_or_decl
,
3874 bool outgoing ATTRIBUTE_UNUSED
)
3876 enum machine_mode natmode
= type_natural_mode (valtype
);
3880 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3881 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3882 x86_64_int_return_registers
, 0);
3883 /* For zero sized structures, construct_container return NULL, but we
3884 need to keep rest of compiler happy by returning meaningful value. */
3886 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3891 tree fn
= NULL_TREE
, fntype
;
3893 && DECL_P (fntype_or_decl
))
3894 fn
= fntype_or_decl
;
3895 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3896 return gen_rtx_REG (TYPE_MODE (valtype
),
3897 ix86_value_regno (natmode
, fn
, fntype
));
3901 /* Return true iff type is returned in memory. */
3903 ix86_return_in_memory (tree type
)
3905 int needed_intregs
, needed_sseregs
, size
;
3906 enum machine_mode mode
= type_natural_mode (type
);
3909 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3911 if (mode
== BLKmode
)
3914 size
= int_size_in_bytes (type
);
3916 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3919 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3921 /* User-created vectors small enough to fit in EAX. */
3925 /* MMX/3dNow values are returned in MM0,
3926 except when it doesn't exits. */
3928 return (TARGET_MMX
? 0 : 1);
3930 /* SSE values are returned in XMM0, except when it doesn't exist. */
3932 return (TARGET_SSE
? 0 : 1);
3946 /* When returning SSE vector types, we have a choice of either
3947 (1) being abi incompatible with a -march switch, or
3948 (2) generating an error.
3949 Given no good solution, I think the safest thing is one warning.
3950 The user won't be able to use -Werror, but....
3952 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3953 called in response to actually generating a caller or callee that
3954 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3955 via aggregate_value_p for general type probing from tree-ssa. */
3958 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3960 static bool warnedsse
, warnedmmx
;
3964 /* Look at the return type of the function, not the function type. */
3965 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3967 if (!TARGET_SSE
&& !warnedsse
)
3970 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3973 warning (0, "SSE vector return without SSE enabled "
3978 if (!TARGET_MMX
&& !warnedmmx
)
3980 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3983 warning (0, "MMX vector return without MMX enabled "
3992 /* Define how to find the value returned by a library function
3993 assuming the value has mode MODE. */
3995 ix86_libcall_value (enum machine_mode mode
)
4009 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4012 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4016 return gen_rtx_REG (mode
, 0);
4020 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4023 /* Given a mode, return the register to use for a return value. */
4026 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4028 gcc_assert (!TARGET_64BIT
);
4030 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4031 we normally prevent this case when mmx is not available. However
4032 some ABIs may require the result to be returned like DImode. */
4033 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4034 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4036 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4037 we prevent this case when sse is not available. However some ABIs
4038 may require the result to be returned like integer TImode. */
4039 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4040 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4042 /* Decimal floating point values can go in %eax, unlike other float modes. */
4043 if (DECIMAL_FLOAT_MODE_P (mode
))
4046 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4047 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4050 /* Floating point return values in %st(0), except for local functions when
4051 SSE math is enabled or for functions with sseregparm attribute. */
4052 if ((func
|| fntype
)
4053 && (mode
== SFmode
|| mode
== DFmode
))
4055 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4056 if ((sse_level
>= 1 && mode
== SFmode
)
4057 || (sse_level
== 2 && mode
== DFmode
))
4058 return FIRST_SSE_REG
;
4061 return FIRST_FLOAT_REG
;
4064 /* Create the va_list data type. */
4067 ix86_build_builtin_va_list (void)
4069 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4071 /* For i386 we use plain pointer to argument area. */
4073 return build_pointer_type (char_type_node
);
4075 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4076 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4078 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4079 unsigned_type_node
);
4080 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4081 unsigned_type_node
);
4082 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4084 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4087 va_list_gpr_counter_field
= f_gpr
;
4088 va_list_fpr_counter_field
= f_fpr
;
4090 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4091 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4092 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4093 DECL_FIELD_CONTEXT (f_sav
) = record
;
4095 TREE_CHAIN (record
) = type_decl
;
4096 TYPE_NAME (record
) = type_decl
;
4097 TYPE_FIELDS (record
) = f_gpr
;
4098 TREE_CHAIN (f_gpr
) = f_fpr
;
4099 TREE_CHAIN (f_fpr
) = f_ovf
;
4100 TREE_CHAIN (f_ovf
) = f_sav
;
4102 layout_type (record
);
4104 /* The correct type is an array type of one element. */
4105 return build_array_type (record
, build_index_type (size_zero_node
));
4108 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4111 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4112 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4115 CUMULATIVE_ARGS next_cum
;
4116 rtx save_area
= NULL_RTX
, mem
;
4129 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4132 /* Indicate to allocate space on the stack for varargs save area. */
4133 ix86_save_varrargs_registers
= 1;
4135 cfun
->stack_alignment_needed
= 128;
4137 fntype
= TREE_TYPE (current_function_decl
);
4138 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4139 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4140 != void_type_node
));
4142 /* For varargs, we do not want to skip the dummy va_dcl argument.
4143 For stdargs, we do want to skip the last named argument. */
4146 function_arg_advance (&next_cum
, mode
, type
, 1);
4149 save_area
= frame_pointer_rtx
;
4151 set
= get_varargs_alias_set ();
4153 for (i
= next_cum
.regno
;
4155 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4158 mem
= gen_rtx_MEM (Pmode
,
4159 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4160 MEM_NOTRAP_P (mem
) = 1;
4161 set_mem_alias_set (mem
, set
);
4162 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4163 x86_64_int_parameter_registers
[i
]));
4166 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4168 /* Now emit code to save SSE registers. The AX parameter contains number
4169 of SSE parameter registers used to call this function. We use
4170 sse_prologue_save insn template that produces computed jump across
4171 SSE saves. We need some preparation work to get this working. */
4173 label
= gen_label_rtx ();
4174 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4176 /* Compute address to jump to :
4177 label - 5*eax + nnamed_sse_arguments*5 */
4178 tmp_reg
= gen_reg_rtx (Pmode
);
4179 nsse_reg
= gen_reg_rtx (Pmode
);
4180 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4181 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4182 gen_rtx_MULT (Pmode
, nsse_reg
,
4184 if (next_cum
.sse_regno
)
4187 gen_rtx_CONST (DImode
,
4188 gen_rtx_PLUS (DImode
,
4190 GEN_INT (next_cum
.sse_regno
* 4))));
4192 emit_move_insn (nsse_reg
, label_ref
);
4193 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4195 /* Compute address of memory block we save into. We always use pointer
4196 pointing 127 bytes after first byte to store - this is needed to keep
4197 instruction size limited by 4 bytes. */
4198 tmp_reg
= gen_reg_rtx (Pmode
);
4199 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4200 plus_constant (save_area
,
4201 8 * REGPARM_MAX
+ 127)));
4202 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4203 MEM_NOTRAP_P (mem
) = 1;
4204 set_mem_alias_set (mem
, set
);
4205 set_mem_align (mem
, BITS_PER_WORD
);
4207 /* And finally do the dirty job! */
4208 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4209 GEN_INT (next_cum
.sse_regno
), label
));
4214 /* Implement va_start. */
4217 ix86_va_start (tree valist
, rtx nextarg
)
4219 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4220 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4221 tree gpr
, fpr
, ovf
, sav
, t
;
4224 /* Only 64bit target needs something special. */
4227 std_expand_builtin_va_start (valist
, nextarg
);
4231 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4232 f_fpr
= TREE_CHAIN (f_gpr
);
4233 f_ovf
= TREE_CHAIN (f_fpr
);
4234 f_sav
= TREE_CHAIN (f_ovf
);
4236 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4237 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4238 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4239 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4240 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4242 /* Count number of gp and fp argument registers used. */
4243 words
= current_function_args_info
.words
;
4244 n_gpr
= current_function_args_info
.regno
;
4245 n_fpr
= current_function_args_info
.sse_regno
;
4247 if (TARGET_DEBUG_ARG
)
4248 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4249 (int) words
, (int) n_gpr
, (int) n_fpr
);
4251 if (cfun
->va_list_gpr_size
)
4253 type
= TREE_TYPE (gpr
);
4254 t
= build2 (MODIFY_EXPR
, type
, gpr
,
4255 build_int_cst (type
, n_gpr
* 8));
4256 TREE_SIDE_EFFECTS (t
) = 1;
4257 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4260 if (cfun
->va_list_fpr_size
)
4262 type
= TREE_TYPE (fpr
);
4263 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4264 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4265 TREE_SIDE_EFFECTS (t
) = 1;
4266 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4269 /* Find the overflow area. */
4270 type
= TREE_TYPE (ovf
);
4271 t
= make_tree (type
, virtual_incoming_args_rtx
);
4273 t
= build2 (PLUS_EXPR
, type
, t
,
4274 build_int_cst (type
, words
* UNITS_PER_WORD
));
4275 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4276 TREE_SIDE_EFFECTS (t
) = 1;
4277 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4279 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4281 /* Find the register save area.
4282 Prologue of the function save it right above stack frame. */
4283 type
= TREE_TYPE (sav
);
4284 t
= make_tree (type
, frame_pointer_rtx
);
4285 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4286 TREE_SIDE_EFFECTS (t
) = 1;
4287 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4291 /* Implement va_arg. */
4294 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4296 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4297 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4298 tree gpr
, fpr
, ovf
, sav
, t
;
4300 tree lab_false
, lab_over
= NULL_TREE
;
4305 enum machine_mode nat_mode
;
4307 /* Only 64bit target needs something special. */
4309 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4311 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4312 f_fpr
= TREE_CHAIN (f_gpr
);
4313 f_ovf
= TREE_CHAIN (f_fpr
);
4314 f_sav
= TREE_CHAIN (f_ovf
);
4316 valist
= build_va_arg_indirect_ref (valist
);
4317 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4318 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4319 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4320 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4322 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4324 type
= build_pointer_type (type
);
4325 size
= int_size_in_bytes (type
);
4326 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4328 nat_mode
= type_natural_mode (type
);
4329 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4330 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4332 /* Pull the value out of the saved registers. */
4334 addr
= create_tmp_var (ptr_type_node
, "addr");
4335 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4339 int needed_intregs
, needed_sseregs
;
4341 tree int_addr
, sse_addr
;
4343 lab_false
= create_artificial_label ();
4344 lab_over
= create_artificial_label ();
4346 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4348 need_temp
= (!REG_P (container
)
4349 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4350 || TYPE_ALIGN (type
) > 128));
4352 /* In case we are passing structure, verify that it is consecutive block
4353 on the register save area. If not we need to do moves. */
4354 if (!need_temp
&& !REG_P (container
))
4356 /* Verify that all registers are strictly consecutive */
4357 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4361 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4363 rtx slot
= XVECEXP (container
, 0, i
);
4364 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4365 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4373 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4375 rtx slot
= XVECEXP (container
, 0, i
);
4376 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4377 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4389 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4390 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4391 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4392 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4395 /* First ensure that we fit completely in registers. */
4398 t
= build_int_cst (TREE_TYPE (gpr
),
4399 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4400 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4401 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4402 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4403 gimplify_and_add (t
, pre_p
);
4407 t
= build_int_cst (TREE_TYPE (fpr
),
4408 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4410 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4411 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4412 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4413 gimplify_and_add (t
, pre_p
);
4416 /* Compute index to start of area used for integer regs. */
4419 /* int_addr = gpr + sav; */
4420 t
= fold_convert (ptr_type_node
, gpr
);
4421 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4422 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4423 gimplify_and_add (t
, pre_p
);
4427 /* sse_addr = fpr + sav; */
4428 t
= fold_convert (ptr_type_node
, fpr
);
4429 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4430 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4431 gimplify_and_add (t
, pre_p
);
4436 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4439 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4440 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4441 gimplify_and_add (t
, pre_p
);
4443 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4445 rtx slot
= XVECEXP (container
, 0, i
);
4446 rtx reg
= XEXP (slot
, 0);
4447 enum machine_mode mode
= GET_MODE (reg
);
4448 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4449 tree addr_type
= build_pointer_type (piece_type
);
4452 tree dest_addr
, dest
;
4454 if (SSE_REGNO_P (REGNO (reg
)))
4456 src_addr
= sse_addr
;
4457 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4461 src_addr
= int_addr
;
4462 src_offset
= REGNO (reg
) * 8;
4464 src_addr
= fold_convert (addr_type
, src_addr
);
4465 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4466 size_int (src_offset
)));
4467 src
= build_va_arg_indirect_ref (src_addr
);
4469 dest_addr
= fold_convert (addr_type
, addr
);
4470 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4471 size_int (INTVAL (XEXP (slot
, 1)))));
4472 dest
= build_va_arg_indirect_ref (dest_addr
);
4474 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4475 gimplify_and_add (t
, pre_p
);
4481 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4482 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4483 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4484 gimplify_and_add (t
, pre_p
);
4488 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4489 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4490 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4491 gimplify_and_add (t
, pre_p
);
4494 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4495 gimplify_and_add (t
, pre_p
);
4497 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4498 append_to_statement_list (t
, pre_p
);
4501 /* ... otherwise out of the overflow area. */
4503 /* Care for on-stack alignment if needed. */
4504 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4505 || integer_zerop (TYPE_SIZE (type
)))
4509 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4510 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4511 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4512 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4513 build_int_cst (TREE_TYPE (t
), -align
));
4515 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4517 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4518 gimplify_and_add (t2
, pre_p
);
4520 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4521 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4522 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4523 gimplify_and_add (t
, pre_p
);
4527 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4528 append_to_statement_list (t
, pre_p
);
4531 ptrtype
= build_pointer_type (type
);
4532 addr
= fold_convert (ptrtype
, addr
);
4535 addr
= build_va_arg_indirect_ref (addr
);
4536 return build_va_arg_indirect_ref (addr
);
4539 /* Return nonzero if OPNUM's MEM should be matched
4540 in movabs* patterns. */
4543 ix86_check_movabs (rtx insn
, int opnum
)
4547 set
= PATTERN (insn
);
4548 if (GET_CODE (set
) == PARALLEL
)
4549 set
= XVECEXP (set
, 0, 0);
4550 gcc_assert (GET_CODE (set
) == SET
);
4551 mem
= XEXP (set
, opnum
);
4552 while (GET_CODE (mem
) == SUBREG
)
4553 mem
= SUBREG_REG (mem
);
4554 gcc_assert (GET_CODE (mem
) == MEM
);
4555 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4558 /* Initialize the table of extra 80387 mathematical constants. */
4561 init_ext_80387_constants (void)
4563 static const char * cst
[5] =
4565 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4566 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4567 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4568 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4569 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4573 for (i
= 0; i
< 5; i
++)
4575 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4576 /* Ensure each constant is rounded to XFmode precision. */
4577 real_convert (&ext_80387_constants_table
[i
],
4578 XFmode
, &ext_80387_constants_table
[i
]);
4581 ext_80387_constants_init
= 1;
4584 /* Return true if the constant is something that can be loaded with
4585 a special instruction. */
4588 standard_80387_constant_p (rtx x
)
4592 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4595 if (x
== CONST0_RTX (GET_MODE (x
)))
4597 if (x
== CONST1_RTX (GET_MODE (x
)))
4600 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4602 /* For XFmode constants, try to find a special 80387 instruction when
4603 optimizing for size or on those CPUs that benefit from them. */
4604 if (GET_MODE (x
) == XFmode
4605 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4609 if (! ext_80387_constants_init
)
4610 init_ext_80387_constants ();
4612 for (i
= 0; i
< 5; i
++)
4613 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4617 /* Load of the constant -0.0 or -1.0 will be split as
4618 fldz;fchs or fld1;fchs sequence. */
4619 if (real_isnegzero (&r
))
4621 if (real_identical (&r
, &dconstm1
))
4627 /* Return the opcode of the special instruction to be used to load
4631 standard_80387_constant_opcode (rtx x
)
4633 switch (standard_80387_constant_p (x
))
4657 /* Return the CONST_DOUBLE representing the 80387 constant that is
4658 loaded by the specified special instruction. The argument IDX
4659 matches the return value from standard_80387_constant_p. */
4662 standard_80387_constant_rtx (int idx
)
4666 if (! ext_80387_constants_init
)
4667 init_ext_80387_constants ();
4683 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4687 /* Return 1 if mode is a valid mode for sse. */
4689 standard_sse_mode_p (enum machine_mode mode
)
4706 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4709 standard_sse_constant_p (rtx x
)
4711 enum machine_mode mode
= GET_MODE (x
);
4713 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4715 if (vector_all_ones_operand (x
, mode
)
4716 && standard_sse_mode_p (mode
))
4717 return TARGET_SSE2
? 2 : -1;
4722 /* Return the opcode of the special instruction to be used to load
4726 standard_sse_constant_opcode (rtx insn
, rtx x
)
4728 switch (standard_sse_constant_p (x
))
4731 if (get_attr_mode (insn
) == MODE_V4SF
)
4732 return "xorps\t%0, %0";
4733 else if (get_attr_mode (insn
) == MODE_V2DF
)
4734 return "xorpd\t%0, %0";
4736 return "pxor\t%0, %0";
4738 return "pcmpeqd\t%0, %0";
4743 /* Returns 1 if OP contains a symbol reference */
4746 symbolic_reference_mentioned_p (rtx op
)
4751 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4754 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4755 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4761 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4762 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4766 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4773 /* Return 1 if it is appropriate to emit `ret' instructions in the
4774 body of a function. Do this only if the epilogue is simple, needing a
4775 couple of insns. Prior to reloading, we can't tell how many registers
4776 must be saved, so return 0 then. Return 0 if there is no frame
4777 marker to de-allocate. */
4780 ix86_can_use_return_insn_p (void)
4782 struct ix86_frame frame
;
4784 if (! reload_completed
|| frame_pointer_needed
)
4787 /* Don't allow more than 32 pop, since that's all we can do
4788 with one instruction. */
4789 if (current_function_pops_args
4790 && current_function_args_size
>= 32768)
4793 ix86_compute_frame_layout (&frame
);
4794 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4797 /* Value should be nonzero if functions must have frame pointers.
4798 Zero means the frame pointer need not be set up (and parms may
4799 be accessed via the stack pointer) in functions that seem suitable. */
4802 ix86_frame_pointer_required (void)
4804 /* If we accessed previous frames, then the generated code expects
4805 to be able to access the saved ebp value in our frame. */
4806 if (cfun
->machine
->accesses_prev_frame
)
4809 /* Several x86 os'es need a frame pointer for other reasons,
4810 usually pertaining to setjmp. */
4811 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4814 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4815 the frame pointer by default. Turn it back on now if we've not
4816 got a leaf function. */
4817 if (TARGET_OMIT_LEAF_FRAME_POINTER
4818 && (!current_function_is_leaf
4819 || ix86_current_function_calls_tls_descriptor
))
4822 if (current_function_profile
)
4828 /* Record that the current function accesses previous call frames. */
4831 ix86_setup_frame_addresses (void)
4833 cfun
->machine
->accesses_prev_frame
= 1;
4836 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4837 # define USE_HIDDEN_LINKONCE 1
4839 # define USE_HIDDEN_LINKONCE 0
4842 static int pic_labels_used
;
4844 /* Fills in the label name that should be used for a pc thunk for
4845 the given register. */
4848 get_pc_thunk_name (char name
[32], unsigned int regno
)
4850 gcc_assert (!TARGET_64BIT
);
4852 if (USE_HIDDEN_LINKONCE
)
4853 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4855 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4859 /* This function generates code for -fpic that loads %ebx with
4860 the return address of the caller and then returns. */
4863 ix86_file_end (void)
4868 for (regno
= 0; regno
< 8; ++regno
)
4872 if (! ((pic_labels_used
>> regno
) & 1))
4875 get_pc_thunk_name (name
, regno
);
4880 switch_to_section (darwin_sections
[text_coal_section
]);
4881 fputs ("\t.weak_definition\t", asm_out_file
);
4882 assemble_name (asm_out_file
, name
);
4883 fputs ("\n\t.private_extern\t", asm_out_file
);
4884 assemble_name (asm_out_file
, name
);
4885 fputs ("\n", asm_out_file
);
4886 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4890 if (USE_HIDDEN_LINKONCE
)
4894 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4896 TREE_PUBLIC (decl
) = 1;
4897 TREE_STATIC (decl
) = 1;
4898 DECL_ONE_ONLY (decl
) = 1;
4900 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4901 switch_to_section (get_named_section (decl
, NULL
, 0));
4903 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4904 fputs ("\t.hidden\t", asm_out_file
);
4905 assemble_name (asm_out_file
, name
);
4906 fputc ('\n', asm_out_file
);
4907 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4911 switch_to_section (text_section
);
4912 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4915 xops
[0] = gen_rtx_REG (SImode
, regno
);
4916 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4918 output_asm_insn ("ret", xops
);
4921 if (NEED_INDICATE_EXEC_STACK
)
4922 file_end_indicate_exec_stack ();
4925 /* Emit code for the SET_GOT patterns. */
4928 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
4933 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4935 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4937 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
4940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4942 output_asm_insn ("call\t%a2", xops
);
4945 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4946 is what will be referenced by the Mach-O PIC subsystem. */
4948 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4951 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4952 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4955 output_asm_insn ("pop{l}\t%0", xops
);
4960 get_pc_thunk_name (name
, REGNO (dest
));
4961 pic_labels_used
|= 1 << REGNO (dest
);
4963 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4964 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4965 output_asm_insn ("call\t%X2", xops
);
4966 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4967 is what will be referenced by the Mach-O PIC subsystem. */
4970 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4972 targetm
.asm_out
.internal_label (asm_out_file
, "L",
4973 CODE_LABEL_NUMBER (label
));
4980 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4981 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4983 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
4988 /* Generate an "push" pattern for input ARG. */
4993 return gen_rtx_SET (VOIDmode
,
4995 gen_rtx_PRE_DEC (Pmode
,
4996 stack_pointer_rtx
)),
5000 /* Return >= 0 if there is an unused call-clobbered register available
5001 for the entire function. */
5004 ix86_select_alt_pic_regnum (void)
5006 if (current_function_is_leaf
&& !current_function_profile
5007 && !ix86_current_function_calls_tls_descriptor
)
5010 for (i
= 2; i
>= 0; --i
)
5011 if (!regs_ever_live
[i
])
5015 return INVALID_REGNUM
;
5018 /* Return 1 if we need to save REGNO. */
5020 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5022 if (pic_offset_table_rtx
5023 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5024 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5025 || current_function_profile
5026 || current_function_calls_eh_return
5027 || current_function_uses_const_pool
))
5029 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5034 if (current_function_calls_eh_return
&& maybe_eh_return
)
5039 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5040 if (test
== INVALID_REGNUM
)
5047 if (cfun
->machine
->force_align_arg_pointer
5048 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5051 return (regs_ever_live
[regno
]
5052 && !call_used_regs
[regno
]
5053 && !fixed_regs
[regno
]
5054 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5057 /* Return number of registers to be saved on the stack. */
5060 ix86_nsaved_regs (void)
5065 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5066 if (ix86_save_reg (regno
, true))
5071 /* Return the offset between two registers, one to be eliminated, and the other
5072 its replacement, at the start of a routine. */
5075 ix86_initial_elimination_offset (int from
, int to
)
5077 struct ix86_frame frame
;
5078 ix86_compute_frame_layout (&frame
);
5080 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5081 return frame
.hard_frame_pointer_offset
;
5082 else if (from
== FRAME_POINTER_REGNUM
5083 && to
== HARD_FRAME_POINTER_REGNUM
)
5084 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5087 gcc_assert (to
== STACK_POINTER_REGNUM
);
5089 if (from
== ARG_POINTER_REGNUM
)
5090 return frame
.stack_pointer_offset
;
5092 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5093 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5097 /* Fill structure ix86_frame about frame of currently computed function. */
5100 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5102 HOST_WIDE_INT total_size
;
5103 unsigned int stack_alignment_needed
;
5104 HOST_WIDE_INT offset
;
5105 unsigned int preferred_alignment
;
5106 HOST_WIDE_INT size
= get_frame_size ();
5108 frame
->nregs
= ix86_nsaved_regs ();
5111 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5112 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5114 /* During reload iteration the amount of registers saved can change.
5115 Recompute the value as needed. Do not recompute when amount of registers
5116 didn't change as reload does multiple calls to the function and does not
5117 expect the decision to change within single iteration. */
5119 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5121 int count
= frame
->nregs
;
5123 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5124 /* The fast prologue uses move instead of push to save registers. This
5125 is significantly longer, but also executes faster as modern hardware
5126 can execute the moves in parallel, but can't do that for push/pop.
5128 Be careful about choosing what prologue to emit: When function takes
5129 many instructions to execute we may use slow version as well as in
5130 case function is known to be outside hot spot (this is known with
5131 feedback only). Weight the size of function by number of registers
5132 to save as it is cheap to use one or two push instructions but very
5133 slow to use many of them. */
5135 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5136 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5137 || (flag_branch_probabilities
5138 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5139 cfun
->machine
->use_fast_prologue_epilogue
= false;
5141 cfun
->machine
->use_fast_prologue_epilogue
5142 = !expensive_function_p (count
);
5144 if (TARGET_PROLOGUE_USING_MOVE
5145 && cfun
->machine
->use_fast_prologue_epilogue
)
5146 frame
->save_regs_using_mov
= true;
5148 frame
->save_regs_using_mov
= false;
5151 /* Skip return address and saved base pointer. */
5152 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5154 frame
->hard_frame_pointer_offset
= offset
;
5156 /* Do some sanity checking of stack_alignment_needed and
5157 preferred_alignment, since i386 port is the only using those features
5158 that may break easily. */
5160 gcc_assert (!size
|| stack_alignment_needed
);
5161 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5162 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5163 gcc_assert (stack_alignment_needed
5164 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5166 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5167 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5169 /* Register save area */
5170 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5173 if (ix86_save_varrargs_registers
)
5175 offset
+= X86_64_VARARGS_SIZE
;
5176 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5179 frame
->va_arg_size
= 0;
5181 /* Align start of frame for local function. */
5182 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5183 & -stack_alignment_needed
) - offset
;
5185 offset
+= frame
->padding1
;
5187 /* Frame pointer points here. */
5188 frame
->frame_pointer_offset
= offset
;
5192 /* Add outgoing arguments area. Can be skipped if we eliminated
5193 all the function calls as dead code.
5194 Skipping is however impossible when function calls alloca. Alloca
5195 expander assumes that last current_function_outgoing_args_size
5196 of stack frame are unused. */
5197 if (ACCUMULATE_OUTGOING_ARGS
5198 && (!current_function_is_leaf
|| current_function_calls_alloca
5199 || ix86_current_function_calls_tls_descriptor
))
5201 offset
+= current_function_outgoing_args_size
;
5202 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5205 frame
->outgoing_arguments_size
= 0;
5207 /* Align stack boundary. Only needed if we're calling another function
5209 if (!current_function_is_leaf
|| current_function_calls_alloca
5210 || ix86_current_function_calls_tls_descriptor
)
5211 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5212 & -preferred_alignment
) - offset
;
5214 frame
->padding2
= 0;
5216 offset
+= frame
->padding2
;
5218 /* We've reached end of stack frame. */
5219 frame
->stack_pointer_offset
= offset
;
5221 /* Size prologue needs to allocate. */
5222 frame
->to_allocate
=
5223 (size
+ frame
->padding1
+ frame
->padding2
5224 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5226 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5227 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5228 frame
->save_regs_using_mov
= false;
5230 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5231 && current_function_is_leaf
5232 && !ix86_current_function_calls_tls_descriptor
)
5234 frame
->red_zone_size
= frame
->to_allocate
;
5235 if (frame
->save_regs_using_mov
)
5236 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5237 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5238 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5241 frame
->red_zone_size
= 0;
5242 frame
->to_allocate
-= frame
->red_zone_size
;
5243 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5245 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5246 fprintf (stderr
, "size: %i\n", size
);
5247 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5248 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5249 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5250 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5251 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5252 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5253 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5254 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5255 frame
->hard_frame_pointer_offset
);
5256 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5260 /* Emit code to save registers in the prologue. */
5263 ix86_emit_save_regs (void)
5268 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5269 if (ix86_save_reg (regno
, true))
5271 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5272 RTX_FRAME_RELATED_P (insn
) = 1;
5276 /* Emit code to save registers using MOV insns. First register
5277 is restored from POINTER + OFFSET. */
5279 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5284 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5285 if (ix86_save_reg (regno
, true))
5287 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5289 gen_rtx_REG (Pmode
, regno
));
5290 RTX_FRAME_RELATED_P (insn
) = 1;
5291 offset
+= UNITS_PER_WORD
;
5295 /* Expand prologue or epilogue stack adjustment.
5296 The pattern exist to put a dependency on all ebp-based memory accesses.
5297 STYLE should be negative if instructions should be marked as frame related,
5298 zero if %r11 register is live and cannot be freely used and positive
5302 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5307 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5308 else if (x86_64_immediate_operand (offset
, DImode
))
5309 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5313 /* r11 is used by indirect sibcall return as well, set before the
5314 epilogue and used after the epilogue. ATM indirect sibcall
5315 shouldn't be used together with huge frame sizes in one
5316 function because of the frame_size check in sibcall.c. */
5318 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5319 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5321 RTX_FRAME_RELATED_P (insn
) = 1;
5322 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5326 RTX_FRAME_RELATED_P (insn
) = 1;
5329 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5332 ix86_internal_arg_pointer (void)
5334 bool has_force_align_arg_pointer
=
5335 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5336 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5337 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5338 && DECL_NAME (current_function_decl
)
5339 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5340 && DECL_FILE_SCOPE_P (current_function_decl
))
5341 || ix86_force_align_arg_pointer
5342 || has_force_align_arg_pointer
)
5344 /* Nested functions can't realign the stack due to a register
5346 if (DECL_CONTEXT (current_function_decl
)
5347 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5349 if (ix86_force_align_arg_pointer
)
5350 warning (0, "-mstackrealign ignored for nested functions");
5351 if (has_force_align_arg_pointer
)
5352 error ("%s not supported for nested functions",
5353 ix86_force_align_arg_pointer_string
);
5354 return virtual_incoming_args_rtx
;
5356 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5357 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5360 return virtual_incoming_args_rtx
;
5363 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5364 This is called from dwarf2out.c to emit call frame instructions
5365 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5367 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5369 rtx unspec
= SET_SRC (pattern
);
5370 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5374 case UNSPEC_REG_SAVE
:
5375 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5376 SET_DEST (pattern
));
5378 case UNSPEC_DEF_CFA
:
5379 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5380 INTVAL (XVECEXP (unspec
, 0, 0)));
5387 /* Expand the prologue into a bunch of separate insns. */
5390 ix86_expand_prologue (void)
5394 struct ix86_frame frame
;
5395 HOST_WIDE_INT allocate
;
5397 ix86_compute_frame_layout (&frame
);
5399 if (cfun
->machine
->force_align_arg_pointer
)
5403 /* Grab the argument pointer. */
5404 x
= plus_constant (stack_pointer_rtx
, 4);
5405 y
= cfun
->machine
->force_align_arg_pointer
;
5406 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5407 RTX_FRAME_RELATED_P (insn
) = 1;
5409 /* The unwind info consists of two parts: install the fafp as the cfa,
5410 and record the fafp as the "save register" of the stack pointer.
5411 The later is there in order that the unwinder can see where it
5412 should restore the stack pointer across the and insn. */
5413 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5414 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5415 RTX_FRAME_RELATED_P (x
) = 1;
5416 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5418 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5419 RTX_FRAME_RELATED_P (y
) = 1;
5420 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5421 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5422 REG_NOTES (insn
) = x
;
5424 /* Align the stack. */
5425 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5428 /* And here we cheat like madmen with the unwind info. We force the
5429 cfa register back to sp+4, which is exactly what it was at the
5430 start of the function. Re-pushing the return address results in
5431 the return at the same spot relative to the cfa, and thus is
5432 correct wrt the unwind info. */
5433 x
= cfun
->machine
->force_align_arg_pointer
;
5434 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5435 insn
= emit_insn (gen_push (x
));
5436 RTX_FRAME_RELATED_P (insn
) = 1;
5439 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5440 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5441 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5442 REG_NOTES (insn
) = x
;
5445 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5446 slower on all targets. Also sdb doesn't like it. */
5448 if (frame_pointer_needed
)
5450 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5451 RTX_FRAME_RELATED_P (insn
) = 1;
5453 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5454 RTX_FRAME_RELATED_P (insn
) = 1;
5457 allocate
= frame
.to_allocate
;
5459 if (!frame
.save_regs_using_mov
)
5460 ix86_emit_save_regs ();
5462 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5464 /* When using red zone we may start register saving before allocating
5465 the stack frame saving one cycle of the prologue. */
5466 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5467 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5468 : stack_pointer_rtx
,
5469 -frame
.nregs
* UNITS_PER_WORD
);
5473 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5474 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5475 GEN_INT (-allocate
), -1);
5478 /* Only valid for Win32. */
5479 rtx eax
= gen_rtx_REG (SImode
, 0);
5480 bool eax_live
= ix86_eax_live_at_start_p ();
5483 gcc_assert (!TARGET_64BIT
);
5487 emit_insn (gen_push (eax
));
5491 emit_move_insn (eax
, GEN_INT (allocate
));
5493 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5494 RTX_FRAME_RELATED_P (insn
) = 1;
5495 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5496 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5497 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5498 t
, REG_NOTES (insn
));
5502 if (frame_pointer_needed
)
5503 t
= plus_constant (hard_frame_pointer_rtx
,
5506 - frame
.nregs
* UNITS_PER_WORD
);
5508 t
= plus_constant (stack_pointer_rtx
, allocate
);
5509 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5513 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5515 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5516 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5518 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5519 -frame
.nregs
* UNITS_PER_WORD
);
5522 pic_reg_used
= false;
5523 if (pic_offset_table_rtx
5524 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5525 || current_function_profile
))
5527 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5529 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5530 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5532 pic_reg_used
= true;
5538 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5540 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5542 /* Even with accurate pre-reload life analysis, we can wind up
5543 deleting all references to the pic register after reload.
5544 Consider if cross-jumping unifies two sides of a branch
5545 controlled by a comparison vs the only read from a global.
5546 In which case, allow the set_got to be deleted, though we're
5547 too late to do anything about the ebx save in the prologue. */
5548 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5551 /* Prevent function calls from be scheduled before the call to mcount.
5552 In the pic_reg_used case, make sure that the got load isn't deleted. */
5553 if (current_function_profile
)
5554 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5557 /* Emit code to restore saved registers using MOV insns. First register
5558 is restored from POINTER + OFFSET. */
5560 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5561 int maybe_eh_return
)
5564 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5566 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5567 if (ix86_save_reg (regno
, maybe_eh_return
))
5569 /* Ensure that adjust_address won't be forced to produce pointer
5570 out of range allowed by x86-64 instruction set. */
5571 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5575 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5576 emit_move_insn (r11
, GEN_INT (offset
));
5577 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5578 base_address
= gen_rtx_MEM (Pmode
, r11
);
5581 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5582 adjust_address (base_address
, Pmode
, offset
));
5583 offset
+= UNITS_PER_WORD
;
5587 /* Restore function stack, frame, and registers. */
5590 ix86_expand_epilogue (int style
)
5593 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5594 struct ix86_frame frame
;
5595 HOST_WIDE_INT offset
;
5597 ix86_compute_frame_layout (&frame
);
5599 /* Calculate start of saved registers relative to ebp. Special care
5600 must be taken for the normal return case of a function using
5601 eh_return: the eax and edx registers are marked as saved, but not
5602 restored along this path. */
5603 offset
= frame
.nregs
;
5604 if (current_function_calls_eh_return
&& style
!= 2)
5606 offset
*= -UNITS_PER_WORD
;
5608 /* If we're only restoring one register and sp is not valid then
5609 using a move instruction to restore the register since it's
5610 less work than reloading sp and popping the register.
5612 The default code result in stack adjustment using add/lea instruction,
5613 while this code results in LEAVE instruction (or discrete equivalent),
5614 so it is profitable in some other cases as well. Especially when there
5615 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5616 and there is exactly one register to pop. This heuristic may need some
5617 tuning in future. */
5618 if ((!sp_valid
&& frame
.nregs
<= 1)
5619 || (TARGET_EPILOGUE_USING_MOVE
5620 && cfun
->machine
->use_fast_prologue_epilogue
5621 && (frame
.nregs
> 1 || frame
.to_allocate
))
5622 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5623 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5624 && cfun
->machine
->use_fast_prologue_epilogue
5625 && frame
.nregs
== 1)
5626 || current_function_calls_eh_return
)
5628 /* Restore registers. We can use ebp or esp to address the memory
5629 locations. If both are available, default to ebp, since offsets
5630 are known to be small. Only exception is esp pointing directly to the
5631 end of block of saved registers, where we may simplify addressing
5634 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5635 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5636 frame
.to_allocate
, style
== 2);
5638 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5639 offset
, style
== 2);
5641 /* eh_return epilogues need %ecx added to the stack pointer. */
5644 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5646 if (frame_pointer_needed
)
5648 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5649 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5650 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5652 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5653 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5655 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5660 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5661 tmp
= plus_constant (tmp
, (frame
.to_allocate
5662 + frame
.nregs
* UNITS_PER_WORD
));
5663 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5666 else if (!frame_pointer_needed
)
5667 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5668 GEN_INT (frame
.to_allocate
5669 + frame
.nregs
* UNITS_PER_WORD
),
5671 /* If not an i386, mov & pop is faster than "leave". */
5672 else if (TARGET_USE_LEAVE
|| optimize_size
5673 || !cfun
->machine
->use_fast_prologue_epilogue
)
5674 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5677 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5678 hard_frame_pointer_rtx
,
5681 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5683 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5688 /* First step is to deallocate the stack frame so that we can
5689 pop the registers. */
5692 gcc_assert (frame_pointer_needed
);
5693 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5694 hard_frame_pointer_rtx
,
5695 GEN_INT (offset
), style
);
5697 else if (frame
.to_allocate
)
5698 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5699 GEN_INT (frame
.to_allocate
), style
);
5701 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5702 if (ix86_save_reg (regno
, false))
5705 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5707 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5709 if (frame_pointer_needed
)
5711 /* Leave results in shorter dependency chains on CPUs that are
5712 able to grok it fast. */
5713 if (TARGET_USE_LEAVE
)
5714 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5715 else if (TARGET_64BIT
)
5716 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5718 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5722 if (cfun
->machine
->force_align_arg_pointer
)
5724 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5725 cfun
->machine
->force_align_arg_pointer
,
5729 /* Sibcall epilogues don't want a return instruction. */
5733 if (current_function_pops_args
&& current_function_args_size
)
5735 rtx popc
= GEN_INT (current_function_pops_args
);
5737 /* i386 can only pop 64K bytes. If asked to pop more, pop
5738 return address, do explicit add, and jump indirectly to the
5741 if (current_function_pops_args
>= 65536)
5743 rtx ecx
= gen_rtx_REG (SImode
, 2);
5745 /* There is no "pascal" calling convention in 64bit ABI. */
5746 gcc_assert (!TARGET_64BIT
);
5748 emit_insn (gen_popsi1 (ecx
));
5749 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5750 emit_jump_insn (gen_return_indirect_internal (ecx
));
5753 emit_jump_insn (gen_return_pop_internal (popc
));
5756 emit_jump_insn (gen_return_internal ());
5759 /* Reset from the function's potential modifications. */
5762 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5763 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5765 if (pic_offset_table_rtx
)
5766 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5768 /* Mach-O doesn't support labels at the end of objects, so if
5769 it looks like we might want one, insert a NOP. */
5771 rtx insn
= get_last_insn ();
5774 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5775 insn
= PREV_INSN (insn
);
5779 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5780 fputs ("\tnop\n", file
);
5786 /* Extract the parts of an RTL expression that is a valid memory address
5787 for an instruction. Return 0 if the structure of the address is
5788 grossly off. Return -1 if the address contains ASHIFT, so it is not
5789 strictly valid, but still used for computing length of lea instruction. */
5792 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5794 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5795 rtx base_reg
, index_reg
;
5796 HOST_WIDE_INT scale
= 1;
5797 rtx scale_rtx
= NULL_RTX
;
5799 enum ix86_address_seg seg
= SEG_DEFAULT
;
5801 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5803 else if (GET_CODE (addr
) == PLUS
)
5813 addends
[n
++] = XEXP (op
, 1);
5816 while (GET_CODE (op
) == PLUS
);
5821 for (i
= n
; i
>= 0; --i
)
5824 switch (GET_CODE (op
))
5829 index
= XEXP (op
, 0);
5830 scale_rtx
= XEXP (op
, 1);
5834 if (XINT (op
, 1) == UNSPEC_TP
5835 && TARGET_TLS_DIRECT_SEG_REFS
5836 && seg
== SEG_DEFAULT
)
5837 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5866 else if (GET_CODE (addr
) == MULT
)
5868 index
= XEXP (addr
, 0); /* index*scale */
5869 scale_rtx
= XEXP (addr
, 1);
5871 else if (GET_CODE (addr
) == ASHIFT
)
5875 /* We're called for lea too, which implements ashift on occasion. */
5876 index
= XEXP (addr
, 0);
5877 tmp
= XEXP (addr
, 1);
5878 if (GET_CODE (tmp
) != CONST_INT
)
5880 scale
= INTVAL (tmp
);
5881 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5887 disp
= addr
; /* displacement */
5889 /* Extract the integral value of scale. */
5892 if (GET_CODE (scale_rtx
) != CONST_INT
)
5894 scale
= INTVAL (scale_rtx
);
5897 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
5898 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
5900 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5901 if (base_reg
&& index_reg
&& scale
== 1
5902 && (index_reg
== arg_pointer_rtx
5903 || index_reg
== frame_pointer_rtx
5904 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
5907 tmp
= base
, base
= index
, index
= tmp
;
5908 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
5911 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5912 if ((base_reg
== hard_frame_pointer_rtx
5913 || base_reg
== frame_pointer_rtx
5914 || base_reg
== arg_pointer_rtx
) && !disp
)
5917 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5918 Avoid this by transforming to [%esi+0]. */
5919 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5920 && base_reg
&& !index_reg
&& !disp
5922 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
5925 /* Special case: encode reg+reg instead of reg*2. */
5926 if (!base
&& index
&& scale
&& scale
== 2)
5927 base
= index
, base_reg
= index_reg
, scale
= 1;
5929 /* Special case: scaling cannot be encoded without base or displacement. */
5930 if (!base
&& !disp
&& index
&& scale
!= 1)
5942 /* Return cost of the memory address x.
5943 For i386, it is better to use a complex address than let gcc copy
5944 the address into a reg and make a new pseudo. But not if the address
5945 requires to two regs - that would mean more pseudos with longer
5948 ix86_address_cost (rtx x
)
5950 struct ix86_address parts
;
5952 int ok
= ix86_decompose_address (x
, &parts
);
5956 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5957 parts
.base
= SUBREG_REG (parts
.base
);
5958 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5959 parts
.index
= SUBREG_REG (parts
.index
);
5961 /* More complex memory references are better. */
5962 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5964 if (parts
.seg
!= SEG_DEFAULT
)
5967 /* Attempt to minimize number of registers in the address. */
5969 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5971 && (!REG_P (parts
.index
)
5972 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5976 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5978 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5979 && parts
.base
!= parts
.index
)
5982 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5983 since it's predecode logic can't detect the length of instructions
5984 and it degenerates to vector decoded. Increase cost of such
5985 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5986 to split such addresses or even refuse such addresses at all.
5988 Following addressing modes are affected:
5993 The first and last case may be avoidable by explicitly coding the zero in
5994 memory address, but I don't have AMD-K6 machine handy to check this
5998 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5999 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6000 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6006 /* If X is a machine specific address (i.e. a symbol or label being
6007 referenced as a displacement from the GOT implemented using an
6008 UNSPEC), then return the base term. Otherwise return X. */
6011 ix86_find_base_term (rtx x
)
6017 if (GET_CODE (x
) != CONST
)
6020 if (GET_CODE (term
) == PLUS
6021 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
6022 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6023 term
= XEXP (term
, 0);
6024 if (GET_CODE (term
) != UNSPEC
6025 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6028 term
= XVECEXP (term
, 0, 0);
6030 if (GET_CODE (term
) != SYMBOL_REF
6031 && GET_CODE (term
) != LABEL_REF
)
6037 term
= ix86_delegitimize_address (x
);
6039 if (GET_CODE (term
) != SYMBOL_REF
6040 && GET_CODE (term
) != LABEL_REF
)
6046 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6047 this is used for to form addresses to local data when -fPIC is in
6051 darwin_local_data_pic (rtx disp
)
6053 if (GET_CODE (disp
) == MINUS
)
6055 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6056 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6057 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6059 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6060 if (! strcmp (sym_name
, "<pic base>"))
6068 /* Determine if a given RTX is a valid constant. We already know this
6069 satisfies CONSTANT_P. */
6072 legitimate_constant_p (rtx x
)
6074 switch (GET_CODE (x
))
6079 if (GET_CODE (x
) == PLUS
)
6081 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6086 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6089 /* Only some unspecs are valid as "constants". */
6090 if (GET_CODE (x
) == UNSPEC
)
6091 switch (XINT (x
, 1))
6094 return TARGET_64BIT
;
6097 x
= XVECEXP (x
, 0, 0);
6098 return (GET_CODE (x
) == SYMBOL_REF
6099 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6101 x
= XVECEXP (x
, 0, 0);
6102 return (GET_CODE (x
) == SYMBOL_REF
6103 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6108 /* We must have drilled down to a symbol. */
6109 if (GET_CODE (x
) == LABEL_REF
)
6111 if (GET_CODE (x
) != SYMBOL_REF
)
6116 /* TLS symbols are never valid. */
6117 if (SYMBOL_REF_TLS_MODEL (x
))
6122 if (GET_MODE (x
) == TImode
6123 && x
!= CONST0_RTX (TImode
)
6129 if (x
== CONST0_RTX (GET_MODE (x
)))
6137 /* Otherwise we handle everything else in the move patterns. */
6141 /* Determine if it's legal to put X into the constant pool. This
6142 is not possible for the address of thread-local symbols, which
6143 is checked above. */
6146 ix86_cannot_force_const_mem (rtx x
)
6148 /* We can always put integral constants and vectors in memory. */
6149 switch (GET_CODE (x
))
6159 return !legitimate_constant_p (x
);
6162 /* Determine if a given RTX is a valid constant address. */
6165 constant_address_p (rtx x
)
6167 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6170 /* Nonzero if the constant value X is a legitimate general operand
6171 when generating PIC code. It is given that flag_pic is on and
6172 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6175 legitimate_pic_operand_p (rtx x
)
6179 switch (GET_CODE (x
))
6182 inner
= XEXP (x
, 0);
6183 if (GET_CODE (inner
) == PLUS
6184 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
6185 inner
= XEXP (inner
, 0);
6187 /* Only some unspecs are valid as "constants". */
6188 if (GET_CODE (inner
) == UNSPEC
)
6189 switch (XINT (inner
, 1))
6192 return TARGET_64BIT
;
6194 x
= XVECEXP (inner
, 0, 0);
6195 return (GET_CODE (x
) == SYMBOL_REF
6196 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6204 return legitimate_pic_address_disp_p (x
);
6211 /* Determine if a given CONST RTX is a valid memory displacement
6215 legitimate_pic_address_disp_p (rtx disp
)
6219 /* In 64bit mode we can allow direct addresses of symbols and labels
6220 when they are not dynamic symbols. */
6223 rtx op0
= disp
, op1
;
6225 switch (GET_CODE (disp
))
6231 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6233 op0
= XEXP (XEXP (disp
, 0), 0);
6234 op1
= XEXP (XEXP (disp
, 0), 1);
6235 if (GET_CODE (op1
) != CONST_INT
6236 || INTVAL (op1
) >= 16*1024*1024
6237 || INTVAL (op1
) < -16*1024*1024)
6239 if (GET_CODE (op0
) == LABEL_REF
)
6241 if (GET_CODE (op0
) != SYMBOL_REF
)
6246 /* TLS references should always be enclosed in UNSPEC. */
6247 if (SYMBOL_REF_TLS_MODEL (op0
))
6249 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6257 if (GET_CODE (disp
) != CONST
)
6259 disp
= XEXP (disp
, 0);
6263 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6264 of GOT tables. We should not need these anyway. */
6265 if (GET_CODE (disp
) != UNSPEC
6266 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6267 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6270 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6271 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6277 if (GET_CODE (disp
) == PLUS
)
6279 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6281 disp
= XEXP (disp
, 0);
6285 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6288 if (GET_CODE (disp
) != UNSPEC
)
6291 switch (XINT (disp
, 1))
6296 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6298 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6299 While ABI specify also 32bit relocation but we don't produce it in
6300 small PIC model at all. */
6301 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6302 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6304 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6306 case UNSPEC_GOTTPOFF
:
6307 case UNSPEC_GOTNTPOFF
:
6308 case UNSPEC_INDNTPOFF
:
6311 disp
= XVECEXP (disp
, 0, 0);
6312 return (GET_CODE (disp
) == SYMBOL_REF
6313 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6315 disp
= XVECEXP (disp
, 0, 0);
6316 return (GET_CODE (disp
) == SYMBOL_REF
6317 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6319 disp
= XVECEXP (disp
, 0, 0);
6320 return (GET_CODE (disp
) == SYMBOL_REF
6321 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6327 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6328 memory address for an instruction. The MODE argument is the machine mode
6329 for the MEM expression that wants to use this address.
6331 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6332 convert common non-canonical forms to canonical form so that they will
6336 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6338 struct ix86_address parts
;
6339 rtx base
, index
, disp
;
6340 HOST_WIDE_INT scale
;
6341 const char *reason
= NULL
;
6342 rtx reason_rtx
= NULL_RTX
;
6344 if (TARGET_DEBUG_ADDR
)
6347 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6348 GET_MODE_NAME (mode
), strict
);
6352 if (ix86_decompose_address (addr
, &parts
) <= 0)
6354 reason
= "decomposition failed";
6359 index
= parts
.index
;
6361 scale
= parts
.scale
;
6363 /* Validate base register.
6365 Don't allow SUBREG's that span more than a word here. It can lead to spill
6366 failures when the base is one word out of a two word structure, which is
6367 represented internally as a DImode int. */
6376 else if (GET_CODE (base
) == SUBREG
6377 && REG_P (SUBREG_REG (base
))
6378 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6380 reg
= SUBREG_REG (base
);
6383 reason
= "base is not a register";
6387 if (GET_MODE (base
) != Pmode
)
6389 reason
= "base is not in Pmode";
6393 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6394 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6396 reason
= "base is not valid";
6401 /* Validate index register.
6403 Don't allow SUBREG's that span more than a word here -- same as above. */
6412 else if (GET_CODE (index
) == SUBREG
6413 && REG_P (SUBREG_REG (index
))
6414 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6416 reg
= SUBREG_REG (index
);
6419 reason
= "index is not a register";
6423 if (GET_MODE (index
) != Pmode
)
6425 reason
= "index is not in Pmode";
6429 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6430 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6432 reason
= "index is not valid";
6437 /* Validate scale factor. */
6440 reason_rtx
= GEN_INT (scale
);
6443 reason
= "scale without index";
6447 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6449 reason
= "scale is not a valid multiplier";
6454 /* Validate displacement. */
6459 if (GET_CODE (disp
) == CONST
6460 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6461 switch (XINT (XEXP (disp
, 0), 1))
6463 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6464 used. While ABI specify also 32bit relocations, we don't produce
6465 them at all and use IP relative instead. */
6468 gcc_assert (flag_pic
);
6470 goto is_legitimate_pic
;
6471 reason
= "64bit address unspec";
6474 case UNSPEC_GOTPCREL
:
6475 gcc_assert (flag_pic
);
6476 goto is_legitimate_pic
;
6478 case UNSPEC_GOTTPOFF
:
6479 case UNSPEC_GOTNTPOFF
:
6480 case UNSPEC_INDNTPOFF
:
6486 reason
= "invalid address unspec";
6490 else if (SYMBOLIC_CONST (disp
)
6494 && MACHOPIC_INDIRECT
6495 && !machopic_operand_p (disp
)
6501 if (TARGET_64BIT
&& (index
|| base
))
6503 /* foo@dtpoff(%rX) is ok. */
6504 if (GET_CODE (disp
) != CONST
6505 || GET_CODE (XEXP (disp
, 0)) != PLUS
6506 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6507 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6508 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6509 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6511 reason
= "non-constant pic memory reference";
6515 else if (! legitimate_pic_address_disp_p (disp
))
6517 reason
= "displacement is an invalid pic construct";
6521 /* This code used to verify that a symbolic pic displacement
6522 includes the pic_offset_table_rtx register.
6524 While this is good idea, unfortunately these constructs may
6525 be created by "adds using lea" optimization for incorrect
6534 This code is nonsensical, but results in addressing
6535 GOT table with pic_offset_table_rtx base. We can't
6536 just refuse it easily, since it gets matched by
6537 "addsi3" pattern, that later gets split to lea in the
6538 case output register differs from input. While this
6539 can be handled by separate addsi pattern for this case
6540 that never results in lea, this seems to be easier and
6541 correct fix for crash to disable this test. */
6543 else if (GET_CODE (disp
) != LABEL_REF
6544 && GET_CODE (disp
) != CONST_INT
6545 && (GET_CODE (disp
) != CONST
6546 || !legitimate_constant_p (disp
))
6547 && (GET_CODE (disp
) != SYMBOL_REF
6548 || !legitimate_constant_p (disp
)))
6550 reason
= "displacement is not constant";
6553 else if (TARGET_64BIT
6554 && !x86_64_immediate_operand (disp
, VOIDmode
))
6556 reason
= "displacement is out of range";
6561 /* Everything looks valid. */
6562 if (TARGET_DEBUG_ADDR
)
6563 fprintf (stderr
, "Success.\n");
6567 if (TARGET_DEBUG_ADDR
)
6569 fprintf (stderr
, "Error: %s\n", reason
);
6570 debug_rtx (reason_rtx
);
6575 /* Return a unique alias set for the GOT. */
6577 static HOST_WIDE_INT
6578 ix86_GOT_alias_set (void)
6580 static HOST_WIDE_INT set
= -1;
6582 set
= new_alias_set ();
6586 /* Return a legitimate reference for ORIG (an address) using the
6587 register REG. If REG is 0, a new pseudo is generated.
6589 There are two types of references that must be handled:
6591 1. Global data references must load the address from the GOT, via
6592 the PIC reg. An insn is emitted to do this load, and the reg is
6595 2. Static data references, constant pool addresses, and code labels
6596 compute the address as an offset from the GOT, whose base is in
6597 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6598 differentiate them from global data objects. The returned
6599 address is the PIC reg + an unspec constant.
6601 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6602 reg also appears in the address. */
6605 legitimize_pic_address (rtx orig
, rtx reg
)
6612 if (TARGET_MACHO
&& !TARGET_64BIT
)
6615 reg
= gen_reg_rtx (Pmode
);
6616 /* Use the generic Mach-O PIC machinery. */
6617 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6621 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6623 else if (TARGET_64BIT
6624 && ix86_cmodel
!= CM_SMALL_PIC
6625 && local_symbolic_operand (addr
, Pmode
))
6628 /* This symbol may be referenced via a displacement from the PIC
6629 base address (@GOTOFF). */
6631 if (reload_in_progress
)
6632 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6633 if (GET_CODE (addr
) == CONST
)
6634 addr
= XEXP (addr
, 0);
6635 if (GET_CODE (addr
) == PLUS
)
6637 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6638 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6641 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6642 new = gen_rtx_CONST (Pmode
, new);
6644 tmpreg
= gen_reg_rtx (Pmode
);
6647 emit_move_insn (tmpreg
, new);
6651 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6652 tmpreg
, 1, OPTAB_DIRECT
);
6655 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6657 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6659 /* This symbol may be referenced via a displacement from the PIC
6660 base address (@GOTOFF). */
6662 if (reload_in_progress
)
6663 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6664 if (GET_CODE (addr
) == CONST
)
6665 addr
= XEXP (addr
, 0);
6666 if (GET_CODE (addr
) == PLUS
)
6668 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6669 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6672 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6673 new = gen_rtx_CONST (Pmode
, new);
6674 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6678 emit_move_insn (reg
, new);
6682 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6686 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6687 new = gen_rtx_CONST (Pmode
, new);
6688 new = gen_const_mem (Pmode
, new);
6689 set_mem_alias_set (new, ix86_GOT_alias_set ());
6692 reg
= gen_reg_rtx (Pmode
);
6693 /* Use directly gen_movsi, otherwise the address is loaded
6694 into register for CSE. We don't want to CSE this addresses,
6695 instead we CSE addresses from the GOT table, so skip this. */
6696 emit_insn (gen_movsi (reg
, new));
6701 /* This symbol must be referenced via a load from the
6702 Global Offset Table (@GOT). */
6704 if (reload_in_progress
)
6705 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6706 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6707 new = gen_rtx_CONST (Pmode
, new);
6708 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6709 new = gen_const_mem (Pmode
, new);
6710 set_mem_alias_set (new, ix86_GOT_alias_set ());
6713 reg
= gen_reg_rtx (Pmode
);
6714 emit_move_insn (reg
, new);
6720 if (GET_CODE (addr
) == CONST_INT
6721 && !x86_64_immediate_operand (addr
, VOIDmode
))
6725 emit_move_insn (reg
, addr
);
6729 new = force_reg (Pmode
, addr
);
6731 else if (GET_CODE (addr
) == CONST
)
6733 addr
= XEXP (addr
, 0);
6735 /* We must match stuff we generate before. Assume the only
6736 unspecs that can get here are ours. Not that we could do
6737 anything with them anyway.... */
6738 if (GET_CODE (addr
) == UNSPEC
6739 || (GET_CODE (addr
) == PLUS
6740 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6742 gcc_assert (GET_CODE (addr
) == PLUS
);
6744 if (GET_CODE (addr
) == PLUS
)
6746 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6748 /* Check first to see if this is a constant offset from a @GOTOFF
6749 symbol reference. */
6750 if (local_symbolic_operand (op0
, Pmode
)
6751 && GET_CODE (op1
) == CONST_INT
)
6755 if (reload_in_progress
)
6756 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6757 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6759 new = gen_rtx_PLUS (Pmode
, new, op1
);
6760 new = gen_rtx_CONST (Pmode
, new);
6761 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6765 emit_move_insn (reg
, new);
6771 if (INTVAL (op1
) < -16*1024*1024
6772 || INTVAL (op1
) >= 16*1024*1024)
6774 if (!x86_64_immediate_operand (op1
, Pmode
))
6775 op1
= force_reg (Pmode
, op1
);
6776 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6782 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6783 new = legitimize_pic_address (XEXP (addr
, 1),
6784 base
== reg
? NULL_RTX
: reg
);
6786 if (GET_CODE (new) == CONST_INT
)
6787 new = plus_constant (base
, INTVAL (new));
6790 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6792 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6793 new = XEXP (new, 1);
6795 new = gen_rtx_PLUS (Pmode
, base
, new);
6803 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6806 get_thread_pointer (int to_reg
)
6810 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6814 reg
= gen_reg_rtx (Pmode
);
6815 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6816 insn
= emit_insn (insn
);
6821 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6822 false if we expect this to be used for a memory address and true if
6823 we expect to load the address into a register. */
6826 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6828 rtx dest
, base
, off
, pic
, tp
;
6833 case TLS_MODEL_GLOBAL_DYNAMIC
:
6834 dest
= gen_reg_rtx (Pmode
);
6835 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6837 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6839 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6842 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6843 insns
= get_insns ();
6846 emit_libcall_block (insns
, dest
, rax
, x
);
6848 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6849 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
6851 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6853 if (TARGET_GNU2_TLS
)
6855 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
6857 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6861 case TLS_MODEL_LOCAL_DYNAMIC
:
6862 base
= gen_reg_rtx (Pmode
);
6863 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6865 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6867 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6870 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6871 insns
= get_insns ();
6874 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6875 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6876 emit_libcall_block (insns
, base
, rax
, note
);
6878 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6879 emit_insn (gen_tls_local_dynamic_base_64 (base
));
6881 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6883 if (TARGET_GNU2_TLS
)
6885 rtx x
= ix86_tls_module_base ();
6887 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
6888 gen_rtx_MINUS (Pmode
, x
, tp
));
6891 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6892 off
= gen_rtx_CONST (Pmode
, off
);
6894 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
6896 if (TARGET_GNU2_TLS
)
6898 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
6900 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6905 case TLS_MODEL_INITIAL_EXEC
:
6909 type
= UNSPEC_GOTNTPOFF
;
6913 if (reload_in_progress
)
6914 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6915 pic
= pic_offset_table_rtx
;
6916 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6918 else if (!TARGET_ANY_GNU_TLS
)
6920 pic
= gen_reg_rtx (Pmode
);
6921 emit_insn (gen_set_got (pic
));
6922 type
= UNSPEC_GOTTPOFF
;
6927 type
= UNSPEC_INDNTPOFF
;
6930 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6931 off
= gen_rtx_CONST (Pmode
, off
);
6933 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6934 off
= gen_const_mem (Pmode
, off
);
6935 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6937 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6939 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6940 off
= force_reg (Pmode
, off
);
6941 return gen_rtx_PLUS (Pmode
, base
, off
);
6945 base
= get_thread_pointer (true);
6946 dest
= gen_reg_rtx (Pmode
);
6947 emit_insn (gen_subsi3 (dest
, base
, off
));
6951 case TLS_MODEL_LOCAL_EXEC
:
6952 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6953 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6954 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6955 off
= gen_rtx_CONST (Pmode
, off
);
6957 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6959 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6960 return gen_rtx_PLUS (Pmode
, base
, off
);
6964 base
= get_thread_pointer (true);
6965 dest
= gen_reg_rtx (Pmode
);
6966 emit_insn (gen_subsi3 (dest
, base
, off
));
6977 /* Try machine-dependent ways of modifying an illegitimate address
6978 to be legitimate. If we find one, return the new, valid address.
6979 This macro is used in only one place: `memory_address' in explow.c.
6981 OLDX is the address as it was before break_out_memory_refs was called.
6982 In some cases it is useful to look at this to decide what needs to be done.
6984 MODE and WIN are passed so that this macro can use
6985 GO_IF_LEGITIMATE_ADDRESS.
6987 It is always safe for this macro to do nothing. It exists to recognize
6988 opportunities to optimize the output.
6990 For the 80386, we handle X+REG by loading X into a register R and
6991 using R+REG. R will go in a general reg and indexing will be used.
6992 However, if REG is a broken-out memory address or multiplication,
6993 nothing needs to be done because REG can certainly go in a general reg.
6995 When -fpic is used, special handling is needed for symbolic references.
6996 See comments by legitimize_pic_address in i386.c for details. */
6999 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7004 if (TARGET_DEBUG_ADDR
)
7006 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7007 GET_MODE_NAME (mode
));
7011 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7013 return legitimize_tls_address (x
, log
, false);
7014 if (GET_CODE (x
) == CONST
7015 && GET_CODE (XEXP (x
, 0)) == PLUS
7016 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7017 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7019 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7020 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7023 if (flag_pic
&& SYMBOLIC_CONST (x
))
7024 return legitimize_pic_address (x
, 0);
7026 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7027 if (GET_CODE (x
) == ASHIFT
7028 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7029 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7032 log
= INTVAL (XEXP (x
, 1));
7033 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7034 GEN_INT (1 << log
));
7037 if (GET_CODE (x
) == PLUS
)
7039 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7041 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7042 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
7043 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7046 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7047 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7048 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7049 GEN_INT (1 << log
));
7052 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7053 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
7054 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7057 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7058 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7059 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7060 GEN_INT (1 << log
));
7063 /* Put multiply first if it isn't already. */
7064 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7066 rtx tmp
= XEXP (x
, 0);
7067 XEXP (x
, 0) = XEXP (x
, 1);
7072 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7073 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7074 created by virtual register instantiation, register elimination, and
7075 similar optimizations. */
7076 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7079 x
= gen_rtx_PLUS (Pmode
,
7080 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7081 XEXP (XEXP (x
, 1), 0)),
7082 XEXP (XEXP (x
, 1), 1));
7086 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7087 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7088 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7089 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7090 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7091 && CONSTANT_P (XEXP (x
, 1)))
7094 rtx other
= NULL_RTX
;
7096 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7098 constant
= XEXP (x
, 1);
7099 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7101 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
7103 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7104 other
= XEXP (x
, 1);
7112 x
= gen_rtx_PLUS (Pmode
,
7113 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7114 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7115 plus_constant (other
, INTVAL (constant
)));
7119 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7122 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7125 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7128 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7131 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7135 && GET_CODE (XEXP (x
, 1)) == REG
7136 && GET_CODE (XEXP (x
, 0)) == REG
)
7139 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7142 x
= legitimize_pic_address (x
, 0);
7145 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7148 if (GET_CODE (XEXP (x
, 0)) == REG
)
7150 rtx temp
= gen_reg_rtx (Pmode
);
7151 rtx val
= force_operand (XEXP (x
, 1), temp
);
7153 emit_move_insn (temp
, val
);
7159 else if (GET_CODE (XEXP (x
, 1)) == REG
)
7161 rtx temp
= gen_reg_rtx (Pmode
);
7162 rtx val
= force_operand (XEXP (x
, 0), temp
);
7164 emit_move_insn (temp
, val
);
7174 /* Print an integer constant expression in assembler syntax. Addition
7175 and subtraction are the only arithmetic that may appear in these
7176 expressions. FILE is the stdio stream to write to, X is the rtx, and
7177 CODE is the operand print code from the output string. */
7180 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7184 switch (GET_CODE (x
))
7187 gcc_assert (flag_pic
);
7192 output_addr_const (file
, x
);
7193 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7194 fputs ("@PLT", file
);
7201 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7202 assemble_name (asm_out_file
, buf
);
7206 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7210 /* This used to output parentheses around the expression,
7211 but that does not work on the 386 (either ATT or BSD assembler). */
7212 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7216 if (GET_MODE (x
) == VOIDmode
)
7218 /* We can use %d if the number is <32 bits and positive. */
7219 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7220 fprintf (file
, "0x%lx%08lx",
7221 (unsigned long) CONST_DOUBLE_HIGH (x
),
7222 (unsigned long) CONST_DOUBLE_LOW (x
));
7224 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7227 /* We can't handle floating point constants;
7228 PRINT_OPERAND must handle them. */
7229 output_operand_lossage ("floating constant misused");
7233 /* Some assemblers need integer constants to appear first. */
7234 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
7236 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7238 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7242 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
7243 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7245 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7251 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7252 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7254 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7256 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7260 gcc_assert (XVECLEN (x
, 0) == 1);
7261 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7262 switch (XINT (x
, 1))
7265 fputs ("@GOT", file
);
7268 fputs ("@GOTOFF", file
);
7270 case UNSPEC_GOTPCREL
:
7271 fputs ("@GOTPCREL(%rip)", file
);
7273 case UNSPEC_GOTTPOFF
:
7274 /* FIXME: This might be @TPOFF in Sun ld too. */
7275 fputs ("@GOTTPOFF", file
);
7278 fputs ("@TPOFF", file
);
7282 fputs ("@TPOFF", file
);
7284 fputs ("@NTPOFF", file
);
7287 fputs ("@DTPOFF", file
);
7289 case UNSPEC_GOTNTPOFF
:
7291 fputs ("@GOTTPOFF(%rip)", file
);
7293 fputs ("@GOTNTPOFF", file
);
7295 case UNSPEC_INDNTPOFF
:
7296 fputs ("@INDNTPOFF", file
);
7299 output_operand_lossage ("invalid UNSPEC as operand");
7305 output_operand_lossage ("invalid expression as operand");
7309 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7310 We need to emit DTP-relative relocations. */
7313 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7315 fputs (ASM_LONG
, file
);
7316 output_addr_const (file
, x
);
7317 fputs ("@DTPOFF", file
);
7323 fputs (", 0", file
);
7330 /* In the name of slightly smaller debug output, and to cater to
7331 general assembler lossage, recognize PIC+GOTOFF and turn it back
7332 into a direct symbol reference.
7334 On Darwin, this is necessary to avoid a crash, because Darwin
7335 has a different PIC label for each routine but the DWARF debugging
7336 information is not associated with any particular routine, so it's
7337 necessary to remove references to the PIC label from RTL stored by
7338 the DWARF output code. */
7341 ix86_delegitimize_address (rtx orig_x
)
7344 /* reg_addend is NULL or a multiple of some register. */
7345 rtx reg_addend
= NULL_RTX
;
7346 /* const_addend is NULL or a const_int. */
7347 rtx const_addend
= NULL_RTX
;
7348 /* This is the result, or NULL. */
7349 rtx result
= NULL_RTX
;
7351 if (GET_CODE (x
) == MEM
)
7356 if (GET_CODE (x
) != CONST
7357 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7358 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7359 || GET_CODE (orig_x
) != MEM
)
7361 return XVECEXP (XEXP (x
, 0), 0, 0);
7364 if (GET_CODE (x
) != PLUS
7365 || GET_CODE (XEXP (x
, 1)) != CONST
)
7368 if (GET_CODE (XEXP (x
, 0)) == REG
7369 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7370 /* %ebx + GOT/GOTOFF */
7372 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7374 /* %ebx + %reg * scale + GOT/GOTOFF */
7375 reg_addend
= XEXP (x
, 0);
7376 if (GET_CODE (XEXP (reg_addend
, 0)) == REG
7377 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7378 reg_addend
= XEXP (reg_addend
, 1);
7379 else if (GET_CODE (XEXP (reg_addend
, 1)) == REG
7380 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7381 reg_addend
= XEXP (reg_addend
, 0);
7384 if (GET_CODE (reg_addend
) != REG
7385 && GET_CODE (reg_addend
) != MULT
7386 && GET_CODE (reg_addend
) != ASHIFT
)
7392 x
= XEXP (XEXP (x
, 1), 0);
7393 if (GET_CODE (x
) == PLUS
7394 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7396 const_addend
= XEXP (x
, 1);
7400 if (GET_CODE (x
) == UNSPEC
7401 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7402 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7403 result
= XVECEXP (x
, 0, 0);
7405 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7406 && GET_CODE (orig_x
) != MEM
)
7407 result
= XEXP (x
, 0);
7413 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7415 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7420 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7425 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7427 enum rtx_code second_code
, bypass_code
;
7428 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7429 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7430 code
= ix86_fp_compare_code_to_integer (code
);
7434 code
= reverse_condition (code
);
7445 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7449 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7450 Those same assemblers have the same but opposite lossage on cmov. */
7451 gcc_assert (mode
== CCmode
);
7452 suffix
= fp
? "nbe" : "a";
7472 gcc_assert (mode
== CCmode
);
7494 gcc_assert (mode
== CCmode
);
7495 suffix
= fp
? "nb" : "ae";
7498 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7502 gcc_assert (mode
== CCmode
);
7506 suffix
= fp
? "u" : "p";
7509 suffix
= fp
? "nu" : "np";
7514 fputs (suffix
, file
);
7517 /* Print the name of register X to FILE based on its machine mode and number.
7518 If CODE is 'w', pretend the mode is HImode.
7519 If CODE is 'b', pretend the mode is QImode.
7520 If CODE is 'k', pretend the mode is SImode.
7521 If CODE is 'q', pretend the mode is DImode.
7522 If CODE is 'h', pretend the reg is the 'high' byte register.
7523 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7526 print_reg (rtx x
, int code
, FILE *file
)
7528 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7529 && REGNO (x
) != FRAME_POINTER_REGNUM
7530 && REGNO (x
) != FLAGS_REG
7531 && REGNO (x
) != FPSR_REG
7532 && REGNO (x
) != FPCR_REG
);
7534 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7537 if (code
== 'w' || MMX_REG_P (x
))
7539 else if (code
== 'b')
7541 else if (code
== 'k')
7543 else if (code
== 'q')
7545 else if (code
== 'y')
7547 else if (code
== 'h')
7550 code
= GET_MODE_SIZE (GET_MODE (x
));
7552 /* Irritatingly, AMD extended registers use different naming convention
7553 from the normal registers. */
7554 if (REX_INT_REG_P (x
))
7556 gcc_assert (TARGET_64BIT
);
7560 error ("extended registers have no high halves");
7563 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7566 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7569 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7572 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7575 error ("unsupported operand size for extended register");
7583 if (STACK_TOP_P (x
))
7585 fputs ("st(0)", file
);
7592 if (! ANY_FP_REG_P (x
))
7593 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7598 fputs (hi_reg_name
[REGNO (x
)], file
);
7601 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7603 fputs (qi_reg_name
[REGNO (x
)], file
);
7606 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7608 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7615 /* Locate some local-dynamic symbol still in use by this function
7616 so that we can print its name in some tls_local_dynamic_base
7620 get_some_local_dynamic_name (void)
7624 if (cfun
->machine
->some_ld_name
)
7625 return cfun
->machine
->some_ld_name
;
7627 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7629 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7630 return cfun
->machine
->some_ld_name
;
7636 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7640 if (GET_CODE (x
) == SYMBOL_REF
7641 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7643 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7651 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7652 C -- print opcode suffix for set/cmov insn.
7653 c -- like C, but print reversed condition
7654 F,f -- likewise, but for floating-point.
7655 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7657 R -- print the prefix for register names.
7658 z -- print the opcode suffix for the size of the current operand.
7659 * -- print a star (in certain assembler syntax)
7660 A -- print an absolute memory reference.
7661 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7662 s -- print a shift double count, followed by the assemblers argument
7664 b -- print the QImode name of the register for the indicated operand.
7665 %b0 would print %al if operands[0] is reg 0.
7666 w -- likewise, print the HImode name of the register.
7667 k -- likewise, print the SImode name of the register.
7668 q -- likewise, print the DImode name of the register.
7669 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7670 y -- print "st(0)" instead of "st" as a register.
7671 D -- print condition for SSE cmp instruction.
7672 P -- if PIC, print an @PLT suffix.
7673 X -- don't print any sort of PIC '@' suffix for a symbol.
7674 & -- print some in-use local-dynamic symbol name.
7675 H -- print a memory address offset by 8; used for sse high-parts
7679 print_operand (FILE *file
, rtx x
, int code
)
7686 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7691 assemble_name (file
, get_some_local_dynamic_name ());
7695 switch (ASSEMBLER_DIALECT
)
7702 /* Intel syntax. For absolute addresses, registers should not
7703 be surrounded by braces. */
7704 if (GET_CODE (x
) != REG
)
7707 PRINT_OPERAND (file
, x
, 0);
7717 PRINT_OPERAND (file
, x
, 0);
7722 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7727 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7732 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7737 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7742 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7747 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7752 /* 387 opcodes don't get size suffixes if the operands are
7754 if (STACK_REG_P (x
))
7757 /* Likewise if using Intel opcodes. */
7758 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7761 /* This is the size of op from size of operand. */
7762 switch (GET_MODE_SIZE (GET_MODE (x
)))
7765 #ifdef HAVE_GAS_FILDS_FISTS
7771 if (GET_MODE (x
) == SFmode
)
7786 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7788 #ifdef GAS_MNEMONICS
7814 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7816 PRINT_OPERAND (file
, x
, 0);
7822 /* Little bit of braindamage here. The SSE compare instructions
7823 does use completely different names for the comparisons that the
7824 fp conditional moves. */
7825 switch (GET_CODE (x
))
7840 fputs ("unord", file
);
7844 fputs ("neq", file
);
7848 fputs ("nlt", file
);
7852 fputs ("nle", file
);
7855 fputs ("ord", file
);
7862 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7863 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7865 switch (GET_MODE (x
))
7867 case HImode
: putc ('w', file
); break;
7869 case SFmode
: putc ('l', file
); break;
7871 case DFmode
: putc ('q', file
); break;
7872 default: gcc_unreachable ();
7879 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7883 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7886 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7889 /* Like above, but reverse condition */
7891 /* Check to see if argument to %c is really a constant
7892 and not a condition code which needs to be reversed. */
7893 if (!COMPARISON_P (x
))
7895 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7898 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7901 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7902 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7905 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7909 /* It doesn't actually matter what mode we use here, as we're
7910 only going to use this for printing. */
7911 x
= adjust_address_nv (x
, DImode
, 8);
7918 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7921 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7924 int pred_val
= INTVAL (XEXP (x
, 0));
7926 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7927 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7929 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7930 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7932 /* Emit hints only in the case default branch prediction
7933 heuristics would fail. */
7934 if (taken
!= cputaken
)
7936 /* We use 3e (DS) prefix for taken branches and
7937 2e (CS) prefix for not taken branches. */
7939 fputs ("ds ; ", file
);
7941 fputs ("cs ; ", file
);
7948 output_operand_lossage ("invalid operand code '%c'", code
);
7952 if (GET_CODE (x
) == REG
)
7953 print_reg (x
, code
, file
);
7955 else if (GET_CODE (x
) == MEM
)
7957 /* No `byte ptr' prefix for call instructions. */
7958 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7961 switch (GET_MODE_SIZE (GET_MODE (x
)))
7963 case 1: size
= "BYTE"; break;
7964 case 2: size
= "WORD"; break;
7965 case 4: size
= "DWORD"; break;
7966 case 8: size
= "QWORD"; break;
7967 case 12: size
= "XWORD"; break;
7968 case 16: size
= "XMMWORD"; break;
7973 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7976 else if (code
== 'w')
7978 else if (code
== 'k')
7982 fputs (" PTR ", file
);
7986 /* Avoid (%rip) for call operands. */
7987 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7988 && GET_CODE (x
) != CONST_INT
)
7989 output_addr_const (file
, x
);
7990 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7991 output_operand_lossage ("invalid constraints for operand");
7996 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8001 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8002 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8004 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8006 fprintf (file
, "0x%08lx", l
);
8009 /* These float cases don't actually occur as immediate operands. */
8010 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8014 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8015 fprintf (file
, "%s", dstr
);
8018 else if (GET_CODE (x
) == CONST_DOUBLE
8019 && GET_MODE (x
) == XFmode
)
8023 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8024 fprintf (file
, "%s", dstr
);
8029 /* We have patterns that allow zero sets of memory, for instance.
8030 In 64-bit mode, we should probably support all 8-byte vectors,
8031 since we can in fact encode that into an immediate. */
8032 if (GET_CODE (x
) == CONST_VECTOR
)
8034 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8040 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8042 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8045 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8046 || GET_CODE (x
) == LABEL_REF
)
8048 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8051 fputs ("OFFSET FLAT:", file
);
8054 if (GET_CODE (x
) == CONST_INT
)
8055 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8057 output_pic_addr_const (file
, x
, code
);
8059 output_addr_const (file
, x
);
8063 /* Print a memory operand whose address is ADDR. */
8066 print_operand_address (FILE *file
, rtx addr
)
8068 struct ix86_address parts
;
8069 rtx base
, index
, disp
;
8071 int ok
= ix86_decompose_address (addr
, &parts
);
8076 index
= parts
.index
;
8078 scale
= parts
.scale
;
8086 if (USER_LABEL_PREFIX
[0] == 0)
8088 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8094 if (!base
&& !index
)
8096 /* Displacement only requires special attention. */
8098 if (GET_CODE (disp
) == CONST_INT
)
8100 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8102 if (USER_LABEL_PREFIX
[0] == 0)
8104 fputs ("ds:", file
);
8106 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8109 output_pic_addr_const (file
, disp
, 0);
8111 output_addr_const (file
, disp
);
8113 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8116 if (GET_CODE (disp
) == CONST
8117 && GET_CODE (XEXP (disp
, 0)) == PLUS
8118 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8119 disp
= XEXP (XEXP (disp
, 0), 0);
8120 if (GET_CODE (disp
) == LABEL_REF
8121 || (GET_CODE (disp
) == SYMBOL_REF
8122 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8123 fputs ("(%rip)", file
);
8128 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8133 output_pic_addr_const (file
, disp
, 0);
8134 else if (GET_CODE (disp
) == LABEL_REF
)
8135 output_asm_label (disp
);
8137 output_addr_const (file
, disp
);
8142 print_reg (base
, 0, file
);
8146 print_reg (index
, 0, file
);
8148 fprintf (file
, ",%d", scale
);
8154 rtx offset
= NULL_RTX
;
8158 /* Pull out the offset of a symbol; print any symbol itself. */
8159 if (GET_CODE (disp
) == CONST
8160 && GET_CODE (XEXP (disp
, 0)) == PLUS
8161 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8163 offset
= XEXP (XEXP (disp
, 0), 1);
8164 disp
= gen_rtx_CONST (VOIDmode
,
8165 XEXP (XEXP (disp
, 0), 0));
8169 output_pic_addr_const (file
, disp
, 0);
8170 else if (GET_CODE (disp
) == LABEL_REF
)
8171 output_asm_label (disp
);
8172 else if (GET_CODE (disp
) == CONST_INT
)
8175 output_addr_const (file
, disp
);
8181 print_reg (base
, 0, file
);
8184 if (INTVAL (offset
) >= 0)
8186 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8190 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8197 print_reg (index
, 0, file
);
8199 fprintf (file
, "*%d", scale
);
8207 output_addr_const_extra (FILE *file
, rtx x
)
8211 if (GET_CODE (x
) != UNSPEC
)
8214 op
= XVECEXP (x
, 0, 0);
8215 switch (XINT (x
, 1))
8217 case UNSPEC_GOTTPOFF
:
8218 output_addr_const (file
, op
);
8219 /* FIXME: This might be @TPOFF in Sun ld. */
8220 fputs ("@GOTTPOFF", file
);
8223 output_addr_const (file
, op
);
8224 fputs ("@TPOFF", file
);
8227 output_addr_const (file
, op
);
8229 fputs ("@TPOFF", file
);
8231 fputs ("@NTPOFF", file
);
8234 output_addr_const (file
, op
);
8235 fputs ("@DTPOFF", file
);
8237 case UNSPEC_GOTNTPOFF
:
8238 output_addr_const (file
, op
);
8240 fputs ("@GOTTPOFF(%rip)", file
);
8242 fputs ("@GOTNTPOFF", file
);
8244 case UNSPEC_INDNTPOFF
:
8245 output_addr_const (file
, op
);
8246 fputs ("@INDNTPOFF", file
);
8256 /* Split one or more DImode RTL references into pairs of SImode
8257 references. The RTL can be REG, offsettable MEM, integer constant, or
8258 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8259 split and "num" is its length. lo_half and hi_half are output arrays
8260 that parallel "operands". */
8263 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8267 rtx op
= operands
[num
];
8269 /* simplify_subreg refuse to split volatile memory addresses,
8270 but we still have to handle it. */
8271 if (GET_CODE (op
) == MEM
)
8273 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8274 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8278 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8279 GET_MODE (op
) == VOIDmode
8280 ? DImode
: GET_MODE (op
), 0);
8281 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8282 GET_MODE (op
) == VOIDmode
8283 ? DImode
: GET_MODE (op
), 4);
8287 /* Split one or more TImode RTL references into pairs of DImode
8288 references. The RTL can be REG, offsettable MEM, integer constant, or
8289 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8290 split and "num" is its length. lo_half and hi_half are output arrays
8291 that parallel "operands". */
8294 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8298 rtx op
= operands
[num
];
8300 /* simplify_subreg refuse to split volatile memory addresses, but we
8301 still have to handle it. */
8302 if (GET_CODE (op
) == MEM
)
8304 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8305 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8309 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8310 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8315 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8316 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8317 is the expression of the binary operation. The output may either be
8318 emitted here, or returned to the caller, like all output_* functions.
8320 There is no guarantee that the operands are the same mode, as they
8321 might be within FLOAT or FLOAT_EXTEND expressions. */
8323 #ifndef SYSV386_COMPAT
8324 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8325 wants to fix the assemblers because that causes incompatibility
8326 with gcc. No-one wants to fix gcc because that causes
8327 incompatibility with assemblers... You can use the option of
8328 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8329 #define SYSV386_COMPAT 1
8333 output_387_binary_op (rtx insn
, rtx
*operands
)
8335 static char buf
[30];
8338 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8340 #ifdef ENABLE_CHECKING
8341 /* Even if we do not want to check the inputs, this documents input
8342 constraints. Which helps in understanding the following code. */
8343 if (STACK_REG_P (operands
[0])
8344 && ((REG_P (operands
[1])
8345 && REGNO (operands
[0]) == REGNO (operands
[1])
8346 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8347 || (REG_P (operands
[2])
8348 && REGNO (operands
[0]) == REGNO (operands
[2])
8349 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8350 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8353 gcc_assert (is_sse
);
8356 switch (GET_CODE (operands
[3]))
8359 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8360 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8368 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8369 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8377 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8378 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8386 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8387 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8401 if (GET_MODE (operands
[0]) == SFmode
)
8402 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8404 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8409 switch (GET_CODE (operands
[3]))
8413 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8415 rtx temp
= operands
[2];
8416 operands
[2] = operands
[1];
8420 /* know operands[0] == operands[1]. */
8422 if (GET_CODE (operands
[2]) == MEM
)
8428 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8430 if (STACK_TOP_P (operands
[0]))
8431 /* How is it that we are storing to a dead operand[2]?
8432 Well, presumably operands[1] is dead too. We can't
8433 store the result to st(0) as st(0) gets popped on this
8434 instruction. Instead store to operands[2] (which I
8435 think has to be st(1)). st(1) will be popped later.
8436 gcc <= 2.8.1 didn't have this check and generated
8437 assembly code that the Unixware assembler rejected. */
8438 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8440 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8444 if (STACK_TOP_P (operands
[0]))
8445 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8447 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8452 if (GET_CODE (operands
[1]) == MEM
)
8458 if (GET_CODE (operands
[2]) == MEM
)
8464 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8467 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8468 derived assemblers, confusingly reverse the direction of
8469 the operation for fsub{r} and fdiv{r} when the
8470 destination register is not st(0). The Intel assembler
8471 doesn't have this brain damage. Read !SYSV386_COMPAT to
8472 figure out what the hardware really does. */
8473 if (STACK_TOP_P (operands
[0]))
8474 p
= "{p\t%0, %2|rp\t%2, %0}";
8476 p
= "{rp\t%2, %0|p\t%0, %2}";
8478 if (STACK_TOP_P (operands
[0]))
8479 /* As above for fmul/fadd, we can't store to st(0). */
8480 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8482 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8487 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8490 if (STACK_TOP_P (operands
[0]))
8491 p
= "{rp\t%0, %1|p\t%1, %0}";
8493 p
= "{p\t%1, %0|rp\t%0, %1}";
8495 if (STACK_TOP_P (operands
[0]))
8496 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8498 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8503 if (STACK_TOP_P (operands
[0]))
8505 if (STACK_TOP_P (operands
[1]))
8506 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8508 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8511 else if (STACK_TOP_P (operands
[1]))
8514 p
= "{\t%1, %0|r\t%0, %1}";
8516 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8522 p
= "{r\t%2, %0|\t%0, %2}";
8524 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8537 /* Return needed mode for entity in optimize_mode_switching pass. */
8540 ix86_mode_needed (int entity
, rtx insn
)
8542 enum attr_i387_cw mode
;
8544 /* The mode UNINITIALIZED is used to store control word after a
8545 function call or ASM pattern. The mode ANY specify that function
8546 has no requirements on the control word and make no changes in the
8547 bits we are interested in. */
8550 || (NONJUMP_INSN_P (insn
)
8551 && (asm_noperands (PATTERN (insn
)) >= 0
8552 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8553 return I387_CW_UNINITIALIZED
;
8555 if (recog_memoized (insn
) < 0)
8558 mode
= get_attr_i387_cw (insn
);
8563 if (mode
== I387_CW_TRUNC
)
8568 if (mode
== I387_CW_FLOOR
)
8573 if (mode
== I387_CW_CEIL
)
8578 if (mode
== I387_CW_MASK_PM
)
8589 /* Output code to initialize control word copies used by trunc?f?i and
8590 rounding patterns. CURRENT_MODE is set to current control word,
8591 while NEW_MODE is set to new control word. */
8594 emit_i387_cw_initialization (int mode
)
8596 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8601 rtx reg
= gen_reg_rtx (HImode
);
8603 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8604 emit_move_insn (reg
, copy_rtx (stored_mode
));
8606 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8611 /* round toward zero (truncate) */
8612 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8613 slot
= SLOT_CW_TRUNC
;
8617 /* round down toward -oo */
8618 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8619 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8620 slot
= SLOT_CW_FLOOR
;
8624 /* round up toward +oo */
8625 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8626 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8627 slot
= SLOT_CW_CEIL
;
8630 case I387_CW_MASK_PM
:
8631 /* mask precision exception for nearbyint() */
8632 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8633 slot
= SLOT_CW_MASK_PM
;
8645 /* round toward zero (truncate) */
8646 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8647 slot
= SLOT_CW_TRUNC
;
8651 /* round down toward -oo */
8652 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8653 slot
= SLOT_CW_FLOOR
;
8657 /* round up toward +oo */
8658 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8659 slot
= SLOT_CW_CEIL
;
8662 case I387_CW_MASK_PM
:
8663 /* mask precision exception for nearbyint() */
8664 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8665 slot
= SLOT_CW_MASK_PM
;
8673 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8675 new_mode
= assign_386_stack_local (HImode
, slot
);
8676 emit_move_insn (new_mode
, reg
);
8679 /* Output code for INSN to convert a float to a signed int. OPERANDS
8680 are the insn operands. The output may be [HSD]Imode and the input
8681 operand may be [SDX]Fmode. */
8684 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8686 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8687 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8688 int round_mode
= get_attr_i387_cw (insn
);
8690 /* Jump through a hoop or two for DImode, since the hardware has no
8691 non-popping instruction. We used to do this a different way, but
8692 that was somewhat fragile and broke with post-reload splitters. */
8693 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8694 output_asm_insn ("fld\t%y1", operands
);
8696 gcc_assert (STACK_TOP_P (operands
[1]));
8697 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8700 output_asm_insn ("fisttp%z0\t%0", operands
);
8703 if (round_mode
!= I387_CW_ANY
)
8704 output_asm_insn ("fldcw\t%3", operands
);
8705 if (stack_top_dies
|| dimode_p
)
8706 output_asm_insn ("fistp%z0\t%0", operands
);
8708 output_asm_insn ("fist%z0\t%0", operands
);
8709 if (round_mode
!= I387_CW_ANY
)
8710 output_asm_insn ("fldcw\t%2", operands
);
8716 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8717 have the values zero or one, indicates the ffreep insn's operand
8718 from the OPERANDS array. */
8721 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8723 if (TARGET_USE_FFREEP
)
8724 #if HAVE_AS_IX86_FFREEP
8725 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8728 static char retval
[] = ".word\t0xc_df";
8729 int regno
= REGNO (operands
[opno
]);
8731 gcc_assert (FP_REGNO_P (regno
));
8733 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
8738 return opno
? "fstp\t%y1" : "fstp\t%y0";
8742 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8743 should be used. UNORDERED_P is true when fucom should be used. */
8746 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8749 rtx cmp_op0
, cmp_op1
;
8750 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8754 cmp_op0
= operands
[0];
8755 cmp_op1
= operands
[1];
8759 cmp_op0
= operands
[1];
8760 cmp_op1
= operands
[2];
8765 if (GET_MODE (operands
[0]) == SFmode
)
8767 return "ucomiss\t{%1, %0|%0, %1}";
8769 return "comiss\t{%1, %0|%0, %1}";
8772 return "ucomisd\t{%1, %0|%0, %1}";
8774 return "comisd\t{%1, %0|%0, %1}";
8777 gcc_assert (STACK_TOP_P (cmp_op0
));
8779 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8781 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8785 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8786 return output_387_ffreep (operands
, 1);
8789 return "ftst\n\tfnstsw\t%0";
8792 if (STACK_REG_P (cmp_op1
)
8794 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8795 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8797 /* If both the top of the 387 stack dies, and the other operand
8798 is also a stack register that dies, then this must be a
8799 `fcompp' float compare */
8803 /* There is no double popping fcomi variant. Fortunately,
8804 eflags is immune from the fstp's cc clobbering. */
8806 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8808 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8809 return output_387_ffreep (operands
, 0);
8814 return "fucompp\n\tfnstsw\t%0";
8816 return "fcompp\n\tfnstsw\t%0";
8821 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8823 static const char * const alt
[16] =
8825 "fcom%z2\t%y2\n\tfnstsw\t%0",
8826 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8827 "fucom%z2\t%y2\n\tfnstsw\t%0",
8828 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8830 "ficom%z2\t%y2\n\tfnstsw\t%0",
8831 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8835 "fcomi\t{%y1, %0|%0, %y1}",
8836 "fcomip\t{%y1, %0|%0, %y1}",
8837 "fucomi\t{%y1, %0|%0, %y1}",
8838 "fucomip\t{%y1, %0|%0, %y1}",
8849 mask
= eflags_p
<< 3;
8850 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
8851 mask
|= unordered_p
<< 1;
8852 mask
|= stack_top_dies
;
8854 gcc_assert (mask
< 16);
8863 ix86_output_addr_vec_elt (FILE *file
, int value
)
8865 const char *directive
= ASM_LONG
;
8869 directive
= ASM_QUAD
;
8871 gcc_assert (!TARGET_64BIT
);
8874 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8878 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8881 fprintf (file
, "%s%s%d-%s%d\n",
8882 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8883 else if (HAVE_AS_GOTOFF_IN_DATA
)
8884 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8886 else if (TARGET_MACHO
)
8888 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8889 machopic_output_function_base_name (file
);
8890 fprintf(file
, "\n");
8894 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8895 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8898 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8902 ix86_expand_clear (rtx dest
)
8906 /* We play register width games, which are only valid after reload. */
8907 gcc_assert (reload_completed
);
8909 /* Avoid HImode and its attendant prefix byte. */
8910 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8911 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8913 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8915 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8916 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8918 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8919 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8925 /* X is an unchanging MEM. If it is a constant pool reference, return
8926 the constant pool rtx, else NULL. */
8929 maybe_get_pool_constant (rtx x
)
8931 x
= ix86_delegitimize_address (XEXP (x
, 0));
8933 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8934 return get_pool_constant (x
);
8940 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8942 int strict
= (reload_in_progress
|| reload_completed
);
8944 enum tls_model model
;
8949 if (GET_CODE (op1
) == SYMBOL_REF
)
8951 model
= SYMBOL_REF_TLS_MODEL (op1
);
8954 op1
= legitimize_tls_address (op1
, model
, true);
8955 op1
= force_operand (op1
, op0
);
8960 else if (GET_CODE (op1
) == CONST
8961 && GET_CODE (XEXP (op1
, 0)) == PLUS
8962 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
8964 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
8967 rtx addend
= XEXP (XEXP (op1
, 0), 1);
8968 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
8969 op1
= force_operand (op1
, NULL
);
8970 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
8971 op0
, 1, OPTAB_DIRECT
);
8977 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8979 if (TARGET_MACHO
&& !TARGET_64BIT
)
8984 rtx temp
= ((reload_in_progress
8985 || ((op0
&& GET_CODE (op0
) == REG
)
8987 ? op0
: gen_reg_rtx (Pmode
));
8988 op1
= machopic_indirect_data_reference (op1
, temp
);
8989 op1
= machopic_legitimize_pic_address (op1
, mode
,
8990 temp
== op1
? 0 : temp
);
8992 else if (MACHOPIC_INDIRECT
)
8993 op1
= machopic_indirect_data_reference (op1
, 0);
9000 if (GET_CODE (op0
) == MEM
)
9001 op1
= force_reg (Pmode
, op1
);
9003 op1
= legitimize_address (op1
, op1
, Pmode
);
9008 if (GET_CODE (op0
) == MEM
9009 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9010 || !push_operand (op0
, mode
))
9011 && GET_CODE (op1
) == MEM
)
9012 op1
= force_reg (mode
, op1
);
9014 if (push_operand (op0
, mode
)
9015 && ! general_no_elim_operand (op1
, mode
))
9016 op1
= copy_to_mode_reg (mode
, op1
);
9018 /* Force large constants in 64bit compilation into register
9019 to get them CSEed. */
9020 if (TARGET_64BIT
&& mode
== DImode
9021 && immediate_operand (op1
, mode
)
9022 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9023 && !register_operand (op0
, mode
)
9024 && optimize
&& !reload_completed
&& !reload_in_progress
)
9025 op1
= copy_to_mode_reg (mode
, op1
);
9027 if (FLOAT_MODE_P (mode
))
9029 /* If we are loading a floating point constant to a register,
9030 force the value to memory now, since we'll get better code
9031 out the back end. */
9035 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9037 op1
= validize_mem (force_const_mem (mode
, op1
));
9038 if (!register_operand (op0
, mode
))
9040 rtx temp
= gen_reg_rtx (mode
);
9041 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9042 emit_move_insn (op0
, temp
);
9049 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9053 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9055 rtx op0
= operands
[0], op1
= operands
[1];
9057 /* Force constants other than zero into memory. We do not know how
9058 the instructions used to build constants modify the upper 64 bits
9059 of the register, once we have that information we may be able
9060 to handle some of them more efficiently. */
9061 if ((reload_in_progress
| reload_completed
) == 0
9062 && register_operand (op0
, mode
)
9064 && standard_sse_constant_p (op1
) <= 0)
9065 op1
= validize_mem (force_const_mem (mode
, op1
));
9067 /* Make operand1 a register if it isn't already. */
9069 && !register_operand (op0
, mode
)
9070 && !register_operand (op1
, mode
))
9072 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9076 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9079 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9080 straight to ix86_expand_vector_move. */
9083 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9092 /* If we're optimizing for size, movups is the smallest. */
9095 op0
= gen_lowpart (V4SFmode
, op0
);
9096 op1
= gen_lowpart (V4SFmode
, op1
);
9097 emit_insn (gen_sse_movups (op0
, op1
));
9101 /* ??? If we have typed data, then it would appear that using
9102 movdqu is the only way to get unaligned data loaded with
9104 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9106 op0
= gen_lowpart (V16QImode
, op0
);
9107 op1
= gen_lowpart (V16QImode
, op1
);
9108 emit_insn (gen_sse2_movdqu (op0
, op1
));
9112 if (TARGET_SSE2
&& mode
== V2DFmode
)
9116 /* When SSE registers are split into halves, we can avoid
9117 writing to the top half twice. */
9118 if (TARGET_SSE_SPLIT_REGS
)
9120 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9125 /* ??? Not sure about the best option for the Intel chips.
9126 The following would seem to satisfy; the register is
9127 entirely cleared, breaking the dependency chain. We
9128 then store to the upper half, with a dependency depth
9129 of one. A rumor has it that Intel recommends two movsd
9130 followed by an unpacklpd, but this is unconfirmed. And
9131 given that the dependency depth of the unpacklpd would
9132 still be one, I'm not sure why this would be better. */
9133 zero
= CONST0_RTX (V2DFmode
);
9136 m
= adjust_address (op1
, DFmode
, 0);
9137 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9138 m
= adjust_address (op1
, DFmode
, 8);
9139 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9143 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9144 emit_move_insn (op0
, CONST0_RTX (mode
));
9146 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9148 if (mode
!= V4SFmode
)
9149 op0
= gen_lowpart (V4SFmode
, op0
);
9150 m
= adjust_address (op1
, V2SFmode
, 0);
9151 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9152 m
= adjust_address (op1
, V2SFmode
, 8);
9153 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9156 else if (MEM_P (op0
))
9158 /* If we're optimizing for size, movups is the smallest. */
9161 op0
= gen_lowpart (V4SFmode
, op0
);
9162 op1
= gen_lowpart (V4SFmode
, op1
);
9163 emit_insn (gen_sse_movups (op0
, op1
));
9167 /* ??? Similar to above, only less clear because of quote
9168 typeless stores unquote. */
9169 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9170 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9172 op0
= gen_lowpart (V16QImode
, op0
);
9173 op1
= gen_lowpart (V16QImode
, op1
);
9174 emit_insn (gen_sse2_movdqu (op0
, op1
));
9178 if (TARGET_SSE2
&& mode
== V2DFmode
)
9180 m
= adjust_address (op0
, DFmode
, 0);
9181 emit_insn (gen_sse2_storelpd (m
, op1
));
9182 m
= adjust_address (op0
, DFmode
, 8);
9183 emit_insn (gen_sse2_storehpd (m
, op1
));
9187 if (mode
!= V4SFmode
)
9188 op1
= gen_lowpart (V4SFmode
, op1
);
9189 m
= adjust_address (op0
, V2SFmode
, 0);
9190 emit_insn (gen_sse_storelps (m
, op1
));
9191 m
= adjust_address (op0
, V2SFmode
, 8);
9192 emit_insn (gen_sse_storehps (m
, op1
));
9199 /* Expand a push in MODE. This is some mode for which we do not support
9200 proper push instructions, at least from the registers that we expect
9201 the value to live in. */
9204 ix86_expand_push (enum machine_mode mode
, rtx x
)
9208 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9209 GEN_INT (-GET_MODE_SIZE (mode
)),
9210 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9211 if (tmp
!= stack_pointer_rtx
)
9212 emit_move_insn (stack_pointer_rtx
, tmp
);
9214 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9215 emit_move_insn (tmp
, x
);
9218 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9219 destination to use for the operation. If different from the true
9220 destination in operands[0], a copy operation will be required. */
9223 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9226 int matching_memory
;
9227 rtx src1
, src2
, dst
;
9233 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9234 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9235 && (rtx_equal_p (dst
, src2
)
9236 || immediate_operand (src1
, mode
)))
9243 /* If the destination is memory, and we do not have matching source
9244 operands, do things in registers. */
9245 matching_memory
= 0;
9246 if (GET_CODE (dst
) == MEM
)
9248 if (rtx_equal_p (dst
, src1
))
9249 matching_memory
= 1;
9250 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9251 && rtx_equal_p (dst
, src2
))
9252 matching_memory
= 2;
9254 dst
= gen_reg_rtx (mode
);
9257 /* Both source operands cannot be in memory. */
9258 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
9260 if (matching_memory
!= 2)
9261 src2
= force_reg (mode
, src2
);
9263 src1
= force_reg (mode
, src1
);
9266 /* If the operation is not commutable, source 1 cannot be a constant
9267 or non-matching memory. */
9268 if ((CONSTANT_P (src1
)
9269 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
9270 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9271 src1
= force_reg (mode
, src1
);
9273 src1
= operands
[1] = src1
;
9274 src2
= operands
[2] = src2
;
9278 /* Similarly, but assume that the destination has already been
9282 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9283 enum machine_mode mode
, rtx operands
[])
9285 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9286 gcc_assert (dst
== operands
[0]);
9289 /* Attempt to expand a binary operator. Make the expansion closer to the
9290 actual machine, then just general_operand, which will allow 3 separate
9291 memory references (one output, two input) in a single insn. */
9294 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9297 rtx src1
, src2
, dst
, op
, clob
;
9299 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9303 /* Emit the instruction. */
9305 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9306 if (reload_in_progress
)
9308 /* Reload doesn't know about the flags register, and doesn't know that
9309 it doesn't want to clobber it. We can only do this with PLUS. */
9310 gcc_assert (code
== PLUS
);
9315 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9316 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9319 /* Fix up the destination if needed. */
9320 if (dst
!= operands
[0])
9321 emit_move_insn (operands
[0], dst
);
9324 /* Return TRUE or FALSE depending on whether the binary operator meets the
9325 appropriate constraints. */
9328 ix86_binary_operator_ok (enum rtx_code code
,
9329 enum machine_mode mode ATTRIBUTE_UNUSED
,
9332 /* Both source operands cannot be in memory. */
9333 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9335 /* If the operation is not commutable, source 1 cannot be a constant. */
9336 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9338 /* If the destination is memory, we must have a matching source operand. */
9339 if (GET_CODE (operands
[0]) == MEM
9340 && ! (rtx_equal_p (operands
[0], operands
[1])
9341 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9342 && rtx_equal_p (operands
[0], operands
[2]))))
9344 /* If the operation is not commutable and the source 1 is memory, we must
9345 have a matching destination. */
9346 if (GET_CODE (operands
[1]) == MEM
9347 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9348 && ! rtx_equal_p (operands
[0], operands
[1]))
9353 /* Attempt to expand a unary operator. Make the expansion closer to the
9354 actual machine, then just general_operand, which will allow 2 separate
9355 memory references (one output, one input) in a single insn. */
9358 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9361 int matching_memory
;
9362 rtx src
, dst
, op
, clob
;
9367 /* If the destination is memory, and we do not have matching source
9368 operands, do things in registers. */
9369 matching_memory
= 0;
9372 if (rtx_equal_p (dst
, src
))
9373 matching_memory
= 1;
9375 dst
= gen_reg_rtx (mode
);
9378 /* When source operand is memory, destination must match. */
9379 if (MEM_P (src
) && !matching_memory
)
9380 src
= force_reg (mode
, src
);
9382 /* Emit the instruction. */
9384 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9385 if (reload_in_progress
|| code
== NOT
)
9387 /* Reload doesn't know about the flags register, and doesn't know that
9388 it doesn't want to clobber it. */
9389 gcc_assert (code
== NOT
);
9394 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9395 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9398 /* Fix up the destination if needed. */
9399 if (dst
!= operands
[0])
9400 emit_move_insn (operands
[0], dst
);
9403 /* Return TRUE or FALSE depending on whether the unary operator meets the
9404 appropriate constraints. */
9407 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9408 enum machine_mode mode ATTRIBUTE_UNUSED
,
9409 rtx operands
[2] ATTRIBUTE_UNUSED
)
9411 /* If one of operands is memory, source and destination must match. */
9412 if ((GET_CODE (operands
[0]) == MEM
9413 || GET_CODE (operands
[1]) == MEM
)
9414 && ! rtx_equal_p (operands
[0], operands
[1]))
9419 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9420 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9421 true, then replicate the mask for all elements of the vector register.
9422 If INVERT is true, then create a mask excluding the sign bit. */
9425 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9427 enum machine_mode vec_mode
;
9428 HOST_WIDE_INT hi
, lo
;
9433 /* Find the sign bit, sign extended to 2*HWI. */
9435 lo
= 0x80000000, hi
= lo
< 0;
9436 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9437 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9439 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9444 /* Force this value into the low part of a fp vector constant. */
9445 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9446 mask
= gen_lowpart (mode
, mask
);
9451 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9453 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9454 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9455 vec_mode
= V4SFmode
;
9460 v
= gen_rtvec (2, mask
, mask
);
9462 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9463 vec_mode
= V2DFmode
;
9466 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9469 /* Generate code for floating point ABS or NEG. */
9472 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9475 rtx mask
, set
, use
, clob
, dst
, src
;
9476 bool matching_memory
;
9477 bool use_sse
= false;
9478 bool vector_mode
= VECTOR_MODE_P (mode
);
9479 enum machine_mode elt_mode
= mode
;
9483 elt_mode
= GET_MODE_INNER (mode
);
9486 else if (TARGET_SSE_MATH
)
9487 use_sse
= SSE_FLOAT_MODE_P (mode
);
9489 /* NEG and ABS performed with SSE use bitwise mask operations.
9490 Create the appropriate mask now. */
9492 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9499 /* If the destination is memory, and we don't have matching source
9500 operands or we're using the x87, do things in registers. */
9501 matching_memory
= false;
9504 if (use_sse
&& rtx_equal_p (dst
, src
))
9505 matching_memory
= true;
9507 dst
= gen_reg_rtx (mode
);
9509 if (MEM_P (src
) && !matching_memory
)
9510 src
= force_reg (mode
, src
);
9514 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9515 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9520 set
= gen_rtx_fmt_e (code
, mode
, src
);
9521 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9524 use
= gen_rtx_USE (VOIDmode
, mask
);
9525 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9526 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9527 gen_rtvec (3, set
, use
, clob
)));
9533 if (dst
!= operands
[0])
9534 emit_move_insn (operands
[0], dst
);
9537 /* Expand a copysign operation. Special case operand 0 being a constant. */
9540 ix86_expand_copysign (rtx operands
[])
9542 enum machine_mode mode
, vmode
;
9543 rtx dest
, op0
, op1
, mask
, nmask
;
9549 mode
= GET_MODE (dest
);
9550 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9552 if (GET_CODE (op0
) == CONST_DOUBLE
)
9556 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9557 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9559 if (op0
== CONST0_RTX (mode
))
9560 op0
= CONST0_RTX (vmode
);
9564 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9565 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9567 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9568 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9571 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9574 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9576 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9580 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9581 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9584 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9586 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9590 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9591 be a constant, and so has already been expanded into a vector constant. */
9594 ix86_split_copysign_const (rtx operands
[])
9596 enum machine_mode mode
, vmode
;
9597 rtx dest
, op0
, op1
, mask
, x
;
9604 mode
= GET_MODE (dest
);
9605 vmode
= GET_MODE (mask
);
9607 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9608 x
= gen_rtx_AND (vmode
, dest
, mask
);
9609 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9611 if (op0
!= CONST0_RTX (vmode
))
9613 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9614 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9618 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9619 so we have to do two masks. */
9622 ix86_split_copysign_var (rtx operands
[])
9624 enum machine_mode mode
, vmode
;
9625 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9628 scratch
= operands
[1];
9631 nmask
= operands
[4];
9634 mode
= GET_MODE (dest
);
9635 vmode
= GET_MODE (mask
);
9637 if (rtx_equal_p (op0
, op1
))
9639 /* Shouldn't happen often (it's useless, obviously), but when it does
9640 we'd generate incorrect code if we continue below. */
9641 emit_move_insn (dest
, op0
);
9645 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9647 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9649 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9650 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9653 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9654 x
= gen_rtx_NOT (vmode
, dest
);
9655 x
= gen_rtx_AND (vmode
, x
, op0
);
9656 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9660 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9662 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9664 else /* alternative 2,4 */
9666 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9667 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9668 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9670 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9672 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9674 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9675 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9677 else /* alternative 3,4 */
9679 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9681 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9682 x
= gen_rtx_AND (vmode
, dest
, op0
);
9684 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9687 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9688 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9691 /* Return TRUE or FALSE depending on whether the first SET in INSN
9692 has source and destination with matching CC modes, and that the
9693 CC mode is at least as constrained as REQ_MODE. */
9696 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9699 enum machine_mode set_mode
;
9701 set
= PATTERN (insn
);
9702 if (GET_CODE (set
) == PARALLEL
)
9703 set
= XVECEXP (set
, 0, 0);
9704 gcc_assert (GET_CODE (set
) == SET
);
9705 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9707 set_mode
= GET_MODE (SET_DEST (set
));
9711 if (req_mode
!= CCNOmode
9712 && (req_mode
!= CCmode
9713 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9717 if (req_mode
== CCGCmode
)
9721 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9725 if (req_mode
== CCZmode
)
9735 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9738 /* Generate insn patterns to do an integer compare of OPERANDS. */
9741 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9743 enum machine_mode cmpmode
;
9746 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9747 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9749 /* This is very simple, but making the interface the same as in the
9750 FP case makes the rest of the code easier. */
9751 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9752 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9754 /* Return the test that should be put into the flags user, i.e.
9755 the bcc, scc, or cmov instruction. */
9756 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9759 /* Figure out whether to use ordered or unordered fp comparisons.
9760 Return the appropriate mode to use. */
9763 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9765 /* ??? In order to make all comparisons reversible, we do all comparisons
9766 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9767 all forms trapping and nontrapping comparisons, we can make inequality
9768 comparisons trapping again, since it results in better code when using
9769 FCOM based compares. */
9770 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9774 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9776 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9777 return ix86_fp_compare_mode (code
);
9780 /* Only zero flag is needed. */
9782 case NE
: /* ZF!=0 */
9784 /* Codes needing carry flag. */
9785 case GEU
: /* CF=0 */
9786 case GTU
: /* CF=0 & ZF=0 */
9787 case LTU
: /* CF=1 */
9788 case LEU
: /* CF=1 | ZF=1 */
9790 /* Codes possibly doable only with sign flag when
9791 comparing against zero. */
9792 case GE
: /* SF=OF or SF=0 */
9793 case LT
: /* SF<>OF or SF=1 */
9794 if (op1
== const0_rtx
)
9797 /* For other cases Carry flag is not required. */
9799 /* Codes doable only with sign flag when comparing
9800 against zero, but we miss jump instruction for it
9801 so we need to use relational tests against overflow
9802 that thus needs to be zero. */
9803 case GT
: /* ZF=0 & SF=OF */
9804 case LE
: /* ZF=1 | SF<>OF */
9805 if (op1
== const0_rtx
)
9809 /* strcmp pattern do (use flags) and combine may ask us for proper
9818 /* Return the fixed registers used for condition codes. */
9821 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9828 /* If two condition code modes are compatible, return a condition code
9829 mode which is compatible with both. Otherwise, return
9832 static enum machine_mode
9833 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
9838 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
9841 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
9842 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
9870 /* These are only compatible with themselves, which we already
9876 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9879 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
9881 enum rtx_code swapped_code
= swap_condition (code
);
9882 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
9883 || (ix86_fp_comparison_cost (swapped_code
)
9884 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
9887 /* Swap, force into registers, or otherwise massage the two operands
9888 to a fp comparison. The operands are updated in place; the new
9889 comparison code is returned. */
9891 static enum rtx_code
9892 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
9894 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
9895 rtx op0
= *pop0
, op1
= *pop1
;
9896 enum machine_mode op_mode
= GET_MODE (op0
);
9897 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
9899 /* All of the unordered compare instructions only work on registers.
9900 The same is true of the fcomi compare instructions. The XFmode
9901 compare instructions require registers except when comparing
9902 against zero or when converting operand 1 from fixed point to
9906 && (fpcmp_mode
== CCFPUmode
9907 || (op_mode
== XFmode
9908 && ! (standard_80387_constant_p (op0
) == 1
9909 || standard_80387_constant_p (op1
) == 1)
9910 && GET_CODE (op1
) != FLOAT
)
9911 || ix86_use_fcomi_compare (code
)))
9913 op0
= force_reg (op_mode
, op0
);
9914 op1
= force_reg (op_mode
, op1
);
9918 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9919 things around if they appear profitable, otherwise force op0
9922 if (standard_80387_constant_p (op0
) == 0
9923 || (GET_CODE (op0
) == MEM
9924 && ! (standard_80387_constant_p (op1
) == 0
9925 || GET_CODE (op1
) == MEM
)))
9928 tmp
= op0
, op0
= op1
, op1
= tmp
;
9929 code
= swap_condition (code
);
9932 if (GET_CODE (op0
) != REG
)
9933 op0
= force_reg (op_mode
, op0
);
9935 if (CONSTANT_P (op1
))
9937 int tmp
= standard_80387_constant_p (op1
);
9939 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9943 op1
= force_reg (op_mode
, op1
);
9946 op1
= force_reg (op_mode
, op1
);
9950 /* Try to rearrange the comparison to make it cheaper. */
9951 if (ix86_fp_comparison_cost (code
)
9952 > ix86_fp_comparison_cost (swap_condition (code
))
9953 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9956 tmp
= op0
, op0
= op1
, op1
= tmp
;
9957 code
= swap_condition (code
);
9958 if (GET_CODE (op0
) != REG
)
9959 op0
= force_reg (op_mode
, op0
);
9967 /* Convert comparison codes we use to represent FP comparison to integer
9968 code that will result in proper branch. Return UNKNOWN if no such code
9972 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10001 /* Split comparison code CODE into comparisons we can do using branch
10002 instructions. BYPASS_CODE is comparison code for branch that will
10003 branch around FIRST_CODE and SECOND_CODE. If some of branches
10004 is not required, set value to UNKNOWN.
10005 We never require more than two branches. */
10008 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10009 enum rtx_code
*first_code
,
10010 enum rtx_code
*second_code
)
10012 *first_code
= code
;
10013 *bypass_code
= UNKNOWN
;
10014 *second_code
= UNKNOWN
;
10016 /* The fcomi comparison sets flags as follows:
10026 case GT
: /* GTU - CF=0 & ZF=0 */
10027 case GE
: /* GEU - CF=0 */
10028 case ORDERED
: /* PF=0 */
10029 case UNORDERED
: /* PF=1 */
10030 case UNEQ
: /* EQ - ZF=1 */
10031 case UNLT
: /* LTU - CF=1 */
10032 case UNLE
: /* LEU - CF=1 | ZF=1 */
10033 case LTGT
: /* EQ - ZF=0 */
10035 case LT
: /* LTU - CF=1 - fails on unordered */
10036 *first_code
= UNLT
;
10037 *bypass_code
= UNORDERED
;
10039 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10040 *first_code
= UNLE
;
10041 *bypass_code
= UNORDERED
;
10043 case EQ
: /* EQ - ZF=1 - fails on unordered */
10044 *first_code
= UNEQ
;
10045 *bypass_code
= UNORDERED
;
10047 case NE
: /* NE - ZF=0 - fails on unordered */
10048 *first_code
= LTGT
;
10049 *second_code
= UNORDERED
;
10051 case UNGE
: /* GEU - CF=0 - fails on unordered */
10053 *second_code
= UNORDERED
;
10055 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10057 *second_code
= UNORDERED
;
10060 gcc_unreachable ();
10062 if (!TARGET_IEEE_FP
)
10064 *second_code
= UNKNOWN
;
10065 *bypass_code
= UNKNOWN
;
10069 /* Return cost of comparison done fcom + arithmetics operations on AX.
10070 All following functions do use number of instructions as a cost metrics.
10071 In future this should be tweaked to compute bytes for optimize_size and
10072 take into account performance of various instructions on various CPUs. */
10074 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10076 if (!TARGET_IEEE_FP
)
10078 /* The cost of code output by ix86_expand_fp_compare. */
10102 gcc_unreachable ();
10106 /* Return cost of comparison done using fcomi operation.
10107 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10109 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10111 enum rtx_code bypass_code
, first_code
, second_code
;
10112 /* Return arbitrarily high cost when instruction is not supported - this
10113 prevents gcc from using it. */
10116 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10117 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10120 /* Return cost of comparison done using sahf operation.
10121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10123 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10125 enum rtx_code bypass_code
, first_code
, second_code
;
10126 /* Return arbitrarily high cost when instruction is not preferred - this
10127 avoids gcc from using it. */
10128 if (!TARGET_USE_SAHF
&& !optimize_size
)
10130 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10131 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10134 /* Compute cost of the comparison done using any method.
10135 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10137 ix86_fp_comparison_cost (enum rtx_code code
)
10139 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10142 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10143 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10145 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10146 if (min
> sahf_cost
)
10148 if (min
> fcomi_cost
)
10153 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10156 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10157 rtx
*second_test
, rtx
*bypass_test
)
10159 enum machine_mode fpcmp_mode
, intcmp_mode
;
10161 int cost
= ix86_fp_comparison_cost (code
);
10162 enum rtx_code bypass_code
, first_code
, second_code
;
10164 fpcmp_mode
= ix86_fp_compare_mode (code
);
10165 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10168 *second_test
= NULL_RTX
;
10170 *bypass_test
= NULL_RTX
;
10172 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10174 /* Do fcomi/sahf based test when profitable. */
10175 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10176 && (second_code
== UNKNOWN
|| second_test
)
10177 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10181 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10182 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10188 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10189 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10191 scratch
= gen_reg_rtx (HImode
);
10192 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10193 emit_insn (gen_x86_sahf_1 (scratch
));
10196 /* The FP codes work out to act like unsigned. */
10197 intcmp_mode
= fpcmp_mode
;
10199 if (bypass_code
!= UNKNOWN
)
10200 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10201 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10203 if (second_code
!= UNKNOWN
)
10204 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10205 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10210 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10211 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10212 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10214 scratch
= gen_reg_rtx (HImode
);
10215 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10217 /* In the unordered case, we have to check C2 for NaN's, which
10218 doesn't happen to work out to anything nice combination-wise.
10219 So do some bit twiddling on the value we've got in AH to come
10220 up with an appropriate set of condition codes. */
10222 intcmp_mode
= CCNOmode
;
10227 if (code
== GT
|| !TARGET_IEEE_FP
)
10229 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10234 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10235 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10236 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10237 intcmp_mode
= CCmode
;
10243 if (code
== LT
&& TARGET_IEEE_FP
)
10245 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10246 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10247 intcmp_mode
= CCmode
;
10252 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10258 if (code
== GE
|| !TARGET_IEEE_FP
)
10260 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10265 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10266 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10273 if (code
== LE
&& TARGET_IEEE_FP
)
10275 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10276 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10277 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10278 intcmp_mode
= CCmode
;
10283 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10289 if (code
== EQ
&& TARGET_IEEE_FP
)
10291 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10292 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10293 intcmp_mode
= CCmode
;
10298 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10305 if (code
== NE
&& TARGET_IEEE_FP
)
10307 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10308 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10314 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10320 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10324 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10329 gcc_unreachable ();
10333 /* Return the test that should be put into the flags user, i.e.
10334 the bcc, scc, or cmov instruction. */
10335 return gen_rtx_fmt_ee (code
, VOIDmode
,
10336 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10341 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10344 op0
= ix86_compare_op0
;
10345 op1
= ix86_compare_op1
;
10348 *second_test
= NULL_RTX
;
10350 *bypass_test
= NULL_RTX
;
10352 if (ix86_compare_emitted
)
10354 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10355 ix86_compare_emitted
= NULL_RTX
;
10357 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10358 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10359 second_test
, bypass_test
);
10361 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10366 /* Return true if the CODE will result in nontrivial jump sequence. */
10368 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10370 enum rtx_code bypass_code
, first_code
, second_code
;
10373 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10374 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10378 ix86_expand_branch (enum rtx_code code
, rtx label
)
10382 /* If we have emitted a compare insn, go straight to simple.
10383 ix86_expand_compare won't emit anything if ix86_compare_emitted
10385 if (ix86_compare_emitted
)
10388 switch (GET_MODE (ix86_compare_op0
))
10394 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10395 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10396 gen_rtx_LABEL_REF (VOIDmode
, label
),
10398 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10407 enum rtx_code bypass_code
, first_code
, second_code
;
10409 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10410 &ix86_compare_op1
);
10412 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10414 /* Check whether we will use the natural sequence with one jump. If
10415 so, we can expand jump early. Otherwise delay expansion by
10416 creating compound insn to not confuse optimizers. */
10417 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10420 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10421 gen_rtx_LABEL_REF (VOIDmode
, label
),
10422 pc_rtx
, NULL_RTX
, NULL_RTX
);
10426 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10427 ix86_compare_op0
, ix86_compare_op1
);
10428 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10429 gen_rtx_LABEL_REF (VOIDmode
, label
),
10431 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10433 use_fcomi
= ix86_use_fcomi_compare (code
);
10434 vec
= rtvec_alloc (3 + !use_fcomi
);
10435 RTVEC_ELT (vec
, 0) = tmp
;
10437 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10439 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10442 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10453 /* Expand DImode branch into multiple compare+branch. */
10455 rtx lo
[2], hi
[2], label2
;
10456 enum rtx_code code1
, code2
, code3
;
10457 enum machine_mode submode
;
10459 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10461 tmp
= ix86_compare_op0
;
10462 ix86_compare_op0
= ix86_compare_op1
;
10463 ix86_compare_op1
= tmp
;
10464 code
= swap_condition (code
);
10466 if (GET_MODE (ix86_compare_op0
) == DImode
)
10468 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10469 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10474 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10475 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10479 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10480 avoid two branches. This costs one extra insn, so disable when
10481 optimizing for size. */
10483 if ((code
== EQ
|| code
== NE
)
10485 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10490 if (hi
[1] != const0_rtx
)
10491 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10492 NULL_RTX
, 0, OPTAB_WIDEN
);
10495 if (lo
[1] != const0_rtx
)
10496 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10497 NULL_RTX
, 0, OPTAB_WIDEN
);
10499 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10500 NULL_RTX
, 0, OPTAB_WIDEN
);
10502 ix86_compare_op0
= tmp
;
10503 ix86_compare_op1
= const0_rtx
;
10504 ix86_expand_branch (code
, label
);
10508 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10509 op1 is a constant and the low word is zero, then we can just
10510 examine the high word. */
10512 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10515 case LT
: case LTU
: case GE
: case GEU
:
10516 ix86_compare_op0
= hi
[0];
10517 ix86_compare_op1
= hi
[1];
10518 ix86_expand_branch (code
, label
);
10524 /* Otherwise, we need two or three jumps. */
10526 label2
= gen_label_rtx ();
10529 code2
= swap_condition (code
);
10530 code3
= unsigned_condition (code
);
10534 case LT
: case GT
: case LTU
: case GTU
:
10537 case LE
: code1
= LT
; code2
= GT
; break;
10538 case GE
: code1
= GT
; code2
= LT
; break;
10539 case LEU
: code1
= LTU
; code2
= GTU
; break;
10540 case GEU
: code1
= GTU
; code2
= LTU
; break;
10542 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10543 case NE
: code2
= UNKNOWN
; break;
10546 gcc_unreachable ();
10551 * if (hi(a) < hi(b)) goto true;
10552 * if (hi(a) > hi(b)) goto false;
10553 * if (lo(a) < lo(b)) goto true;
10557 ix86_compare_op0
= hi
[0];
10558 ix86_compare_op1
= hi
[1];
10560 if (code1
!= UNKNOWN
)
10561 ix86_expand_branch (code1
, label
);
10562 if (code2
!= UNKNOWN
)
10563 ix86_expand_branch (code2
, label2
);
10565 ix86_compare_op0
= lo
[0];
10566 ix86_compare_op1
= lo
[1];
10567 ix86_expand_branch (code3
, label
);
10569 if (code2
!= UNKNOWN
)
10570 emit_label (label2
);
10575 gcc_unreachable ();
10579 /* Split branch based on floating point condition. */
10581 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10582 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10584 rtx second
, bypass
;
10585 rtx label
= NULL_RTX
;
10587 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10590 if (target2
!= pc_rtx
)
10593 code
= reverse_condition_maybe_unordered (code
);
10598 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10599 tmp
, &second
, &bypass
);
10601 /* Remove pushed operand from stack. */
10603 ix86_free_from_memory (GET_MODE (pushed
));
10605 if (split_branch_probability
>= 0)
10607 /* Distribute the probabilities across the jumps.
10608 Assume the BYPASS and SECOND to be always test
10610 probability
= split_branch_probability
;
10612 /* Value of 1 is low enough to make no need for probability
10613 to be updated. Later we may run some experiments and see
10614 if unordered values are more frequent in practice. */
10616 bypass_probability
= 1;
10618 second_probability
= 1;
10620 if (bypass
!= NULL_RTX
)
10622 label
= gen_label_rtx ();
10623 i
= emit_jump_insn (gen_rtx_SET
10625 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10627 gen_rtx_LABEL_REF (VOIDmode
,
10630 if (bypass_probability
>= 0)
10632 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10633 GEN_INT (bypass_probability
),
10636 i
= emit_jump_insn (gen_rtx_SET
10638 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10639 condition
, target1
, target2
)));
10640 if (probability
>= 0)
10642 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10643 GEN_INT (probability
),
10645 if (second
!= NULL_RTX
)
10647 i
= emit_jump_insn (gen_rtx_SET
10649 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10651 if (second_probability
>= 0)
10653 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10654 GEN_INT (second_probability
),
10657 if (label
!= NULL_RTX
)
10658 emit_label (label
);
10662 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10664 rtx ret
, tmp
, tmpreg
, equiv
;
10665 rtx second_test
, bypass_test
;
10667 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10668 return 0; /* FAIL */
10670 gcc_assert (GET_MODE (dest
) == QImode
);
10672 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10673 PUT_MODE (ret
, QImode
);
10678 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10679 if (bypass_test
|| second_test
)
10681 rtx test
= second_test
;
10683 rtx tmp2
= gen_reg_rtx (QImode
);
10686 gcc_assert (!second_test
);
10687 test
= bypass_test
;
10689 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10691 PUT_MODE (test
, QImode
);
10692 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10695 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10697 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10700 /* Attach a REG_EQUAL note describing the comparison result. */
10701 if (ix86_compare_op0
&& ix86_compare_op1
)
10703 equiv
= simplify_gen_relational (code
, QImode
,
10704 GET_MODE (ix86_compare_op0
),
10705 ix86_compare_op0
, ix86_compare_op1
);
10706 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10709 return 1; /* DONE */
10712 /* Expand comparison setting or clearing carry flag. Return true when
10713 successful and set pop for the operation. */
10715 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10717 enum machine_mode mode
=
10718 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10720 /* Do not handle DImode compares that go through special path. Also we can't
10721 deal with FP compares yet. This is possible to add. */
10722 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10724 if (FLOAT_MODE_P (mode
))
10726 rtx second_test
= NULL
, bypass_test
= NULL
;
10727 rtx compare_op
, compare_seq
;
10729 /* Shortcut: following common codes never translate into carry flag compares. */
10730 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10731 || code
== ORDERED
|| code
== UNORDERED
)
10734 /* These comparisons require zero flag; swap operands so they won't. */
10735 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10736 && !TARGET_IEEE_FP
)
10741 code
= swap_condition (code
);
10744 /* Try to expand the comparison and verify that we end up with carry flag
10745 based comparison. This is fails to be true only when we decide to expand
10746 comparison using arithmetic that is not too common scenario. */
10748 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10749 &second_test
, &bypass_test
);
10750 compare_seq
= get_insns ();
10753 if (second_test
|| bypass_test
)
10755 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10756 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10757 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10759 code
= GET_CODE (compare_op
);
10760 if (code
!= LTU
&& code
!= GEU
)
10762 emit_insn (compare_seq
);
10766 if (!INTEGRAL_MODE_P (mode
))
10774 /* Convert a==0 into (unsigned)a<1. */
10777 if (op1
!= const0_rtx
)
10780 code
= (code
== EQ
? LTU
: GEU
);
10783 /* Convert a>b into b<a or a>=b-1. */
10786 if (GET_CODE (op1
) == CONST_INT
)
10788 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10789 /* Bail out on overflow. We still can swap operands but that
10790 would force loading of the constant into register. */
10791 if (op1
== const0_rtx
10792 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10794 code
= (code
== GTU
? GEU
: LTU
);
10801 code
= (code
== GTU
? LTU
: GEU
);
10805 /* Convert a>=0 into (unsigned)a<0x80000000. */
10808 if (mode
== DImode
|| op1
!= const0_rtx
)
10810 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10811 code
= (code
== LT
? GEU
: LTU
);
10815 if (mode
== DImode
|| op1
!= constm1_rtx
)
10817 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10818 code
= (code
== LE
? GEU
: LTU
);
10824 /* Swapping operands may cause constant to appear as first operand. */
10825 if (!nonimmediate_operand (op0
, VOIDmode
))
10827 if (no_new_pseudos
)
10829 op0
= force_reg (mode
, op0
);
10831 ix86_compare_op0
= op0
;
10832 ix86_compare_op1
= op1
;
10833 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
10834 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
10839 ix86_expand_int_movcc (rtx operands
[])
10841 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
10842 rtx compare_seq
, compare_op
;
10843 rtx second_test
, bypass_test
;
10844 enum machine_mode mode
= GET_MODE (operands
[0]);
10845 bool sign_bit_compare_p
= false;;
10848 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10849 compare_seq
= get_insns ();
10852 compare_code
= GET_CODE (compare_op
);
10854 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
10855 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
10856 sign_bit_compare_p
= true;
10858 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10859 HImode insns, we'd be swallowed in word prefix ops. */
10861 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
10862 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
10863 && GET_CODE (operands
[2]) == CONST_INT
10864 && GET_CODE (operands
[3]) == CONST_INT
)
10866 rtx out
= operands
[0];
10867 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
10868 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
10869 HOST_WIDE_INT diff
;
10872 /* Sign bit compares are better done using shifts than we do by using
10874 if (sign_bit_compare_p
10875 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10876 ix86_compare_op1
, &compare_op
))
10878 /* Detect overlap between destination and compare sources. */
10881 if (!sign_bit_compare_p
)
10883 bool fpcmp
= false;
10885 compare_code
= GET_CODE (compare_op
);
10887 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10888 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10891 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
10894 /* To simplify rest of code, restrict to the GEU case. */
10895 if (compare_code
== LTU
)
10897 HOST_WIDE_INT tmp
= ct
;
10900 compare_code
= reverse_condition (compare_code
);
10901 code
= reverse_condition (code
);
10906 PUT_CODE (compare_op
,
10907 reverse_condition_maybe_unordered
10908 (GET_CODE (compare_op
)));
10910 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10914 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
10915 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
10916 tmp
= gen_reg_rtx (mode
);
10918 if (mode
== DImode
)
10919 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
10921 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
10925 if (code
== GT
|| code
== GE
)
10926 code
= reverse_condition (code
);
10929 HOST_WIDE_INT tmp
= ct
;
10934 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
10935 ix86_compare_op1
, VOIDmode
, 0, -1);
10948 tmp
= expand_simple_binop (mode
, PLUS
,
10950 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10961 tmp
= expand_simple_binop (mode
, IOR
,
10963 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10965 else if (diff
== -1 && ct
)
10975 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10977 tmp
= expand_simple_binop (mode
, PLUS
,
10978 copy_rtx (tmp
), GEN_INT (cf
),
10979 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10987 * andl cf - ct, dest
10997 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11000 tmp
= expand_simple_binop (mode
, AND
,
11002 gen_int_mode (cf
- ct
, mode
),
11003 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11005 tmp
= expand_simple_binop (mode
, PLUS
,
11006 copy_rtx (tmp
), GEN_INT (ct
),
11007 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11010 if (!rtx_equal_p (tmp
, out
))
11011 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11013 return 1; /* DONE */
11019 tmp
= ct
, ct
= cf
, cf
= tmp
;
11021 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11023 /* We may be reversing unordered compare to normal compare, that
11024 is not valid in general (we may convert non-trapping condition
11025 to trapping one), however on i386 we currently emit all
11026 comparisons unordered. */
11027 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11028 code
= reverse_condition_maybe_unordered (code
);
11032 compare_code
= reverse_condition (compare_code
);
11033 code
= reverse_condition (code
);
11037 compare_code
= UNKNOWN
;
11038 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11039 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
11041 if (ix86_compare_op1
== const0_rtx
11042 && (code
== LT
|| code
== GE
))
11043 compare_code
= code
;
11044 else if (ix86_compare_op1
== constm1_rtx
)
11048 else if (code
== GT
)
11053 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11054 if (compare_code
!= UNKNOWN
11055 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11056 && (cf
== -1 || ct
== -1))
11058 /* If lea code below could be used, only optimize
11059 if it results in a 2 insn sequence. */
11061 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11062 || diff
== 3 || diff
== 5 || diff
== 9)
11063 || (compare_code
== LT
&& ct
== -1)
11064 || (compare_code
== GE
&& cf
== -1))
11067 * notl op1 (if necessary)
11075 code
= reverse_condition (code
);
11078 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11079 ix86_compare_op1
, VOIDmode
, 0, -1);
11081 out
= expand_simple_binop (mode
, IOR
,
11083 out
, 1, OPTAB_DIRECT
);
11084 if (out
!= operands
[0])
11085 emit_move_insn (operands
[0], out
);
11087 return 1; /* DONE */
11092 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11093 || diff
== 3 || diff
== 5 || diff
== 9)
11094 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11096 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11102 * lea cf(dest*(ct-cf)),dest
11106 * This also catches the degenerate setcc-only case.
11112 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11113 ix86_compare_op1
, VOIDmode
, 0, 1);
11116 /* On x86_64 the lea instruction operates on Pmode, so we need
11117 to get arithmetics done in proper mode to match. */
11119 tmp
= copy_rtx (out
);
11123 out1
= copy_rtx (out
);
11124 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11128 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11134 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11137 if (!rtx_equal_p (tmp
, out
))
11140 out
= force_operand (tmp
, copy_rtx (out
));
11142 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11144 if (!rtx_equal_p (out
, operands
[0]))
11145 emit_move_insn (operands
[0], copy_rtx (out
));
11147 return 1; /* DONE */
11151 * General case: Jumpful:
11152 * xorl dest,dest cmpl op1, op2
11153 * cmpl op1, op2 movl ct, dest
11154 * setcc dest jcc 1f
11155 * decl dest movl cf, dest
11156 * andl (cf-ct),dest 1:
11159 * Size 20. Size 14.
11161 * This is reasonably steep, but branch mispredict costs are
11162 * high on modern cpus, so consider failing only if optimizing
11166 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11167 && BRANCH_COST
>= 2)
11173 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11174 /* We may be reversing unordered compare to normal compare,
11175 that is not valid in general (we may convert non-trapping
11176 condition to trapping one), however on i386 we currently
11177 emit all comparisons unordered. */
11178 code
= reverse_condition_maybe_unordered (code
);
11181 code
= reverse_condition (code
);
11182 if (compare_code
!= UNKNOWN
)
11183 compare_code
= reverse_condition (compare_code
);
11187 if (compare_code
!= UNKNOWN
)
11189 /* notl op1 (if needed)
11194 For x < 0 (resp. x <= -1) there will be no notl,
11195 so if possible swap the constants to get rid of the
11197 True/false will be -1/0 while code below (store flag
11198 followed by decrement) is 0/-1, so the constants need
11199 to be exchanged once more. */
11201 if (compare_code
== GE
|| !cf
)
11203 code
= reverse_condition (code
);
11208 HOST_WIDE_INT tmp
= cf
;
11213 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11214 ix86_compare_op1
, VOIDmode
, 0, -1);
11218 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11219 ix86_compare_op1
, VOIDmode
, 0, 1);
11221 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11222 copy_rtx (out
), 1, OPTAB_DIRECT
);
11225 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11226 gen_int_mode (cf
- ct
, mode
),
11227 copy_rtx (out
), 1, OPTAB_DIRECT
);
11229 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11230 copy_rtx (out
), 1, OPTAB_DIRECT
);
11231 if (!rtx_equal_p (out
, operands
[0]))
11232 emit_move_insn (operands
[0], copy_rtx (out
));
11234 return 1; /* DONE */
11238 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11240 /* Try a few things more with specific constants and a variable. */
11243 rtx var
, orig_out
, out
, tmp
;
11245 if (BRANCH_COST
<= 2)
11246 return 0; /* FAIL */
11248 /* If one of the two operands is an interesting constant, load a
11249 constant with the above and mask it in with a logical operation. */
11251 if (GET_CODE (operands
[2]) == CONST_INT
)
11254 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11255 operands
[3] = constm1_rtx
, op
= and_optab
;
11256 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11257 operands
[3] = const0_rtx
, op
= ior_optab
;
11259 return 0; /* FAIL */
11261 else if (GET_CODE (operands
[3]) == CONST_INT
)
11264 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11265 operands
[2] = constm1_rtx
, op
= and_optab
;
11266 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11267 operands
[2] = const0_rtx
, op
= ior_optab
;
11269 return 0; /* FAIL */
11272 return 0; /* FAIL */
11274 orig_out
= operands
[0];
11275 tmp
= gen_reg_rtx (mode
);
11278 /* Recurse to get the constant loaded. */
11279 if (ix86_expand_int_movcc (operands
) == 0)
11280 return 0; /* FAIL */
11282 /* Mask in the interesting variable. */
11283 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11285 if (!rtx_equal_p (out
, orig_out
))
11286 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11288 return 1; /* DONE */
11292 * For comparison with above,
11302 if (! nonimmediate_operand (operands
[2], mode
))
11303 operands
[2] = force_reg (mode
, operands
[2]);
11304 if (! nonimmediate_operand (operands
[3], mode
))
11305 operands
[3] = force_reg (mode
, operands
[3]);
11307 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11309 rtx tmp
= gen_reg_rtx (mode
);
11310 emit_move_insn (tmp
, operands
[3]);
11313 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11315 rtx tmp
= gen_reg_rtx (mode
);
11316 emit_move_insn (tmp
, operands
[2]);
11320 if (! register_operand (operands
[2], VOIDmode
)
11322 || ! register_operand (operands
[3], VOIDmode
)))
11323 operands
[2] = force_reg (mode
, operands
[2]);
11326 && ! register_operand (operands
[3], VOIDmode
))
11327 operands
[3] = force_reg (mode
, operands
[3]);
11329 emit_insn (compare_seq
);
11330 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11331 gen_rtx_IF_THEN_ELSE (mode
,
11332 compare_op
, operands
[2],
11335 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11336 gen_rtx_IF_THEN_ELSE (mode
,
11338 copy_rtx (operands
[3]),
11339 copy_rtx (operands
[0]))));
11341 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11342 gen_rtx_IF_THEN_ELSE (mode
,
11344 copy_rtx (operands
[2]),
11345 copy_rtx (operands
[0]))));
11347 return 1; /* DONE */
11350 /* Swap, force into registers, or otherwise massage the two operands
11351 to an sse comparison with a mask result. Thus we differ a bit from
11352 ix86_prepare_fp_compare_args which expects to produce a flags result.
11354 The DEST operand exists to help determine whether to commute commutative
11355 operators. The POP0/POP1 operands are updated in place. The new
11356 comparison code is returned, or UNKNOWN if not implementable. */
11358 static enum rtx_code
11359 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11360 rtx
*pop0
, rtx
*pop1
)
11368 /* We have no LTGT as an operator. We could implement it with
11369 NE & ORDERED, but this requires an extra temporary. It's
11370 not clear that it's worth it. */
11377 /* These are supported directly. */
11384 /* For commutative operators, try to canonicalize the destination
11385 operand to be first in the comparison - this helps reload to
11386 avoid extra moves. */
11387 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11395 /* These are not supported directly. Swap the comparison operands
11396 to transform into something that is supported. */
11400 code
= swap_condition (code
);
11404 gcc_unreachable ();
11410 /* Detect conditional moves that exactly match min/max operational
11411 semantics. Note that this is IEEE safe, as long as we don't
11412 interchange the operands.
11414 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11415 and TRUE if the operation is successful and instructions are emitted. */
11418 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11419 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11421 enum machine_mode mode
;
11427 else if (code
== UNGE
)
11430 if_true
= if_false
;
11436 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11438 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11443 mode
= GET_MODE (dest
);
11445 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11446 but MODE may be a vector mode and thus not appropriate. */
11447 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11449 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11452 if_true
= force_reg (mode
, if_true
);
11453 v
= gen_rtvec (2, if_true
, if_false
);
11454 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11458 code
= is_min
? SMIN
: SMAX
;
11459 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11462 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11466 /* Expand an sse vector comparison. Return the register with the result. */
11469 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11470 rtx op_true
, rtx op_false
)
11472 enum machine_mode mode
= GET_MODE (dest
);
11475 cmp_op0
= force_reg (mode
, cmp_op0
);
11476 if (!nonimmediate_operand (cmp_op1
, mode
))
11477 cmp_op1
= force_reg (mode
, cmp_op1
);
11480 || reg_overlap_mentioned_p (dest
, op_true
)
11481 || reg_overlap_mentioned_p (dest
, op_false
))
11482 dest
= gen_reg_rtx (mode
);
11484 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11485 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11490 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11491 operations. This is used for both scalar and vector conditional moves. */
11494 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11496 enum machine_mode mode
= GET_MODE (dest
);
11499 if (op_false
== CONST0_RTX (mode
))
11501 op_true
= force_reg (mode
, op_true
);
11502 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11503 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11505 else if (op_true
== CONST0_RTX (mode
))
11507 op_false
= force_reg (mode
, op_false
);
11508 x
= gen_rtx_NOT (mode
, cmp
);
11509 x
= gen_rtx_AND (mode
, x
, op_false
);
11510 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11514 op_true
= force_reg (mode
, op_true
);
11515 op_false
= force_reg (mode
, op_false
);
11517 t2
= gen_reg_rtx (mode
);
11519 t3
= gen_reg_rtx (mode
);
11523 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11524 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11526 x
= gen_rtx_NOT (mode
, cmp
);
11527 x
= gen_rtx_AND (mode
, x
, op_false
);
11528 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11530 x
= gen_rtx_IOR (mode
, t3
, t2
);
11531 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11535 /* Expand a floating-point conditional move. Return true if successful. */
11538 ix86_expand_fp_movcc (rtx operands
[])
11540 enum machine_mode mode
= GET_MODE (operands
[0]);
11541 enum rtx_code code
= GET_CODE (operands
[1]);
11542 rtx tmp
, compare_op
, second_test
, bypass_test
;
11544 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11546 enum machine_mode cmode
;
11548 /* Since we've no cmove for sse registers, don't force bad register
11549 allocation just to gain access to it. Deny movcc when the
11550 comparison mode doesn't match the move mode. */
11551 cmode
= GET_MODE (ix86_compare_op0
);
11552 if (cmode
== VOIDmode
)
11553 cmode
= GET_MODE (ix86_compare_op1
);
11557 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11559 &ix86_compare_op1
);
11560 if (code
== UNKNOWN
)
11563 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11564 ix86_compare_op1
, operands
[2],
11568 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11569 ix86_compare_op1
, operands
[2], operands
[3]);
11570 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11574 /* The floating point conditional move instructions don't directly
11575 support conditions resulting from a signed integer comparison. */
11577 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11579 /* The floating point conditional move instructions don't directly
11580 support signed integer comparisons. */
11582 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11584 gcc_assert (!second_test
&& !bypass_test
);
11585 tmp
= gen_reg_rtx (QImode
);
11586 ix86_expand_setcc (code
, tmp
);
11588 ix86_compare_op0
= tmp
;
11589 ix86_compare_op1
= const0_rtx
;
11590 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11592 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11594 tmp
= gen_reg_rtx (mode
);
11595 emit_move_insn (tmp
, operands
[3]);
11598 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11600 tmp
= gen_reg_rtx (mode
);
11601 emit_move_insn (tmp
, operands
[2]);
11605 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11606 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11607 operands
[2], operands
[3])));
11609 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11610 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11611 operands
[3], operands
[0])));
11613 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11614 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11615 operands
[2], operands
[0])));
11620 /* Expand a floating-point vector conditional move; a vcond operation
11621 rather than a movcc operation. */
11624 ix86_expand_fp_vcond (rtx operands
[])
11626 enum rtx_code code
= GET_CODE (operands
[3]);
11629 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11630 &operands
[4], &operands
[5]);
11631 if (code
== UNKNOWN
)
11634 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11635 operands
[5], operands
[1], operands
[2]))
11638 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11639 operands
[1], operands
[2]);
11640 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11644 /* Expand a signed integral vector conditional move. */
11647 ix86_expand_int_vcond (rtx operands
[])
11649 enum machine_mode mode
= GET_MODE (operands
[0]);
11650 enum rtx_code code
= GET_CODE (operands
[3]);
11651 bool negate
= false;
11654 cop0
= operands
[4];
11655 cop1
= operands
[5];
11657 /* Canonicalize the comparison to EQ, GT, GTU. */
11668 code
= reverse_condition (code
);
11674 code
= reverse_condition (code
);
11680 code
= swap_condition (code
);
11681 x
= cop0
, cop0
= cop1
, cop1
= x
;
11685 gcc_unreachable ();
11688 /* Unsigned parallel compare is not supported by the hardware. Play some
11689 tricks to turn this into a signed comparison against 0. */
11692 cop0
= force_reg (mode
, cop0
);
11700 /* Perform a parallel modulo subtraction. */
11701 t1
= gen_reg_rtx (mode
);
11702 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11704 /* Extract the original sign bit of op0. */
11705 mask
= GEN_INT (-0x80000000);
11706 mask
= gen_rtx_CONST_VECTOR (mode
,
11707 gen_rtvec (4, mask
, mask
, mask
, mask
));
11708 mask
= force_reg (mode
, mask
);
11709 t2
= gen_reg_rtx (mode
);
11710 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11712 /* XOR it back into the result of the subtraction. This results
11713 in the sign bit set iff we saw unsigned underflow. */
11714 x
= gen_reg_rtx (mode
);
11715 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11723 /* Perform a parallel unsigned saturating subtraction. */
11724 x
= gen_reg_rtx (mode
);
11725 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11726 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11733 gcc_unreachable ();
11737 cop1
= CONST0_RTX (mode
);
11740 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11741 operands
[1+negate
], operands
[2-negate
]);
11743 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11744 operands
[2-negate
]);
11748 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11749 true if we should do zero extension, else sign extension. HIGH_P is
11750 true if we want the N/2 high elements, else the low elements. */
11753 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
11755 enum machine_mode imode
= GET_MODE (operands
[1]);
11756 rtx (*unpack
)(rtx
, rtx
, rtx
);
11763 unpack
= gen_vec_interleave_highv16qi
;
11765 unpack
= gen_vec_interleave_lowv16qi
;
11769 unpack
= gen_vec_interleave_highv8hi
;
11771 unpack
= gen_vec_interleave_lowv8hi
;
11775 unpack
= gen_vec_interleave_highv4si
;
11777 unpack
= gen_vec_interleave_lowv4si
;
11780 gcc_unreachable ();
11783 dest
= gen_lowpart (imode
, operands
[0]);
11786 se
= force_reg (imode
, CONST0_RTX (imode
));
11788 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
11789 operands
[1], pc_rtx
, pc_rtx
);
11791 emit_insn (unpack (dest
, operands
[1], se
));
11794 /* Expand conditional increment or decrement using adb/sbb instructions.
11795 The default case using setcc followed by the conditional move can be
11796 done by generic code. */
11798 ix86_expand_int_addcc (rtx operands
[])
11800 enum rtx_code code
= GET_CODE (operands
[1]);
11802 rtx val
= const0_rtx
;
11803 bool fpcmp
= false;
11804 enum machine_mode mode
= GET_MODE (operands
[0]);
11806 if (operands
[3] != const1_rtx
11807 && operands
[3] != constm1_rtx
)
11809 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11810 ix86_compare_op1
, &compare_op
))
11812 code
= GET_CODE (compare_op
);
11814 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11815 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11818 code
= ix86_fp_compare_code_to_integer (code
);
11825 PUT_CODE (compare_op
,
11826 reverse_condition_maybe_unordered
11827 (GET_CODE (compare_op
)));
11829 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11831 PUT_MODE (compare_op
, mode
);
11833 /* Construct either adc or sbb insn. */
11834 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
11836 switch (GET_MODE (operands
[0]))
11839 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11842 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11845 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11848 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11851 gcc_unreachable ();
11856 switch (GET_MODE (operands
[0]))
11859 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11862 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11865 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11868 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11871 gcc_unreachable ();
11874 return 1; /* DONE */
11878 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11879 works for floating pointer parameters and nonoffsetable memories.
11880 For pushes, it returns just stack offsets; the values will be saved
11881 in the right order. Maximally three parts are generated. */
11884 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
11889 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
11891 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
11893 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
11894 gcc_assert (size
>= 2 && size
<= 3);
11896 /* Optimize constant pool reference to immediates. This is used by fp
11897 moves, that force all constants to memory to allow combining. */
11898 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
11900 rtx tmp
= maybe_get_pool_constant (operand
);
11905 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
11907 /* The only non-offsetable memories we handle are pushes. */
11908 int ok
= push_operand (operand
, VOIDmode
);
11912 operand
= copy_rtx (operand
);
11913 PUT_MODE (operand
, Pmode
);
11914 parts
[0] = parts
[1] = parts
[2] = operand
;
11918 if (GET_CODE (operand
) == CONST_VECTOR
)
11920 enum machine_mode imode
= int_mode_for_mode (mode
);
11921 /* Caution: if we looked through a constant pool memory above,
11922 the operand may actually have a different mode now. That's
11923 ok, since we want to pun this all the way back to an integer. */
11924 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
11925 gcc_assert (operand
!= NULL
);
11931 if (mode
== DImode
)
11932 split_di (&operand
, 1, &parts
[0], &parts
[1]);
11935 if (REG_P (operand
))
11937 gcc_assert (reload_completed
);
11938 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
11939 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
11941 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
11943 else if (offsettable_memref_p (operand
))
11945 operand
= adjust_address (operand
, SImode
, 0);
11946 parts
[0] = operand
;
11947 parts
[1] = adjust_address (operand
, SImode
, 4);
11949 parts
[2] = adjust_address (operand
, SImode
, 8);
11951 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11956 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11960 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
11961 parts
[2] = gen_int_mode (l
[2], SImode
);
11964 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
11967 gcc_unreachable ();
11969 parts
[1] = gen_int_mode (l
[1], SImode
);
11970 parts
[0] = gen_int_mode (l
[0], SImode
);
11973 gcc_unreachable ();
11978 if (mode
== TImode
)
11979 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
11980 if (mode
== XFmode
|| mode
== TFmode
)
11982 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
11983 if (REG_P (operand
))
11985 gcc_assert (reload_completed
);
11986 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
11987 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
11989 else if (offsettable_memref_p (operand
))
11991 operand
= adjust_address (operand
, DImode
, 0);
11992 parts
[0] = operand
;
11993 parts
[1] = adjust_address (operand
, upper_mode
, 8);
11995 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12000 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12001 real_to_target (l
, &r
, mode
);
12003 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12004 if (HOST_BITS_PER_WIDE_INT
>= 64)
12007 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12008 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12011 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12013 if (upper_mode
== SImode
)
12014 parts
[1] = gen_int_mode (l
[2], SImode
);
12015 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12018 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12019 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12022 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12025 gcc_unreachable ();
12032 /* Emit insns to perform a move or push of DI, DF, and XF values.
12033 Return false when normal moves are needed; true when all required
12034 insns have been emitted. Operands 2-4 contain the input values
12035 int the correct order; operands 5-7 contain the output values. */
12038 ix86_split_long_move (rtx operands
[])
12043 int collisions
= 0;
12044 enum machine_mode mode
= GET_MODE (operands
[0]);
12046 /* The DFmode expanders may ask us to move double.
12047 For 64bit target this is single move. By hiding the fact
12048 here we simplify i386.md splitters. */
12049 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12051 /* Optimize constant pool reference to immediates. This is used by
12052 fp moves, that force all constants to memory to allow combining. */
12054 if (GET_CODE (operands
[1]) == MEM
12055 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12056 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12057 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12058 if (push_operand (operands
[0], VOIDmode
))
12060 operands
[0] = copy_rtx (operands
[0]);
12061 PUT_MODE (operands
[0], Pmode
);
12064 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12065 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12066 emit_move_insn (operands
[0], operands
[1]);
12070 /* The only non-offsettable memory we handle is push. */
12071 if (push_operand (operands
[0], VOIDmode
))
12074 gcc_assert (GET_CODE (operands
[0]) != MEM
12075 || offsettable_memref_p (operands
[0]));
12077 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12078 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12080 /* When emitting push, take care for source operands on the stack. */
12081 if (push
&& GET_CODE (operands
[1]) == MEM
12082 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12085 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12086 XEXP (part
[1][2], 0));
12087 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12088 XEXP (part
[1][1], 0));
12091 /* We need to do copy in the right order in case an address register
12092 of the source overlaps the destination. */
12093 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
12095 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12097 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12100 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12103 /* Collision in the middle part can be handled by reordering. */
12104 if (collisions
== 1 && nparts
== 3
12105 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12108 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12109 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12112 /* If there are more collisions, we can't handle it by reordering.
12113 Do an lea to the last part and use only one colliding move. */
12114 else if (collisions
> 1)
12120 base
= part
[0][nparts
- 1];
12122 /* Handle the case when the last part isn't valid for lea.
12123 Happens in 64-bit mode storing the 12-byte XFmode. */
12124 if (GET_MODE (base
) != Pmode
)
12125 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12127 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12128 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12129 part
[1][1] = replace_equiv_address (part
[1][1],
12130 plus_constant (base
, UNITS_PER_WORD
));
12132 part
[1][2] = replace_equiv_address (part
[1][2],
12133 plus_constant (base
, 8));
12143 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12144 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12145 emit_move_insn (part
[0][2], part
[1][2]);
12150 /* In 64bit mode we don't have 32bit push available. In case this is
12151 register, it is OK - we will just use larger counterpart. We also
12152 retype memory - these comes from attempt to avoid REX prefix on
12153 moving of second half of TFmode value. */
12154 if (GET_MODE (part
[1][1]) == SImode
)
12156 switch (GET_CODE (part
[1][1]))
12159 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12163 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12167 gcc_unreachable ();
12170 if (GET_MODE (part
[1][0]) == SImode
)
12171 part
[1][0] = part
[1][1];
12174 emit_move_insn (part
[0][1], part
[1][1]);
12175 emit_move_insn (part
[0][0], part
[1][0]);
12179 /* Choose correct order to not overwrite the source before it is copied. */
12180 if ((REG_P (part
[0][0])
12181 && REG_P (part
[1][1])
12182 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12184 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12186 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12190 operands
[2] = part
[0][2];
12191 operands
[3] = part
[0][1];
12192 operands
[4] = part
[0][0];
12193 operands
[5] = part
[1][2];
12194 operands
[6] = part
[1][1];
12195 operands
[7] = part
[1][0];
12199 operands
[2] = part
[0][1];
12200 operands
[3] = part
[0][0];
12201 operands
[5] = part
[1][1];
12202 operands
[6] = part
[1][0];
12209 operands
[2] = part
[0][0];
12210 operands
[3] = part
[0][1];
12211 operands
[4] = part
[0][2];
12212 operands
[5] = part
[1][0];
12213 operands
[6] = part
[1][1];
12214 operands
[7] = part
[1][2];
12218 operands
[2] = part
[0][0];
12219 operands
[3] = part
[0][1];
12220 operands
[5] = part
[1][0];
12221 operands
[6] = part
[1][1];
12225 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12228 if (GET_CODE (operands
[5]) == CONST_INT
12229 && operands
[5] != const0_rtx
12230 && REG_P (operands
[2]))
12232 if (GET_CODE (operands
[6]) == CONST_INT
12233 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12234 operands
[6] = operands
[2];
12237 && GET_CODE (operands
[7]) == CONST_INT
12238 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12239 operands
[7] = operands
[2];
12243 && GET_CODE (operands
[6]) == CONST_INT
12244 && operands
[6] != const0_rtx
12245 && REG_P (operands
[3])
12246 && GET_CODE (operands
[7]) == CONST_INT
12247 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12248 operands
[7] = operands
[3];
12251 emit_move_insn (operands
[2], operands
[5]);
12252 emit_move_insn (operands
[3], operands
[6]);
12254 emit_move_insn (operands
[4], operands
[7]);
12259 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12260 left shift by a constant, either using a single shift or
12261 a sequence of add instructions. */
12264 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12268 emit_insn ((mode
== DImode
12270 : gen_adddi3
) (operand
, operand
, operand
));
12272 else if (!optimize_size
12273 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12276 for (i
=0; i
<count
; i
++)
12278 emit_insn ((mode
== DImode
12280 : gen_adddi3
) (operand
, operand
, operand
));
12284 emit_insn ((mode
== DImode
12286 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12290 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12292 rtx low
[2], high
[2];
12294 const int single_width
= mode
== DImode
? 32 : 64;
12296 if (GET_CODE (operands
[2]) == CONST_INT
)
12298 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12299 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12301 if (count
>= single_width
)
12303 emit_move_insn (high
[0], low
[1]);
12304 emit_move_insn (low
[0], const0_rtx
);
12306 if (count
> single_width
)
12307 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12311 if (!rtx_equal_p (operands
[0], operands
[1]))
12312 emit_move_insn (operands
[0], operands
[1]);
12313 emit_insn ((mode
== DImode
12315 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12316 ix86_expand_ashl_const (low
[0], count
, mode
);
12321 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12323 if (operands
[1] == const1_rtx
)
12325 /* Assuming we've chosen a QImode capable registers, then 1 << N
12326 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12327 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12329 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12331 ix86_expand_clear (low
[0]);
12332 ix86_expand_clear (high
[0]);
12333 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12335 d
= gen_lowpart (QImode
, low
[0]);
12336 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12337 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12338 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12340 d
= gen_lowpart (QImode
, high
[0]);
12341 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12342 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12343 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12346 /* Otherwise, we can get the same results by manually performing
12347 a bit extract operation on bit 5/6, and then performing the two
12348 shifts. The two methods of getting 0/1 into low/high are exactly
12349 the same size. Avoiding the shift in the bit extract case helps
12350 pentium4 a bit; no one else seems to care much either way. */
12355 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12356 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12358 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12359 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12361 emit_insn ((mode
== DImode
12363 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12364 emit_insn ((mode
== DImode
12366 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12367 emit_move_insn (low
[0], high
[0]);
12368 emit_insn ((mode
== DImode
12370 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12373 emit_insn ((mode
== DImode
12375 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12376 emit_insn ((mode
== DImode
12378 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12382 if (operands
[1] == constm1_rtx
)
12384 /* For -1 << N, we can avoid the shld instruction, because we
12385 know that we're shifting 0...31/63 ones into a -1. */
12386 emit_move_insn (low
[0], constm1_rtx
);
12388 emit_move_insn (high
[0], low
[0]);
12390 emit_move_insn (high
[0], constm1_rtx
);
12394 if (!rtx_equal_p (operands
[0], operands
[1]))
12395 emit_move_insn (operands
[0], operands
[1]);
12397 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12398 emit_insn ((mode
== DImode
12400 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12403 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12405 if (TARGET_CMOVE
&& scratch
)
12407 ix86_expand_clear (scratch
);
12408 emit_insn ((mode
== DImode
12409 ? gen_x86_shift_adj_1
12410 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12413 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12417 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12419 rtx low
[2], high
[2];
12421 const int single_width
= mode
== DImode
? 32 : 64;
12423 if (GET_CODE (operands
[2]) == CONST_INT
)
12425 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12426 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12428 if (count
== single_width
* 2 - 1)
12430 emit_move_insn (high
[0], high
[1]);
12431 emit_insn ((mode
== DImode
12433 : gen_ashrdi3
) (high
[0], high
[0],
12434 GEN_INT (single_width
- 1)));
12435 emit_move_insn (low
[0], high
[0]);
12438 else if (count
>= single_width
)
12440 emit_move_insn (low
[0], high
[1]);
12441 emit_move_insn (high
[0], low
[0]);
12442 emit_insn ((mode
== DImode
12444 : gen_ashrdi3
) (high
[0], high
[0],
12445 GEN_INT (single_width
- 1)));
12446 if (count
> single_width
)
12447 emit_insn ((mode
== DImode
12449 : gen_ashrdi3
) (low
[0], low
[0],
12450 GEN_INT (count
- single_width
)));
12454 if (!rtx_equal_p (operands
[0], operands
[1]))
12455 emit_move_insn (operands
[0], operands
[1]);
12456 emit_insn ((mode
== DImode
12458 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12459 emit_insn ((mode
== DImode
12461 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12466 if (!rtx_equal_p (operands
[0], operands
[1]))
12467 emit_move_insn (operands
[0], operands
[1]);
12469 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12471 emit_insn ((mode
== DImode
12473 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12474 emit_insn ((mode
== DImode
12476 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12478 if (TARGET_CMOVE
&& scratch
)
12480 emit_move_insn (scratch
, high
[0]);
12481 emit_insn ((mode
== DImode
12483 : gen_ashrdi3
) (scratch
, scratch
,
12484 GEN_INT (single_width
- 1)));
12485 emit_insn ((mode
== DImode
12486 ? gen_x86_shift_adj_1
12487 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12491 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12496 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12498 rtx low
[2], high
[2];
12500 const int single_width
= mode
== DImode
? 32 : 64;
12502 if (GET_CODE (operands
[2]) == CONST_INT
)
12504 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12505 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12507 if (count
>= single_width
)
12509 emit_move_insn (low
[0], high
[1]);
12510 ix86_expand_clear (high
[0]);
12512 if (count
> single_width
)
12513 emit_insn ((mode
== DImode
12515 : gen_lshrdi3
) (low
[0], low
[0],
12516 GEN_INT (count
- single_width
)));
12520 if (!rtx_equal_p (operands
[0], operands
[1]))
12521 emit_move_insn (operands
[0], operands
[1]);
12522 emit_insn ((mode
== DImode
12524 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12525 emit_insn ((mode
== DImode
12527 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12532 if (!rtx_equal_p (operands
[0], operands
[1]))
12533 emit_move_insn (operands
[0], operands
[1]);
12535 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12537 emit_insn ((mode
== DImode
12539 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12540 emit_insn ((mode
== DImode
12542 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12544 /* Heh. By reversing the arguments, we can reuse this pattern. */
12545 if (TARGET_CMOVE
&& scratch
)
12547 ix86_expand_clear (scratch
);
12548 emit_insn ((mode
== DImode
12549 ? gen_x86_shift_adj_1
12550 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12554 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12558 /* Helper function for the string operations below. Dest VARIABLE whether
12559 it is aligned to VALUE bytes. If true, jump to the label. */
12561 ix86_expand_aligntest (rtx variable
, int value
)
12563 rtx label
= gen_label_rtx ();
12564 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12565 if (GET_MODE (variable
) == DImode
)
12566 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12568 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12569 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12574 /* Adjust COUNTER by the VALUE. */
12576 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12578 if (GET_MODE (countreg
) == DImode
)
12579 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12581 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12584 /* Zero extend possibly SImode EXP to Pmode register. */
12586 ix86_zero_extend_to_Pmode (rtx exp
)
12589 if (GET_MODE (exp
) == VOIDmode
)
12590 return force_reg (Pmode
, exp
);
12591 if (GET_MODE (exp
) == Pmode
)
12592 return copy_to_mode_reg (Pmode
, exp
);
12593 r
= gen_reg_rtx (Pmode
);
12594 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12598 /* Expand string move (memcpy) operation. Use i386 string operations when
12599 profitable. expand_clrmem contains similar code. */
12601 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12603 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12604 enum machine_mode counter_mode
;
12605 HOST_WIDE_INT align
= 0;
12606 unsigned HOST_WIDE_INT count
= 0;
12608 if (GET_CODE (align_exp
) == CONST_INT
)
12609 align
= INTVAL (align_exp
);
12611 /* Can't use any of this if the user has appropriated esi or edi. */
12612 if (global_regs
[4] || global_regs
[5])
12615 /* This simple hack avoids all inlining code and simplifies code below. */
12616 if (!TARGET_ALIGN_STRINGOPS
)
12619 if (GET_CODE (count_exp
) == CONST_INT
)
12621 count
= INTVAL (count_exp
);
12622 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12626 /* Figure out proper mode for counter. For 32bits it is always SImode,
12627 for 64bits use SImode when possible, otherwise DImode.
12628 Set count to number of bytes copied when known at compile time. */
12630 || GET_MODE (count_exp
) == SImode
12631 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12632 counter_mode
= SImode
;
12634 counter_mode
= DImode
;
12636 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12638 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12639 if (destreg
!= XEXP (dst
, 0))
12640 dst
= replace_equiv_address_nv (dst
, destreg
);
12641 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12642 if (srcreg
!= XEXP (src
, 0))
12643 src
= replace_equiv_address_nv (src
, srcreg
);
12645 /* When optimizing for size emit simple rep ; movsb instruction for
12646 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12647 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12648 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12649 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12650 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12651 known to be zero or not. The rep; movsb sequence causes higher
12652 register pressure though, so take that into account. */
12654 if ((!optimize
|| optimize_size
)
12659 || (count
& 3) + count
/ 4 > 6))))
12661 emit_insn (gen_cld ());
12662 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12663 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12664 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12665 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12669 /* For constant aligned (or small unaligned) copies use rep movsl
12670 followed by code copying the rest. For PentiumPro ensure 8 byte
12671 alignment to allow rep movsl acceleration. */
12673 else if (count
!= 0
12675 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12676 || optimize_size
|| count
< (unsigned int) 64))
12678 unsigned HOST_WIDE_INT offset
= 0;
12679 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12680 rtx srcmem
, dstmem
;
12682 emit_insn (gen_cld ());
12683 if (count
& ~(size
- 1))
12685 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12687 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12689 while (offset
< (count
& ~(size
- 1)))
12691 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12693 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12695 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12701 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12702 & (TARGET_64BIT
? -1 : 0x3fffffff));
12703 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12704 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12706 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12707 GEN_INT (size
== 4 ? 2 : 3));
12708 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12709 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12711 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12712 countreg
, destexp
, srcexp
));
12713 offset
= count
& ~(size
- 1);
12716 if (size
== 8 && (count
& 0x04))
12718 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12720 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12722 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12727 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12729 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12731 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12736 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12738 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12740 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12743 /* The generic code based on the glibc implementation:
12744 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12745 allowing accelerated copying there)
12746 - copy the data using rep movsl
12747 - copy the rest. */
12752 rtx srcmem
, dstmem
;
12753 int desired_alignment
= (TARGET_PENTIUMPRO
12754 && (count
== 0 || count
>= (unsigned int) 260)
12755 ? 8 : UNITS_PER_WORD
);
12756 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12757 dst
= change_address (dst
, BLKmode
, destreg
);
12758 src
= change_address (src
, BLKmode
, srcreg
);
12760 /* In case we don't know anything about the alignment, default to
12761 library version, since it is usually equally fast and result in
12764 Also emit call when we know that the count is large and call overhead
12765 will not be important. */
12766 if (!TARGET_INLINE_ALL_STRINGOPS
12767 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12770 if (TARGET_SINGLE_STRINGOP
)
12771 emit_insn (gen_cld ());
12773 countreg2
= gen_reg_rtx (Pmode
);
12774 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12776 /* We don't use loops to align destination and to copy parts smaller
12777 than 4 bytes, because gcc is able to optimize such code better (in
12778 the case the destination or the count really is aligned, gcc is often
12779 able to predict the branches) and also it is friendlier to the
12780 hardware branch prediction.
12782 Using loops is beneficial for generic case, because we can
12783 handle small counts using the loops. Many CPUs (such as Athlon)
12784 have large REP prefix setup costs.
12786 This is quite costly. Maybe we can revisit this decision later or
12787 add some customizability to this code. */
12789 if (count
== 0 && align
< desired_alignment
)
12791 label
= gen_label_rtx ();
12792 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12793 LEU
, 0, counter_mode
, 1, label
);
12797 rtx label
= ix86_expand_aligntest (destreg
, 1);
12798 srcmem
= change_address (src
, QImode
, srcreg
);
12799 dstmem
= change_address (dst
, QImode
, destreg
);
12800 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12801 ix86_adjust_counter (countreg
, 1);
12802 emit_label (label
);
12803 LABEL_NUSES (label
) = 1;
12807 rtx label
= ix86_expand_aligntest (destreg
, 2);
12808 srcmem
= change_address (src
, HImode
, srcreg
);
12809 dstmem
= change_address (dst
, HImode
, destreg
);
12810 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12811 ix86_adjust_counter (countreg
, 2);
12812 emit_label (label
);
12813 LABEL_NUSES (label
) = 1;
12815 if (align
<= 4 && desired_alignment
> 4)
12817 rtx label
= ix86_expand_aligntest (destreg
, 4);
12818 srcmem
= change_address (src
, SImode
, srcreg
);
12819 dstmem
= change_address (dst
, SImode
, destreg
);
12820 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12821 ix86_adjust_counter (countreg
, 4);
12822 emit_label (label
);
12823 LABEL_NUSES (label
) = 1;
12826 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12828 emit_label (label
);
12829 LABEL_NUSES (label
) = 1;
12832 if (!TARGET_SINGLE_STRINGOP
)
12833 emit_insn (gen_cld ());
12836 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12838 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12842 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12843 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12845 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12846 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12847 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12848 countreg2
, destexp
, srcexp
));
12852 emit_label (label
);
12853 LABEL_NUSES (label
) = 1;
12855 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12857 srcmem
= change_address (src
, SImode
, srcreg
);
12858 dstmem
= change_address (dst
, SImode
, destreg
);
12859 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12861 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
12863 rtx label
= ix86_expand_aligntest (countreg
, 4);
12864 srcmem
= change_address (src
, SImode
, srcreg
);
12865 dstmem
= change_address (dst
, SImode
, destreg
);
12866 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12867 emit_label (label
);
12868 LABEL_NUSES (label
) = 1;
12870 if (align
> 2 && count
!= 0 && (count
& 2))
12872 srcmem
= change_address (src
, HImode
, srcreg
);
12873 dstmem
= change_address (dst
, HImode
, destreg
);
12874 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12876 if (align
<= 2 || count
== 0)
12878 rtx label
= ix86_expand_aligntest (countreg
, 2);
12879 srcmem
= change_address (src
, HImode
, srcreg
);
12880 dstmem
= change_address (dst
, HImode
, destreg
);
12881 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12882 emit_label (label
);
12883 LABEL_NUSES (label
) = 1;
12885 if (align
> 1 && count
!= 0 && (count
& 1))
12887 srcmem
= change_address (src
, QImode
, srcreg
);
12888 dstmem
= change_address (dst
, QImode
, destreg
);
12889 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12891 if (align
<= 1 || count
== 0)
12893 rtx label
= ix86_expand_aligntest (countreg
, 1);
12894 srcmem
= change_address (src
, QImode
, srcreg
);
12895 dstmem
= change_address (dst
, QImode
, destreg
);
12896 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12897 emit_label (label
);
12898 LABEL_NUSES (label
) = 1;
12905 /* Expand string clear operation (bzero). Use i386 string operations when
12906 profitable. expand_movmem contains similar code. */
12908 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
12910 rtx destreg
, zeroreg
, countreg
, destexp
;
12911 enum machine_mode counter_mode
;
12912 HOST_WIDE_INT align
= 0;
12913 unsigned HOST_WIDE_INT count
= 0;
12915 if (GET_CODE (align_exp
) == CONST_INT
)
12916 align
= INTVAL (align_exp
);
12918 /* Can't use any of this if the user has appropriated esi. */
12919 if (global_regs
[4])
12922 /* This simple hack avoids all inlining code and simplifies code below. */
12923 if (!TARGET_ALIGN_STRINGOPS
)
12926 if (GET_CODE (count_exp
) == CONST_INT
)
12928 count
= INTVAL (count_exp
);
12929 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12932 /* Figure out proper mode for counter. For 32bits it is always SImode,
12933 for 64bits use SImode when possible, otherwise DImode.
12934 Set count to number of bytes copied when known at compile time. */
12936 || GET_MODE (count_exp
) == SImode
12937 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12938 counter_mode
= SImode
;
12940 counter_mode
= DImode
;
12942 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12943 if (destreg
!= XEXP (dst
, 0))
12944 dst
= replace_equiv_address_nv (dst
, destreg
);
12947 /* When optimizing for size emit simple rep ; movsb instruction for
12948 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12949 sequence is 7 bytes long, so if optimizing for size and count is
12950 small enough that some stosl, stosw and stosb instructions without
12951 rep are shorter, fall back into the next if. */
12953 if ((!optimize
|| optimize_size
)
12956 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
12958 emit_insn (gen_cld ());
12960 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12961 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
12962 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12963 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
12965 else if (count
!= 0
12967 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12968 || optimize_size
|| count
< (unsigned int) 64))
12970 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12971 unsigned HOST_WIDE_INT offset
= 0;
12973 emit_insn (gen_cld ());
12975 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
12976 if (count
& ~(size
- 1))
12978 unsigned HOST_WIDE_INT repcount
;
12979 unsigned int max_nonrep
;
12981 repcount
= count
>> (size
== 4 ? 2 : 3);
12983 repcount
&= 0x3fffffff;
12985 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12986 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12987 bytes. In both cases the latter seems to be faster for small
12989 max_nonrep
= size
== 4 ? 7 : 4;
12990 if (!optimize_size
)
12993 case PROCESSOR_PENTIUM4
:
12994 case PROCESSOR_NOCONA
:
13001 if (repcount
<= max_nonrep
)
13002 while (repcount
-- > 0)
13004 rtx mem
= adjust_automodify_address_nv (dst
,
13005 GET_MODE (zeroreg
),
13007 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
13012 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
13013 countreg
= ix86_zero_extend_to_Pmode (countreg
);
13014 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13015 GEN_INT (size
== 4 ? 2 : 3));
13016 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13017 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
13019 offset
= count
& ~(size
- 1);
13022 if (size
== 8 && (count
& 0x04))
13024 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
13026 emit_insn (gen_strset (destreg
, mem
,
13027 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13032 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
13034 emit_insn (gen_strset (destreg
, mem
,
13035 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13040 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
13042 emit_insn (gen_strset (destreg
, mem
,
13043 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13050 /* Compute desired alignment of the string operation. */
13051 int desired_alignment
= (TARGET_PENTIUMPRO
13052 && (count
== 0 || count
>= (unsigned int) 260)
13053 ? 8 : UNITS_PER_WORD
);
13055 /* In case we don't know anything about the alignment, default to
13056 library version, since it is usually equally fast and result in
13059 Also emit call when we know that the count is large and call overhead
13060 will not be important. */
13061 if (!TARGET_INLINE_ALL_STRINGOPS
13062 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
13065 if (TARGET_SINGLE_STRINGOP
)
13066 emit_insn (gen_cld ());
13068 countreg2
= gen_reg_rtx (Pmode
);
13069 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
13070 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
13071 /* Get rid of MEM_OFFSET, it won't be accurate. */
13072 dst
= change_address (dst
, BLKmode
, destreg
);
13074 if (count
== 0 && align
< desired_alignment
)
13076 label
= gen_label_rtx ();
13077 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
13078 LEU
, 0, counter_mode
, 1, label
);
13082 rtx label
= ix86_expand_aligntest (destreg
, 1);
13083 emit_insn (gen_strset (destreg
, dst
,
13084 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13085 ix86_adjust_counter (countreg
, 1);
13086 emit_label (label
);
13087 LABEL_NUSES (label
) = 1;
13091 rtx label
= ix86_expand_aligntest (destreg
, 2);
13092 emit_insn (gen_strset (destreg
, dst
,
13093 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13094 ix86_adjust_counter (countreg
, 2);
13095 emit_label (label
);
13096 LABEL_NUSES (label
) = 1;
13098 if (align
<= 4 && desired_alignment
> 4)
13100 rtx label
= ix86_expand_aligntest (destreg
, 4);
13101 emit_insn (gen_strset (destreg
, dst
,
13103 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
13105 ix86_adjust_counter (countreg
, 4);
13106 emit_label (label
);
13107 LABEL_NUSES (label
) = 1;
13110 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
13112 emit_label (label
);
13113 LABEL_NUSES (label
) = 1;
13117 if (!TARGET_SINGLE_STRINGOP
)
13118 emit_insn (gen_cld ());
13121 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13123 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13127 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13128 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13130 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13131 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
13135 emit_label (label
);
13136 LABEL_NUSES (label
) = 1;
13139 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13140 emit_insn (gen_strset (destreg
, dst
,
13141 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13142 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
13144 rtx label
= ix86_expand_aligntest (countreg
, 4);
13145 emit_insn (gen_strset (destreg
, dst
,
13146 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13147 emit_label (label
);
13148 LABEL_NUSES (label
) = 1;
13150 if (align
> 2 && count
!= 0 && (count
& 2))
13151 emit_insn (gen_strset (destreg
, dst
,
13152 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13153 if (align
<= 2 || count
== 0)
13155 rtx label
= ix86_expand_aligntest (countreg
, 2);
13156 emit_insn (gen_strset (destreg
, dst
,
13157 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13158 emit_label (label
);
13159 LABEL_NUSES (label
) = 1;
13161 if (align
> 1 && count
!= 0 && (count
& 1))
13162 emit_insn (gen_strset (destreg
, dst
,
13163 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13164 if (align
<= 1 || count
== 0)
13166 rtx label
= ix86_expand_aligntest (countreg
, 1);
13167 emit_insn (gen_strset (destreg
, dst
,
13168 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13169 emit_label (label
);
13170 LABEL_NUSES (label
) = 1;
13176 /* Expand strlen. */
13178 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
13180 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
13182 /* The generic case of strlen expander is long. Avoid it's
13183 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13185 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13186 && !TARGET_INLINE_ALL_STRINGOPS
13188 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
13191 addr
= force_reg (Pmode
, XEXP (src
, 0));
13192 scratch1
= gen_reg_rtx (Pmode
);
13194 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13197 /* Well it seems that some optimizer does not combine a call like
13198 foo(strlen(bar), strlen(bar));
13199 when the move and the subtraction is done here. It does calculate
13200 the length just once when these instructions are done inside of
13201 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13202 often used and I use one fewer register for the lifetime of
13203 output_strlen_unroll() this is better. */
13205 emit_move_insn (out
, addr
);
13207 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
13209 /* strlensi_unroll_1 returns the address of the zero at the end of
13210 the string, like memchr(), so compute the length by subtracting
13211 the start address. */
13213 emit_insn (gen_subdi3 (out
, out
, addr
));
13215 emit_insn (gen_subsi3 (out
, out
, addr
));
13220 scratch2
= gen_reg_rtx (Pmode
);
13221 scratch3
= gen_reg_rtx (Pmode
);
13222 scratch4
= force_reg (Pmode
, constm1_rtx
);
13224 emit_move_insn (scratch3
, addr
);
13225 eoschar
= force_reg (QImode
, eoschar
);
13227 emit_insn (gen_cld ());
13228 src
= replace_equiv_address_nv (src
, scratch3
);
13230 /* If .md starts supporting :P, this can be done in .md. */
13231 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
13232 scratch4
), UNSPEC_SCAS
);
13233 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
13236 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
13237 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
13241 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
13242 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
13248 /* Expand the appropriate insns for doing strlen if not just doing
13251 out = result, initialized with the start address
13252 align_rtx = alignment of the address.
13253 scratch = scratch register, initialized with the startaddress when
13254 not aligned, otherwise undefined
13256 This is just the body. It needs the initializations mentioned above and
13257 some address computing at the end. These things are done in i386.md. */
13260 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
13264 rtx align_2_label
= NULL_RTX
;
13265 rtx align_3_label
= NULL_RTX
;
13266 rtx align_4_label
= gen_label_rtx ();
13267 rtx end_0_label
= gen_label_rtx ();
13269 rtx tmpreg
= gen_reg_rtx (SImode
);
13270 rtx scratch
= gen_reg_rtx (SImode
);
13274 if (GET_CODE (align_rtx
) == CONST_INT
)
13275 align
= INTVAL (align_rtx
);
13277 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13279 /* Is there a known alignment and is it less than 4? */
13282 rtx scratch1
= gen_reg_rtx (Pmode
);
13283 emit_move_insn (scratch1
, out
);
13284 /* Is there a known alignment and is it not 2? */
13287 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
13288 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
13290 /* Leave just the 3 lower bits. */
13291 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
13292 NULL_RTX
, 0, OPTAB_WIDEN
);
13294 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13295 Pmode
, 1, align_4_label
);
13296 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
13297 Pmode
, 1, align_2_label
);
13298 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
13299 Pmode
, 1, align_3_label
);
13303 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13304 check if is aligned to 4 - byte. */
13306 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
13307 NULL_RTX
, 0, OPTAB_WIDEN
);
13309 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13310 Pmode
, 1, align_4_label
);
13313 mem
= change_address (src
, QImode
, out
);
13315 /* Now compare the bytes. */
13317 /* Compare the first n unaligned byte on a byte per byte basis. */
13318 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
13319 QImode
, 1, end_0_label
);
13321 /* Increment the address. */
13323 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13325 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13327 /* Not needed with an alignment of 2 */
13330 emit_label (align_2_label
);
13332 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13336 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13338 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13340 emit_label (align_3_label
);
13343 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13347 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13349 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13352 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13353 align this loop. It gives only huge programs, but does not help to
13355 emit_label (align_4_label
);
13357 mem
= change_address (src
, SImode
, out
);
13358 emit_move_insn (scratch
, mem
);
13360 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
13362 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
13364 /* This formula yields a nonzero result iff one of the bytes is zero.
13365 This saves three branches inside loop and many cycles. */
13367 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
13368 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
13369 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
13370 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13371 gen_int_mode (0x80808080, SImode
)));
13372 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13377 rtx reg
= gen_reg_rtx (SImode
);
13378 rtx reg2
= gen_reg_rtx (Pmode
);
13379 emit_move_insn (reg
, tmpreg
);
13380 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13382 /* If zero is not in the first two bytes, move two bytes forward. */
13383 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13384 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13385 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13386 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13387 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13390 /* Emit lea manually to avoid clobbering of flags. */
13391 emit_insn (gen_rtx_SET (SImode
, reg2
,
13392 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13394 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13395 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13396 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13397 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13404 rtx end_2_label
= gen_label_rtx ();
13405 /* Is zero in the first two bytes? */
13407 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13408 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13409 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13410 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13411 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13413 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13414 JUMP_LABEL (tmp
) = end_2_label
;
13416 /* Not in the first two. Move two bytes forward. */
13417 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13419 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13421 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13423 emit_label (end_2_label
);
13427 /* Avoid branch in fixing the byte. */
13428 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13429 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13430 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13432 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13434 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13436 emit_label (end_0_label
);
13440 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13441 rtx callarg2 ATTRIBUTE_UNUSED
,
13442 rtx pop
, int sibcall
)
13444 rtx use
= NULL
, call
;
13446 if (pop
== const0_rtx
)
13448 gcc_assert (!TARGET_64BIT
|| !pop
);
13450 if (TARGET_MACHO
&& !TARGET_64BIT
)
13453 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13454 fnaddr
= machopic_indirect_call_target (fnaddr
);
13459 /* Static functions and indirect calls don't need the pic register. */
13460 if (! TARGET_64BIT
&& flag_pic
13461 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13462 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13463 use_reg (&use
, pic_offset_table_rtx
);
13466 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13468 rtx al
= gen_rtx_REG (QImode
, 0);
13469 emit_move_insn (al
, callarg2
);
13470 use_reg (&use
, al
);
13473 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13475 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13476 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13478 if (sibcall
&& TARGET_64BIT
13479 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13482 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13483 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13484 emit_move_insn (fnaddr
, addr
);
13485 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13488 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13490 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13493 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13494 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13495 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13498 call
= emit_call_insn (call
);
13500 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13504 /* Clear stack slot assignments remembered from previous functions.
13505 This is called from INIT_EXPANDERS once before RTL is emitted for each
13508 static struct machine_function
*
13509 ix86_init_machine_status (void)
13511 struct machine_function
*f
;
13513 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13514 f
->use_fast_prologue_epilogue_nregs
= -1;
13515 f
->tls_descriptor_call_expanded_p
= 0;
13520 /* Return a MEM corresponding to a stack slot with mode MODE.
13521 Allocate a new slot if necessary.
13523 The RTL for a function can have several slots available: N is
13524 which slot to use. */
13527 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13529 struct stack_local_entry
*s
;
13531 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13533 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13534 if (s
->mode
== mode
&& s
->n
== n
)
13535 return copy_rtx (s
->rtl
);
13537 s
= (struct stack_local_entry
*)
13538 ggc_alloc (sizeof (struct stack_local_entry
));
13541 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13543 s
->next
= ix86_stack_locals
;
13544 ix86_stack_locals
= s
;
13548 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13550 static GTY(()) rtx ix86_tls_symbol
;
13552 ix86_tls_get_addr (void)
13555 if (!ix86_tls_symbol
)
13557 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13558 (TARGET_ANY_GNU_TLS
13560 ? "___tls_get_addr"
13561 : "__tls_get_addr");
13564 return ix86_tls_symbol
;
13567 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13569 static GTY(()) rtx ix86_tls_module_base_symbol
;
13571 ix86_tls_module_base (void)
13574 if (!ix86_tls_module_base_symbol
)
13576 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13577 "_TLS_MODULE_BASE_");
13578 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13579 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13582 return ix86_tls_module_base_symbol
;
13585 /* Calculate the length of the memory address in the instruction
13586 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13589 memory_address_length (rtx addr
)
13591 struct ix86_address parts
;
13592 rtx base
, index
, disp
;
13596 if (GET_CODE (addr
) == PRE_DEC
13597 || GET_CODE (addr
) == POST_INC
13598 || GET_CODE (addr
) == PRE_MODIFY
13599 || GET_CODE (addr
) == POST_MODIFY
)
13602 ok
= ix86_decompose_address (addr
, &parts
);
13605 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13606 parts
.base
= SUBREG_REG (parts
.base
);
13607 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13608 parts
.index
= SUBREG_REG (parts
.index
);
13611 index
= parts
.index
;
13616 - esp as the base always wants an index,
13617 - ebp as the base always wants a displacement. */
13619 /* Register Indirect. */
13620 if (base
&& !index
&& !disp
)
13622 /* esp (for its index) and ebp (for its displacement) need
13623 the two-byte modrm form. */
13624 if (addr
== stack_pointer_rtx
13625 || addr
== arg_pointer_rtx
13626 || addr
== frame_pointer_rtx
13627 || addr
== hard_frame_pointer_rtx
)
13631 /* Direct Addressing. */
13632 else if (disp
&& !base
&& !index
)
13637 /* Find the length of the displacement constant. */
13640 if (base
&& satisfies_constraint_K (disp
))
13645 /* ebp always wants a displacement. */
13646 else if (base
== hard_frame_pointer_rtx
)
13649 /* An index requires the two-byte modrm form.... */
13651 /* ...like esp, which always wants an index. */
13652 || base
== stack_pointer_rtx
13653 || base
== arg_pointer_rtx
13654 || base
== frame_pointer_rtx
)
13661 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13662 is set, expect that insn have 8bit immediate alternative. */
13664 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13668 extract_insn_cached (insn
);
13669 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13670 if (CONSTANT_P (recog_data
.operand
[i
]))
13673 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
13677 switch (get_attr_mode (insn
))
13688 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13693 fatal_insn ("unknown insn mode", insn
);
13699 /* Compute default value for "length_address" attribute. */
13701 ix86_attr_length_address_default (rtx insn
)
13705 if (get_attr_type (insn
) == TYPE_LEA
)
13707 rtx set
= PATTERN (insn
);
13709 if (GET_CODE (set
) == PARALLEL
)
13710 set
= XVECEXP (set
, 0, 0);
13712 gcc_assert (GET_CODE (set
) == SET
);
13714 return memory_address_length (SET_SRC (set
));
13717 extract_insn_cached (insn
);
13718 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13719 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13721 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13727 /* Return the maximum number of instructions a cpu can issue. */
13730 ix86_issue_rate (void)
13734 case PROCESSOR_PENTIUM
:
13738 case PROCESSOR_PENTIUMPRO
:
13739 case PROCESSOR_PENTIUM4
:
13740 case PROCESSOR_ATHLON
:
13742 case PROCESSOR_NOCONA
:
13743 case PROCESSOR_GENERIC32
:
13744 case PROCESSOR_GENERIC64
:
13752 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13753 by DEP_INSN and nothing set by DEP_INSN. */
13756 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13760 /* Simplify the test for uninteresting insns. */
13761 if (insn_type
!= TYPE_SETCC
13762 && insn_type
!= TYPE_ICMOV
13763 && insn_type
!= TYPE_FCMOV
13764 && insn_type
!= TYPE_IBR
)
13767 if ((set
= single_set (dep_insn
)) != 0)
13769 set
= SET_DEST (set
);
13772 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13773 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13774 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13775 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13777 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13778 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13783 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13786 /* This test is true if the dependent insn reads the flags but
13787 not any other potentially set register. */
13788 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13791 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13797 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13798 address with operands set by DEP_INSN. */
13801 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13805 if (insn_type
== TYPE_LEA
13808 addr
= PATTERN (insn
);
13810 if (GET_CODE (addr
) == PARALLEL
)
13811 addr
= XVECEXP (addr
, 0, 0);
13813 gcc_assert (GET_CODE (addr
) == SET
);
13815 addr
= SET_SRC (addr
);
13820 extract_insn_cached (insn
);
13821 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13822 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13824 addr
= XEXP (recog_data
.operand
[i
], 0);
13831 return modified_in_p (addr
, dep_insn
);
13835 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
13837 enum attr_type insn_type
, dep_insn_type
;
13838 enum attr_memory memory
;
13840 int dep_insn_code_number
;
13842 /* Anti and output dependencies have zero cost on all CPUs. */
13843 if (REG_NOTE_KIND (link
) != 0)
13846 dep_insn_code_number
= recog_memoized (dep_insn
);
13848 /* If we can't recognize the insns, we can't really do anything. */
13849 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
13852 insn_type
= get_attr_type (insn
);
13853 dep_insn_type
= get_attr_type (dep_insn
);
13857 case PROCESSOR_PENTIUM
:
13858 /* Address Generation Interlock adds a cycle of latency. */
13859 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13862 /* ??? Compares pair with jump/setcc. */
13863 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
13866 /* Floating point stores require value to be ready one cycle earlier. */
13867 if (insn_type
== TYPE_FMOV
13868 && get_attr_memory (insn
) == MEMORY_STORE
13869 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13873 case PROCESSOR_PENTIUMPRO
:
13874 memory
= get_attr_memory (insn
);
13876 /* INT->FP conversion is expensive. */
13877 if (get_attr_fp_int_src (dep_insn
))
13880 /* There is one cycle extra latency between an FP op and a store. */
13881 if (insn_type
== TYPE_FMOV
13882 && (set
= single_set (dep_insn
)) != NULL_RTX
13883 && (set2
= single_set (insn
)) != NULL_RTX
13884 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
13885 && GET_CODE (SET_DEST (set2
)) == MEM
)
13888 /* Show ability of reorder buffer to hide latency of load by executing
13889 in parallel with previous instruction in case
13890 previous instruction is not needed to compute the address. */
13891 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13892 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13894 /* Claim moves to take one cycle, as core can issue one load
13895 at time and the next load can start cycle later. */
13896 if (dep_insn_type
== TYPE_IMOV
13897 || dep_insn_type
== TYPE_FMOV
)
13905 memory
= get_attr_memory (insn
);
13907 /* The esp dependency is resolved before the instruction is really
13909 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
13910 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
13913 /* INT->FP conversion is expensive. */
13914 if (get_attr_fp_int_src (dep_insn
))
13917 /* Show ability of reorder buffer to hide latency of load by executing
13918 in parallel with previous instruction in case
13919 previous instruction is not needed to compute the address. */
13920 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13921 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13923 /* Claim moves to take one cycle, as core can issue one load
13924 at time and the next load can start cycle later. */
13925 if (dep_insn_type
== TYPE_IMOV
13926 || dep_insn_type
== TYPE_FMOV
)
13935 case PROCESSOR_ATHLON
:
13937 case PROCESSOR_GENERIC32
:
13938 case PROCESSOR_GENERIC64
:
13939 memory
= get_attr_memory (insn
);
13941 /* Show ability of reorder buffer to hide latency of load by executing
13942 in parallel with previous instruction in case
13943 previous instruction is not needed to compute the address. */
13944 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13945 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13947 enum attr_unit unit
= get_attr_unit (insn
);
13950 /* Because of the difference between the length of integer and
13951 floating unit pipeline preparation stages, the memory operands
13952 for floating point are cheaper.
13954 ??? For Athlon it the difference is most probably 2. */
13955 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
13958 loadcost
= TARGET_ATHLON
? 2 : 0;
13960 if (cost
>= loadcost
)
13973 /* How many alternative schedules to try. This should be as wide as the
13974 scheduling freedom in the DFA, but no wider. Making this value too
13975 large results extra work for the scheduler. */
13978 ia32_multipass_dfa_lookahead (void)
13980 if (ix86_tune
== PROCESSOR_PENTIUM
)
13983 if (ix86_tune
== PROCESSOR_PENTIUMPRO
13984 || ix86_tune
== PROCESSOR_K6
)
13992 /* Compute the alignment given to a constant that is being placed in memory.
13993 EXP is the constant and ALIGN is the alignment that the object would
13995 The value of this function is used instead of that alignment to align
13999 ix86_constant_alignment (tree exp
, int align
)
14001 if (TREE_CODE (exp
) == REAL_CST
)
14003 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
14005 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
14008 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
14009 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
14010 return BITS_PER_WORD
;
14015 /* Compute the alignment for a static variable.
14016 TYPE is the data type, and ALIGN is the alignment that
14017 the object would ordinarily have. The value of this function is used
14018 instead of that alignment to align the object. */
14021 ix86_data_alignment (tree type
, int align
)
14023 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
14025 if (AGGREGATE_TYPE_P (type
)
14026 && TYPE_SIZE (type
)
14027 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14028 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
14029 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
14030 && align
< max_align
)
14033 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14034 to 16byte boundary. */
14037 if (AGGREGATE_TYPE_P (type
)
14038 && TYPE_SIZE (type
)
14039 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14040 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
14041 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14045 if (TREE_CODE (type
) == ARRAY_TYPE
)
14047 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14049 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14052 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14055 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14057 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14060 else if ((TREE_CODE (type
) == RECORD_TYPE
14061 || TREE_CODE (type
) == UNION_TYPE
14062 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14063 && TYPE_FIELDS (type
))
14065 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14067 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14070 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14071 || TREE_CODE (type
) == INTEGER_TYPE
)
14073 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14075 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14082 /* Compute the alignment for a local variable.
14083 TYPE is the data type, and ALIGN is the alignment that
14084 the object would ordinarily have. The value of this macro is used
14085 instead of that alignment to align the object. */
14088 ix86_local_alignment (tree type
, int align
)
14090 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14091 to 16byte boundary. */
14094 if (AGGREGATE_TYPE_P (type
)
14095 && TYPE_SIZE (type
)
14096 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14097 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
14098 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14101 if (TREE_CODE (type
) == ARRAY_TYPE
)
14103 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14105 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14108 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14110 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14112 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14115 else if ((TREE_CODE (type
) == RECORD_TYPE
14116 || TREE_CODE (type
) == UNION_TYPE
14117 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14118 && TYPE_FIELDS (type
))
14120 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14122 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14125 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14126 || TREE_CODE (type
) == INTEGER_TYPE
)
14129 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14131 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14137 /* Emit RTL insns to initialize the variable parts of a trampoline.
14138 FNADDR is an RTX for the address of the function's pure code.
14139 CXT is an RTX for the static chain value for the function. */
14141 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
14145 /* Compute offset from the end of the jmp to the target function. */
14146 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
14147 plus_constant (tramp
, 10),
14148 NULL_RTX
, 1, OPTAB_DIRECT
);
14149 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
14150 gen_int_mode (0xb9, QImode
));
14151 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
14152 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
14153 gen_int_mode (0xe9, QImode
));
14154 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
14159 /* Try to load address using shorter movl instead of movabs.
14160 We may want to support movq for kernel mode, but kernel does not use
14161 trampolines at the moment. */
14162 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
14164 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
14165 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14166 gen_int_mode (0xbb41, HImode
));
14167 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
14168 gen_lowpart (SImode
, fnaddr
));
14173 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14174 gen_int_mode (0xbb49, HImode
));
14175 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14179 /* Load static chain using movabs to r10. */
14180 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14181 gen_int_mode (0xba49, HImode
));
14182 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14185 /* Jump to the r11 */
14186 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14187 gen_int_mode (0xff49, HImode
));
14188 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
14189 gen_int_mode (0xe3, QImode
));
14191 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
14194 #ifdef ENABLE_EXECUTE_STACK
14195 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
14196 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
14200 /* Codes for all the SSE/MMX builtins. */
14203 IX86_BUILTIN_ADDPS
,
14204 IX86_BUILTIN_ADDSS
,
14205 IX86_BUILTIN_DIVPS
,
14206 IX86_BUILTIN_DIVSS
,
14207 IX86_BUILTIN_MULPS
,
14208 IX86_BUILTIN_MULSS
,
14209 IX86_BUILTIN_SUBPS
,
14210 IX86_BUILTIN_SUBSS
,
14212 IX86_BUILTIN_CMPEQPS
,
14213 IX86_BUILTIN_CMPLTPS
,
14214 IX86_BUILTIN_CMPLEPS
,
14215 IX86_BUILTIN_CMPGTPS
,
14216 IX86_BUILTIN_CMPGEPS
,
14217 IX86_BUILTIN_CMPNEQPS
,
14218 IX86_BUILTIN_CMPNLTPS
,
14219 IX86_BUILTIN_CMPNLEPS
,
14220 IX86_BUILTIN_CMPNGTPS
,
14221 IX86_BUILTIN_CMPNGEPS
,
14222 IX86_BUILTIN_CMPORDPS
,
14223 IX86_BUILTIN_CMPUNORDPS
,
14224 IX86_BUILTIN_CMPEQSS
,
14225 IX86_BUILTIN_CMPLTSS
,
14226 IX86_BUILTIN_CMPLESS
,
14227 IX86_BUILTIN_CMPNEQSS
,
14228 IX86_BUILTIN_CMPNLTSS
,
14229 IX86_BUILTIN_CMPNLESS
,
14230 IX86_BUILTIN_CMPNGTSS
,
14231 IX86_BUILTIN_CMPNGESS
,
14232 IX86_BUILTIN_CMPORDSS
,
14233 IX86_BUILTIN_CMPUNORDSS
,
14235 IX86_BUILTIN_COMIEQSS
,
14236 IX86_BUILTIN_COMILTSS
,
14237 IX86_BUILTIN_COMILESS
,
14238 IX86_BUILTIN_COMIGTSS
,
14239 IX86_BUILTIN_COMIGESS
,
14240 IX86_BUILTIN_COMINEQSS
,
14241 IX86_BUILTIN_UCOMIEQSS
,
14242 IX86_BUILTIN_UCOMILTSS
,
14243 IX86_BUILTIN_UCOMILESS
,
14244 IX86_BUILTIN_UCOMIGTSS
,
14245 IX86_BUILTIN_UCOMIGESS
,
14246 IX86_BUILTIN_UCOMINEQSS
,
14248 IX86_BUILTIN_CVTPI2PS
,
14249 IX86_BUILTIN_CVTPS2PI
,
14250 IX86_BUILTIN_CVTSI2SS
,
14251 IX86_BUILTIN_CVTSI642SS
,
14252 IX86_BUILTIN_CVTSS2SI
,
14253 IX86_BUILTIN_CVTSS2SI64
,
14254 IX86_BUILTIN_CVTTPS2PI
,
14255 IX86_BUILTIN_CVTTSS2SI
,
14256 IX86_BUILTIN_CVTTSS2SI64
,
14258 IX86_BUILTIN_MAXPS
,
14259 IX86_BUILTIN_MAXSS
,
14260 IX86_BUILTIN_MINPS
,
14261 IX86_BUILTIN_MINSS
,
14263 IX86_BUILTIN_LOADUPS
,
14264 IX86_BUILTIN_STOREUPS
,
14265 IX86_BUILTIN_MOVSS
,
14267 IX86_BUILTIN_MOVHLPS
,
14268 IX86_BUILTIN_MOVLHPS
,
14269 IX86_BUILTIN_LOADHPS
,
14270 IX86_BUILTIN_LOADLPS
,
14271 IX86_BUILTIN_STOREHPS
,
14272 IX86_BUILTIN_STORELPS
,
14274 IX86_BUILTIN_MASKMOVQ
,
14275 IX86_BUILTIN_MOVMSKPS
,
14276 IX86_BUILTIN_PMOVMSKB
,
14278 IX86_BUILTIN_MOVNTPS
,
14279 IX86_BUILTIN_MOVNTQ
,
14281 IX86_BUILTIN_LOADDQU
,
14282 IX86_BUILTIN_STOREDQU
,
14284 IX86_BUILTIN_PACKSSWB
,
14285 IX86_BUILTIN_PACKSSDW
,
14286 IX86_BUILTIN_PACKUSWB
,
14288 IX86_BUILTIN_PADDB
,
14289 IX86_BUILTIN_PADDW
,
14290 IX86_BUILTIN_PADDD
,
14291 IX86_BUILTIN_PADDQ
,
14292 IX86_BUILTIN_PADDSB
,
14293 IX86_BUILTIN_PADDSW
,
14294 IX86_BUILTIN_PADDUSB
,
14295 IX86_BUILTIN_PADDUSW
,
14296 IX86_BUILTIN_PSUBB
,
14297 IX86_BUILTIN_PSUBW
,
14298 IX86_BUILTIN_PSUBD
,
14299 IX86_BUILTIN_PSUBQ
,
14300 IX86_BUILTIN_PSUBSB
,
14301 IX86_BUILTIN_PSUBSW
,
14302 IX86_BUILTIN_PSUBUSB
,
14303 IX86_BUILTIN_PSUBUSW
,
14306 IX86_BUILTIN_PANDN
,
14310 IX86_BUILTIN_PAVGB
,
14311 IX86_BUILTIN_PAVGW
,
14313 IX86_BUILTIN_PCMPEQB
,
14314 IX86_BUILTIN_PCMPEQW
,
14315 IX86_BUILTIN_PCMPEQD
,
14316 IX86_BUILTIN_PCMPGTB
,
14317 IX86_BUILTIN_PCMPGTW
,
14318 IX86_BUILTIN_PCMPGTD
,
14320 IX86_BUILTIN_PMADDWD
,
14322 IX86_BUILTIN_PMAXSW
,
14323 IX86_BUILTIN_PMAXUB
,
14324 IX86_BUILTIN_PMINSW
,
14325 IX86_BUILTIN_PMINUB
,
14327 IX86_BUILTIN_PMULHUW
,
14328 IX86_BUILTIN_PMULHW
,
14329 IX86_BUILTIN_PMULLW
,
14331 IX86_BUILTIN_PSADBW
,
14332 IX86_BUILTIN_PSHUFW
,
14334 IX86_BUILTIN_PSLLW
,
14335 IX86_BUILTIN_PSLLD
,
14336 IX86_BUILTIN_PSLLQ
,
14337 IX86_BUILTIN_PSRAW
,
14338 IX86_BUILTIN_PSRAD
,
14339 IX86_BUILTIN_PSRLW
,
14340 IX86_BUILTIN_PSRLD
,
14341 IX86_BUILTIN_PSRLQ
,
14342 IX86_BUILTIN_PSLLWI
,
14343 IX86_BUILTIN_PSLLDI
,
14344 IX86_BUILTIN_PSLLQI
,
14345 IX86_BUILTIN_PSRAWI
,
14346 IX86_BUILTIN_PSRADI
,
14347 IX86_BUILTIN_PSRLWI
,
14348 IX86_BUILTIN_PSRLDI
,
14349 IX86_BUILTIN_PSRLQI
,
14351 IX86_BUILTIN_PUNPCKHBW
,
14352 IX86_BUILTIN_PUNPCKHWD
,
14353 IX86_BUILTIN_PUNPCKHDQ
,
14354 IX86_BUILTIN_PUNPCKLBW
,
14355 IX86_BUILTIN_PUNPCKLWD
,
14356 IX86_BUILTIN_PUNPCKLDQ
,
14358 IX86_BUILTIN_SHUFPS
,
14360 IX86_BUILTIN_RCPPS
,
14361 IX86_BUILTIN_RCPSS
,
14362 IX86_BUILTIN_RSQRTPS
,
14363 IX86_BUILTIN_RSQRTSS
,
14364 IX86_BUILTIN_SQRTPS
,
14365 IX86_BUILTIN_SQRTSS
,
14367 IX86_BUILTIN_UNPCKHPS
,
14368 IX86_BUILTIN_UNPCKLPS
,
14370 IX86_BUILTIN_ANDPS
,
14371 IX86_BUILTIN_ANDNPS
,
14373 IX86_BUILTIN_XORPS
,
14376 IX86_BUILTIN_LDMXCSR
,
14377 IX86_BUILTIN_STMXCSR
,
14378 IX86_BUILTIN_SFENCE
,
14380 /* 3DNow! Original */
14381 IX86_BUILTIN_FEMMS
,
14382 IX86_BUILTIN_PAVGUSB
,
14383 IX86_BUILTIN_PF2ID
,
14384 IX86_BUILTIN_PFACC
,
14385 IX86_BUILTIN_PFADD
,
14386 IX86_BUILTIN_PFCMPEQ
,
14387 IX86_BUILTIN_PFCMPGE
,
14388 IX86_BUILTIN_PFCMPGT
,
14389 IX86_BUILTIN_PFMAX
,
14390 IX86_BUILTIN_PFMIN
,
14391 IX86_BUILTIN_PFMUL
,
14392 IX86_BUILTIN_PFRCP
,
14393 IX86_BUILTIN_PFRCPIT1
,
14394 IX86_BUILTIN_PFRCPIT2
,
14395 IX86_BUILTIN_PFRSQIT1
,
14396 IX86_BUILTIN_PFRSQRT
,
14397 IX86_BUILTIN_PFSUB
,
14398 IX86_BUILTIN_PFSUBR
,
14399 IX86_BUILTIN_PI2FD
,
14400 IX86_BUILTIN_PMULHRW
,
14402 /* 3DNow! Athlon Extensions */
14403 IX86_BUILTIN_PF2IW
,
14404 IX86_BUILTIN_PFNACC
,
14405 IX86_BUILTIN_PFPNACC
,
14406 IX86_BUILTIN_PI2FW
,
14407 IX86_BUILTIN_PSWAPDSI
,
14408 IX86_BUILTIN_PSWAPDSF
,
14411 IX86_BUILTIN_ADDPD
,
14412 IX86_BUILTIN_ADDSD
,
14413 IX86_BUILTIN_DIVPD
,
14414 IX86_BUILTIN_DIVSD
,
14415 IX86_BUILTIN_MULPD
,
14416 IX86_BUILTIN_MULSD
,
14417 IX86_BUILTIN_SUBPD
,
14418 IX86_BUILTIN_SUBSD
,
14420 IX86_BUILTIN_CMPEQPD
,
14421 IX86_BUILTIN_CMPLTPD
,
14422 IX86_BUILTIN_CMPLEPD
,
14423 IX86_BUILTIN_CMPGTPD
,
14424 IX86_BUILTIN_CMPGEPD
,
14425 IX86_BUILTIN_CMPNEQPD
,
14426 IX86_BUILTIN_CMPNLTPD
,
14427 IX86_BUILTIN_CMPNLEPD
,
14428 IX86_BUILTIN_CMPNGTPD
,
14429 IX86_BUILTIN_CMPNGEPD
,
14430 IX86_BUILTIN_CMPORDPD
,
14431 IX86_BUILTIN_CMPUNORDPD
,
14432 IX86_BUILTIN_CMPNEPD
,
14433 IX86_BUILTIN_CMPEQSD
,
14434 IX86_BUILTIN_CMPLTSD
,
14435 IX86_BUILTIN_CMPLESD
,
14436 IX86_BUILTIN_CMPNEQSD
,
14437 IX86_BUILTIN_CMPNLTSD
,
14438 IX86_BUILTIN_CMPNLESD
,
14439 IX86_BUILTIN_CMPORDSD
,
14440 IX86_BUILTIN_CMPUNORDSD
,
14441 IX86_BUILTIN_CMPNESD
,
14443 IX86_BUILTIN_COMIEQSD
,
14444 IX86_BUILTIN_COMILTSD
,
14445 IX86_BUILTIN_COMILESD
,
14446 IX86_BUILTIN_COMIGTSD
,
14447 IX86_BUILTIN_COMIGESD
,
14448 IX86_BUILTIN_COMINEQSD
,
14449 IX86_BUILTIN_UCOMIEQSD
,
14450 IX86_BUILTIN_UCOMILTSD
,
14451 IX86_BUILTIN_UCOMILESD
,
14452 IX86_BUILTIN_UCOMIGTSD
,
14453 IX86_BUILTIN_UCOMIGESD
,
14454 IX86_BUILTIN_UCOMINEQSD
,
14456 IX86_BUILTIN_MAXPD
,
14457 IX86_BUILTIN_MAXSD
,
14458 IX86_BUILTIN_MINPD
,
14459 IX86_BUILTIN_MINSD
,
14461 IX86_BUILTIN_ANDPD
,
14462 IX86_BUILTIN_ANDNPD
,
14464 IX86_BUILTIN_XORPD
,
14466 IX86_BUILTIN_SQRTPD
,
14467 IX86_BUILTIN_SQRTSD
,
14469 IX86_BUILTIN_UNPCKHPD
,
14470 IX86_BUILTIN_UNPCKLPD
,
14472 IX86_BUILTIN_SHUFPD
,
14474 IX86_BUILTIN_LOADUPD
,
14475 IX86_BUILTIN_STOREUPD
,
14476 IX86_BUILTIN_MOVSD
,
14478 IX86_BUILTIN_LOADHPD
,
14479 IX86_BUILTIN_LOADLPD
,
14481 IX86_BUILTIN_CVTDQ2PD
,
14482 IX86_BUILTIN_CVTDQ2PS
,
14484 IX86_BUILTIN_CVTPD2DQ
,
14485 IX86_BUILTIN_CVTPD2PI
,
14486 IX86_BUILTIN_CVTPD2PS
,
14487 IX86_BUILTIN_CVTTPD2DQ
,
14488 IX86_BUILTIN_CVTTPD2PI
,
14490 IX86_BUILTIN_CVTPI2PD
,
14491 IX86_BUILTIN_CVTSI2SD
,
14492 IX86_BUILTIN_CVTSI642SD
,
14494 IX86_BUILTIN_CVTSD2SI
,
14495 IX86_BUILTIN_CVTSD2SI64
,
14496 IX86_BUILTIN_CVTSD2SS
,
14497 IX86_BUILTIN_CVTSS2SD
,
14498 IX86_BUILTIN_CVTTSD2SI
,
14499 IX86_BUILTIN_CVTTSD2SI64
,
14501 IX86_BUILTIN_CVTPS2DQ
,
14502 IX86_BUILTIN_CVTPS2PD
,
14503 IX86_BUILTIN_CVTTPS2DQ
,
14505 IX86_BUILTIN_MOVNTI
,
14506 IX86_BUILTIN_MOVNTPD
,
14507 IX86_BUILTIN_MOVNTDQ
,
14510 IX86_BUILTIN_MASKMOVDQU
,
14511 IX86_BUILTIN_MOVMSKPD
,
14512 IX86_BUILTIN_PMOVMSKB128
,
14514 IX86_BUILTIN_PACKSSWB128
,
14515 IX86_BUILTIN_PACKSSDW128
,
14516 IX86_BUILTIN_PACKUSWB128
,
14518 IX86_BUILTIN_PADDB128
,
14519 IX86_BUILTIN_PADDW128
,
14520 IX86_BUILTIN_PADDD128
,
14521 IX86_BUILTIN_PADDQ128
,
14522 IX86_BUILTIN_PADDSB128
,
14523 IX86_BUILTIN_PADDSW128
,
14524 IX86_BUILTIN_PADDUSB128
,
14525 IX86_BUILTIN_PADDUSW128
,
14526 IX86_BUILTIN_PSUBB128
,
14527 IX86_BUILTIN_PSUBW128
,
14528 IX86_BUILTIN_PSUBD128
,
14529 IX86_BUILTIN_PSUBQ128
,
14530 IX86_BUILTIN_PSUBSB128
,
14531 IX86_BUILTIN_PSUBSW128
,
14532 IX86_BUILTIN_PSUBUSB128
,
14533 IX86_BUILTIN_PSUBUSW128
,
14535 IX86_BUILTIN_PAND128
,
14536 IX86_BUILTIN_PANDN128
,
14537 IX86_BUILTIN_POR128
,
14538 IX86_BUILTIN_PXOR128
,
14540 IX86_BUILTIN_PAVGB128
,
14541 IX86_BUILTIN_PAVGW128
,
14543 IX86_BUILTIN_PCMPEQB128
,
14544 IX86_BUILTIN_PCMPEQW128
,
14545 IX86_BUILTIN_PCMPEQD128
,
14546 IX86_BUILTIN_PCMPGTB128
,
14547 IX86_BUILTIN_PCMPGTW128
,
14548 IX86_BUILTIN_PCMPGTD128
,
14550 IX86_BUILTIN_PMADDWD128
,
14552 IX86_BUILTIN_PMAXSW128
,
14553 IX86_BUILTIN_PMAXUB128
,
14554 IX86_BUILTIN_PMINSW128
,
14555 IX86_BUILTIN_PMINUB128
,
14557 IX86_BUILTIN_PMULUDQ
,
14558 IX86_BUILTIN_PMULUDQ128
,
14559 IX86_BUILTIN_PMULHUW128
,
14560 IX86_BUILTIN_PMULHW128
,
14561 IX86_BUILTIN_PMULLW128
,
14563 IX86_BUILTIN_PSADBW128
,
14564 IX86_BUILTIN_PSHUFHW
,
14565 IX86_BUILTIN_PSHUFLW
,
14566 IX86_BUILTIN_PSHUFD
,
14568 IX86_BUILTIN_PSLLW128
,
14569 IX86_BUILTIN_PSLLD128
,
14570 IX86_BUILTIN_PSLLQ128
,
14571 IX86_BUILTIN_PSRAW128
,
14572 IX86_BUILTIN_PSRAD128
,
14573 IX86_BUILTIN_PSRLW128
,
14574 IX86_BUILTIN_PSRLD128
,
14575 IX86_BUILTIN_PSRLQ128
,
14576 IX86_BUILTIN_PSLLDQI128
,
14577 IX86_BUILTIN_PSLLWI128
,
14578 IX86_BUILTIN_PSLLDI128
,
14579 IX86_BUILTIN_PSLLQI128
,
14580 IX86_BUILTIN_PSRAWI128
,
14581 IX86_BUILTIN_PSRADI128
,
14582 IX86_BUILTIN_PSRLDQI128
,
14583 IX86_BUILTIN_PSRLWI128
,
14584 IX86_BUILTIN_PSRLDI128
,
14585 IX86_BUILTIN_PSRLQI128
,
14587 IX86_BUILTIN_PUNPCKHBW128
,
14588 IX86_BUILTIN_PUNPCKHWD128
,
14589 IX86_BUILTIN_PUNPCKHDQ128
,
14590 IX86_BUILTIN_PUNPCKHQDQ128
,
14591 IX86_BUILTIN_PUNPCKLBW128
,
14592 IX86_BUILTIN_PUNPCKLWD128
,
14593 IX86_BUILTIN_PUNPCKLDQ128
,
14594 IX86_BUILTIN_PUNPCKLQDQ128
,
14596 IX86_BUILTIN_CLFLUSH
,
14597 IX86_BUILTIN_MFENCE
,
14598 IX86_BUILTIN_LFENCE
,
14600 /* Prescott New Instructions. */
14601 IX86_BUILTIN_ADDSUBPS
,
14602 IX86_BUILTIN_HADDPS
,
14603 IX86_BUILTIN_HSUBPS
,
14604 IX86_BUILTIN_MOVSHDUP
,
14605 IX86_BUILTIN_MOVSLDUP
,
14606 IX86_BUILTIN_ADDSUBPD
,
14607 IX86_BUILTIN_HADDPD
,
14608 IX86_BUILTIN_HSUBPD
,
14609 IX86_BUILTIN_LDDQU
,
14611 IX86_BUILTIN_MONITOR
,
14612 IX86_BUILTIN_MWAIT
,
14615 IX86_BUILTIN_PHADDW
,
14616 IX86_BUILTIN_PHADDD
,
14617 IX86_BUILTIN_PHADDSW
,
14618 IX86_BUILTIN_PHSUBW
,
14619 IX86_BUILTIN_PHSUBD
,
14620 IX86_BUILTIN_PHSUBSW
,
14621 IX86_BUILTIN_PMADDUBSW
,
14622 IX86_BUILTIN_PMULHRSW
,
14623 IX86_BUILTIN_PSHUFB
,
14624 IX86_BUILTIN_PSIGNB
,
14625 IX86_BUILTIN_PSIGNW
,
14626 IX86_BUILTIN_PSIGND
,
14627 IX86_BUILTIN_PALIGNR
,
14628 IX86_BUILTIN_PABSB
,
14629 IX86_BUILTIN_PABSW
,
14630 IX86_BUILTIN_PABSD
,
14632 IX86_BUILTIN_PHADDW128
,
14633 IX86_BUILTIN_PHADDD128
,
14634 IX86_BUILTIN_PHADDSW128
,
14635 IX86_BUILTIN_PHSUBW128
,
14636 IX86_BUILTIN_PHSUBD128
,
14637 IX86_BUILTIN_PHSUBSW128
,
14638 IX86_BUILTIN_PMADDUBSW128
,
14639 IX86_BUILTIN_PMULHRSW128
,
14640 IX86_BUILTIN_PSHUFB128
,
14641 IX86_BUILTIN_PSIGNB128
,
14642 IX86_BUILTIN_PSIGNW128
,
14643 IX86_BUILTIN_PSIGND128
,
14644 IX86_BUILTIN_PALIGNR128
,
14645 IX86_BUILTIN_PABSB128
,
14646 IX86_BUILTIN_PABSW128
,
14647 IX86_BUILTIN_PABSD128
,
14649 IX86_BUILTIN_VEC_INIT_V2SI
,
14650 IX86_BUILTIN_VEC_INIT_V4HI
,
14651 IX86_BUILTIN_VEC_INIT_V8QI
,
14652 IX86_BUILTIN_VEC_EXT_V2DF
,
14653 IX86_BUILTIN_VEC_EXT_V2DI
,
14654 IX86_BUILTIN_VEC_EXT_V4SF
,
14655 IX86_BUILTIN_VEC_EXT_V4SI
,
14656 IX86_BUILTIN_VEC_EXT_V8HI
,
14657 IX86_BUILTIN_VEC_EXT_V2SI
,
14658 IX86_BUILTIN_VEC_EXT_V4HI
,
14659 IX86_BUILTIN_VEC_SET_V8HI
,
14660 IX86_BUILTIN_VEC_SET_V4HI
,
14665 #define def_builtin(MASK, NAME, TYPE, CODE) \
14667 if ((MASK) & target_flags \
14668 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14669 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14670 NULL, NULL_TREE); \
14673 /* Bits for builtin_description.flag. */
14675 /* Set when we don't support the comparison natively, and should
14676 swap_comparison in order to support it. */
14677 #define BUILTIN_DESC_SWAP_OPERANDS 1
14679 struct builtin_description
14681 const unsigned int mask
;
14682 const enum insn_code icode
;
14683 const char *const name
;
14684 const enum ix86_builtins code
;
14685 const enum rtx_code comparison
;
14686 const unsigned int flag
;
14689 static const struct builtin_description bdesc_comi
[] =
14691 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14692 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14693 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14694 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14695 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14696 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14697 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14698 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14699 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14700 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14701 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14702 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14703 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14704 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14705 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14706 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14707 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14708 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14709 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14710 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14711 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14712 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14713 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14714 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14717 static const struct builtin_description bdesc_2arg
[] =
14720 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14721 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14722 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14723 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14724 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14725 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14726 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14727 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14729 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14730 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14731 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14732 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14733 BUILTIN_DESC_SWAP_OPERANDS
},
14734 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14735 BUILTIN_DESC_SWAP_OPERANDS
},
14736 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14737 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14738 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14739 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14740 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14741 BUILTIN_DESC_SWAP_OPERANDS
},
14742 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14743 BUILTIN_DESC_SWAP_OPERANDS
},
14744 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14745 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14746 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14747 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14748 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14749 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14750 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14751 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14752 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14753 BUILTIN_DESC_SWAP_OPERANDS
},
14754 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14755 BUILTIN_DESC_SWAP_OPERANDS
},
14756 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14758 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14759 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14760 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14761 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14763 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14764 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14765 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14766 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14768 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14769 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14770 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14771 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14772 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14775 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14776 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14777 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14778 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14779 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14780 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14781 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14782 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14784 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14785 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14786 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14787 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14788 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14789 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14790 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14791 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14793 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14794 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14795 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14797 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14798 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
14799 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
14800 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
14802 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
14803 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
14805 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
14806 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
14807 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
14808 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
14809 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
14810 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
14812 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
14813 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
14814 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
14815 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
14817 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
14818 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
14819 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
14820 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
14821 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
14822 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
14825 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
14826 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
14827 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
14829 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
14830 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
14831 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
14833 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
14834 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
14835 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
14836 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
14837 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
14838 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
14840 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
14841 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
14842 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
14843 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
14844 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
14845 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
14847 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
14848 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
14849 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
14850 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
14852 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
14853 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
14856 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
14857 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
14858 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
14859 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
14860 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
14861 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
14862 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
14863 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
14865 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
14866 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
14867 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
14868 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
14869 BUILTIN_DESC_SWAP_OPERANDS
},
14870 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
14871 BUILTIN_DESC_SWAP_OPERANDS
},
14872 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
14873 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
14874 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
14875 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
14876 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
14877 BUILTIN_DESC_SWAP_OPERANDS
},
14878 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
14879 BUILTIN_DESC_SWAP_OPERANDS
},
14880 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
14881 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
14882 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
14883 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
14884 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
14885 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
14886 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
14887 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
14888 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
14890 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
14891 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
14892 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
14893 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
14895 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
14896 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
14897 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
14898 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
14900 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
14901 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
14902 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
14905 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
14906 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
14907 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
14908 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
14909 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
14910 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
14911 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
14912 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
14914 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
14915 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
14916 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
14917 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
14918 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
14919 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
14920 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
14921 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
14923 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
14924 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
14926 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
14927 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
14928 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
14929 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
14931 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
14932 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
14934 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
14935 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
14936 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
14937 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
14938 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
14939 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
14941 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
14942 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
14943 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
14944 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
14946 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
14947 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
14948 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
14949 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
14950 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
14951 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
14952 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
14953 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
14955 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
14956 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
14957 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
14959 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
14960 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
14962 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
14963 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
14965 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
14966 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
14967 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
14969 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
14970 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
14971 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
14973 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
14974 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
14976 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
14978 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
14979 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
14980 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
14981 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
14984 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
14985 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
14986 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
14987 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
14988 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
14989 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
14992 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
14993 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
14994 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
14995 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
14996 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
14997 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
14998 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
14999 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
15000 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
15001 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
15002 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
15003 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
15004 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
15005 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
15006 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
15007 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
15008 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
15009 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
15010 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
15011 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
15012 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
15013 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
15014 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
15015 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
15018 static const struct builtin_description bdesc_1arg
[] =
15020 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
15021 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
15023 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
15024 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
15025 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
15027 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
15028 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
15029 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
15030 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
15031 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
15032 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
15034 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
15035 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
15037 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
15039 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
15040 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
15042 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
15043 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
15044 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
15045 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
15046 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
15048 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
15050 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
15051 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
15052 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
15053 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
15055 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
15056 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
15057 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
15060 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
15061 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
15064 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
15065 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
15066 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
15067 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
15068 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
15069 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
15073 ix86_init_builtins (void)
15076 ix86_init_mmx_sse_builtins ();
15079 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15080 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15083 ix86_init_mmx_sse_builtins (void)
15085 const struct builtin_description
* d
;
15088 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
15089 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
15090 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
15091 tree V2DI_type_node
15092 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
15093 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
15094 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
15095 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
15096 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
15097 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
15098 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
15100 tree pchar_type_node
= build_pointer_type (char_type_node
);
15101 tree pcchar_type_node
= build_pointer_type (
15102 build_type_variant (char_type_node
, 1, 0));
15103 tree pfloat_type_node
= build_pointer_type (float_type_node
);
15104 tree pcfloat_type_node
= build_pointer_type (
15105 build_type_variant (float_type_node
, 1, 0));
15106 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
15107 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
15108 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
15111 tree int_ftype_v4sf_v4sf
15112 = build_function_type_list (integer_type_node
,
15113 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15114 tree v4si_ftype_v4sf_v4sf
15115 = build_function_type_list (V4SI_type_node
,
15116 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15117 /* MMX/SSE/integer conversions. */
15118 tree int_ftype_v4sf
15119 = build_function_type_list (integer_type_node
,
15120 V4SF_type_node
, NULL_TREE
);
15121 tree int64_ftype_v4sf
15122 = build_function_type_list (long_long_integer_type_node
,
15123 V4SF_type_node
, NULL_TREE
);
15124 tree int_ftype_v8qi
15125 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
15126 tree v4sf_ftype_v4sf_int
15127 = build_function_type_list (V4SF_type_node
,
15128 V4SF_type_node
, integer_type_node
, NULL_TREE
);
15129 tree v4sf_ftype_v4sf_int64
15130 = build_function_type_list (V4SF_type_node
,
15131 V4SF_type_node
, long_long_integer_type_node
,
15133 tree v4sf_ftype_v4sf_v2si
15134 = build_function_type_list (V4SF_type_node
,
15135 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
15137 /* Miscellaneous. */
15138 tree v8qi_ftype_v4hi_v4hi
15139 = build_function_type_list (V8QI_type_node
,
15140 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15141 tree v4hi_ftype_v2si_v2si
15142 = build_function_type_list (V4HI_type_node
,
15143 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15144 tree v4sf_ftype_v4sf_v4sf_int
15145 = build_function_type_list (V4SF_type_node
,
15146 V4SF_type_node
, V4SF_type_node
,
15147 integer_type_node
, NULL_TREE
);
15148 tree v2si_ftype_v4hi_v4hi
15149 = build_function_type_list (V2SI_type_node
,
15150 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15151 tree v4hi_ftype_v4hi_int
15152 = build_function_type_list (V4HI_type_node
,
15153 V4HI_type_node
, integer_type_node
, NULL_TREE
);
15154 tree v4hi_ftype_v4hi_di
15155 = build_function_type_list (V4HI_type_node
,
15156 V4HI_type_node
, long_long_unsigned_type_node
,
15158 tree v2si_ftype_v2si_di
15159 = build_function_type_list (V2SI_type_node
,
15160 V2SI_type_node
, long_long_unsigned_type_node
,
15162 tree void_ftype_void
15163 = build_function_type (void_type_node
, void_list_node
);
15164 tree void_ftype_unsigned
15165 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
15166 tree void_ftype_unsigned_unsigned
15167 = build_function_type_list (void_type_node
, unsigned_type_node
,
15168 unsigned_type_node
, NULL_TREE
);
15169 tree void_ftype_pcvoid_unsigned_unsigned
15170 = build_function_type_list (void_type_node
, const_ptr_type_node
,
15171 unsigned_type_node
, unsigned_type_node
,
15173 tree unsigned_ftype_void
15174 = build_function_type (unsigned_type_node
, void_list_node
);
15175 tree v2si_ftype_v4sf
15176 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
15177 /* Loads/stores. */
15178 tree void_ftype_v8qi_v8qi_pchar
15179 = build_function_type_list (void_type_node
,
15180 V8QI_type_node
, V8QI_type_node
,
15181 pchar_type_node
, NULL_TREE
);
15182 tree v4sf_ftype_pcfloat
15183 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
15184 /* @@@ the type is bogus */
15185 tree v4sf_ftype_v4sf_pv2si
15186 = build_function_type_list (V4SF_type_node
,
15187 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
15188 tree void_ftype_pv2si_v4sf
15189 = build_function_type_list (void_type_node
,
15190 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
15191 tree void_ftype_pfloat_v4sf
15192 = build_function_type_list (void_type_node
,
15193 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
15194 tree void_ftype_pdi_di
15195 = build_function_type_list (void_type_node
,
15196 pdi_type_node
, long_long_unsigned_type_node
,
15198 tree void_ftype_pv2di_v2di
15199 = build_function_type_list (void_type_node
,
15200 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
15201 /* Normal vector unops. */
15202 tree v4sf_ftype_v4sf
15203 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15204 tree v16qi_ftype_v16qi
15205 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15206 tree v8hi_ftype_v8hi
15207 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15208 tree v4si_ftype_v4si
15209 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15210 tree v8qi_ftype_v8qi
15211 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15212 tree v4hi_ftype_v4hi
15213 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15215 /* Normal vector binops. */
15216 tree v4sf_ftype_v4sf_v4sf
15217 = build_function_type_list (V4SF_type_node
,
15218 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15219 tree v8qi_ftype_v8qi_v8qi
15220 = build_function_type_list (V8QI_type_node
,
15221 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15222 tree v4hi_ftype_v4hi_v4hi
15223 = build_function_type_list (V4HI_type_node
,
15224 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15225 tree v2si_ftype_v2si_v2si
15226 = build_function_type_list (V2SI_type_node
,
15227 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15228 tree di_ftype_di_di
15229 = build_function_type_list (long_long_unsigned_type_node
,
15230 long_long_unsigned_type_node
,
15231 long_long_unsigned_type_node
, NULL_TREE
);
15233 tree di_ftype_di_di_int
15234 = build_function_type_list (long_long_unsigned_type_node
,
15235 long_long_unsigned_type_node
,
15236 long_long_unsigned_type_node
,
15237 integer_type_node
, NULL_TREE
);
15239 tree v2si_ftype_v2sf
15240 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
15241 tree v2sf_ftype_v2si
15242 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
15243 tree v2si_ftype_v2si
15244 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15245 tree v2sf_ftype_v2sf
15246 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15247 tree v2sf_ftype_v2sf_v2sf
15248 = build_function_type_list (V2SF_type_node
,
15249 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15250 tree v2si_ftype_v2sf_v2sf
15251 = build_function_type_list (V2SI_type_node
,
15252 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15253 tree pint_type_node
= build_pointer_type (integer_type_node
);
15254 tree pdouble_type_node
= build_pointer_type (double_type_node
);
15255 tree pcdouble_type_node
= build_pointer_type (
15256 build_type_variant (double_type_node
, 1, 0));
15257 tree int_ftype_v2df_v2df
15258 = build_function_type_list (integer_type_node
,
15259 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15261 tree void_ftype_pcvoid
15262 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
15263 tree v4sf_ftype_v4si
15264 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
15265 tree v4si_ftype_v4sf
15266 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
15267 tree v2df_ftype_v4si
15268 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
15269 tree v4si_ftype_v2df
15270 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
15271 tree v2si_ftype_v2df
15272 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
15273 tree v4sf_ftype_v2df
15274 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15275 tree v2df_ftype_v2si
15276 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
15277 tree v2df_ftype_v4sf
15278 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15279 tree int_ftype_v2df
15280 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
15281 tree int64_ftype_v2df
15282 = build_function_type_list (long_long_integer_type_node
,
15283 V2DF_type_node
, NULL_TREE
);
15284 tree v2df_ftype_v2df_int
15285 = build_function_type_list (V2DF_type_node
,
15286 V2DF_type_node
, integer_type_node
, NULL_TREE
);
15287 tree v2df_ftype_v2df_int64
15288 = build_function_type_list (V2DF_type_node
,
15289 V2DF_type_node
, long_long_integer_type_node
,
15291 tree v4sf_ftype_v4sf_v2df
15292 = build_function_type_list (V4SF_type_node
,
15293 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15294 tree v2df_ftype_v2df_v4sf
15295 = build_function_type_list (V2DF_type_node
,
15296 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15297 tree v2df_ftype_v2df_v2df_int
15298 = build_function_type_list (V2DF_type_node
,
15299 V2DF_type_node
, V2DF_type_node
,
15302 tree v2df_ftype_v2df_pcdouble
15303 = build_function_type_list (V2DF_type_node
,
15304 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15305 tree void_ftype_pdouble_v2df
15306 = build_function_type_list (void_type_node
,
15307 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
15308 tree void_ftype_pint_int
15309 = build_function_type_list (void_type_node
,
15310 pint_type_node
, integer_type_node
, NULL_TREE
);
15311 tree void_ftype_v16qi_v16qi_pchar
15312 = build_function_type_list (void_type_node
,
15313 V16QI_type_node
, V16QI_type_node
,
15314 pchar_type_node
, NULL_TREE
);
15315 tree v2df_ftype_pcdouble
15316 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15317 tree v2df_ftype_v2df_v2df
15318 = build_function_type_list (V2DF_type_node
,
15319 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15320 tree v16qi_ftype_v16qi_v16qi
15321 = build_function_type_list (V16QI_type_node
,
15322 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15323 tree v8hi_ftype_v8hi_v8hi
15324 = build_function_type_list (V8HI_type_node
,
15325 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15326 tree v4si_ftype_v4si_v4si
15327 = build_function_type_list (V4SI_type_node
,
15328 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15329 tree v2di_ftype_v2di_v2di
15330 = build_function_type_list (V2DI_type_node
,
15331 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
15332 tree v2di_ftype_v2df_v2df
15333 = build_function_type_list (V2DI_type_node
,
15334 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15335 tree v2df_ftype_v2df
15336 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15337 tree v2di_ftype_v2di_int
15338 = build_function_type_list (V2DI_type_node
,
15339 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15340 tree v2di_ftype_v2di_v2di_int
15341 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
15342 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15343 tree v4si_ftype_v4si_int
15344 = build_function_type_list (V4SI_type_node
,
15345 V4SI_type_node
, integer_type_node
, NULL_TREE
);
15346 tree v8hi_ftype_v8hi_int
15347 = build_function_type_list (V8HI_type_node
,
15348 V8HI_type_node
, integer_type_node
, NULL_TREE
);
15349 tree v8hi_ftype_v8hi_v2di
15350 = build_function_type_list (V8HI_type_node
,
15351 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
15352 tree v4si_ftype_v4si_v2di
15353 = build_function_type_list (V4SI_type_node
,
15354 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
15355 tree v4si_ftype_v8hi_v8hi
15356 = build_function_type_list (V4SI_type_node
,
15357 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15358 tree di_ftype_v8qi_v8qi
15359 = build_function_type_list (long_long_unsigned_type_node
,
15360 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15361 tree di_ftype_v2si_v2si
15362 = build_function_type_list (long_long_unsigned_type_node
,
15363 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15364 tree v2di_ftype_v16qi_v16qi
15365 = build_function_type_list (V2DI_type_node
,
15366 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15367 tree v2di_ftype_v4si_v4si
15368 = build_function_type_list (V2DI_type_node
,
15369 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15370 tree int_ftype_v16qi
15371 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
15372 tree v16qi_ftype_pcchar
15373 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
15374 tree void_ftype_pchar_v16qi
15375 = build_function_type_list (void_type_node
,
15376 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
15379 tree float128_type
;
15382 /* The __float80 type. */
15383 if (TYPE_MODE (long_double_type_node
) == XFmode
)
15384 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
15388 /* The __float80 type. */
15389 float80_type
= make_node (REAL_TYPE
);
15390 TYPE_PRECISION (float80_type
) = 80;
15391 layout_type (float80_type
);
15392 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
15397 float128_type
= make_node (REAL_TYPE
);
15398 TYPE_PRECISION (float128_type
) = 128;
15399 layout_type (float128_type
);
15400 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
15403 /* Add all builtins that are more or less simple operations on two
15405 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15407 /* Use one of the operands; the target can have a different mode for
15408 mask-generating compares. */
15409 enum machine_mode mode
;
15414 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15419 type
= v16qi_ftype_v16qi_v16qi
;
15422 type
= v8hi_ftype_v8hi_v8hi
;
15425 type
= v4si_ftype_v4si_v4si
;
15428 type
= v2di_ftype_v2di_v2di
;
15431 type
= v2df_ftype_v2df_v2df
;
15434 type
= v4sf_ftype_v4sf_v4sf
;
15437 type
= v8qi_ftype_v8qi_v8qi
;
15440 type
= v4hi_ftype_v4hi_v4hi
;
15443 type
= v2si_ftype_v2si_v2si
;
15446 type
= di_ftype_di_di
;
15450 gcc_unreachable ();
15453 /* Override for comparisons. */
15454 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15455 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15456 type
= v4si_ftype_v4sf_v4sf
;
15458 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15459 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15460 type
= v2di_ftype_v2df_v2df
;
15462 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15465 /* Add all builtins that are more or less simple operations on 1 operand. */
15466 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
15468 enum machine_mode mode
;
15473 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15478 type
= v16qi_ftype_v16qi
;
15481 type
= v8hi_ftype_v8hi
;
15484 type
= v4si_ftype_v4si
;
15487 type
= v2df_ftype_v2df
;
15490 type
= v4sf_ftype_v4sf
;
15493 type
= v8qi_ftype_v8qi
;
15496 type
= v4hi_ftype_v4hi
;
15499 type
= v2si_ftype_v2si
;
15506 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15509 /* Add the remaining MMX insns with somewhat more complicated types. */
15510 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15511 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15512 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15513 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15515 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15516 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15517 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15519 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15520 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15522 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15523 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15525 /* comi/ucomi insns. */
15526 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15527 if (d
->mask
== MASK_SSE2
)
15528 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15530 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15532 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15533 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15534 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15536 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15537 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15538 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15539 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15540 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15541 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15542 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15543 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15544 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15545 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15546 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15548 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15550 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15551 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15553 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15554 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15555 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15556 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15558 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15559 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15560 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15561 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15563 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15565 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15567 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15568 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15569 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15570 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15571 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15572 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15574 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15576 /* Original 3DNow! */
15577 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15578 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15579 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15580 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15581 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15582 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15583 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15584 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15585 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15586 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15587 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15588 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15589 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15590 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15591 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15592 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15593 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15594 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15595 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15596 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15598 /* 3DNow! extension as used in the Athlon CPU. */
15599 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15600 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15601 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15602 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15603 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15604 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15607 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15609 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15610 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15612 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15613 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15615 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15616 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15617 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15618 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15619 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15621 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15622 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15623 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15624 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15626 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15627 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15629 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15631 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15632 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15634 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15635 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15636 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15637 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15638 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15640 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15642 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15643 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15644 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15645 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15647 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15648 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15649 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15651 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15652 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15653 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15654 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15656 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15657 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15658 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15660 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15661 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15663 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15664 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15666 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15667 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15668 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15670 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15671 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15672 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15674 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15675 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15677 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15678 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15679 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15680 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15682 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15683 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15684 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15685 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15687 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15688 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15690 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15692 /* Prescott New Instructions. */
15693 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15694 void_ftype_pcvoid_unsigned_unsigned
,
15695 IX86_BUILTIN_MONITOR
);
15696 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15697 void_ftype_unsigned_unsigned
,
15698 IX86_BUILTIN_MWAIT
);
15699 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15701 IX86_BUILTIN_MOVSHDUP
);
15702 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15704 IX86_BUILTIN_MOVSLDUP
);
15705 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15706 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15709 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
15710 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
15711 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
15712 IX86_BUILTIN_PALIGNR
);
15714 /* Access to the vec_init patterns. */
15715 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15716 integer_type_node
, NULL_TREE
);
15717 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15718 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15720 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15721 short_integer_type_node
,
15722 short_integer_type_node
,
15723 short_integer_type_node
, NULL_TREE
);
15724 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15725 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15727 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15728 char_type_node
, char_type_node
,
15729 char_type_node
, char_type_node
,
15730 char_type_node
, char_type_node
,
15731 char_type_node
, NULL_TREE
);
15732 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15733 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15735 /* Access to the vec_extract patterns. */
15736 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15737 integer_type_node
, NULL_TREE
);
15738 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15739 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15741 ftype
= build_function_type_list (long_long_integer_type_node
,
15742 V2DI_type_node
, integer_type_node
,
15744 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15745 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15747 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15748 integer_type_node
, NULL_TREE
);
15749 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15750 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15752 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15753 integer_type_node
, NULL_TREE
);
15754 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15755 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15757 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15758 integer_type_node
, NULL_TREE
);
15759 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15760 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15762 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15763 integer_type_node
, NULL_TREE
);
15764 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15765 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15767 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15768 integer_type_node
, NULL_TREE
);
15769 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15770 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15772 /* Access to the vec_set patterns. */
15773 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15775 integer_type_node
, NULL_TREE
);
15776 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15777 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15779 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15781 integer_type_node
, NULL_TREE
);
15782 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15783 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15786 /* Errors in the source file can cause expand_expr to return const0_rtx
15787 where we expect a vector. To avoid crashing, use one of the vector
15788 clear instructions. */
15790 safe_vector_operand (rtx x
, enum machine_mode mode
)
15792 if (x
== const0_rtx
)
15793 x
= CONST0_RTX (mode
);
15797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15800 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15803 tree arg0
= TREE_VALUE (arglist
);
15804 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15805 rtx op0
= expand_normal (arg0
);
15806 rtx op1
= expand_normal (arg1
);
15807 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15808 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15809 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15811 if (VECTOR_MODE_P (mode0
))
15812 op0
= safe_vector_operand (op0
, mode0
);
15813 if (VECTOR_MODE_P (mode1
))
15814 op1
= safe_vector_operand (op1
, mode1
);
15816 if (optimize
|| !target
15817 || GET_MODE (target
) != tmode
15818 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15819 target
= gen_reg_rtx (tmode
);
15821 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
15823 rtx x
= gen_reg_rtx (V4SImode
);
15824 emit_insn (gen_sse2_loadd (x
, op1
));
15825 op1
= gen_lowpart (TImode
, x
);
15828 /* The insn must want input operands in the same modes as the
15830 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
15831 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
15833 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15834 op0
= copy_to_mode_reg (mode0
, op0
);
15835 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15836 op1
= copy_to_mode_reg (mode1
, op1
);
15838 /* ??? Using ix86_fixup_binary_operands is problematic when
15839 we've got mismatched modes. Fake it. */
15845 if (tmode
== mode0
&& tmode
== mode1
)
15847 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
15851 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
15853 op0
= force_reg (mode0
, op0
);
15854 op1
= force_reg (mode1
, op1
);
15855 target
= gen_reg_rtx (tmode
);
15858 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15865 /* Subroutine of ix86_expand_builtin to take care of stores. */
15868 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
15871 tree arg0
= TREE_VALUE (arglist
);
15872 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15873 rtx op0
= expand_normal (arg0
);
15874 rtx op1
= expand_normal (arg1
);
15875 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15876 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15878 if (VECTOR_MODE_P (mode1
))
15879 op1
= safe_vector_operand (op1
, mode1
);
15881 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15882 op1
= copy_to_mode_reg (mode1
, op1
);
15884 pat
= GEN_FCN (icode
) (op0
, op1
);
15890 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15893 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
15894 rtx target
, int do_load
)
15897 tree arg0
= TREE_VALUE (arglist
);
15898 rtx op0
= expand_normal (arg0
);
15899 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15900 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15902 if (optimize
|| !target
15903 || GET_MODE (target
) != tmode
15904 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15905 target
= gen_reg_rtx (tmode
);
15907 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15910 if (VECTOR_MODE_P (mode0
))
15911 op0
= safe_vector_operand (op0
, mode0
);
15913 if ((optimize
&& !register_operand (op0
, mode0
))
15914 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15915 op0
= copy_to_mode_reg (mode0
, op0
);
15918 pat
= GEN_FCN (icode
) (target
, op0
);
15925 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15926 sqrtss, rsqrtss, rcpss. */
15929 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15932 tree arg0
= TREE_VALUE (arglist
);
15933 rtx op1
, op0
= expand_normal (arg0
);
15934 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15935 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15937 if (optimize
|| !target
15938 || GET_MODE (target
) != tmode
15939 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15940 target
= gen_reg_rtx (tmode
);
15942 if (VECTOR_MODE_P (mode0
))
15943 op0
= safe_vector_operand (op0
, mode0
);
15945 if ((optimize
&& !register_operand (op0
, mode0
))
15946 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15947 op0
= copy_to_mode_reg (mode0
, op0
);
15950 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
15951 op1
= copy_to_mode_reg (mode0
, op1
);
15953 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15960 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15963 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
15967 tree arg0
= TREE_VALUE (arglist
);
15968 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15969 rtx op0
= expand_normal (arg0
);
15970 rtx op1
= expand_normal (arg1
);
15972 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
15973 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
15974 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
15975 enum rtx_code comparison
= d
->comparison
;
15977 if (VECTOR_MODE_P (mode0
))
15978 op0
= safe_vector_operand (op0
, mode0
);
15979 if (VECTOR_MODE_P (mode1
))
15980 op1
= safe_vector_operand (op1
, mode1
);
15982 /* Swap operands if we have a comparison that isn't available in
15984 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15986 rtx tmp
= gen_reg_rtx (mode1
);
15987 emit_move_insn (tmp
, op1
);
15992 if (optimize
|| !target
15993 || GET_MODE (target
) != tmode
15994 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
15995 target
= gen_reg_rtx (tmode
);
15997 if ((optimize
&& !register_operand (op0
, mode0
))
15998 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
15999 op0
= copy_to_mode_reg (mode0
, op0
);
16000 if ((optimize
&& !register_operand (op1
, mode1
))
16001 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
16002 op1
= copy_to_mode_reg (mode1
, op1
);
16004 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16005 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
16012 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16015 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
16019 tree arg0
= TREE_VALUE (arglist
);
16020 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16021 rtx op0
= expand_normal (arg0
);
16022 rtx op1
= expand_normal (arg1
);
16024 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16025 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16026 enum rtx_code comparison
= d
->comparison
;
16028 if (VECTOR_MODE_P (mode0
))
16029 op0
= safe_vector_operand (op0
, mode0
);
16030 if (VECTOR_MODE_P (mode1
))
16031 op1
= safe_vector_operand (op1
, mode1
);
16033 /* Swap operands if we have a comparison that isn't available in
16035 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
16042 target
= gen_reg_rtx (SImode
);
16043 emit_move_insn (target
, const0_rtx
);
16044 target
= gen_rtx_SUBREG (QImode
, target
, 0);
16046 if ((optimize
&& !register_operand (op0
, mode0
))
16047 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
16048 op0
= copy_to_mode_reg (mode0
, op0
);
16049 if ((optimize
&& !register_operand (op1
, mode1
))
16050 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
16051 op1
= copy_to_mode_reg (mode1
, op1
);
16053 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16054 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
16058 emit_insn (gen_rtx_SET (VOIDmode
,
16059 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
16060 gen_rtx_fmt_ee (comparison
, QImode
,
16064 return SUBREG_REG (target
);
16067 /* Return the integer constant in ARG. Constrain it to be in the range
16068 of the subparts of VEC_TYPE; issue an error if not. */
16071 get_element_number (tree vec_type
, tree arg
)
16073 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16075 if (!host_integerp (arg
, 1)
16076 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
16078 error ("selector must be an integer constant in the range 0..%wi", max
);
16085 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16086 ix86_expand_vector_init. We DO have language-level syntax for this, in
16087 the form of (type){ init-list }. Except that since we can't place emms
16088 instructions from inside the compiler, we can't allow the use of MMX
16089 registers unless the user explicitly asks for it. So we do *not* define
16090 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16091 we have builtins invoked by mmintrin.h that gives us license to emit
16092 these sorts of instructions. */
16095 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
16097 enum machine_mode tmode
= TYPE_MODE (type
);
16098 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16099 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16100 rtvec v
= rtvec_alloc (n_elt
);
16102 gcc_assert (VECTOR_MODE_P (tmode
));
16104 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
16106 rtx x
= expand_normal (TREE_VALUE (arglist
));
16107 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16110 gcc_assert (arglist
== NULL
);
16112 if (!target
|| !register_operand (target
, tmode
))
16113 target
= gen_reg_rtx (tmode
);
16115 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
16119 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16120 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16121 had a language-level syntax for referencing vector elements. */
16124 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
16126 enum machine_mode tmode
, mode0
;
16131 arg0
= TREE_VALUE (arglist
);
16132 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16134 op0
= expand_normal (arg0
);
16135 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
16137 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16138 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16139 gcc_assert (VECTOR_MODE_P (mode0
));
16141 op0
= force_reg (mode0
, op0
);
16143 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16144 target
= gen_reg_rtx (tmode
);
16146 ix86_expand_vector_extract (true, target
, op0
, elt
);
16151 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16152 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16153 a language-level syntax for referencing vector elements. */
16156 ix86_expand_vec_set_builtin (tree arglist
)
16158 enum machine_mode tmode
, mode1
;
16159 tree arg0
, arg1
, arg2
;
16163 arg0
= TREE_VALUE (arglist
);
16164 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16165 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16167 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16168 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16169 gcc_assert (VECTOR_MODE_P (tmode
));
16171 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
16172 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
16173 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16175 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16176 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16178 op0
= force_reg (tmode
, op0
);
16179 op1
= force_reg (mode1
, op1
);
16181 ix86_expand_vector_set (true, op0
, op1
, elt
);
16186 /* Expand an expression EXP that calls a built-in function,
16187 with result going to TARGET if that's convenient
16188 (and in mode MODE if that's convenient).
16189 SUBTARGET may be used as the target for computing one of EXP's operands.
16190 IGNORE is nonzero if the value is to be ignored. */
16193 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
16194 enum machine_mode mode ATTRIBUTE_UNUSED
,
16195 int ignore ATTRIBUTE_UNUSED
)
16197 const struct builtin_description
*d
;
16199 enum insn_code icode
;
16200 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
16201 tree arglist
= TREE_OPERAND (exp
, 1);
16202 tree arg0
, arg1
, arg2
;
16203 rtx op0
, op1
, op2
, pat
;
16204 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
16205 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
16209 case IX86_BUILTIN_EMMS
:
16210 emit_insn (gen_mmx_emms ());
16213 case IX86_BUILTIN_SFENCE
:
16214 emit_insn (gen_sse_sfence ());
16217 case IX86_BUILTIN_MASKMOVQ
:
16218 case IX86_BUILTIN_MASKMOVDQU
:
16219 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
16220 ? CODE_FOR_mmx_maskmovq
16221 : CODE_FOR_sse2_maskmovdqu
);
16222 /* Note the arg order is different from the operand order. */
16223 arg1
= TREE_VALUE (arglist
);
16224 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
16225 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16226 op0
= expand_normal (arg0
);
16227 op1
= expand_normal (arg1
);
16228 op2
= expand_normal (arg2
);
16229 mode0
= insn_data
[icode
].operand
[0].mode
;
16230 mode1
= insn_data
[icode
].operand
[1].mode
;
16231 mode2
= insn_data
[icode
].operand
[2].mode
;
16233 op0
= force_reg (Pmode
, op0
);
16234 op0
= gen_rtx_MEM (mode1
, op0
);
16236 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16237 op0
= copy_to_mode_reg (mode0
, op0
);
16238 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16239 op1
= copy_to_mode_reg (mode1
, op1
);
16240 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
16241 op2
= copy_to_mode_reg (mode2
, op2
);
16242 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
16248 case IX86_BUILTIN_SQRTSS
:
16249 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
16250 case IX86_BUILTIN_RSQRTSS
:
16251 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
16252 case IX86_BUILTIN_RCPSS
:
16253 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
16255 case IX86_BUILTIN_LOADUPS
:
16256 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
16258 case IX86_BUILTIN_STOREUPS
:
16259 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
16261 case IX86_BUILTIN_LOADHPS
:
16262 case IX86_BUILTIN_LOADLPS
:
16263 case IX86_BUILTIN_LOADHPD
:
16264 case IX86_BUILTIN_LOADLPD
:
16265 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
16266 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
16267 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
16268 : CODE_FOR_sse2_loadlpd
);
16269 arg0
= TREE_VALUE (arglist
);
16270 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16271 op0
= expand_normal (arg0
);
16272 op1
= expand_normal (arg1
);
16273 tmode
= insn_data
[icode
].operand
[0].mode
;
16274 mode0
= insn_data
[icode
].operand
[1].mode
;
16275 mode1
= insn_data
[icode
].operand
[2].mode
;
16277 op0
= force_reg (mode0
, op0
);
16278 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
16279 if (optimize
|| target
== 0
16280 || GET_MODE (target
) != tmode
16281 || !register_operand (target
, tmode
))
16282 target
= gen_reg_rtx (tmode
);
16283 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16289 case IX86_BUILTIN_STOREHPS
:
16290 case IX86_BUILTIN_STORELPS
:
16291 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
16292 : CODE_FOR_sse_storelps
);
16293 arg0
= TREE_VALUE (arglist
);
16294 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16295 op0
= expand_normal (arg0
);
16296 op1
= expand_normal (arg1
);
16297 mode0
= insn_data
[icode
].operand
[0].mode
;
16298 mode1
= insn_data
[icode
].operand
[1].mode
;
16300 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16301 op1
= force_reg (mode1
, op1
);
16303 pat
= GEN_FCN (icode
) (op0
, op1
);
16309 case IX86_BUILTIN_MOVNTPS
:
16310 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
16311 case IX86_BUILTIN_MOVNTQ
:
16312 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
16314 case IX86_BUILTIN_LDMXCSR
:
16315 op0
= expand_normal (TREE_VALUE (arglist
));
16316 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16317 emit_move_insn (target
, op0
);
16318 emit_insn (gen_sse_ldmxcsr (target
));
16321 case IX86_BUILTIN_STMXCSR
:
16322 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16323 emit_insn (gen_sse_stmxcsr (target
));
16324 return copy_to_mode_reg (SImode
, target
);
16326 case IX86_BUILTIN_SHUFPS
:
16327 case IX86_BUILTIN_SHUFPD
:
16328 icode
= (fcode
== IX86_BUILTIN_SHUFPS
16329 ? CODE_FOR_sse_shufps
16330 : CODE_FOR_sse2_shufpd
);
16331 arg0
= TREE_VALUE (arglist
);
16332 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16333 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16334 op0
= expand_normal (arg0
);
16335 op1
= expand_normal (arg1
);
16336 op2
= expand_normal (arg2
);
16337 tmode
= insn_data
[icode
].operand
[0].mode
;
16338 mode0
= insn_data
[icode
].operand
[1].mode
;
16339 mode1
= insn_data
[icode
].operand
[2].mode
;
16340 mode2
= insn_data
[icode
].operand
[3].mode
;
16342 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16343 op0
= copy_to_mode_reg (mode0
, op0
);
16344 if ((optimize
&& !register_operand (op1
, mode1
))
16345 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16346 op1
= copy_to_mode_reg (mode1
, op1
);
16347 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16349 /* @@@ better error message */
16350 error ("mask must be an immediate");
16351 return gen_reg_rtx (tmode
);
16353 if (optimize
|| target
== 0
16354 || GET_MODE (target
) != tmode
16355 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16356 target
= gen_reg_rtx (tmode
);
16357 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16363 case IX86_BUILTIN_PSHUFW
:
16364 case IX86_BUILTIN_PSHUFD
:
16365 case IX86_BUILTIN_PSHUFHW
:
16366 case IX86_BUILTIN_PSHUFLW
:
16367 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
16368 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
16369 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
16370 : CODE_FOR_mmx_pshufw
);
16371 arg0
= TREE_VALUE (arglist
);
16372 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16373 op0
= expand_normal (arg0
);
16374 op1
= expand_normal (arg1
);
16375 tmode
= insn_data
[icode
].operand
[0].mode
;
16376 mode1
= insn_data
[icode
].operand
[1].mode
;
16377 mode2
= insn_data
[icode
].operand
[2].mode
;
16379 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16380 op0
= copy_to_mode_reg (mode1
, op0
);
16381 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16383 /* @@@ better error message */
16384 error ("mask must be an immediate");
16388 || GET_MODE (target
) != tmode
16389 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16390 target
= gen_reg_rtx (tmode
);
16391 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16397 case IX86_BUILTIN_PSLLDQI128
:
16398 case IX86_BUILTIN_PSRLDQI128
:
16399 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
16400 : CODE_FOR_sse2_lshrti3
);
16401 arg0
= TREE_VALUE (arglist
);
16402 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16403 op0
= expand_normal (arg0
);
16404 op1
= expand_normal (arg1
);
16405 tmode
= insn_data
[icode
].operand
[0].mode
;
16406 mode1
= insn_data
[icode
].operand
[1].mode
;
16407 mode2
= insn_data
[icode
].operand
[2].mode
;
16409 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16411 op0
= copy_to_reg (op0
);
16412 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16414 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16416 error ("shift must be an immediate");
16419 target
= gen_reg_rtx (V2DImode
);
16420 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16426 case IX86_BUILTIN_FEMMS
:
16427 emit_insn (gen_mmx_femms ());
16430 case IX86_BUILTIN_PAVGUSB
:
16431 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16433 case IX86_BUILTIN_PF2ID
:
16434 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16436 case IX86_BUILTIN_PFACC
:
16437 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16439 case IX86_BUILTIN_PFADD
:
16440 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16442 case IX86_BUILTIN_PFCMPEQ
:
16443 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16445 case IX86_BUILTIN_PFCMPGE
:
16446 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16448 case IX86_BUILTIN_PFCMPGT
:
16449 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16451 case IX86_BUILTIN_PFMAX
:
16452 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16454 case IX86_BUILTIN_PFMIN
:
16455 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16457 case IX86_BUILTIN_PFMUL
:
16458 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16460 case IX86_BUILTIN_PFRCP
:
16461 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16463 case IX86_BUILTIN_PFRCPIT1
:
16464 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16466 case IX86_BUILTIN_PFRCPIT2
:
16467 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16469 case IX86_BUILTIN_PFRSQIT1
:
16470 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16472 case IX86_BUILTIN_PFRSQRT
:
16473 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16475 case IX86_BUILTIN_PFSUB
:
16476 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16478 case IX86_BUILTIN_PFSUBR
:
16479 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16481 case IX86_BUILTIN_PI2FD
:
16482 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16484 case IX86_BUILTIN_PMULHRW
:
16485 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16487 case IX86_BUILTIN_PF2IW
:
16488 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16490 case IX86_BUILTIN_PFNACC
:
16491 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16493 case IX86_BUILTIN_PFPNACC
:
16494 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16496 case IX86_BUILTIN_PI2FW
:
16497 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16499 case IX86_BUILTIN_PSWAPDSI
:
16500 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16502 case IX86_BUILTIN_PSWAPDSF
:
16503 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16505 case IX86_BUILTIN_SQRTSD
:
16506 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16507 case IX86_BUILTIN_LOADUPD
:
16508 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16509 case IX86_BUILTIN_STOREUPD
:
16510 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16512 case IX86_BUILTIN_MFENCE
:
16513 emit_insn (gen_sse2_mfence ());
16515 case IX86_BUILTIN_LFENCE
:
16516 emit_insn (gen_sse2_lfence ());
16519 case IX86_BUILTIN_CLFLUSH
:
16520 arg0
= TREE_VALUE (arglist
);
16521 op0
= expand_normal (arg0
);
16522 icode
= CODE_FOR_sse2_clflush
;
16523 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16524 op0
= copy_to_mode_reg (Pmode
, op0
);
16526 emit_insn (gen_sse2_clflush (op0
));
16529 case IX86_BUILTIN_MOVNTPD
:
16530 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16531 case IX86_BUILTIN_MOVNTDQ
:
16532 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16533 case IX86_BUILTIN_MOVNTI
:
16534 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16536 case IX86_BUILTIN_LOADDQU
:
16537 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16538 case IX86_BUILTIN_STOREDQU
:
16539 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16541 case IX86_BUILTIN_MONITOR
:
16542 arg0
= TREE_VALUE (arglist
);
16543 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16544 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16545 op0
= expand_normal (arg0
);
16546 op1
= expand_normal (arg1
);
16547 op2
= expand_normal (arg2
);
16549 op0
= copy_to_mode_reg (Pmode
, op0
);
16551 op1
= copy_to_mode_reg (SImode
, op1
);
16553 op2
= copy_to_mode_reg (SImode
, op2
);
16555 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16557 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
16560 case IX86_BUILTIN_MWAIT
:
16561 arg0
= TREE_VALUE (arglist
);
16562 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16563 op0
= expand_normal (arg0
);
16564 op1
= expand_normal (arg1
);
16566 op0
= copy_to_mode_reg (SImode
, op0
);
16568 op1
= copy_to_mode_reg (SImode
, op1
);
16569 emit_insn (gen_sse3_mwait (op0
, op1
));
16572 case IX86_BUILTIN_LDDQU
:
16573 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16576 case IX86_BUILTIN_PALIGNR
:
16577 case IX86_BUILTIN_PALIGNR128
:
16578 if (fcode
== IX86_BUILTIN_PALIGNR
)
16580 icode
= CODE_FOR_ssse3_palignrdi
;
16585 icode
= CODE_FOR_ssse3_palignrti
;
16588 arg0
= TREE_VALUE (arglist
);
16589 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16590 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16591 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
16592 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
16593 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
16594 tmode
= insn_data
[icode
].operand
[0].mode
;
16595 mode1
= insn_data
[icode
].operand
[1].mode
;
16596 mode2
= insn_data
[icode
].operand
[2].mode
;
16597 mode3
= insn_data
[icode
].operand
[3].mode
;
16599 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16601 op0
= copy_to_reg (op0
);
16602 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16604 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16606 op1
= copy_to_reg (op1
);
16607 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
16609 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
16611 error ("shift must be an immediate");
16614 target
= gen_reg_rtx (mode
);
16615 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
16622 case IX86_BUILTIN_VEC_INIT_V2SI
:
16623 case IX86_BUILTIN_VEC_INIT_V4HI
:
16624 case IX86_BUILTIN_VEC_INIT_V8QI
:
16625 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16627 case IX86_BUILTIN_VEC_EXT_V2DF
:
16628 case IX86_BUILTIN_VEC_EXT_V2DI
:
16629 case IX86_BUILTIN_VEC_EXT_V4SF
:
16630 case IX86_BUILTIN_VEC_EXT_V4SI
:
16631 case IX86_BUILTIN_VEC_EXT_V8HI
:
16632 case IX86_BUILTIN_VEC_EXT_V2SI
:
16633 case IX86_BUILTIN_VEC_EXT_V4HI
:
16634 return ix86_expand_vec_ext_builtin (arglist
, target
);
16636 case IX86_BUILTIN_VEC_SET_V8HI
:
16637 case IX86_BUILTIN_VEC_SET_V4HI
:
16638 return ix86_expand_vec_set_builtin (arglist
);
16644 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16645 if (d
->code
== fcode
)
16647 /* Compares are treated specially. */
16648 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16649 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16650 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16651 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16652 return ix86_expand_sse_compare (d
, arglist
, target
);
16654 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16657 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16658 if (d
->code
== fcode
)
16659 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16661 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16662 if (d
->code
== fcode
)
16663 return ix86_expand_sse_comi (d
, arglist
, target
);
16665 gcc_unreachable ();
16668 /* Store OPERAND to the memory after reload is completed. This means
16669 that we can't easily use assign_stack_local. */
16671 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16675 gcc_assert (reload_completed
);
16676 if (TARGET_RED_ZONE
)
16678 result
= gen_rtx_MEM (mode
,
16679 gen_rtx_PLUS (Pmode
,
16681 GEN_INT (-RED_ZONE_SIZE
)));
16682 emit_move_insn (result
, operand
);
16684 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16690 operand
= gen_lowpart (DImode
, operand
);
16694 gen_rtx_SET (VOIDmode
,
16695 gen_rtx_MEM (DImode
,
16696 gen_rtx_PRE_DEC (DImode
,
16697 stack_pointer_rtx
)),
16701 gcc_unreachable ();
16703 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16712 split_di (&operand
, 1, operands
, operands
+ 1);
16714 gen_rtx_SET (VOIDmode
,
16715 gen_rtx_MEM (SImode
,
16716 gen_rtx_PRE_DEC (Pmode
,
16717 stack_pointer_rtx
)),
16720 gen_rtx_SET (VOIDmode
,
16721 gen_rtx_MEM (SImode
,
16722 gen_rtx_PRE_DEC (Pmode
,
16723 stack_pointer_rtx
)),
16728 /* Store HImodes as SImodes. */
16729 operand
= gen_lowpart (SImode
, operand
);
16733 gen_rtx_SET (VOIDmode
,
16734 gen_rtx_MEM (GET_MODE (operand
),
16735 gen_rtx_PRE_DEC (SImode
,
16736 stack_pointer_rtx
)),
16740 gcc_unreachable ();
16742 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16747 /* Free operand from the memory. */
16749 ix86_free_from_memory (enum machine_mode mode
)
16751 if (!TARGET_RED_ZONE
)
16755 if (mode
== DImode
|| TARGET_64BIT
)
16759 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16760 to pop or add instruction if registers are available. */
16761 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16762 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16767 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16768 QImode must go into class Q_REGS.
16769 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16770 movdf to do mem-to-mem moves through integer regs. */
16772 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16774 enum machine_mode mode
= GET_MODE (x
);
16776 /* We're only allowed to return a subclass of CLASS. Many of the
16777 following checks fail for NO_REGS, so eliminate that early. */
16778 if (class == NO_REGS
)
16781 /* All classes can load zeros. */
16782 if (x
== CONST0_RTX (mode
))
16785 /* Force constants into memory if we are loading a (nonzero) constant into
16786 an MMX or SSE register. This is because there are no MMX/SSE instructions
16787 to load from a constant. */
16789 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16792 /* Prefer SSE regs only, if we can use them for math. */
16793 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
16794 return SSE_CLASS_P (class) ? class : NO_REGS
;
16796 /* Floating-point constants need more complex checks. */
16797 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
16799 /* General regs can load everything. */
16800 if (reg_class_subset_p (class, GENERAL_REGS
))
16803 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16804 zero above. We only want to wind up preferring 80387 registers if
16805 we plan on doing computation with them. */
16807 && standard_80387_constant_p (x
))
16809 /* Limit class to non-sse. */
16810 if (class == FLOAT_SSE_REGS
)
16812 if (class == FP_TOP_SSE_REGS
)
16814 if (class == FP_SECOND_SSE_REGS
)
16815 return FP_SECOND_REG
;
16816 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
16823 /* Generally when we see PLUS here, it's the function invariant
16824 (plus soft-fp const_int). Which can only be computed into general
16826 if (GET_CODE (x
) == PLUS
)
16827 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
16829 /* QImode constants are easy to load, but non-constant QImode data
16830 must go into Q_REGS. */
16831 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
16833 if (reg_class_subset_p (class, Q_REGS
))
16835 if (reg_class_subset_p (Q_REGS
, class))
16843 /* Discourage putting floating-point values in SSE registers unless
16844 SSE math is being used, and likewise for the 387 registers. */
16846 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
16848 enum machine_mode mode
= GET_MODE (x
);
16850 /* Restrict the output reload class to the register bank that we are doing
16851 math on. If we would like not to return a subset of CLASS, reject this
16852 alternative: if reload cannot do this, it will still use its choice. */
16853 mode
= GET_MODE (x
);
16854 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
16855 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
16857 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
16859 if (class == FP_TOP_SSE_REGS
)
16861 else if (class == FP_SECOND_SSE_REGS
)
16862 return FP_SECOND_REG
;
16864 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
16870 /* If we are copying between general and FP registers, we need a memory
16871 location. The same is true for SSE and MMX registers.
16873 The macro can't work reliably when one of the CLASSES is class containing
16874 registers from multiple units (SSE, MMX, integer). We avoid this by never
16875 combining those units in single alternative in the machine description.
16876 Ensure that this constraint holds to avoid unexpected surprises.
16878 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16879 enforce these sanity checks. */
16882 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
16883 enum machine_mode mode
, int strict
)
16885 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
16886 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
16887 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
16888 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
16889 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
16890 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
16892 gcc_assert (!strict
);
16896 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
16899 /* ??? This is a lie. We do have moves between mmx/general, and for
16900 mmx/sse2. But by saying we need secondary memory we discourage the
16901 register allocator from using the mmx registers unless needed. */
16902 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
16905 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16907 /* SSE1 doesn't have any direct moves from other classes. */
16911 /* If the target says that inter-unit moves are more expensive
16912 than moving through memory, then don't generate them. */
16913 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
16916 /* Between SSE and general, we have moves no larger than word size. */
16917 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16920 /* ??? For the cost of one register reformat penalty, we could use
16921 the same instructions to move SFmode and DFmode data, but the
16922 relevant move patterns don't support those alternatives. */
16923 if (mode
== SFmode
|| mode
== DFmode
)
16930 /* Return true if the registers in CLASS cannot represent the change from
16931 modes FROM to TO. */
16934 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
16935 enum reg_class
class)
16940 /* x87 registers can't do subreg at all, as all values are reformatted
16941 to extended precision. */
16942 if (MAYBE_FLOAT_CLASS_P (class))
16945 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16947 /* Vector registers do not support QI or HImode loads. If we don't
16948 disallow a change to these modes, reload will assume it's ok to
16949 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16950 the vec_dupv4hi pattern. */
16951 if (GET_MODE_SIZE (from
) < 4)
16954 /* Vector registers do not support subreg with nonzero offsets, which
16955 are otherwise valid for integer registers. Since we can't see
16956 whether we have a nonzero offset from here, prohibit all
16957 nonparadoxical subregs changing size. */
16958 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
16965 /* Return the cost of moving data from a register in class CLASS1 to
16966 one in class CLASS2.
16968 It is not required that the cost always equal 2 when FROM is the same as TO;
16969 on some machines it is expensive to move between registers if they are not
16970 general registers. */
16973 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
16974 enum reg_class class2
)
16976 /* In case we require secondary memory, compute cost of the store followed
16977 by load. In order to avoid bad register allocation choices, we need
16978 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16980 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
16984 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
16985 MEMORY_MOVE_COST (mode
, class1
, 1));
16986 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
16987 MEMORY_MOVE_COST (mode
, class2
, 1));
16989 /* In case of copying from general_purpose_register we may emit multiple
16990 stores followed by single load causing memory size mismatch stall.
16991 Count this as arbitrarily high cost of 20. */
16992 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
16995 /* In the case of FP/MMX moves, the registers actually overlap, and we
16996 have to switch modes in order to treat them differently. */
16997 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
16998 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
17004 /* Moves between SSE/MMX and integer unit are expensive. */
17005 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
17006 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
17007 return ix86_cost
->mmxsse_to_integer
;
17008 if (MAYBE_FLOAT_CLASS_P (class1
))
17009 return ix86_cost
->fp_move
;
17010 if (MAYBE_SSE_CLASS_P (class1
))
17011 return ix86_cost
->sse_move
;
17012 if (MAYBE_MMX_CLASS_P (class1
))
17013 return ix86_cost
->mmx_move
;
17017 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17020 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
17022 /* Flags and only flags can only hold CCmode values. */
17023 if (CC_REGNO_P (regno
))
17024 return GET_MODE_CLASS (mode
) == MODE_CC
;
17025 if (GET_MODE_CLASS (mode
) == MODE_CC
17026 || GET_MODE_CLASS (mode
) == MODE_RANDOM
17027 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
17029 if (FP_REGNO_P (regno
))
17030 return VALID_FP_MODE_P (mode
);
17031 if (SSE_REGNO_P (regno
))
17033 /* We implement the move patterns for all vector modes into and
17034 out of SSE registers, even when no operation instructions
17036 return (VALID_SSE_REG_MODE (mode
)
17037 || VALID_SSE2_REG_MODE (mode
)
17038 || VALID_MMX_REG_MODE (mode
)
17039 || VALID_MMX_REG_MODE_3DNOW (mode
));
17041 if (MMX_REGNO_P (regno
))
17043 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17044 so if the register is available at all, then we can move data of
17045 the given mode into or out of it. */
17046 return (VALID_MMX_REG_MODE (mode
)
17047 || VALID_MMX_REG_MODE_3DNOW (mode
));
17050 if (mode
== QImode
)
17052 /* Take care for QImode values - they can be in non-QI regs,
17053 but then they do cause partial register stalls. */
17054 if (regno
< 4 || TARGET_64BIT
)
17056 if (!TARGET_PARTIAL_REG_STALL
)
17058 return reload_in_progress
|| reload_completed
;
17060 /* We handle both integer and floats in the general purpose registers. */
17061 else if (VALID_INT_MODE_P (mode
))
17063 else if (VALID_FP_MODE_P (mode
))
17065 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17066 on to use that value in smaller contexts, this can easily force a
17067 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17068 supporting DImode, allow it. */
17069 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
17075 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17076 tieable integer mode. */
17079 ix86_tieable_integer_mode_p (enum machine_mode mode
)
17088 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
17091 return TARGET_64BIT
;
17098 /* Return true if MODE1 is accessible in a register that can hold MODE2
17099 without copying. That is, all register classes that can hold MODE2
17100 can also hold MODE1. */
17103 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
17105 if (mode1
== mode2
)
17108 if (ix86_tieable_integer_mode_p (mode1
)
17109 && ix86_tieable_integer_mode_p (mode2
))
17112 /* MODE2 being XFmode implies fp stack or general regs, which means we
17113 can tie any smaller floating point modes to it. Note that we do not
17114 tie this with TFmode. */
17115 if (mode2
== XFmode
)
17116 return mode1
== SFmode
|| mode1
== DFmode
;
17118 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17119 that we can tie it with SFmode. */
17120 if (mode2
== DFmode
)
17121 return mode1
== SFmode
;
17123 /* If MODE2 is only appropriate for an SSE register, then tie with
17124 any other mode acceptable to SSE registers. */
17125 if (GET_MODE_SIZE (mode2
) >= 8
17126 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
17127 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
17129 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17130 with any other mode acceptable to MMX registers. */
17131 if (GET_MODE_SIZE (mode2
) == 8
17132 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
17133 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
17138 /* Return the cost of moving data of mode M between a
17139 register and memory. A value of 2 is the default; this cost is
17140 relative to those in `REGISTER_MOVE_COST'.
17142 If moving between registers and memory is more expensive than
17143 between two registers, you should define this macro to express the
17146 Model also increased moving costs of QImode registers in non
17150 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
17152 if (FLOAT_CLASS_P (class))
17169 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
17171 if (SSE_CLASS_P (class))
17174 switch (GET_MODE_SIZE (mode
))
17188 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
17190 if (MMX_CLASS_P (class))
17193 switch (GET_MODE_SIZE (mode
))
17204 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
17206 switch (GET_MODE_SIZE (mode
))
17210 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
17211 : ix86_cost
->movzbl_load
);
17213 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
17214 : ix86_cost
->int_store
[0] + 4);
17217 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
17219 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17220 if (mode
== TFmode
)
17222 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
17223 * (((int) GET_MODE_SIZE (mode
)
17224 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
17228 /* Compute a (partial) cost for rtx X. Return true if the complete
17229 cost has been computed, and false if subexpressions should be
17230 scanned. In either case, *TOTAL contains the cost result. */
17233 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
17235 enum machine_mode mode
= GET_MODE (x
);
17243 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
17245 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
17247 else if (flag_pic
&& SYMBOLIC_CONST (x
)
17249 || (!GET_CODE (x
) != LABEL_REF
17250 && (GET_CODE (x
) != SYMBOL_REF
17251 || !SYMBOL_REF_LOCAL_P (x
)))))
17258 if (mode
== VOIDmode
)
17261 switch (standard_80387_constant_p (x
))
17266 default: /* Other constants */
17271 /* Start with (MEM (SYMBOL_REF)), since that's where
17272 it'll probably end up. Add a penalty for size. */
17273 *total
= (COSTS_N_INSNS (1)
17274 + (flag_pic
!= 0 && !TARGET_64BIT
)
17275 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
17281 /* The zero extensions is often completely free on x86_64, so make
17282 it as cheap as possible. */
17283 if (TARGET_64BIT
&& mode
== DImode
17284 && GET_MODE (XEXP (x
, 0)) == SImode
)
17286 else if (TARGET_ZERO_EXTEND_WITH_AND
)
17287 *total
= ix86_cost
->add
;
17289 *total
= ix86_cost
->movzx
;
17293 *total
= ix86_cost
->movsx
;
17297 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
17298 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
17300 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17303 *total
= ix86_cost
->add
;
17306 if ((value
== 2 || value
== 3)
17307 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
17309 *total
= ix86_cost
->lea
;
17319 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
17321 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17323 if (INTVAL (XEXP (x
, 1)) > 32)
17324 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
17326 *total
= ix86_cost
->shift_const
* 2;
17330 if (GET_CODE (XEXP (x
, 1)) == AND
)
17331 *total
= ix86_cost
->shift_var
* 2;
17333 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
17338 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17339 *total
= ix86_cost
->shift_const
;
17341 *total
= ix86_cost
->shift_var
;
17346 if (FLOAT_MODE_P (mode
))
17348 *total
= ix86_cost
->fmul
;
17353 rtx op0
= XEXP (x
, 0);
17354 rtx op1
= XEXP (x
, 1);
17356 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17358 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17359 for (nbits
= 0; value
!= 0; value
&= value
- 1)
17363 /* This is arbitrary. */
17366 /* Compute costs correctly for widening multiplication. */
17367 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
17368 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
17369 == GET_MODE_SIZE (mode
))
17371 int is_mulwiden
= 0;
17372 enum machine_mode inner_mode
= GET_MODE (op0
);
17374 if (GET_CODE (op0
) == GET_CODE (op1
))
17375 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
17376 else if (GET_CODE (op1
) == CONST_INT
)
17378 if (GET_CODE (op0
) == SIGN_EXTEND
)
17379 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
17382 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
17386 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
17389 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
17390 + nbits
* ix86_cost
->mult_bit
17391 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
17400 if (FLOAT_MODE_P (mode
))
17401 *total
= ix86_cost
->fdiv
;
17403 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
17407 if (FLOAT_MODE_P (mode
))
17408 *total
= ix86_cost
->fadd
;
17409 else if (GET_MODE_CLASS (mode
) == MODE_INT
17410 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
17412 if (GET_CODE (XEXP (x
, 0)) == PLUS
17413 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
17414 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
17415 && CONSTANT_P (XEXP (x
, 1)))
17417 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
17418 if (val
== 2 || val
== 4 || val
== 8)
17420 *total
= ix86_cost
->lea
;
17421 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17422 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
17424 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17428 else if (GET_CODE (XEXP (x
, 0)) == MULT
17429 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
17431 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
17432 if (val
== 2 || val
== 4 || val
== 8)
17434 *total
= ix86_cost
->lea
;
17435 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17436 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17440 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
17442 *total
= ix86_cost
->lea
;
17443 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17444 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17445 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17452 if (FLOAT_MODE_P (mode
))
17454 *total
= ix86_cost
->fadd
;
17462 if (!TARGET_64BIT
&& mode
== DImode
)
17464 *total
= (ix86_cost
->add
* 2
17465 + (rtx_cost (XEXP (x
, 0), outer_code
)
17466 << (GET_MODE (XEXP (x
, 0)) != DImode
))
17467 + (rtx_cost (XEXP (x
, 1), outer_code
)
17468 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17474 if (FLOAT_MODE_P (mode
))
17476 *total
= ix86_cost
->fchs
;
17482 if (!TARGET_64BIT
&& mode
== DImode
)
17483 *total
= ix86_cost
->add
* 2;
17485 *total
= ix86_cost
->add
;
17489 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17490 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17491 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17492 && XEXP (x
, 1) == const0_rtx
)
17494 /* This kind of construct is implemented using test[bwl].
17495 Treat it as if we had an AND. */
17496 *total
= (ix86_cost
->add
17497 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17498 + rtx_cost (const1_rtx
, outer_code
));
17504 if (!TARGET_SSE_MATH
17506 || (mode
== DFmode
&& !TARGET_SSE2
))
17507 /* For standard 80387 constants, raise the cost to prevent
17508 compress_float_constant() to generate load from memory. */
17509 switch (standard_80387_constant_p (XEXP (x
, 0)))
17519 *total
= (x86_ext_80387_constants
& TUNEMASK
17526 if (FLOAT_MODE_P (mode
))
17527 *total
= ix86_cost
->fabs
;
17531 if (FLOAT_MODE_P (mode
))
17532 *total
= ix86_cost
->fsqrt
;
17536 if (XINT (x
, 1) == UNSPEC_TP
)
17547 static int current_machopic_label_num
;
17549 /* Given a symbol name and its associated stub, write out the
17550 definition of the stub. */
17553 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17555 unsigned int length
;
17556 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17557 int label
= ++current_machopic_label_num
;
17559 /* For 64-bit we shouldn't get here. */
17560 gcc_assert (!TARGET_64BIT
);
17562 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17563 symb
= (*targetm
.strip_name_encoding
) (symb
);
17565 length
= strlen (stub
);
17566 binder_name
= alloca (length
+ 32);
17567 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17569 length
= strlen (symb
);
17570 symbol_name
= alloca (length
+ 32);
17571 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17573 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17576 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17578 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17580 fprintf (file
, "%s:\n", stub
);
17581 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17585 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
17586 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17587 fprintf (file
, "\tjmp\t*%%edx\n");
17590 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
17592 fprintf (file
, "%s:\n", binder_name
);
17596 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17597 fprintf (file
, "\tpushl\t%%eax\n");
17600 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
17602 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
17604 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17605 fprintf (file
, "%s:\n", lazy_ptr_name
);
17606 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17607 fprintf (file
, "\t.long %s\n", binder_name
);
17611 darwin_x86_file_end (void)
17613 darwin_file_end ();
17616 #endif /* TARGET_MACHO */
17618 /* Order the registers for register allocator. */
17621 x86_order_regs_for_local_alloc (void)
17626 /* First allocate the local general purpose registers. */
17627 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17628 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17629 reg_alloc_order
[pos
++] = i
;
17631 /* Global general purpose registers. */
17632 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17633 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17634 reg_alloc_order
[pos
++] = i
;
17636 /* x87 registers come first in case we are doing FP math
17638 if (!TARGET_SSE_MATH
)
17639 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17640 reg_alloc_order
[pos
++] = i
;
17642 /* SSE registers. */
17643 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17644 reg_alloc_order
[pos
++] = i
;
17645 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17646 reg_alloc_order
[pos
++] = i
;
17648 /* x87 registers. */
17649 if (TARGET_SSE_MATH
)
17650 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17651 reg_alloc_order
[pos
++] = i
;
17653 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17654 reg_alloc_order
[pos
++] = i
;
17656 /* Initialize the rest of array as we do not allocate some registers
17658 while (pos
< FIRST_PSEUDO_REGISTER
)
17659 reg_alloc_order
[pos
++] = 0;
17662 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17663 struct attribute_spec.handler. */
17665 ix86_handle_struct_attribute (tree
*node
, tree name
,
17666 tree args ATTRIBUTE_UNUSED
,
17667 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17670 if (DECL_P (*node
))
17672 if (TREE_CODE (*node
) == TYPE_DECL
)
17673 type
= &TREE_TYPE (*node
);
17678 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17679 || TREE_CODE (*type
) == UNION_TYPE
)))
17681 warning (OPT_Wattributes
, "%qs attribute ignored",
17682 IDENTIFIER_POINTER (name
));
17683 *no_add_attrs
= true;
17686 else if ((is_attribute_p ("ms_struct", name
)
17687 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17688 || ((is_attribute_p ("gcc_struct", name
)
17689 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17691 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17692 IDENTIFIER_POINTER (name
));
17693 *no_add_attrs
= true;
17700 ix86_ms_bitfield_layout_p (tree record_type
)
17702 return (TARGET_MS_BITFIELD_LAYOUT
&&
17703 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17704 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17707 /* Returns an expression indicating where the this parameter is
17708 located on entry to the FUNCTION. */
17711 x86_this_parameter (tree function
)
17713 tree type
= TREE_TYPE (function
);
17717 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17718 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17721 if (ix86_function_regparm (type
, function
) > 0)
17725 parm
= TYPE_ARG_TYPES (type
);
17726 /* Figure out whether or not the function has a variable number of
17728 for (; parm
; parm
= TREE_CHAIN (parm
))
17729 if (TREE_VALUE (parm
) == void_type_node
)
17731 /* If not, the this parameter is in the first argument. */
17735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17737 return gen_rtx_REG (SImode
, regno
);
17741 if (aggregate_value_p (TREE_TYPE (type
), type
))
17742 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17744 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17747 /* Determine whether x86_output_mi_thunk can succeed. */
17750 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17751 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17752 HOST_WIDE_INT vcall_offset
, tree function
)
17754 /* 64-bit can handle anything. */
17758 /* For 32-bit, everything's fine if we have one free register. */
17759 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17762 /* Need a free register for vcall_offset. */
17766 /* Need a free register for GOT references. */
17767 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17770 /* Otherwise ok. */
17774 /* Output the assembler code for a thunk function. THUNK_DECL is the
17775 declaration for the thunk function itself, FUNCTION is the decl for
17776 the target function. DELTA is an immediate constant offset to be
17777 added to THIS. If VCALL_OFFSET is nonzero, the word at
17778 *(*this + vcall_offset) should be added to THIS. */
17781 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17782 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17783 HOST_WIDE_INT vcall_offset
, tree function
)
17786 rtx
this = x86_this_parameter (function
);
17789 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17790 pull it in now and let DELTA benefit. */
17793 else if (vcall_offset
)
17795 /* Put the this parameter into %eax. */
17797 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17798 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17801 this_reg
= NULL_RTX
;
17803 /* Adjust the this parameter by a fixed constant. */
17806 xops
[0] = GEN_INT (delta
);
17807 xops
[1] = this_reg
? this_reg
: this;
17810 if (!x86_64_general_operand (xops
[0], DImode
))
17812 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17814 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
17818 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17821 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17824 /* Adjust the this parameter by a value stored in the vtable. */
17828 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17831 int tmp_regno
= 2 /* ECX */;
17832 if (lookup_attribute ("fastcall",
17833 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
17834 tmp_regno
= 0 /* EAX */;
17835 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
17838 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
17841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17843 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17845 /* Adjust the this parameter. */
17846 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
17847 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
17849 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
17850 xops
[0] = GEN_INT (vcall_offset
);
17852 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17853 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
17855 xops
[1] = this_reg
;
17857 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17859 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17862 /* If necessary, drop THIS back to its stack slot. */
17863 if (this_reg
&& this_reg
!= this)
17865 xops
[0] = this_reg
;
17867 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17870 xops
[0] = XEXP (DECL_RTL (function
), 0);
17873 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17874 output_asm_insn ("jmp\t%P0", xops
);
17877 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
17878 tmp
= gen_rtx_CONST (Pmode
, tmp
);
17879 tmp
= gen_rtx_MEM (QImode
, tmp
);
17881 output_asm_insn ("jmp\t%A0", xops
);
17886 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17887 output_asm_insn ("jmp\t%P0", xops
);
17892 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
17893 tmp
= (gen_rtx_SYMBOL_REF
17895 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
17896 tmp
= gen_rtx_MEM (QImode
, tmp
);
17898 output_asm_insn ("jmp\t%0", xops
);
17901 #endif /* TARGET_MACHO */
17903 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
17904 output_set_got (tmp
, NULL_RTX
);
17907 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
17908 output_asm_insn ("jmp\t{*}%1", xops
);
17914 x86_file_start (void)
17916 default_file_start ();
17918 darwin_file_start ();
17920 if (X86_FILE_START_VERSION_DIRECTIVE
)
17921 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
17922 if (X86_FILE_START_FLTUSED
)
17923 fputs ("\t.global\t__fltused\n", asm_out_file
);
17924 if (ix86_asm_dialect
== ASM_INTEL
)
17925 fputs ("\t.intel_syntax\n", asm_out_file
);
17929 x86_field_alignment (tree field
, int computed
)
17931 enum machine_mode mode
;
17932 tree type
= TREE_TYPE (field
);
17934 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
17936 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
17937 ? get_inner_array_type (type
) : type
);
17938 if (mode
== DFmode
|| mode
== DCmode
17939 || GET_MODE_CLASS (mode
) == MODE_INT
17940 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
17941 return MIN (32, computed
);
17945 /* Output assembler code to FILE to increment profiler label # LABELNO
17946 for profiling a function entry. */
17948 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
17953 #ifndef NO_PROFILE_COUNTERS
17954 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
17956 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
17960 #ifndef NO_PROFILE_COUNTERS
17961 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
17963 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17967 #ifndef NO_PROFILE_COUNTERS
17968 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17969 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
17971 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
17975 #ifndef NO_PROFILE_COUNTERS
17976 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
17977 PROFILE_COUNT_REGISTER
);
17979 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17983 /* We don't have exact information about the insn sizes, but we may assume
17984 quite safely that we are informed about all 1 byte insns and memory
17985 address sizes. This is enough to eliminate unnecessary padding in
17989 min_insn_size (rtx insn
)
17993 if (!INSN_P (insn
) || !active_insn_p (insn
))
17996 /* Discard alignments we've emit and jump instructions. */
17997 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
17998 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
18000 if (GET_CODE (insn
) == JUMP_INSN
18001 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
18002 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
18005 /* Important case - calls are always 5 bytes.
18006 It is common to have many calls in the row. */
18007 if (GET_CODE (insn
) == CALL_INSN
18008 && symbolic_reference_mentioned_p (PATTERN (insn
))
18009 && !SIBLING_CALL_P (insn
))
18011 if (get_attr_length (insn
) <= 1)
18014 /* For normal instructions we may rely on the sizes of addresses
18015 and the presence of symbol to require 4 bytes of encoding.
18016 This is not the case for jumps where references are PC relative. */
18017 if (GET_CODE (insn
) != JUMP_INSN
)
18019 l
= get_attr_length_address (insn
);
18020 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
18029 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18033 ix86_avoid_jump_misspredicts (void)
18035 rtx insn
, start
= get_insns ();
18036 int nbytes
= 0, njumps
= 0;
18039 /* Look for all minimal intervals of instructions containing 4 jumps.
18040 The intervals are bounded by START and INSN. NBYTES is the total
18041 size of instructions in the interval including INSN and not including
18042 START. When the NBYTES is smaller than 16 bytes, it is possible
18043 that the end of START and INSN ends up in the same 16byte page.
18045 The smallest offset in the page INSN can start is the case where START
18046 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18047 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18049 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18052 nbytes
+= min_insn_size (insn
);
18054 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
18055 INSN_UID (insn
), min_insn_size (insn
));
18056 if ((GET_CODE (insn
) == JUMP_INSN
18057 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
18058 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
18059 || GET_CODE (insn
) == CALL_INSN
)
18066 start
= NEXT_INSN (start
);
18067 if ((GET_CODE (start
) == JUMP_INSN
18068 && GET_CODE (PATTERN (start
)) != ADDR_VEC
18069 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
18070 || GET_CODE (start
) == CALL_INSN
)
18071 njumps
--, isjump
= 1;
18074 nbytes
-= min_insn_size (start
);
18076 gcc_assert (njumps
>= 0);
18078 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
18079 INSN_UID (start
), INSN_UID (insn
), nbytes
);
18081 if (njumps
== 3 && isjump
&& nbytes
< 16)
18083 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
18086 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
18087 INSN_UID (insn
), padsize
);
18088 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
18093 /* AMD Athlon works faster
18094 when RET is not destination of conditional jump or directly preceded
18095 by other jump instruction. We avoid the penalty by inserting NOP just
18096 before the RET instructions in such cases. */
18098 ix86_pad_returns (void)
18103 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
18105 basic_block bb
= e
->src
;
18106 rtx ret
= BB_END (bb
);
18108 bool replace
= false;
18110 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
18111 || !maybe_hot_bb_p (bb
))
18113 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
18114 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
18116 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
18121 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
18122 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
18123 && !(e
->flags
& EDGE_FALLTHRU
))
18128 prev
= prev_active_insn (ret
);
18130 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
18131 || GET_CODE (prev
) == CALL_INSN
))
18133 /* Empty functions get branch mispredict even when the jump destination
18134 is not visible to us. */
18135 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
18140 emit_insn_before (gen_return_internal_long (), ret
);
18146 /* Implement machine specific optimizations. We implement padding of returns
18147 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18151 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
18152 ix86_pad_returns ();
18153 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
18154 ix86_avoid_jump_misspredicts ();
18157 /* Return nonzero when QImode register that must be represented via REX prefix
18160 x86_extended_QIreg_mentioned_p (rtx insn
)
18163 extract_insn_cached (insn
);
18164 for (i
= 0; i
< recog_data
.n_operands
; i
++)
18165 if (REG_P (recog_data
.operand
[i
])
18166 && REGNO (recog_data
.operand
[i
]) >= 4)
18171 /* Return nonzero when P points to register encoded via REX prefix.
18172 Called via for_each_rtx. */
18174 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
18176 unsigned int regno
;
18179 regno
= REGNO (*p
);
18180 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
18183 /* Return true when INSN mentions register that must be encoded using REX
18186 x86_extended_reg_mentioned_p (rtx insn
)
18188 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
18191 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18192 optabs would emit if we didn't have TFmode patterns. */
18195 x86_emit_floatuns (rtx operands
[2])
18197 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
18198 enum machine_mode mode
, inmode
;
18200 inmode
= GET_MODE (operands
[1]);
18201 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
18204 in
= force_reg (inmode
, operands
[1]);
18205 mode
= GET_MODE (out
);
18206 neglab
= gen_label_rtx ();
18207 donelab
= gen_label_rtx ();
18208 i1
= gen_reg_rtx (Pmode
);
18209 f0
= gen_reg_rtx (mode
);
18211 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
18213 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
18214 emit_jump_insn (gen_jump (donelab
));
18217 emit_label (neglab
);
18219 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18220 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18221 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
18222 expand_float (f0
, i0
, 0);
18223 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
18225 emit_label (donelab
);
18228 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18229 with all elements equal to VAR. Return true if successful. */
18232 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
18233 rtx target
, rtx val
)
18235 enum machine_mode smode
, wsmode
, wvmode
;
18250 val
= force_reg (GET_MODE_INNER (mode
), val
);
18251 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18252 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18258 if (TARGET_SSE
|| TARGET_3DNOW_A
)
18260 val
= gen_lowpart (SImode
, val
);
18261 x
= gen_rtx_TRUNCATE (HImode
, val
);
18262 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
18263 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18285 /* Extend HImode to SImode using a paradoxical SUBREG. */
18286 tmp1
= gen_reg_rtx (SImode
);
18287 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18288 /* Insert the SImode value as low element of V4SImode vector. */
18289 tmp2
= gen_reg_rtx (V4SImode
);
18290 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18291 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18292 CONST0_RTX (V4SImode
),
18294 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18295 /* Cast the V4SImode vector back to a V8HImode vector. */
18296 tmp1
= gen_reg_rtx (V8HImode
);
18297 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
18298 /* Duplicate the low short through the whole low SImode word. */
18299 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
18300 /* Cast the V8HImode vector back to a V4SImode vector. */
18301 tmp2
= gen_reg_rtx (V4SImode
);
18302 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18303 /* Replicate the low element of the V4SImode vector. */
18304 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18305 /* Cast the V2SImode back to V8HImode, and store in target. */
18306 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
18317 /* Extend QImode to SImode using a paradoxical SUBREG. */
18318 tmp1
= gen_reg_rtx (SImode
);
18319 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18320 /* Insert the SImode value as low element of V4SImode vector. */
18321 tmp2
= gen_reg_rtx (V4SImode
);
18322 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18323 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18324 CONST0_RTX (V4SImode
),
18326 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18327 /* Cast the V4SImode vector back to a V16QImode vector. */
18328 tmp1
= gen_reg_rtx (V16QImode
);
18329 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
18330 /* Duplicate the low byte through the whole low SImode word. */
18331 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18332 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18333 /* Cast the V16QImode vector back to a V4SImode vector. */
18334 tmp2
= gen_reg_rtx (V4SImode
);
18335 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18336 /* Replicate the low element of the V4SImode vector. */
18337 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18338 /* Cast the V2SImode back to V16QImode, and store in target. */
18339 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
18347 /* Replicate the value once into the next wider mode and recurse. */
18348 val
= convert_modes (wsmode
, smode
, val
, true);
18349 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
18350 GEN_INT (GET_MODE_BITSIZE (smode
)),
18351 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18352 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
18354 x
= gen_reg_rtx (wvmode
);
18355 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
18356 gcc_unreachable ();
18357 emit_move_insn (target
, gen_lowpart (mode
, x
));
18365 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18366 whose ONE_VAR element is VAR, and other elements are zero. Return true
18370 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
18371 rtx target
, rtx var
, int one_var
)
18373 enum machine_mode vsimode
;
18389 var
= force_reg (GET_MODE_INNER (mode
), var
);
18390 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
18391 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18396 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
18397 new_target
= gen_reg_rtx (mode
);
18399 new_target
= target
;
18400 var
= force_reg (GET_MODE_INNER (mode
), var
);
18401 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
18402 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
18403 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
18406 /* We need to shuffle the value to the correct position, so
18407 create a new pseudo to store the intermediate result. */
18409 /* With SSE2, we can use the integer shuffle insns. */
18410 if (mode
!= V4SFmode
&& TARGET_SSE2
)
18412 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
18414 GEN_INT (one_var
== 1 ? 0 : 1),
18415 GEN_INT (one_var
== 2 ? 0 : 1),
18416 GEN_INT (one_var
== 3 ? 0 : 1)));
18417 if (target
!= new_target
)
18418 emit_move_insn (target
, new_target
);
18422 /* Otherwise convert the intermediate result to V4SFmode and
18423 use the SSE1 shuffle instructions. */
18424 if (mode
!= V4SFmode
)
18426 tmp
= gen_reg_rtx (V4SFmode
);
18427 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
18432 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
18434 GEN_INT (one_var
== 1 ? 0 : 1),
18435 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
18436 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
18438 if (mode
!= V4SFmode
)
18439 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
18440 else if (tmp
!= target
)
18441 emit_move_insn (target
, tmp
);
18443 else if (target
!= new_target
)
18444 emit_move_insn (target
, new_target
);
18449 vsimode
= V4SImode
;
18455 vsimode
= V2SImode
;
18461 /* Zero extend the variable element to SImode and recurse. */
18462 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
18464 x
= gen_reg_rtx (vsimode
);
18465 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
18467 gcc_unreachable ();
18469 emit_move_insn (target
, gen_lowpart (mode
, x
));
18477 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18478 consisting of the values in VALS. It is known that all elements
18479 except ONE_VAR are constants. Return true if successful. */
18482 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
18483 rtx target
, rtx vals
, int one_var
)
18485 rtx var
= XVECEXP (vals
, 0, one_var
);
18486 enum machine_mode wmode
;
18489 const_vec
= copy_rtx (vals
);
18490 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
18491 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
18499 /* For the two element vectors, it's just as easy to use
18500 the general case. */
18516 /* There's no way to set one QImode entry easily. Combine
18517 the variable value with its adjacent constant value, and
18518 promote to an HImode set. */
18519 x
= XVECEXP (vals
, 0, one_var
^ 1);
18522 var
= convert_modes (HImode
, QImode
, var
, true);
18523 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
18524 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18525 x
= GEN_INT (INTVAL (x
) & 0xff);
18529 var
= convert_modes (HImode
, QImode
, var
, true);
18530 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
18532 if (x
!= const0_rtx
)
18533 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
18534 1, OPTAB_LIB_WIDEN
);
18536 x
= gen_reg_rtx (wmode
);
18537 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
18538 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
18540 emit_move_insn (target
, gen_lowpart (mode
, x
));
18547 emit_move_insn (target
, const_vec
);
18548 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
18552 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18553 all values variable, and none identical. */
18556 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
18557 rtx target
, rtx vals
)
18559 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
18560 rtx op0
= NULL
, op1
= NULL
;
18561 bool use_vec_concat
= false;
18567 if (!mmx_ok
&& !TARGET_SSE
)
18573 /* For the two element vectors, we always implement VEC_CONCAT. */
18574 op0
= XVECEXP (vals
, 0, 0);
18575 op1
= XVECEXP (vals
, 0, 1);
18576 use_vec_concat
= true;
18580 half_mode
= V2SFmode
;
18583 half_mode
= V2SImode
;
18589 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18590 Recurse to load the two halves. */
18592 op0
= gen_reg_rtx (half_mode
);
18593 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
18594 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
18596 op1
= gen_reg_rtx (half_mode
);
18597 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
18598 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18600 use_vec_concat
= true;
18611 gcc_unreachable ();
18614 if (use_vec_concat
)
18616 if (!register_operand (op0
, half_mode
))
18617 op0
= force_reg (half_mode
, op0
);
18618 if (!register_operand (op1
, half_mode
))
18619 op1
= force_reg (half_mode
, op1
);
18621 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18622 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18626 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18627 enum machine_mode inner_mode
;
18628 rtx words
[4], shift
;
18630 inner_mode
= GET_MODE_INNER (mode
);
18631 n_elts
= GET_MODE_NUNITS (mode
);
18632 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18633 n_elt_per_word
= n_elts
/ n_words
;
18634 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18636 for (i
= 0; i
< n_words
; ++i
)
18638 rtx word
= NULL_RTX
;
18640 for (j
= 0; j
< n_elt_per_word
; ++j
)
18642 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18643 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18649 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18650 word
, 1, OPTAB_LIB_WIDEN
);
18651 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18652 word
, 1, OPTAB_LIB_WIDEN
);
18660 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18661 else if (n_words
== 2)
18663 rtx tmp
= gen_reg_rtx (mode
);
18664 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18665 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18666 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18667 emit_move_insn (target
, tmp
);
18669 else if (n_words
== 4)
18671 rtx tmp
= gen_reg_rtx (V4SImode
);
18672 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18673 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18674 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18677 gcc_unreachable ();
18681 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18682 instructions unless MMX_OK is true. */
18685 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18687 enum machine_mode mode
= GET_MODE (target
);
18688 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18689 int n_elts
= GET_MODE_NUNITS (mode
);
18690 int n_var
= 0, one_var
= -1;
18691 bool all_same
= true, all_const_zero
= true;
18695 for (i
= 0; i
< n_elts
; ++i
)
18697 x
= XVECEXP (vals
, 0, i
);
18698 if (!CONSTANT_P (x
))
18699 n_var
++, one_var
= i
;
18700 else if (x
!= CONST0_RTX (inner_mode
))
18701 all_const_zero
= false;
18702 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18706 /* Constants are best loaded from the constant pool. */
18709 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18713 /* If all values are identical, broadcast the value. */
18715 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18716 XVECEXP (vals
, 0, 0)))
18719 /* Values where only one field is non-constant are best loaded from
18720 the pool and overwritten via move later. */
18724 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
18725 XVECEXP (vals
, 0, one_var
),
18729 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18733 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18737 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18739 enum machine_mode mode
= GET_MODE (target
);
18740 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18741 bool use_vec_merge
= false;
18750 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18751 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18753 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18755 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18756 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18766 /* For the two element vectors, we implement a VEC_CONCAT with
18767 the extraction of the other element. */
18769 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18770 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18773 op0
= val
, op1
= tmp
;
18775 op0
= tmp
, op1
= val
;
18777 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18778 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18786 use_vec_merge
= true;
18790 /* tmp = target = A B C D */
18791 tmp
= copy_to_reg (target
);
18792 /* target = A A B B */
18793 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18794 /* target = X A B B */
18795 ix86_expand_vector_set (false, target
, val
, 0);
18796 /* target = A X C D */
18797 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18798 GEN_INT (1), GEN_INT (0),
18799 GEN_INT (2+4), GEN_INT (3+4)));
18803 /* tmp = target = A B C D */
18804 tmp
= copy_to_reg (target
);
18805 /* tmp = X B C D */
18806 ix86_expand_vector_set (false, tmp
, val
, 0);
18807 /* target = A B X D */
18808 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18809 GEN_INT (0), GEN_INT (1),
18810 GEN_INT (0+4), GEN_INT (3+4)));
18814 /* tmp = target = A B C D */
18815 tmp
= copy_to_reg (target
);
18816 /* tmp = X B C D */
18817 ix86_expand_vector_set (false, tmp
, val
, 0);
18818 /* target = A B X D */
18819 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18820 GEN_INT (0), GEN_INT (1),
18821 GEN_INT (2+4), GEN_INT (0+4)));
18825 gcc_unreachable ();
18830 /* Element 0 handled by vec_merge below. */
18833 use_vec_merge
= true;
18839 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18840 store into element 0, then shuffle them back. */
18844 order
[0] = GEN_INT (elt
);
18845 order
[1] = const1_rtx
;
18846 order
[2] = const2_rtx
;
18847 order
[3] = GEN_INT (3);
18848 order
[elt
] = const0_rtx
;
18850 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18851 order
[1], order
[2], order
[3]));
18853 ix86_expand_vector_set (false, target
, val
, 0);
18855 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18856 order
[1], order
[2], order
[3]));
18860 /* For SSE1, we have to reuse the V4SF code. */
18861 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
18862 gen_lowpart (SFmode
, val
), elt
);
18867 use_vec_merge
= TARGET_SSE2
;
18870 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18881 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18882 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
18883 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18887 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18889 emit_move_insn (mem
, target
);
18891 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18892 emit_move_insn (tmp
, val
);
18894 emit_move_insn (target
, mem
);
18899 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
18901 enum machine_mode mode
= GET_MODE (vec
);
18902 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18903 bool use_vec_extr
= false;
18916 use_vec_extr
= true;
18928 tmp
= gen_reg_rtx (mode
);
18929 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
18930 GEN_INT (elt
), GEN_INT (elt
),
18931 GEN_INT (elt
+4), GEN_INT (elt
+4)));
18935 tmp
= gen_reg_rtx (mode
);
18936 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
18940 gcc_unreachable ();
18943 use_vec_extr
= true;
18958 tmp
= gen_reg_rtx (mode
);
18959 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
18960 GEN_INT (elt
), GEN_INT (elt
),
18961 GEN_INT (elt
), GEN_INT (elt
)));
18965 tmp
= gen_reg_rtx (mode
);
18966 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
18970 gcc_unreachable ();
18973 use_vec_extr
= true;
18978 /* For SSE1, we have to reuse the V4SF code. */
18979 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
18980 gen_lowpart (V4SFmode
, vec
), elt
);
18986 use_vec_extr
= TARGET_SSE2
;
18989 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18994 /* ??? Could extract the appropriate HImode element and shift. */
19001 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
19002 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
19004 /* Let the rtl optimizers know about the zero extension performed. */
19005 if (inner_mode
== HImode
)
19007 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
19008 target
= gen_lowpart (SImode
, target
);
19011 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19015 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19017 emit_move_insn (mem
, vec
);
19019 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
19020 emit_move_insn (target
, tmp
);
19024 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19025 pattern to reduce; DEST is the destination; IN is the input vector. */
19028 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
19030 rtx tmp1
, tmp2
, tmp3
;
19032 tmp1
= gen_reg_rtx (V4SFmode
);
19033 tmp2
= gen_reg_rtx (V4SFmode
);
19034 tmp3
= gen_reg_rtx (V4SFmode
);
19036 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
19037 emit_insn (fn (tmp2
, tmp1
, in
));
19039 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
19040 GEN_INT (1), GEN_INT (1),
19041 GEN_INT (1+4), GEN_INT (1+4)));
19042 emit_insn (fn (dest
, tmp2
, tmp3
));
19045 /* Target hook for scalar_mode_supported_p. */
19047 ix86_scalar_mode_supported_p (enum machine_mode mode
)
19049 if (DECIMAL_FLOAT_MODE_P (mode
))
19052 return default_scalar_mode_supported_p (mode
);
19055 /* Implements target hook vector_mode_supported_p. */
19057 ix86_vector_mode_supported_p (enum machine_mode mode
)
19059 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
19061 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
19063 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
19065 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
19070 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19072 We do this in the new i386 backend to maintain source compatibility
19073 with the old cc0-based compiler. */
19076 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
19077 tree inputs ATTRIBUTE_UNUSED
,
19080 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
19082 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
19084 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
19089 /* Return true if this goes in small data/bss. */
19092 ix86_in_large_data_p (tree exp
)
19094 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
19097 /* Functions are never large data. */
19098 if (TREE_CODE (exp
) == FUNCTION_DECL
)
19101 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
19103 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
19104 if (strcmp (section
, ".ldata") == 0
19105 || strcmp (section
, ".lbss") == 0)
19111 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
19113 /* If this is an incomplete type with size 0, then we can't put it
19114 in data because it might be too big when completed. */
19115 if (!size
|| size
> ix86_section_threshold
)
19122 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
19124 default_encode_section_info (decl
, rtl
, first
);
19126 if (TREE_CODE (decl
) == VAR_DECL
19127 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
19128 && ix86_in_large_data_p (decl
))
19129 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
19132 /* Worker function for REVERSE_CONDITION. */
19135 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
19137 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
19138 ? reverse_condition (code
)
19139 : reverse_condition_maybe_unordered (code
));
19142 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19146 output_387_reg_move (rtx insn
, rtx
*operands
)
19148 if (REG_P (operands
[1])
19149 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
19151 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
19152 return output_387_ffreep (operands
, 0);
19153 return "fstp\t%y0";
19155 if (STACK_TOP_P (operands
[0]))
19156 return "fld%z1\t%y1";
19160 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19161 FP status register is set. */
19164 ix86_emit_fp_unordered_jump (rtx label
)
19166 rtx reg
= gen_reg_rtx (HImode
);
19169 emit_insn (gen_x86_fnstsw_1 (reg
));
19171 if (TARGET_USE_SAHF
)
19173 emit_insn (gen_x86_sahf_1 (reg
));
19175 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
19176 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
19180 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
19182 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
19183 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
19186 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
19187 gen_rtx_LABEL_REF (VOIDmode
, label
),
19189 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
19190 emit_jump_insn (temp
);
19193 /* Output code to perform a log1p XFmode calculation. */
19195 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
19197 rtx label1
= gen_label_rtx ();
19198 rtx label2
= gen_label_rtx ();
19200 rtx tmp
= gen_reg_rtx (XFmode
);
19201 rtx tmp2
= gen_reg_rtx (XFmode
);
19203 emit_insn (gen_absxf2 (tmp
, op1
));
19204 emit_insn (gen_cmpxf (tmp
,
19205 CONST_DOUBLE_FROM_REAL_VALUE (
19206 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
19208 emit_jump_insn (gen_bge (label1
));
19210 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19211 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
19212 emit_jump (label2
);
19214 emit_label (label1
);
19215 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
19216 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
19217 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19218 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
19220 emit_label (label2
);
19223 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19226 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
19229 /* With Binutils 2.15, the "@unwind" marker must be specified on
19230 every occurrence of the ".eh_frame" section, not just the first
19233 && strcmp (name
, ".eh_frame") == 0)
19235 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
19236 flags
& SECTION_WRITE
? "aw" : "a");
19239 default_elf_asm_named_section (name
, flags
, decl
);
19242 /* Return the mangling of TYPE if it is an extended fundamental type. */
19244 static const char *
19245 ix86_mangle_fundamental_type (tree type
)
19247 switch (TYPE_MODE (type
))
19250 /* __float128 is "g". */
19253 /* "long double" or __float80 is "e". */
19260 /* For 32-bit code we can save PIC register setup by using
19261 __stack_chk_fail_local hidden function instead of calling
19262 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19263 register, so it is better to call __stack_chk_fail directly. */
19266 ix86_stack_protect_fail (void)
19268 return TARGET_64BIT
19269 ? default_external_stack_protect_fail ()
19270 : default_hidden_stack_protect_fail ();
19273 /* Select a format to encode pointers in exception handling data. CODE
19274 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19275 true if the symbol may be affected by dynamic relocations.
19277 ??? All x86 object file formats are capable of representing this.
19278 After all, the relocation needed is the same as for the call insn.
19279 Whether or not a particular assembler allows us to enter such, I
19280 guess we'll have to see. */
19282 asm_preferred_eh_data_format (int code
, int global
)
19286 int type
= DW_EH_PE_sdata8
;
19288 || ix86_cmodel
== CM_SMALL_PIC
19289 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
19290 type
= DW_EH_PE_sdata4
;
19291 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
19293 if (ix86_cmodel
== CM_SMALL
19294 || (ix86_cmodel
== CM_MEDIUM
&& code
))
19295 return DW_EH_PE_udata4
;
19296 return DW_EH_PE_absptr
;
19299 /* Expand copysign from SIGN to the positive value ABS_VALUE
19300 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19303 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
19305 enum machine_mode mode
= GET_MODE (sign
);
19306 rtx sgn
= gen_reg_rtx (mode
);
19307 if (mask
== NULL_RTX
)
19309 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
19310 if (!VECTOR_MODE_P (mode
))
19312 /* We need to generate a scalar mode mask in this case. */
19313 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19314 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19315 mask
= gen_reg_rtx (mode
);
19316 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19320 mask
= gen_rtx_NOT (mode
, mask
);
19321 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
19322 gen_rtx_AND (mode
, mask
, sign
)));
19323 emit_insn (gen_rtx_SET (VOIDmode
, result
,
19324 gen_rtx_IOR (mode
, abs_value
, sgn
)));
19327 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19328 mask for masking out the sign-bit is stored in *SMASK, if that is
19331 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
19333 enum machine_mode mode
= GET_MODE (op0
);
19336 xa
= gen_reg_rtx (mode
);
19337 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
19338 if (!VECTOR_MODE_P (mode
))
19340 /* We need to generate a scalar mode mask in this case. */
19341 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19342 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19343 mask
= gen_reg_rtx (mode
);
19344 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19346 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
19347 gen_rtx_AND (mode
, op0
, mask
)));
19355 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19356 swapping the operands if SWAP_OPERANDS is true. The expanded
19357 code is a forward jump to a newly created label in case the
19358 comparison is true. The generated label rtx is returned. */
19360 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
19361 bool swap_operands
)
19372 label
= gen_label_rtx ();
19373 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
19374 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19375 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
19376 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
19377 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19378 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
19379 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19380 JUMP_LABEL (tmp
) = label
;
19385 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19386 using comparison code CODE. Operands are swapped for the comparison if
19387 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19389 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
19390 bool swap_operands
)
19392 enum machine_mode mode
= GET_MODE (op0
);
19393 rtx mask
= gen_reg_rtx (mode
);
19402 if (mode
== DFmode
)
19403 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
19404 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19406 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
19407 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19412 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19413 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19415 ix86_gen_TWO52 (enum machine_mode mode
)
19417 REAL_VALUE_TYPE TWO52r
;
19420 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
19421 TWO52
= const_double_from_real_value (TWO52r
, mode
);
19422 TWO52
= force_reg (mode
, TWO52
);
19427 /* Expand SSE sequence for computing lround from OP1 storing
19430 ix86_expand_lround (rtx op0
, rtx op1
)
19432 /* C code for the stuff we're doing below:
19433 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19436 enum machine_mode mode
= GET_MODE (op1
);
19437 const struct real_format
*fmt
;
19438 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19441 /* load nextafter (0.5, 0.0) */
19442 fmt
= REAL_MODE_FORMAT (mode
);
19443 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19444 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19446 /* adj = copysign (0.5, op1) */
19447 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19448 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
19450 /* adj = op1 + adj */
19451 expand_simple_binop (mode
, PLUS
, adj
, op1
, adj
, 0, OPTAB_DIRECT
);
19453 /* op0 = (imode)adj */
19454 expand_fix (op0
, adj
, 0);
19457 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19460 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
19462 /* C code for the stuff we're doing below (for do_floor):
19464 xi -= (double)xi > op1 ? 1 : 0;
19467 enum machine_mode fmode
= GET_MODE (op1
);
19468 enum machine_mode imode
= GET_MODE (op0
);
19469 rtx ireg
, freg
, label
;
19471 /* reg = (long)op1 */
19472 ireg
= gen_reg_rtx (imode
);
19473 expand_fix (ireg
, op1
, 0);
19475 /* freg = (double)reg */
19476 freg
= gen_reg_rtx (fmode
);
19477 expand_float (freg
, ireg
, 0);
19479 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19480 label
= ix86_expand_sse_compare_and_jump (UNLE
,
19481 freg
, op1
, !do_floor
);
19482 expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
19483 ireg
, const1_rtx
, ireg
, 0, OPTAB_DIRECT
);
19484 emit_label (label
);
19485 LABEL_NUSES (label
) = 1;
19487 emit_move_insn (op0
, ireg
);
19490 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19491 result in OPERAND0. */
19493 ix86_expand_rint (rtx operand0
, rtx operand1
)
19495 /* C code for the stuff we're doing below:
19496 xa = fabs (operand1);
19497 if (!isless (xa, 2**52))
19499 xa = xa + 2**52 - 2**52;
19500 return copysign (xa, operand1);
19502 enum machine_mode mode
= GET_MODE (operand0
);
19503 rtx res
, xa
, label
, TWO52
, mask
;
19505 res
= gen_reg_rtx (mode
);
19506 emit_move_insn (res
, operand1
);
19508 /* xa = abs (operand1) */
19509 xa
= ix86_expand_sse_fabs (res
, &mask
);
19511 /* if (!isless (xa, TWO52)) goto label; */
19512 TWO52
= ix86_gen_TWO52 (mode
);
19513 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19515 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19516 expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19518 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
19520 emit_label (label
);
19521 LABEL_NUSES (label
) = 1;
19523 emit_move_insn (operand0
, res
);
19526 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19529 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
19531 /* C code for the stuff we expand below.
19532 double xa = fabs (x), x2;
19533 if (!isless (xa, TWO52))
19535 xa = xa + TWO52 - TWO52;
19536 x2 = copysign (xa, x);
19545 enum machine_mode mode
= GET_MODE (operand0
);
19546 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
19548 TWO52
= ix86_gen_TWO52 (mode
);
19550 /* Temporary for holding the result, initialized to the input
19551 operand to ease control flow. */
19552 res
= gen_reg_rtx (mode
);
19553 emit_move_insn (res
, operand1
);
19555 /* xa = abs (operand1) */
19556 xa
= ix86_expand_sse_fabs (res
, &mask
);
19558 /* if (!isless (xa, TWO52)) goto label; */
19559 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19561 /* xa = xa + TWO52 - TWO52; */
19562 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19563 expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19565 /* xa = copysign (xa, operand1) */
19566 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
19568 /* generate 1.0 or -1.0 */
19569 one
= force_reg (mode
,
19570 const_double_from_real_value (do_floor
19571 ? dconst1
: dconstm1
, mode
));
19573 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19574 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19575 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19576 gen_rtx_AND (mode
, one
, tmp
)));
19577 /* We always need to subtract here to preserve signed zero. */
19578 expand_simple_binop (mode
, MINUS
,
19579 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19581 emit_label (label
);
19582 LABEL_NUSES (label
) = 1;
19584 emit_move_insn (operand0
, res
);
19587 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19590 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
19592 /* C code for the stuff we expand below.
19593 double xa = fabs (x), x2;
19594 if (!isless (xa, TWO52))
19596 x2 = (double)(long)x;
19603 if (HONOR_SIGNED_ZEROS (mode))
19604 return copysign (x2, x);
19607 enum machine_mode mode
= GET_MODE (operand0
);
19608 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
19610 TWO52
= ix86_gen_TWO52 (mode
);
19612 /* Temporary for holding the result, initialized to the input
19613 operand to ease control flow. */
19614 res
= gen_reg_rtx (mode
);
19615 emit_move_insn (res
, operand1
);
19617 /* xa = abs (operand1) */
19618 xa
= ix86_expand_sse_fabs (res
, &mask
);
19620 /* if (!isless (xa, TWO52)) goto label; */
19621 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19623 /* xa = (double)(long)x */
19624 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19625 expand_fix (xi
, res
, 0);
19626 expand_float (xa
, xi
, 0);
19629 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19631 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19632 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19633 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19634 gen_rtx_AND (mode
, one
, tmp
)));
19635 expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
19636 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19638 if (HONOR_SIGNED_ZEROS (mode
))
19639 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19641 emit_label (label
);
19642 LABEL_NUSES (label
) = 1;
19644 emit_move_insn (operand0
, res
);
19647 /* Expand SSE sequence for computing round from OPERAND1 storing
19648 into OPERAND0. Sequence that works without relying on DImode truncation
19649 via cvttsd2siq that is only available on 64bit targets. */
19651 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
19653 /* C code for the stuff we expand below.
19654 double xa = fabs (x), xa2, x2;
19655 if (!isless (xa, TWO52))
19657 Using the absolute value and copying back sign makes
19658 -0.0 -> -0.0 correct.
19659 xa2 = xa + TWO52 - TWO52;
19664 else if (dxa > 0.5)
19666 x2 = copysign (xa2, x);
19669 enum machine_mode mode
= GET_MODE (operand0
);
19670 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
19672 TWO52
= ix86_gen_TWO52 (mode
);
19674 /* Temporary for holding the result, initialized to the input
19675 operand to ease control flow. */
19676 res
= gen_reg_rtx (mode
);
19677 emit_move_insn (res
, operand1
);
19679 /* xa = abs (operand1) */
19680 xa
= ix86_expand_sse_fabs (res
, &mask
);
19682 /* if (!isless (xa, TWO52)) goto label; */
19683 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19685 /* xa2 = xa + TWO52 - TWO52; */
19686 xa2
= gen_reg_rtx (mode
);
19687 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19688 expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19690 /* dxa = xa2 - xa; */
19691 dxa
= gen_reg_rtx (mode
);
19692 expand_simple_binop (mode
, MINUS
, xa2
, xa
, dxa
, 0, OPTAB_DIRECT
);
19694 /* generate 0.5, 1.0 and -0.5 */
19695 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
19696 one
= gen_reg_rtx (mode
);
19697 expand_simple_binop (mode
, PLUS
, half
, half
, one
, 0, OPTAB_DIRECT
);
19698 mhalf
= gen_reg_rtx (mode
);
19699 expand_simple_binop (mode
, MINUS
, half
, one
, mhalf
, 0, OPTAB_DIRECT
);
19702 tmp
= gen_reg_rtx (mode
);
19703 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19704 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
19705 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19706 gen_rtx_AND (mode
, one
, tmp
)));
19707 expand_simple_binop (mode
, MINUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19708 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19709 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
19710 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19711 gen_rtx_AND (mode
, one
, tmp
)));
19712 expand_simple_binop (mode
, PLUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19714 /* res = copysign (xa2, operand1) */
19715 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
19717 emit_label (label
);
19718 LABEL_NUSES (label
) = 1;
19720 emit_move_insn (operand0
, res
);
19723 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19726 ix86_expand_trunc (rtx operand0
, rtx operand1
)
19728 /* C code for SSE variant we expand below.
19729 double xa = fabs (x), x2;
19730 if (!isless (xa, TWO52))
19732 x2 = (double)(long)x;
19733 if (HONOR_SIGNED_ZEROS (mode))
19734 return copysign (x2, x);
19737 enum machine_mode mode
= GET_MODE (operand0
);
19738 rtx xa
, xi
, TWO52
, label
, res
, mask
;
19740 TWO52
= ix86_gen_TWO52 (mode
);
19742 /* Temporary for holding the result, initialized to the input
19743 operand to ease control flow. */
19744 res
= gen_reg_rtx (mode
);
19745 emit_move_insn (res
, operand1
);
19747 /* xa = abs (operand1) */
19748 xa
= ix86_expand_sse_fabs (res
, &mask
);
19750 /* if (!isless (xa, TWO52)) goto label; */
19751 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19753 /* x = (double)(long)x */
19754 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19755 expand_fix (xi
, res
, 0);
19756 expand_float (res
, xi
, 0);
19758 if (HONOR_SIGNED_ZEROS (mode
))
19759 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19761 emit_label (label
);
19762 LABEL_NUSES (label
) = 1;
19764 emit_move_insn (operand0
, res
);
19767 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19770 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
19772 enum machine_mode mode
= GET_MODE (operand0
);
19773 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
;
19775 /* C code for SSE variant we expand below.
19776 double xa = fabs (x), x2;
19777 if (!isless (xa, TWO52))
19779 xa2 = xa + TWO52 - TWO52;
19783 x2 = copysign (xa2, x);
19787 TWO52
= ix86_gen_TWO52 (mode
);
19789 /* Temporary for holding the result, initialized to the input
19790 operand to ease control flow. */
19791 res
= gen_reg_rtx (mode
);
19792 emit_move_insn (res
, operand1
);
19794 /* xa = abs (operand1) */
19795 xa
= ix86_expand_sse_fabs (res
, &smask
);
19797 /* if (!isless (xa, TWO52)) goto label; */
19798 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19800 /* res = xa + TWO52 - TWO52; */
19801 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, res
, 0, OPTAB_DIRECT
);
19802 expand_simple_binop (mode
, MINUS
, res
, TWO52
, res
, 0, OPTAB_DIRECT
);
19805 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19807 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19808 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
19809 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
19810 gen_rtx_AND (mode
, mask
, one
)));
19811 expand_simple_binop (mode
, MINUS
,
19812 res
, mask
, res
, 0, OPTAB_DIRECT
);
19814 /* res = copysign (res, operand1) */
19815 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
19817 emit_label (label
);
19818 LABEL_NUSES (label
) = 1;
19820 emit_move_insn (operand0
, res
);
19823 /* Expand SSE sequence for computing round from OPERAND1 storing
19826 ix86_expand_round (rtx operand0
, rtx operand1
)
19828 /* C code for the stuff we're doing below:
19829 double xa = fabs (x);
19830 if (!isless (xa, TWO52))
19832 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19833 return copysign (xa, x);
19835 enum machine_mode mode
= GET_MODE (operand0
);
19836 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
19837 const struct real_format
*fmt
;
19838 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19840 /* Temporary for holding the result, initialized to the input
19841 operand to ease control flow. */
19842 res
= gen_reg_rtx (mode
);
19843 emit_move_insn (res
, operand1
);
19845 TWO52
= ix86_gen_TWO52 (mode
);
19846 xa
= ix86_expand_sse_fabs (res
, &mask
);
19847 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19849 /* load nextafter (0.5, 0.0) */
19850 fmt
= REAL_MODE_FORMAT (mode
);
19851 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19852 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19854 /* xa = xa + 0.5 */
19855 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19856 expand_simple_binop (mode
, PLUS
, xa
, half
, xa
, 0, OPTAB_DIRECT
);
19858 /* xa = (double)(int64_t)xa */
19859 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19860 expand_fix (xi
, xa
, 0);
19861 expand_float (xa
, xi
, 0);
19863 /* res = copysign (xa, operand1) */
19864 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
19866 emit_label (label
);
19867 LABEL_NUSES (label
) = 1;
19869 emit_move_insn (operand0
, res
);
19872 #include "gt-i386.h"