1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx
legitimize_dllimport_symbol (rtx
, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
132 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
133 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
134 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost
= { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
203 DUMMY_STRINGOP_ALGS
},
204 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
205 DUMMY_STRINGOP_ALGS
},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost
= { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
275 DUMMY_STRINGOP_ALGS
},
276 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
277 DUMMY_STRINGOP_ALGS
},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost
= {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
345 DUMMY_STRINGOP_ALGS
},
346 {{libcall
, {{-1, rep_prefix_4_byte
}}},
347 DUMMY_STRINGOP_ALGS
},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost
= {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
420 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
421 DUMMY_STRINGOP_ALGS
},
422 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
423 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
424 DUMMY_STRINGOP_ALGS
},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost
= {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
493 DUMMY_STRINGOP_ALGS
},
494 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
495 DUMMY_STRINGOP_ALGS
},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost
= {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
566 DUMMY_STRINGOP_ALGS
},
567 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
568 DUMMY_STRINGOP_ALGS
},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost
= {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
639 DUMMY_STRINGOP_ALGS
},
640 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
641 DUMMY_STRINGOP_ALGS
},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost
= {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
717 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
718 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
719 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
720 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost
= {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
804 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
805 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
806 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
807 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost
= {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
875 DUMMY_STRINGOP_ALGS
},
876 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
878 DUMMY_STRINGOP_ALGS
},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost
= {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
946 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
947 {100000, unrolled_loop
}, {-1, libcall
}}}},
948 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
950 {libcall
, {{24, loop
}, {64, unrolled_loop
},
951 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost
= {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1019 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1020 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1021 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1022 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1023 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1024 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost
= {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1092 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1093 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1094 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1095 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1096 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1097 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost
= {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS
,
1172 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1173 {DUMMY_STRINGOP_ALGS
,
1174 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost
= {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1243 DUMMY_STRINGOP_ALGS
},
1244 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1245 DUMMY_STRINGOP_ALGS
},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC64
,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1301 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486
| m_PENT
| m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_K6
1308 | m_CORE2
| m_GENERIC
,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM
| m_PPRO
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
| m_GENERIC
,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM
| m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1324 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE
| m_ATOM
| m_PPRO
| m_PENT4
| m_NOCONA
1329 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2
| m_GENERIC
,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386
| m_486
| m_K6_GEODE
,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO
| m_AMD_MULTIPLE
| m_PENT
| m_ATOM
| m_CORE2
| m_GENERIC
),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT
| m_ATOM
| m_K6
| m_CORE2
| m_GENERIC
),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE
| m_PENT
| m_ATOM
| m_386
| m_486
| m_AMD_MULTIPLE
1370 | m_CORE2
| m_GENERIC
/* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT
| m_486
| m_386
),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386
| m_PENT4
| m_NOCONA
,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1395 | m_CORE2
| m_GENERIC
,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE
| m_ATOM
| m_PPRO
| m_K6_GEODE
| m_386
1399 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE
| m_ATOM
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE
| m_ATOM
| m_PPRO
| m_386
| m_486
1407 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1412 | m_GENERIC
| m_GEODE
),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO
| m_PENT4
| m_NOCONA
,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8
| m_ATOM
| m_PPRO
| m_CORE2
| m_GENERIC
,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8
| m_ATOM
| m_PPRO
| m_CORE2
| m_GENERIC
,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT
| m_ATOM
| m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE
| m_ATOM
| m_CORE2
| m_GENERIC
,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4
| m_NOCONA
| m_GENERIC
| m_ATOM
),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC
,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE
| m_ATHLON_K8
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
1485 | m_CORE2
| m_GENERIC
,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10
| m_GENERIC
,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386
| m_486
| m_PENT
| m_K6
),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT
| m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
1564 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1566 static enum stringop_alg stringop_alg
= no_stringop
;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1575 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1576 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1583 /* ax, dx, cx, bx */
1584 AREG
, DREG
, CREG
, BREG
,
1585 /* si, di, bp, sp */
1586 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1588 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1589 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1595 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1598 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1601 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1602 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1603 /* SSE REX registers */
1604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0
= NULL_RTX
;
1703 rtx ix86_compare_op1
= NULL_RTX
;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers
[6] =
1709 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1714 CX_REG
, DX_REG
, R8_REG
, R9_REG
1717 static int const x86_64_int_return_registers
[4] =
1719 AX_REG
, DX_REG
, DI_REG
, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry
{
1725 unsigned short mode
;
1728 struct stack_local_entry
*next
;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame
;
1763 int outgoing_arguments_size
;
1766 HOST_WIDE_INT to_allocate
;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset
;
1769 HOST_WIDE_INT hard_frame_pointer_offset
;
1770 HOST_WIDE_INT stack_pointer_offset
;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov
;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel
;
1780 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1782 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath
;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule
;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune
;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch
;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse
;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm
;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer
;
1804 static const char ix86_force_align_arg_pointer_string
[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave
) (void);
1808 static rtx (*ix86_gen_pop1
) (rtx
);
1809 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
1810 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
1811 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
);
1812 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
1813 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
1814 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary
;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary
;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary
;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary
;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi
;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost
;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node
;
1837 static GTY(()) tree ms_va_list_type_node
;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold
= 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix
[16];
1846 int internal_label_prefix_len
;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS
,
1862 X86_64_INTEGERSI_CLASS
,
1869 X86_64_COMPLEX_X87_CLASS
,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1877 static bool ext_80387_constants_init
= 0;
1880 static struct machine_function
* ix86_init_machine_status (void);
1881 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1882 static int ix86_function_regparm (const_tree
, const_tree
);
1883 static void ix86_compute_frame_layout (struct ix86_frame
*);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH
,
1891 IX86_FUNCTION_SPECIFIC_TUNE
,
1892 IX86_FUNCTION_SPECIFIC_FPMATH
,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
1899 static void ix86_function_specific_save (struct cl_target_option
*);
1900 static void ix86_function_specific_restore (struct cl_target_option
*);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option
*);
1903 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree
, char *[]);
1905 static bool ix86_can_inline_p (tree
, tree
);
1906 static void ix86_set_current_function (tree
);
1908 static enum calling_abi
ix86_function_abi (const_tree
);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted
;
1919 static int ix86_arch_specified
;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit
;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
2020 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2021 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2023 /* Processor target table, indexed by processor number */
2026 const struct processor_costs
*cost
; /* Processor costs */
2027 const int align_loop
; /* Default alignments. */
2028 const int align_loop_max_skip
;
2029 const int align_jump
;
2030 const int align_jump_max_skip
;
2031 const int align_func
;
2034 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2036 {&i386_cost
, 4, 3, 4, 3, 4},
2037 {&i486_cost
, 16, 15, 16, 15, 16},
2038 {&pentium_cost
, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2040 {&geode_cost
, 0, 0, 0, 0, 0},
2041 {&k6_cost
, 32, 7, 32, 7, 32},
2042 {&athlon_cost
, 16, 7, 16, 7, 16},
2043 {&pentium4_cost
, 0, 0, 0, 0, 0},
2044 {&k8_cost
, 16, 7, 16, 7, 16},
2045 {&nocona_cost
, 0, 0, 0, 0, 0},
2046 {&core2_cost
, 16, 10, 16, 10, 16},
2047 {&generic32_cost
, 16, 7, 16, 7, 16},
2048 {&generic64_cost
, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2050 {&atom_cost
, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2079 /* Implement TARGET_HANDLE_OPTION. */
2082 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
2089 ix86_isa_flags
|= OPTION_MASK_ISA_MMX_SET
;
2090 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_SET
;
2094 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
2095 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
2102 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_SET
;
2103 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_SET
;
2107 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
2108 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
2118 ix86_isa_flags
|= OPTION_MASK_ISA_SSE_SET
;
2119 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_SET
;
2123 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
2124 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
2131 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2_SET
;
2132 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_SET
;
2136 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
2137 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
2144 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3_SET
;
2145 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_SET
;
2149 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
2150 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
2157 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3_SET
;
2158 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_SET
;
2162 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
2163 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
2170 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1_SET
;
2171 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_SET
;
2175 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
2176 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
2183 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2_SET
;
2184 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_SET
;
2188 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
2189 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
2196 ix86_isa_flags
|= OPTION_MASK_ISA_AVX_SET
;
2197 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AVX_SET
;
2201 ix86_isa_flags
&= ~OPTION_MASK_ISA_AVX_UNSET
;
2202 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AVX_UNSET
;
2209 ix86_isa_flags
|= OPTION_MASK_ISA_FMA_SET
;
2210 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA_SET
;
2214 ix86_isa_flags
&= ~OPTION_MASK_ISA_FMA_UNSET
;
2215 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA_UNSET
;
2220 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_SET
;
2221 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_SET
;
2225 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
2226 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
2232 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A_SET
;
2233 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_SET
;
2237 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
2238 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
2245 ix86_isa_flags
|= OPTION_MASK_ISA_SSE5_SET
;
2246 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5_SET
;
2250 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE5_UNSET
;
2251 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5_UNSET
;
2258 ix86_isa_flags
|= OPTION_MASK_ISA_ABM_SET
;
2259 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_ABM_SET
;
2263 ix86_isa_flags
&= ~OPTION_MASK_ISA_ABM_UNSET
;
2264 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_ABM_UNSET
;
2271 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT_SET
;
2272 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_POPCNT_SET
;
2276 ix86_isa_flags
&= ~OPTION_MASK_ISA_POPCNT_UNSET
;
2277 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_POPCNT_UNSET
;
2284 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF_SET
;
2285 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SAHF_SET
;
2289 ix86_isa_flags
&= ~OPTION_MASK_ISA_SAHF_UNSET
;
2290 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SAHF_UNSET
;
2297 ix86_isa_flags
|= OPTION_MASK_ISA_CX16_SET
;
2298 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CX16_SET
;
2302 ix86_isa_flags
&= ~OPTION_MASK_ISA_CX16_UNSET
;
2303 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CX16_UNSET
;
2310 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE_SET
;
2311 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MOVBE_SET
;
2315 ix86_isa_flags
&= ~OPTION_MASK_ISA_MOVBE_UNSET
;
2316 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MOVBE_UNSET
;
2323 ix86_isa_flags
|= OPTION_MASK_ISA_CRC32_SET
;
2324 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CRC32_SET
;
2328 ix86_isa_flags
&= ~OPTION_MASK_ISA_CRC32_UNSET
;
2329 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CRC32_UNSET
;
2336 ix86_isa_flags
|= OPTION_MASK_ISA_AES_SET
;
2337 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AES_SET
;
2341 ix86_isa_flags
&= ~OPTION_MASK_ISA_AES_UNSET
;
2342 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AES_UNSET
;
2349 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL_SET
;
2350 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_PCLMUL_SET
;
2354 ix86_isa_flags
&= ~OPTION_MASK_ISA_PCLMUL_UNSET
;
2355 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_PCLMUL_UNSET
;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2368 ix86_target_string (int isa
, int flags
, const char *arch
, const char *tune
,
2369 const char *fpmath
, bool add_nl_p
)
2371 struct ix86_target_opts
2373 const char *option
; /* option string */
2374 int mask
; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts
[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT
},
2382 { "-msse5", OPTION_MASK_ISA_SSE5
},
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2387 { "-msse3", OPTION_MASK_ISA_SSE3
},
2388 { "-msse2", OPTION_MASK_ISA_SSE2
},
2389 { "-msse", OPTION_MASK_ISA_SSE
},
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2392 { "-mmmx", OPTION_MASK_ISA_MMX
},
2393 { "-mabm", OPTION_MASK_ISA_ABM
},
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2397 { "-maes", OPTION_MASK_ISA_AES
},
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2402 static struct ix86_target_opts flag_opts
[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2405 { "-m80387", MASK_80387
},
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2407 { "-malign-double", MASK_ALIGN_DOUBLE
},
2408 { "-mcld", MASK_CLD
},
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2410 { "-mieee-fp", MASK_IEEE_FP
},
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD
},
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2418 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2420 { "-mrecip", MASK_RECIP
},
2421 { "-mrtd", MASK_RTD
},
2422 { "-msseregparm", MASK_SSEREGPARM
},
2423 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2427 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2430 char target_other
[40];
2439 memset (opts
, '\0', sizeof (opts
));
2441 /* Add -march= option. */
2444 opts
[num
][0] = "-march=";
2445 opts
[num
++][1] = arch
;
2448 /* Add -mtune= option. */
2451 opts
[num
][0] = "-mtune=";
2452 opts
[num
++][1] = tune
;
2455 /* Pick out the options in isa options. */
2456 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2458 if ((isa
& isa_opts
[i
].mask
) != 0)
2460 opts
[num
++][0] = isa_opts
[i
].option
;
2461 isa
&= ~ isa_opts
[i
].mask
;
2465 if (isa
&& add_nl_p
)
2467 opts
[num
++][0] = isa_other
;
2468 sprintf (isa_other
, "(other isa: 0x%x)", isa
);
2471 /* Add flag options. */
2472 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2474 if ((flags
& flag_opts
[i
].mask
) != 0)
2476 opts
[num
++][0] = flag_opts
[i
].option
;
2477 flags
&= ~ flag_opts
[i
].mask
;
2481 if (flags
&& add_nl_p
)
2483 opts
[num
++][0] = target_other
;
2484 sprintf (target_other
, "(other flags: 0x%x)", isa
);
2487 /* Add -fpmath= option. */
2490 opts
[num
][0] = "-mfpmath=";
2491 opts
[num
++][1] = fpmath
;
2498 gcc_assert (num
< ARRAY_SIZE (opts
));
2500 /* Size the string. */
2502 sep_len
= (add_nl_p
) ? 3 : 1;
2503 for (i
= 0; i
< num
; i
++)
2506 for (j
= 0; j
< 2; j
++)
2508 len
+= strlen (opts
[i
][j
]);
2511 /* Build the string. */
2512 ret
= ptr
= (char *) xmalloc (len
);
2515 for (i
= 0; i
< num
; i
++)
2519 for (j
= 0; j
< 2; j
++)
2520 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2527 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2535 for (j
= 0; j
< 2; j
++)
2538 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2540 line_len
+= len2
[j
];
2545 gcc_assert (ret
+ len
>= ptr
);
2550 /* Function that is callable from the debugger to print the current
2553 ix86_debug_options (void)
2555 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2556 ix86_arch_string
, ix86_tune_string
,
2557 ix86_fpmath_string
, true);
2561 fprintf (stderr
, "%s\n\n", opts
);
2565 fprintf (stderr
, "<no options>\n\n");
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2580 override_options (bool main_args_p
)
2583 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2597 PTA_PREFETCH_SSE
= 1 << 4,
2599 PTA_3DNOW_A
= 1 << 6,
2603 PTA_POPCNT
= 1 << 10,
2605 PTA_SSE4A
= 1 << 12,
2606 PTA_NO_SAHF
= 1 << 13,
2607 PTA_SSE4_1
= 1 << 14,
2608 PTA_SSE4_2
= 1 << 15,
2611 PTA_PCLMUL
= 1 << 18,
2619 const char *const name
; /* processor name or nickname. */
2620 const enum processor_type processor
;
2621 const enum attr_cpu schedule
;
2622 const unsigned /*enum pta_flags*/ flags
;
2624 const processor_alias_table
[] =
2626 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2627 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2628 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2629 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2631 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2632 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2633 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2634 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2635 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2638 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2640 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2642 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2643 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2644 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2645 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2646 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2647 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2648 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2649 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2650 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2651 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2652 | PTA_CX16
| PTA_NO_SAHF
},
2653 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2654 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2655 | PTA_SSSE3
| PTA_CX16
},
2656 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2657 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2658 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2659 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2660 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
2661 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2662 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2663 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2664 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2665 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2666 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2667 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2668 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2669 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2670 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2671 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2672 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2673 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2674 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2675 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2676 {"k8", PROCESSOR_K8
, CPU_K8
,
2677 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2678 | PTA_SSE2
| PTA_NO_SAHF
},
2679 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2680 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2681 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2682 {"opteron", PROCESSOR_K8
, CPU_K8
,
2683 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2684 | PTA_SSE2
| PTA_NO_SAHF
},
2685 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2686 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2687 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2688 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2689 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2690 | PTA_SSE2
| PTA_NO_SAHF
},
2691 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2692 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2693 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2694 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2695 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2696 | PTA_SSE2
| PTA_NO_SAHF
},
2697 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2698 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2699 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2700 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2701 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2702 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2703 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
2706 PTA_64BIT
/* flags are only used for -march switch. */ },
2709 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2721 prefix
= "option(\"";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS
;
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO
&& TARGET_64BIT
)
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer
== 2)
2744 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2745 if (flag_asynchronous_unwind_tables
== 2)
2746 flag_asynchronous_unwind_tables
= 1;
2747 if (flag_pcc_struct_return
== 2)
2748 flag_pcc_struct_return
= 0;
2752 if (flag_omit_frame_pointer
== 2)
2753 flag_omit_frame_pointer
= 0;
2754 if (flag_asynchronous_unwind_tables
== 2)
2755 flag_asynchronous_unwind_tables
= 0;
2756 if (flag_pcc_struct_return
== 2)
2757 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string
)
2763 if (!strcmp (ix86_tune_string
, "generic")
2764 || !strcmp (ix86_tune_string
, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string
, "native"))
2771 ix86_tune_string
= "generic64";
2773 ix86_tune_string
= "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string
, "generic32")
2779 || !strcmp (ix86_tune_string
, "generic64")))
2781 else if (!strncmp (ix86_tune_string
, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string
, prefix
, suffix
, sw
);
2787 if (ix86_arch_string
)
2788 ix86_tune_string
= ix86_arch_string
;
2789 if (!ix86_tune_string
)
2791 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2792 ix86_tune_defaulted
= 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string
, "generic")
2798 || !strcmp (ix86_tune_string
, "x86-64")
2799 || !strcmp (ix86_tune_string
, "i686"))
2802 ix86_tune_string
= "generic64";
2804 ix86_tune_string
= "generic32";
2807 if (ix86_stringop_string
)
2809 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2810 stringop_alg
= rep_prefix_1_byte
;
2811 else if (!strcmp (ix86_stringop_string
, "libcall"))
2812 stringop_alg
= libcall
;
2813 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2814 stringop_alg
= rep_prefix_4_byte
;
2815 else if (!strcmp (ix86_stringop_string
, "rep_8byte")
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg
= rep_prefix_8_byte
;
2819 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2820 stringop_alg
= loop_1_byte
;
2821 else if (!strcmp (ix86_stringop_string
, "loop"))
2822 stringop_alg
= loop
;
2823 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2824 stringop_alg
= unrolled_loop
;
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string
, prefix
, suffix
, sw
);
2829 if (!strcmp (ix86_tune_string
, "x86-64"))
2830 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
2834 if (!ix86_arch_string
)
2835 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2837 ix86_arch_specified
= 1;
2839 if (!strcmp (ix86_arch_string
, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix
, suffix
, sw
);
2842 if (!strncmp (ix86_arch_string
, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string
, prefix
, suffix
, sw
);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string
)
2849 if (strcmp (ix86_abi_string
, "sysv") == 0)
2850 ix86_abi
= SYSV_ABI
;
2851 else if (strcmp (ix86_abi_string
, "ms") == 0)
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string
, prefix
, suffix
, sw
);
2858 ix86_abi
= DEFAULT_ABI
;
2860 if (ix86_cmodel_string
!= 0)
2862 if (!strcmp (ix86_cmodel_string
, "small"))
2863 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2864 else if (!strcmp (ix86_cmodel_string
, "medium"))
2865 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2866 else if (!strcmp (ix86_cmodel_string
, "large"))
2867 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2870 else if (!strcmp (ix86_cmodel_string
, "32"))
2871 ix86_cmodel
= CM_32
;
2872 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2873 ix86_cmodel
= CM_KERNEL
;
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string
, prefix
, suffix
, sw
);
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
2885 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2886 else if (TARGET_64BIT
)
2887 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2889 ix86_cmodel
= CM_32
;
2891 if (ix86_asm_string
!= 0)
2894 && !strcmp (ix86_asm_string
, "intel"))
2895 ix86_asm_dialect
= ASM_INTEL
;
2896 else if (!strcmp (ix86_asm_string
, "att"))
2897 ix86_asm_dialect
= ASM_ATT
;
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string
, prefix
, suffix
, sw
);
2902 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2905 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2909 for (i
= 0; i
< pta_size
; i
++)
2910 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2912 ix86_schedule
= processor_alias_table
[i
].schedule
;
2913 ix86_arch
= processor_alias_table
[i
].processor
;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune
= ix86_arch
;
2917 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2918 error ("CPU you selected does not support x86-64 "
2921 if (processor_alias_table
[i
].flags
& PTA_MMX
2922 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2923 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2924 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2925 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2926 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2927 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2929 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2930 if (processor_alias_table
[i
].flags
& PTA_SSE
2931 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2932 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2933 if (processor_alias_table
[i
].flags
& PTA_SSE2
2934 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2935 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2936 if (processor_alias_table
[i
].flags
& PTA_SSE3
2937 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2938 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2939 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2940 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2941 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2942 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2944 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2945 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2947 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2948 if (processor_alias_table
[i
].flags
& PTA_AVX
2949 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
2950 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
2951 if (processor_alias_table
[i
].flags
& PTA_FMA
2952 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
2953 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
2954 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2955 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2956 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2957 if (processor_alias_table
[i
].flags
& PTA_SSE5
2958 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE5
))
2959 ix86_isa_flags
|= OPTION_MASK_ISA_SSE5
;
2960 if (processor_alias_table
[i
].flags
& PTA_ABM
2961 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
2962 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
2963 if (processor_alias_table
[i
].flags
& PTA_CX16
2964 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
2965 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
2966 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
2967 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
2968 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
2969 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
2970 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
2971 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
2972 if (processor_alias_table
[i
].flags
& PTA_MOVBE
2973 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
2974 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
2975 if (processor_alias_table
[i
].flags
& PTA_AES
2976 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
2977 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
2978 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
2980 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
2981 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2982 x86_prefetch_sse
= true;
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string
, prefix
, suffix
, sw
);
2991 ix86_arch_mask
= 1u << ix86_arch
;
2992 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2993 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
2995 for (i
= 0; i
< pta_size
; i
++)
2996 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2998 ix86_schedule
= processor_alias_table
[i
].schedule
;
2999 ix86_tune
= processor_alias_table
[i
].processor
;
3000 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3002 if (ix86_tune_defaulted
)
3004 ix86_tune_string
= "x86-64";
3005 for (i
= 0; i
< pta_size
; i
++)
3006 if (! strcmp (ix86_tune_string
,
3007 processor_alias_table
[i
].name
))
3009 ix86_schedule
= processor_alias_table
[i
].schedule
;
3010 ix86_tune
= processor_alias_table
[i
].processor
;
3013 error ("CPU you selected does not support x86-64 "
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3022 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3023 x86_prefetch_sse
= true;
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string
, prefix
, suffix
, sw
);
3030 ix86_tune_mask
= 1u << ix86_tune
;
3031 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3032 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3035 ix86_cost
= &ix86_size_cost
;
3037 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status
= ix86_init_machine_status
;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string
)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix
, suffix
);
3047 i
= atoi (ix86_regparm_string
);
3048 if (i
< 0 || i
> REGPARM_MAX
)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix
, i
, suffix
, REGPARM_MAX
);
3055 ix86_regparm
= REGPARM_MAX
;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string
)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix
, suffix
, suffix
);
3064 if (align_loops
== 0)
3066 i
= atoi (ix86_align_loops_string
);
3067 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3071 align_loops
= 1 << i
;
3075 if (ix86_align_jumps_string
)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix
, suffix
, suffix
);
3079 if (align_jumps
== 0)
3081 i
= atoi (ix86_align_jumps_string
);
3082 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3086 align_jumps
= 1 << i
;
3090 if (ix86_align_funcs_string
)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix
, suffix
, suffix
);
3094 if (align_functions
== 0)
3096 i
= atoi (ix86_align_funcs_string
);
3097 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3101 align_functions
= 1 << i
;
3105 /* Default align_* from the processor table. */
3106 if (align_loops
== 0)
3108 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3109 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3111 if (align_jumps
== 0)
3113 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3114 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3116 if (align_functions
== 0)
3118 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost
= ix86_cost
->branch_cost
;
3123 if (ix86_branch_cost_string
)
3125 i
= atoi (ix86_branch_cost_string
);
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix
, i
, suffix
);
3129 ix86_branch_cost
= i
;
3131 if (ix86_section_threshold_string
)
3133 i
= atoi (ix86_section_threshold_string
);
3135 error ("%slarge-data-threshold=%d%s is negative", prefix
, i
, suffix
);
3137 ix86_section_threshold
= i
;
3140 if (ix86_tls_dialect_string
)
3142 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
3143 ix86_tls_dialect
= TLS_DIALECT_GNU
;
3144 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
3145 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
3146 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
3147 ix86_tls_dialect
= TLS_DIALECT_SUN
;
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string
, prefix
, suffix
, sw
);
3153 if (ix87_precision_string
)
3155 i
= atoi (ix87_precision_string
);
3156 if (i
!= 32 && i
!= 64 && i
!= 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
3162 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified
)
3169 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3177 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3179 if (!ix86_arch_specified
)
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3186 target_flags
|= MASK_NO_RED_ZONE
;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer
)
3191 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3193 flag_omit_frame_pointer
= 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only
)
3198 target_flags
&= ~MASK_IEEE_FP
;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3203 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3208 target_flags
|= MASK_NO_FANCY_MATH_387
;
3210 /* Turn on MMX builtins for -msse. */
3213 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3214 x86_prefetch_sse
= true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2
|| TARGET_ABM
)
3219 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3224 if (ix86_preferred_stack_boundary_string
)
3226 i
= atoi (ix86_preferred_stack_boundary_string
);
3227 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix
, i
, suffix
, TARGET_64BIT
? 4 : 2);
3231 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer
== -1)
3236 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer
)
3241 ix86_default_incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
3243 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3244 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3245 if (ix86_incoming_stack_boundary_string
)
3247 i
= atoi (ix86_incoming_stack_boundary_string
);
3248 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i
, TARGET_64BIT
? 4 : 2);
3253 ix86_user_incoming_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary
;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3262 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3264 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3265 if (ix86_fpmath_string
!= 0)
3267 if (! strcmp (ix86_fpmath_string
, "387"))
3268 ix86_fpmath
= FPMATH_387
;
3269 else if (! strcmp (ix86_fpmath_string
, "sse"))
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath
= FPMATH_387
;
3277 ix86_fpmath
= FPMATH_SSE
;
3279 else if (! strcmp (ix86_fpmath_string
, "387,sse")
3280 || ! strcmp (ix86_fpmath_string
, "387+sse")
3281 || ! strcmp (ix86_fpmath_string
, "sse,387")
3282 || ! strcmp (ix86_fpmath_string
, "sse+387")
3283 || ! strcmp (ix86_fpmath_string
, "both"))
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath
= FPMATH_387
;
3290 else if (!TARGET_80387
)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath
= FPMATH_SSE
;
3296 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string
, prefix
, suffix
, sw
);
3303 /* If the i387 is disabled, then do not return values in it. */
3305 target_flags
&= ~MASK_FLOAT_RETURNS
;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string
)
3310 if (strcmp (ix86_veclibabi_string
, "svml") == 0)
3311 ix86_veclib_handler
= ix86_veclibabi_svml
;
3312 else if (strcmp (ix86_veclibabi_string
, "acml") == 0)
3313 ix86_veclib_handler
= ix86_veclibabi_acml
;
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string
,
3317 prefix
, suffix
, sw
);
3320 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
3321 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3323 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3329 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3330 || flag_exceptions
|| flag_non_call_exceptions
)
3331 && flag_omit_frame_pointer
3332 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3334 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3338 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3347 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix
, suffix
);
3350 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3362 p
= strchr (internal_label_prefix
, 'X');
3363 internal_label_prefix_len
= p
- internal_label_prefix
;
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE
)
3370 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost
->simultaneous_prefetches
);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
3376 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
3378 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
3380 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3385 targetm
.expand_builtin_va_start
= NULL
;
3389 ix86_gen_leave
= gen_leave_rex64
;
3390 ix86_gen_pop1
= gen_popdi1
;
3391 ix86_gen_add3
= gen_adddi3
;
3392 ix86_gen_sub3
= gen_subdi3
;
3393 ix86_gen_sub3_carry
= gen_subdi3_carry_rex64
;
3394 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3395 ix86_gen_monitor
= gen_sse3_monitor64
;
3396 ix86_gen_andsp
= gen_anddi3
;
3400 ix86_gen_leave
= gen_leave
;
3401 ix86_gen_pop1
= gen_popsi1
;
3402 ix86_gen_add3
= gen_addsi3
;
3403 ix86_gen_sub3
= gen_subsi3
;
3404 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3405 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3406 ix86_gen_monitor
= gen_sse3_monitor
;
3407 ix86_gen_andsp
= gen_andsi3
;
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3413 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3416 /* Save the initial options in case the user does function specific options */
3418 target_option_default_node
= target_option_current_node
3419 = build_target_option_node ();
3422 /* Save the current options */
3425 ix86_function_specific_save (struct cl_target_option
*ptr
)
3427 ptr
->arch
= ix86_arch
;
3428 ptr
->schedule
= ix86_schedule
;
3429 ptr
->tune
= ix86_tune
;
3430 ptr
->fpmath
= ix86_fpmath
;
3431 ptr
->branch_cost
= ix86_branch_cost
;
3432 ptr
->tune_defaulted
= ix86_tune_defaulted
;
3433 ptr
->arch_specified
= ix86_arch_specified
;
3434 ptr
->ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
3435 ptr
->target_flags_explicit
= target_flags_explicit
;
3437 /* The fields are char but the variables are not; make sure the
3438 values fit in the fields. */
3439 gcc_assert (ptr
->arch
== ix86_arch
);
3440 gcc_assert (ptr
->schedule
== ix86_schedule
);
3441 gcc_assert (ptr
->tune
== ix86_tune
);
3442 gcc_assert (ptr
->fpmath
== ix86_fpmath
);
3443 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
3446 /* Restore the current options */
3449 ix86_function_specific_restore (struct cl_target_option
*ptr
)
3451 enum processor_type old_tune
= ix86_tune
;
3452 enum processor_type old_arch
= ix86_arch
;
3453 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3456 ix86_arch
= (enum processor_type
) ptr
->arch
;
3457 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
3458 ix86_tune
= (enum processor_type
) ptr
->tune
;
3459 ix86_fpmath
= (enum fpmath_unit
) ptr
->fpmath
;
3460 ix86_branch_cost
= ptr
->branch_cost
;
3461 ix86_tune_defaulted
= ptr
->tune_defaulted
;
3462 ix86_arch_specified
= ptr
->arch_specified
;
3463 ix86_isa_flags_explicit
= ptr
->ix86_isa_flags_explicit
;
3464 target_flags_explicit
= ptr
->target_flags_explicit
;
3466 /* Recreate the arch feature tests if the arch changed */
3467 if (old_arch
!= ix86_arch
)
3469 ix86_arch_mask
= 1u << ix86_arch
;
3470 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3471 ix86_arch_features
[i
]
3472 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3475 /* Recreate the tune optimization tests */
3476 if (old_tune
!= ix86_tune
)
3478 ix86_tune_mask
= 1u << ix86_tune
;
3479 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3480 ix86_tune_features
[i
]
3481 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3485 /* Print the current options */
3488 ix86_function_specific_print (FILE *file
, int indent
,
3489 struct cl_target_option
*ptr
)
3492 = ix86_target_string (ptr
->ix86_isa_flags
, ptr
->target_flags
,
3493 NULL
, NULL
, NULL
, false);
3495 fprintf (file
, "%*sarch = %d (%s)\n",
3498 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
3499 ? cpu_names
[ptr
->arch
]
3502 fprintf (file
, "%*stune = %d (%s)\n",
3505 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
3506 ? cpu_names
[ptr
->tune
]
3509 fprintf (file
, "%*sfpmath = %d%s%s\n", indent
, "", ptr
->fpmath
,
3510 (ptr
->fpmath
& FPMATH_387
) ? ", 387" : "",
3511 (ptr
->fpmath
& FPMATH_SSE
) ? ", sse" : "");
3512 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
3516 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
3517 free (target_string
);
3522 /* Inner function to process the attribute((target(...))), take an argument and
3523 set the current options from the argument. If we have a list, recursively go
3527 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[])
3532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3534 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3535 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3550 enum ix86_opt_type type
;
3555 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
3556 IX86_ATTR_ISA ("abm", OPT_mabm
),
3557 IX86_ATTR_ISA ("aes", OPT_maes
),
3558 IX86_ATTR_ISA ("avx", OPT_mavx
),
3559 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
3560 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
3561 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
3562 IX86_ATTR_ISA ("sse", OPT_msse
),
3563 IX86_ATTR_ISA ("sse2", OPT_msse2
),
3564 IX86_ATTR_ISA ("sse3", OPT_msse3
),
3565 IX86_ATTR_ISA ("sse4", OPT_msse4
),
3566 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
3567 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
3568 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
3569 IX86_ATTR_ISA ("sse5", OPT_msse5
),
3570 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
3572 /* string options */
3573 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
3574 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH
),
3575 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
3578 IX86_ATTR_YES ("cld",
3582 IX86_ATTR_NO ("fancy-math-387",
3583 OPT_mfancy_math_387
,
3584 MASK_NO_FANCY_MATH_387
),
3586 IX86_ATTR_NO ("fused-madd",
3588 MASK_NO_FUSED_MADD
),
3590 IX86_ATTR_YES ("ieee-fp",
3594 IX86_ATTR_YES ("inline-all-stringops",
3595 OPT_minline_all_stringops
,
3596 MASK_INLINE_ALL_STRINGOPS
),
3598 IX86_ATTR_YES ("inline-stringops-dynamically",
3599 OPT_minline_stringops_dynamically
,
3600 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
3602 IX86_ATTR_NO ("align-stringops",
3603 OPT_mno_align_stringops
,
3604 MASK_NO_ALIGN_STRINGOPS
),
3606 IX86_ATTR_YES ("recip",
3612 /* If this is a list, recurse to get the options. */
3613 if (TREE_CODE (args
) == TREE_LIST
)
3617 for (; args
; args
= TREE_CHAIN (args
))
3618 if (TREE_VALUE (args
)
3619 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
), p_strings
))
3625 else if (TREE_CODE (args
) != STRING_CST
)
3628 /* Handle multiple arguments separated by commas. */
3629 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
3631 while (next_optstr
&& *next_optstr
!= '\0')
3633 char *p
= next_optstr
;
3635 char *comma
= strchr (next_optstr
, ',');
3636 const char *opt_string
;
3637 size_t len
, opt_len
;
3642 enum ix86_opt_type type
= ix86_opt_unknown
;
3648 len
= comma
- next_optstr
;
3649 next_optstr
= comma
+ 1;
3657 /* Recognize no-xxx. */
3658 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
3667 /* Find the option. */
3670 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
3672 type
= attrs
[i
].type
;
3673 opt_len
= attrs
[i
].len
;
3674 if (ch
== attrs
[i
].string
[0]
3675 && ((type
!= ix86_opt_str
) ? len
== opt_len
: len
> opt_len
)
3676 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
3679 mask
= attrs
[i
].mask
;
3680 opt_string
= attrs
[i
].string
;
3685 /* Process the option. */
3688 error ("attribute(target(\"%s\")) is unknown", orig_p
);
3692 else if (type
== ix86_opt_isa
)
3693 ix86_handle_option (opt
, p
, opt_set_p
);
3695 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
3697 if (type
== ix86_opt_no
)
3698 opt_set_p
= !opt_set_p
;
3701 target_flags
|= mask
;
3703 target_flags
&= ~mask
;
3706 else if (type
== ix86_opt_str
)
3710 error ("option(\"%s\") was already specified", opt_string
);
3714 p_strings
[opt
] = xstrdup (p
+ opt_len
);
3724 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3727 ix86_valid_target_attribute_tree (tree args
)
3729 const char *orig_arch_string
= ix86_arch_string
;
3730 const char *orig_tune_string
= ix86_tune_string
;
3731 const char *orig_fpmath_string
= ix86_fpmath_string
;
3732 int orig_tune_defaulted
= ix86_tune_defaulted
;
3733 int orig_arch_specified
= ix86_arch_specified
;
3734 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
, NULL
};
3737 struct cl_target_option
*def
3738 = TREE_TARGET_OPTION (target_option_default_node
);
3740 /* Process each of the options on the chain. */
3741 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
))
3744 /* If the changed options are different from the default, rerun override_options,
3745 and then save the options away. The string options are are attribute options,
3746 and will be undone when we copy the save structure. */
3747 if (ix86_isa_flags
!= def
->ix86_isa_flags
3748 || target_flags
!= def
->target_flags
3749 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
3750 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
3751 || option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
])
3753 /* If we are using the default tune= or arch=, undo the string assigned,
3754 and use the default. */
3755 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
3756 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
3757 else if (!orig_arch_specified
)
3758 ix86_arch_string
= NULL
;
3760 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
3761 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
3762 else if (orig_tune_defaulted
)
3763 ix86_tune_string
= NULL
;
3765 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3766 if (option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
])
3767 ix86_fpmath_string
= option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
];
3768 else if (!TARGET_64BIT
&& TARGET_SSE
)
3769 ix86_fpmath_string
= "sse,387";
3771 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3772 override_options (false);
3774 /* Add any builtin functions with the new isa if any. */
3775 ix86_add_new_builtins (ix86_isa_flags
);
3777 /* Save the current options unless we are validating options for
3779 t
= build_target_option_node ();
3781 ix86_arch_string
= orig_arch_string
;
3782 ix86_tune_string
= orig_tune_string
;
3783 ix86_fpmath_string
= orig_fpmath_string
;
3785 /* Free up memory allocated to hold the strings */
3786 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
3787 if (option_strings
[i
])
3788 free (option_strings
[i
]);
3794 /* Hook to validate attribute((target("string"))). */
3797 ix86_valid_target_attribute_p (tree fndecl
,
3798 tree
ARG_UNUSED (name
),
3800 int ARG_UNUSED (flags
))
3802 struct cl_target_option cur_target
;
3804 tree old_optimize
= build_optimization_node ();
3805 tree new_target
, new_optimize
;
3806 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
3808 /* If the function changed the optimization levels as well as setting target
3809 options, start with the optimizations specified. */
3810 if (func_optimize
&& func_optimize
!= old_optimize
)
3811 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize
));
3813 /* The target attributes may also change some optimization flags, so update
3814 the optimization options if necessary. */
3815 cl_target_option_save (&cur_target
);
3816 new_target
= ix86_valid_target_attribute_tree (args
);
3817 new_optimize
= build_optimization_node ();
3824 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
3826 if (old_optimize
!= new_optimize
)
3827 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
3830 cl_target_option_restore (&cur_target
);
3832 if (old_optimize
!= new_optimize
)
3833 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize
));
3839 /* Hook to determine if one function can safely inline another. */
3842 ix86_can_inline_p (tree caller
, tree callee
)
3845 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
3846 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
3848 /* If callee has no option attributes, then it is ok to inline. */
3852 /* If caller has no option attributes, but callee does then it is not ok to
3854 else if (!caller_tree
)
3859 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
3860 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
3862 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3863 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3865 if ((caller_opts
->ix86_isa_flags
& callee_opts
->ix86_isa_flags
)
3866 != callee_opts
->ix86_isa_flags
)
3869 /* See if we have the same non-isa options. */
3870 else if (caller_opts
->target_flags
!= callee_opts
->target_flags
)
3873 /* See if arch, tune, etc. are the same. */
3874 else if (caller_opts
->arch
!= callee_opts
->arch
)
3877 else if (caller_opts
->tune
!= callee_opts
->tune
)
3880 else if (caller_opts
->fpmath
!= callee_opts
->fpmath
)
3883 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
3894 /* Remember the last target of ix86_set_current_function. */
3895 static GTY(()) tree ix86_previous_fndecl
;
3897 /* Establish appropriate back-end context for processing the function
3898 FNDECL. The argument might be NULL to indicate processing at top
3899 level, outside of any function scope. */
3901 ix86_set_current_function (tree fndecl
)
3903 /* Only change the context if the function changes. This hook is called
3904 several times in the course of compiling a function, and we don't want to
3905 slow things down too much or call target_reinit when it isn't safe. */
3906 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
3908 tree old_tree
= (ix86_previous_fndecl
3909 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
3912 tree new_tree
= (fndecl
3913 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
3916 ix86_previous_fndecl
= fndecl
;
3917 if (old_tree
== new_tree
)
3922 cl_target_option_restore (TREE_TARGET_OPTION (new_tree
));
3928 struct cl_target_option
*def
3929 = TREE_TARGET_OPTION (target_option_current_node
);
3931 cl_target_option_restore (def
);
3938 /* Return true if this goes in large data/bss. */
3941 ix86_in_large_data_p (tree exp
)
3943 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
3946 /* Functions are never large data. */
3947 if (TREE_CODE (exp
) == FUNCTION_DECL
)
3950 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
3952 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
3953 if (strcmp (section
, ".ldata") == 0
3954 || strcmp (section
, ".lbss") == 0)
3960 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
3962 /* If this is an incomplete type with size 0, then we can't put it
3963 in data because it might be too big when completed. */
3964 if (!size
|| size
> ix86_section_threshold
)
3971 /* Switch to the appropriate section for output of DECL.
3972 DECL is either a `VAR_DECL' node or a constant of some sort.
3973 RELOC indicates whether forming the initial value of DECL requires
3974 link-time relocations. */
3976 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
3980 x86_64_elf_select_section (tree decl
, int reloc
,
3981 unsigned HOST_WIDE_INT align
)
3983 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
3984 && ix86_in_large_data_p (decl
))
3986 const char *sname
= NULL
;
3987 unsigned int flags
= SECTION_WRITE
;
3988 switch (categorize_decl_for_section (decl
, reloc
))
3993 case SECCAT_DATA_REL
:
3994 sname
= ".ldata.rel";
3996 case SECCAT_DATA_REL_LOCAL
:
3997 sname
= ".ldata.rel.local";
3999 case SECCAT_DATA_REL_RO
:
4000 sname
= ".ldata.rel.ro";
4002 case SECCAT_DATA_REL_RO_LOCAL
:
4003 sname
= ".ldata.rel.ro.local";
4007 flags
|= SECTION_BSS
;
4010 case SECCAT_RODATA_MERGE_STR
:
4011 case SECCAT_RODATA_MERGE_STR_INIT
:
4012 case SECCAT_RODATA_MERGE_CONST
:
4016 case SECCAT_SRODATA
:
4023 /* We don't split these for medium model. Place them into
4024 default sections and hope for best. */
4026 case SECCAT_EMUTLS_VAR
:
4027 case SECCAT_EMUTLS_TMPL
:
4032 /* We might get called with string constants, but get_named_section
4033 doesn't like them as they are not DECLs. Also, we need to set
4034 flags in that case. */
4036 return get_section (sname
, flags
, NULL
);
4037 return get_named_section (decl
, sname
, reloc
);
4040 return default_elf_select_section (decl
, reloc
, align
);
4043 /* Build up a unique section name, expressed as a
4044 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4045 RELOC indicates whether the initial value of EXP requires
4046 link-time relocations. */
4048 static void ATTRIBUTE_UNUSED
4049 x86_64_elf_unique_section (tree decl
, int reloc
)
4051 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4052 && ix86_in_large_data_p (decl
))
4054 const char *prefix
= NULL
;
4055 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4056 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4058 switch (categorize_decl_for_section (decl
, reloc
))
4061 case SECCAT_DATA_REL
:
4062 case SECCAT_DATA_REL_LOCAL
:
4063 case SECCAT_DATA_REL_RO
:
4064 case SECCAT_DATA_REL_RO_LOCAL
:
4065 prefix
= one_only
? ".ld" : ".ldata";
4068 prefix
= one_only
? ".lb" : ".lbss";
4071 case SECCAT_RODATA_MERGE_STR
:
4072 case SECCAT_RODATA_MERGE_STR_INIT
:
4073 case SECCAT_RODATA_MERGE_CONST
:
4074 prefix
= one_only
? ".lr" : ".lrodata";
4076 case SECCAT_SRODATA
:
4083 /* We don't split these for medium model. Place them into
4084 default sections and hope for best. */
4086 case SECCAT_EMUTLS_VAR
:
4087 prefix
= targetm
.emutls
.var_section
;
4089 case SECCAT_EMUTLS_TMPL
:
4090 prefix
= targetm
.emutls
.tmpl_section
;
4095 const char *name
, *linkonce
;
4098 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4099 name
= targetm
.strip_name_encoding (name
);
4101 /* If we're using one_only, then there needs to be a .gnu.linkonce
4102 prefix to the section name. */
4103 linkonce
= one_only
? ".gnu.linkonce" : "";
4105 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4107 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4111 default_unique_section (decl
, reloc
);
4114 #ifdef COMMON_ASM_OP
4115 /* This says how to output assembler code to declare an
4116 uninitialized external linkage data object.
4118 For medium model x86-64 we need to use .largecomm opcode for
4121 x86_elf_aligned_common (FILE *file
,
4122 const char *name
, unsigned HOST_WIDE_INT size
,
4125 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4126 && size
> (unsigned int)ix86_section_threshold
)
4127 fprintf (file
, ".largecomm\t");
4129 fprintf (file
, "%s", COMMON_ASM_OP
);
4130 assemble_name (file
, name
);
4131 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4132 size
, align
/ BITS_PER_UNIT
);
4136 /* Utility function for targets to use in implementing
4137 ASM_OUTPUT_ALIGNED_BSS. */
4140 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4141 const char *name
, unsigned HOST_WIDE_INT size
,
4144 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4145 && size
> (unsigned int)ix86_section_threshold
)
4146 switch_to_section (get_named_section (decl
, ".lbss", 0));
4148 switch_to_section (bss_section
);
4149 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4150 #ifdef ASM_DECLARE_OBJECT_NAME
4151 last_assemble_variable_decl
= decl
;
4152 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4154 /* Standard thing is just output label for the object. */
4155 ASM_OUTPUT_LABEL (file
, name
);
4156 #endif /* ASM_DECLARE_OBJECT_NAME */
4157 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4161 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
4163 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4164 make the problem with not enough registers even worse. */
4165 #ifdef INSN_SCHEDULING
4167 flag_schedule_insns
= 0;
4171 /* The Darwin libraries never set errno, so we might as well
4172 avoid calling them when that's the only reason we would. */
4173 flag_errno_math
= 0;
4175 /* The default values of these switches depend on the TARGET_64BIT
4176 that is not known at this moment. Mark these values with 2 and
4177 let user the to override these. In case there is no command line option
4178 specifying them, we will set the defaults in override_options. */
4180 flag_omit_frame_pointer
= 2;
4181 flag_pcc_struct_return
= 2;
4182 flag_asynchronous_unwind_tables
= 2;
4183 flag_vect_cost_model
= 1;
4184 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4185 SUBTARGET_OPTIMIZATION_OPTIONS
;
4189 /* Decide whether we can make a sibling call to a function. DECL is the
4190 declaration of the function being targeted by the call and EXP is the
4191 CALL_EXPR representing the call. */
4194 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4199 /* If we are generating position-independent code, we cannot sibcall
4200 optimize any indirect call, or a direct call to a global function,
4201 as the PLT requires %ebx be live. */
4202 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
4209 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
4210 if (POINTER_TYPE_P (func
))
4211 func
= TREE_TYPE (func
);
4214 /* Check that the return value locations are the same. Like
4215 if we are returning floats on the 80387 register stack, we cannot
4216 make a sibcall from a function that doesn't return a float to a
4217 function that does or, conversely, from a function that does return
4218 a float to a function that doesn't; the necessary stack adjustment
4219 would not be executed. This is also the place we notice
4220 differences in the return value ABI. Note that it is ok for one
4221 of the functions to have void return type as long as the return
4222 value of the other is passed in a register. */
4223 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
4224 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4226 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4228 if (!rtx_equal_p (a
, b
))
4231 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4233 else if (!rtx_equal_p (a
, b
))
4236 /* If this call is indirect, we'll need to be able to use a call-clobbered
4237 register for the address of the target function. Make sure that all
4238 such registers are not used for passing parameters. */
4239 if (!decl
&& !TARGET_64BIT
)
4243 /* We're looking at the CALL_EXPR, we need the type of the function. */
4244 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4245 type
= TREE_TYPE (type
); /* pointer type */
4246 type
= TREE_TYPE (type
); /* function type */
4248 if (ix86_function_regparm (type
, NULL
) >= 3)
4250 /* ??? Need to count the actual number of registers to be used,
4251 not the possible number of registers. Fix later. */
4256 /* Dllimport'd functions are also called indirectly. */
4257 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4259 && decl
&& DECL_DLLIMPORT_P (decl
)
4260 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
4263 /* If we need to align the outgoing stack, then sibcalling would
4264 unalign the stack, which may break the called function. */
4265 if (ix86_incoming_stack_boundary
< PREFERRED_STACK_BOUNDARY
)
4268 /* Otherwise okay. That also includes certain types of indirect calls. */
4272 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4273 calling convention attributes;
4274 arguments as in struct attribute_spec.handler. */
4277 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4279 int flags ATTRIBUTE_UNUSED
,
4282 if (TREE_CODE (*node
) != FUNCTION_TYPE
4283 && TREE_CODE (*node
) != METHOD_TYPE
4284 && TREE_CODE (*node
) != FIELD_DECL
4285 && TREE_CODE (*node
) != TYPE_DECL
)
4287 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4289 *no_add_attrs
= true;
4293 /* Can combine regparm with all attributes but fastcall. */
4294 if (is_attribute_p ("regparm", name
))
4298 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4300 error ("fastcall and regparm attributes are not compatible");
4303 cst
= TREE_VALUE (args
);
4304 if (TREE_CODE (cst
) != INTEGER_CST
)
4306 warning (OPT_Wattributes
,
4307 "%qE attribute requires an integer constant argument",
4309 *no_add_attrs
= true;
4311 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4313 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4315 *no_add_attrs
= true;
4323 /* Do not warn when emulating the MS ABI. */
4324 if (TREE_CODE (*node
) != FUNCTION_TYPE
|| ix86_function_type_abi (*node
)!=MS_ABI
)
4325 warning (OPT_Wattributes
, "%qE attribute ignored",
4327 *no_add_attrs
= true;
4331 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4332 if (is_attribute_p ("fastcall", name
))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4336 error ("fastcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4340 error ("fastcall and stdcall attributes are not compatible");
4342 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4344 error ("fastcall and regparm attributes are not compatible");
4348 /* Can combine stdcall with fastcall (redundant), regparm and
4350 else if (is_attribute_p ("stdcall", name
))
4352 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4354 error ("stdcall and cdecl attributes are not compatible");
4356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4358 error ("stdcall and fastcall attributes are not compatible");
4362 /* Can combine cdecl with regparm and sseregparm. */
4363 else if (is_attribute_p ("cdecl", name
))
4365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4367 error ("stdcall and cdecl attributes are not compatible");
4369 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4371 error ("fastcall and cdecl attributes are not compatible");
4375 /* Can combine sseregparm with all attributes. */
4380 /* Return 0 if the attributes for two types are incompatible, 1 if they
4381 are compatible, and 2 if they are nearly compatible (which causes a
4382 warning to be generated). */
4385 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
4387 /* Check for mismatch of non-default calling convention. */
4388 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
4390 if (TREE_CODE (type1
) != FUNCTION_TYPE
4391 && TREE_CODE (type1
) != METHOD_TYPE
)
4394 /* Check for mismatched fastcall/regparm types. */
4395 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
4396 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
4397 || (ix86_function_regparm (type1
, NULL
)
4398 != ix86_function_regparm (type2
, NULL
)))
4401 /* Check for mismatched sseregparm types. */
4402 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
4403 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
4406 /* Check for mismatched return types (cdecl vs stdcall). */
4407 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
4408 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
4414 /* Return the regparm value for a function with the indicated TYPE and DECL.
4415 DECL may be NULL when calling function indirectly
4416 or considering a libcall. */
4419 ix86_function_regparm (const_tree type
, const_tree decl
)
4424 static bool error_issued
;
4427 return (ix86_function_type_abi (type
) == SYSV_ABI
4428 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
4430 regparm
= ix86_regparm
;
4431 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
4435 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
4437 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
)
4439 /* We can't use regparm(3) for nested functions because
4440 these pass static chain pointer in %ecx register. */
4441 if (!error_issued
&& regparm
== 3
4442 && decl_function_context (decl
)
4443 && !DECL_NO_STATIC_CHAIN (decl
))
4445 error ("nested functions are limited to 2 register parameters");
4446 error_issued
= true;
4454 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
4457 /* Use register calling convention for local functions when possible. */
4459 && TREE_CODE (decl
) == FUNCTION_DECL
4463 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4464 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4467 int local_regparm
, globals
= 0, regno
;
4470 /* Make sure no regparm register is taken by a
4471 fixed register variable. */
4472 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
4473 if (fixed_regs
[local_regparm
])
4476 /* We can't use regparm(3) for nested functions as these use
4477 static chain pointer in third argument. */
4478 if (local_regparm
== 3
4479 && decl_function_context (decl
)
4480 && !DECL_NO_STATIC_CHAIN (decl
))
4483 /* If the function realigns its stackpointer, the prologue will
4484 clobber %ecx. If we've already generated code for the callee,
4485 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4486 scanning the attributes for the self-realigning property. */
4487 f
= DECL_STRUCT_FUNCTION (decl
);
4488 /* Since current internal arg pointer won't conflict with
4489 parameter passing regs, so no need to change stack
4490 realignment and adjust regparm number.
4492 Each fixed register usage increases register pressure,
4493 so less registers should be used for argument passing.
4494 This functionality can be overriden by an explicit
4496 for (regno
= 0; regno
<= DI_REG
; regno
++)
4497 if (fixed_regs
[regno
])
4501 = globals
< local_regparm
? local_regparm
- globals
: 0;
4503 if (local_regparm
> regparm
)
4504 regparm
= local_regparm
;
4511 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4512 DFmode (2) arguments in SSE registers for a function with the
4513 indicated TYPE and DECL. DECL may be NULL when calling function
4514 indirectly or considering a libcall. Otherwise return 0. */
4517 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
4519 gcc_assert (!TARGET_64BIT
);
4521 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4522 by the sseregparm attribute. */
4523 if (TARGET_SSEREGPARM
4524 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
4531 error ("Calling %qD with attribute sseregparm without "
4532 "SSE/SSE2 enabled", decl
);
4534 error ("Calling %qT with attribute sseregparm without "
4535 "SSE/SSE2 enabled", type
);
4543 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4544 (and DFmode for SSE2) arguments in SSE registers. */
4545 if (decl
&& TARGET_SSE_MATH
&& optimize
&& !profile_flag
)
4547 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4548 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4550 return TARGET_SSE2
? 2 : 1;
4556 /* Return true if EAX is live at the start of the function. Used by
4557 ix86_expand_prologue to determine if we need special help before
4558 calling allocate_stack_worker. */
4561 ix86_eax_live_at_start_p (void)
4563 /* Cheat. Don't bother working forward from ix86_function_regparm
4564 to the function type to whether an actual argument is located in
4565 eax. Instead just look at cfg info, which is still close enough
4566 to correct at this point. This gives false positives for broken
4567 functions that might use uninitialized data that happens to be
4568 allocated in eax, but who cares? */
4569 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
4572 /* Value is the number of bytes of arguments automatically
4573 popped when returning from a subroutine call.
4574 FUNDECL is the declaration node of the function (as a tree),
4575 FUNTYPE is the data type of the function (as a tree),
4576 or for a library call it is an identifier node for the subroutine name.
4577 SIZE is the number of bytes of arguments passed on the stack.
4579 On the 80386, the RTD insn may be used to pop them if the number
4580 of args is fixed, but if the number is variable then the caller
4581 must pop them all. RTD can't be used for library calls now
4582 because the library is compiled with the Unix compiler.
4583 Use of RTD is a selectable option, since it is incompatible with
4584 standard Unix calling sequences. If the option is not selected,
4585 the caller must always pop the args.
4587 The attribute stdcall is equivalent to RTD on a per module basis. */
4590 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
4594 /* None of the 64-bit ABIs pop arguments. */
4598 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
4600 /* Cdecl functions override -mrtd, and never pop the stack. */
4601 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
4603 /* Stdcall and fastcall functions will pop the stack if not
4605 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
4606 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
4609 if (rtd
&& ! stdarg_p (funtype
))
4613 /* Lose any fake structure return argument if it is passed on the stack. */
4614 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
4615 && !KEEP_AGGREGATE_RETURN_POINTER
)
4617 int nregs
= ix86_function_regparm (funtype
, fundecl
);
4619 return GET_MODE_SIZE (Pmode
);
4625 /* Argument support functions. */
4627 /* Return true when register may be used to pass function parameters. */
4629 ix86_function_arg_regno_p (int regno
)
4632 const int *parm_regs
;
4637 return (regno
< REGPARM_MAX
4638 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
4640 return (regno
< REGPARM_MAX
4641 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
4642 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
4643 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
4644 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
4649 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
4654 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
4655 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
4659 /* TODO: The function should depend on current function ABI but
4660 builtins.c would need updating then. Therefore we use the
4663 /* RAX is used as hidden argument to va_arg functions. */
4664 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
4667 if (ix86_abi
== MS_ABI
)
4668 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
4670 parm_regs
= x86_64_int_parameter_registers
;
4671 for (i
= 0; i
< (ix86_abi
== MS_ABI
4672 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
4673 if (regno
== parm_regs
[i
])
4678 /* Return if we do not know how to pass TYPE solely in registers. */
4681 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
4683 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
4686 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4687 The layout_type routine is crafty and tries to trick us into passing
4688 currently unsupported vector types on the stack by using TImode. */
4689 return (!TARGET_64BIT
&& mode
== TImode
4690 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
4693 /* It returns the size, in bytes, of the area reserved for arguments passed
4694 in registers for the function represented by fndecl dependent to the used
4697 ix86_reg_parm_stack_space (const_tree fndecl
)
4699 enum calling_abi call_abi
= SYSV_ABI
;
4700 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
4701 call_abi
= ix86_function_abi (fndecl
);
4703 call_abi
= ix86_function_type_abi (fndecl
);
4704 if (call_abi
== MS_ABI
)
4709 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4712 ix86_function_type_abi (const_tree fntype
)
4714 if (TARGET_64BIT
&& fntype
!= NULL
)
4716 enum calling_abi abi
= ix86_abi
;
4717 if (abi
== SYSV_ABI
)
4719 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
4722 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
4729 static enum calling_abi
4730 ix86_function_abi (const_tree fndecl
)
4734 return ix86_function_type_abi (TREE_TYPE (fndecl
));
4737 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4740 ix86_cfun_abi (void)
4742 if (! cfun
|| ! TARGET_64BIT
)
4744 return cfun
->machine
->call_abi
;
4748 extern void init_regs (void);
4750 /* Implementation of call abi switching target hook. Specific to FNDECL
4751 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4752 for more details. */
4754 ix86_call_abi_override (const_tree fndecl
)
4756 if (fndecl
== NULL_TREE
)
4757 cfun
->machine
->call_abi
= ix86_abi
;
4759 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
4762 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4763 re-initialization of init_regs each time we switch function context since
4764 this is needed only during RTL expansion. */
4766 ix86_maybe_switch_abi (void)
4769 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
4773 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4774 for a call to a function whose data type is FNTYPE.
4775 For a library call, FNTYPE is 0. */
4778 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
4779 tree fntype
, /* tree ptr for function decl */
4780 rtx libname
, /* SYMBOL_REF of library name or 0 */
4783 struct cgraph_local_info
*i
= fndecl
? cgraph_local_info (fndecl
) : NULL
;
4784 memset (cum
, 0, sizeof (*cum
));
4787 cum
->call_abi
= ix86_function_abi (fndecl
);
4789 cum
->call_abi
= ix86_function_type_abi (fntype
);
4790 /* Set up the number of registers to use for passing arguments. */
4792 if (cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
4793 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4794 cum
->nregs
= ix86_regparm
;
4797 if (cum
->call_abi
!= ix86_abi
)
4798 cum
->nregs
= (ix86_abi
!= SYSV_ABI
4799 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
4803 cum
->sse_nregs
= SSE_REGPARM_MAX
;
4806 if (cum
->call_abi
!= ix86_abi
)
4807 cum
->sse_nregs
= (ix86_abi
!= SYSV_ABI
4808 ? X86_64_SSE_REGPARM_MAX
4809 : X86_64_MS_SSE_REGPARM_MAX
);
4813 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
4814 cum
->warn_avx
= true;
4815 cum
->warn_sse
= true;
4816 cum
->warn_mmx
= true;
4818 /* Because type might mismatch in between caller and callee, we need to
4819 use actual type of function for local calls.
4820 FIXME: cgraph_analyze can be told to actually record if function uses
4821 va_start so for local functions maybe_vaarg can be made aggressive
4823 FIXME: once typesytem is fixed, we won't need this code anymore. */
4825 fntype
= TREE_TYPE (fndecl
);
4826 cum
->maybe_vaarg
= (fntype
4827 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
4832 /* If there are variable arguments, then we won't pass anything
4833 in registers in 32-bit mode. */
4834 if (stdarg_p (fntype
))
4845 /* Use ecx and edx registers if function has fastcall attribute,
4846 else look for regparm information. */
4849 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
4855 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
4858 /* Set up the number of SSE registers used for passing SFmode
4859 and DFmode arguments. Warn for mismatching ABI. */
4860 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
4864 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4865 But in the case of vector types, it is some vector mode.
4867 When we have only some of our vector isa extensions enabled, then there
4868 are some modes for which vector_mode_supported_p is false. For these
4869 modes, the generic vector support in gcc will choose some non-vector mode
4870 in order to implement the type. By computing the natural mode, we'll
4871 select the proper ABI location for the operand and not depend on whatever
4872 the middle-end decides to do with these vector types.
4874 The midde-end can't deal with the vector types > 16 bytes. In this
4875 case, we return the original mode and warn ABI change if CUM isn't
4878 static enum machine_mode
4879 type_natural_mode (const_tree type
, CUMULATIVE_ARGS
*cum
)
4881 enum machine_mode mode
= TYPE_MODE (type
);
4883 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
4885 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4886 if ((size
== 8 || size
== 16 || size
== 32)
4887 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4888 && TYPE_VECTOR_SUBPARTS (type
) > 1)
4890 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
4892 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
4893 mode
= MIN_MODE_VECTOR_FLOAT
;
4895 mode
= MIN_MODE_VECTOR_INT
;
4897 /* Get the mode which has this inner mode and number of units. */
4898 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
4899 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
4900 && GET_MODE_INNER (mode
) == innermode
)
4902 if (size
== 32 && !TARGET_AVX
)
4904 static bool warnedavx
;
4911 warning (0, "AVX vector argument without AVX "
4912 "enabled changes the ABI");
4914 return TYPE_MODE (type
);
4927 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4928 this may not agree with the mode that the type system has chosen for the
4929 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4930 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4933 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
4938 if (orig_mode
!= BLKmode
)
4939 tmp
= gen_rtx_REG (orig_mode
, regno
);
4942 tmp
= gen_rtx_REG (mode
, regno
);
4943 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
4944 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
4950 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4951 of this code is to classify each 8bytes of incoming argument by the register
4952 class and assign registers accordingly. */
4954 /* Return the union class of CLASS1 and CLASS2.
4955 See the x86-64 PS ABI for details. */
4957 static enum x86_64_reg_class
4958 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
4960 /* Rule #1: If both classes are equal, this is the resulting class. */
4961 if (class1
== class2
)
4964 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4966 if (class1
== X86_64_NO_CLASS
)
4968 if (class2
== X86_64_NO_CLASS
)
4971 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4972 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
4973 return X86_64_MEMORY_CLASS
;
4975 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4976 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
4977 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
4978 return X86_64_INTEGERSI_CLASS
;
4979 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
4980 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
4981 return X86_64_INTEGER_CLASS
;
4983 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4985 if (class1
== X86_64_X87_CLASS
4986 || class1
== X86_64_X87UP_CLASS
4987 || class1
== X86_64_COMPLEX_X87_CLASS
4988 || class2
== X86_64_X87_CLASS
4989 || class2
== X86_64_X87UP_CLASS
4990 || class2
== X86_64_COMPLEX_X87_CLASS
)
4991 return X86_64_MEMORY_CLASS
;
4993 /* Rule #6: Otherwise class SSE is used. */
4994 return X86_64_SSE_CLASS
;
4997 /* Classify the argument of type TYPE and mode MODE.
4998 CLASSES will be filled by the register class used to pass each word
4999 of the operand. The number of words is returned. In case the parameter
5000 should be passed in memory, 0 is returned. As a special case for zero
5001 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5003 BIT_OFFSET is used internally for handling records and specifies offset
5004 of the offset in bits modulo 256 to avoid overflow cases.
5006 See the x86-64 PS ABI for details.
5010 classify_argument (enum machine_mode mode
, const_tree type
,
5011 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5013 HOST_WIDE_INT bytes
=
5014 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5015 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5017 /* Variable sized entities are always passed/returned in memory. */
5021 if (mode
!= VOIDmode
5022 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5025 if (type
&& AGGREGATE_TYPE_P (type
))
5029 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5031 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5035 for (i
= 0; i
< words
; i
++)
5036 classes
[i
] = X86_64_NO_CLASS
;
5038 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5039 signalize memory class, so handle it as special case. */
5042 classes
[0] = X86_64_NO_CLASS
;
5046 /* Classify each field of record and merge classes. */
5047 switch (TREE_CODE (type
))
5050 /* And now merge the fields of structure. */
5051 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5053 if (TREE_CODE (field
) == FIELD_DECL
)
5057 if (TREE_TYPE (field
) == error_mark_node
)
5060 /* Bitfields are always classified as integer. Handle them
5061 early, since later code would consider them to be
5062 misaligned integers. */
5063 if (DECL_BIT_FIELD (field
))
5065 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5066 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5067 + tree_low_cst (DECL_SIZE (field
), 0)
5070 merge_classes (X86_64_INTEGER_CLASS
,
5077 type
= TREE_TYPE (field
);
5079 /* Flexible array member is ignored. */
5080 if (TYPE_MODE (type
) == BLKmode
5081 && TREE_CODE (type
) == ARRAY_TYPE
5082 && TYPE_SIZE (type
) == NULL_TREE
5083 && TYPE_DOMAIN (type
) != NULL_TREE
5084 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5089 if (!warned
&& warn_psabi
)
5092 inform (input_location
,
5093 "The ABI of passing struct with"
5094 " a flexible array member has"
5095 " changed in GCC 4.4");
5099 num
= classify_argument (TYPE_MODE (type
), type
,
5101 (int_bit_position (field
)
5102 + bit_offset
) % 256);
5105 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5106 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5108 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5115 /* Arrays are handled as small records. */
5118 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5119 TREE_TYPE (type
), subclasses
, bit_offset
);
5123 /* The partial classes are now full classes. */
5124 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5125 subclasses
[0] = X86_64_SSE_CLASS
;
5126 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5127 && !((bit_offset
% 64) == 0 && bytes
== 4))
5128 subclasses
[0] = X86_64_INTEGER_CLASS
;
5130 for (i
= 0; i
< words
; i
++)
5131 classes
[i
] = subclasses
[i
% num
];
5136 case QUAL_UNION_TYPE
:
5137 /* Unions are similar to RECORD_TYPE but offset is always 0.
5139 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5141 if (TREE_CODE (field
) == FIELD_DECL
)
5145 if (TREE_TYPE (field
) == error_mark_node
)
5148 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5149 TREE_TYPE (field
), subclasses
,
5153 for (i
= 0; i
< num
; i
++)
5154 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5165 /* When size > 16 bytes, if the first one isn't
5166 X86_64_SSE_CLASS or any other ones aren't
5167 X86_64_SSEUP_CLASS, everything should be passed in
5169 if (classes
[0] != X86_64_SSE_CLASS
)
5172 for (i
= 1; i
< words
; i
++)
5173 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5177 /* Final merger cleanup. */
5178 for (i
= 0; i
< words
; i
++)
5180 /* If one class is MEMORY, everything should be passed in
5182 if (classes
[i
] == X86_64_MEMORY_CLASS
)
5185 /* The X86_64_SSEUP_CLASS should be always preceded by
5186 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5187 if (classes
[i
] == X86_64_SSEUP_CLASS
5188 && classes
[i
- 1] != X86_64_SSE_CLASS
5189 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
5191 /* The first one should never be X86_64_SSEUP_CLASS. */
5192 gcc_assert (i
!= 0);
5193 classes
[i
] = X86_64_SSE_CLASS
;
5196 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5197 everything should be passed in memory. */
5198 if (classes
[i
] == X86_64_X87UP_CLASS
5199 && (classes
[i
- 1] != X86_64_X87_CLASS
))
5203 /* The first one should never be X86_64_X87UP_CLASS. */
5204 gcc_assert (i
!= 0);
5205 if (!warned
&& warn_psabi
)
5208 inform (input_location
,
5209 "The ABI of passing union with long double"
5210 " has changed in GCC 4.4");
5218 /* Compute alignment needed. We align all types to natural boundaries with
5219 exception of XFmode that is aligned to 64bits. */
5220 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
5222 int mode_alignment
= GET_MODE_BITSIZE (mode
);
5225 mode_alignment
= 128;
5226 else if (mode
== XCmode
)
5227 mode_alignment
= 256;
5228 if (COMPLEX_MODE_P (mode
))
5229 mode_alignment
/= 2;
5230 /* Misaligned fields are always returned in memory. */
5231 if (bit_offset
% mode_alignment
)
5235 /* for V1xx modes, just use the base mode */
5236 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
5237 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
5238 mode
= GET_MODE_INNER (mode
);
5240 /* Classification of atomic types. */
5245 classes
[0] = X86_64_SSE_CLASS
;
5248 classes
[0] = X86_64_SSE_CLASS
;
5249 classes
[1] = X86_64_SSEUP_CLASS
;
5259 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
5263 classes
[0] = X86_64_INTEGERSI_CLASS
;
5266 else if (size
<= 64)
5268 classes
[0] = X86_64_INTEGER_CLASS
;
5271 else if (size
<= 64+32)
5273 classes
[0] = X86_64_INTEGER_CLASS
;
5274 classes
[1] = X86_64_INTEGERSI_CLASS
;
5277 else if (size
<= 64+64)
5279 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5287 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5291 /* OImode shouldn't be used directly. */
5296 if (!(bit_offset
% 64))
5297 classes
[0] = X86_64_SSESF_CLASS
;
5299 classes
[0] = X86_64_SSE_CLASS
;
5302 classes
[0] = X86_64_SSEDF_CLASS
;
5305 classes
[0] = X86_64_X87_CLASS
;
5306 classes
[1] = X86_64_X87UP_CLASS
;
5309 classes
[0] = X86_64_SSE_CLASS
;
5310 classes
[1] = X86_64_SSEUP_CLASS
;
5313 classes
[0] = X86_64_SSE_CLASS
;
5314 if (!(bit_offset
% 64))
5320 if (!warned
&& warn_psabi
)
5323 inform (input_location
,
5324 "The ABI of passing structure with complex float"
5325 " member has changed in GCC 4.4");
5327 classes
[1] = X86_64_SSESF_CLASS
;
5331 classes
[0] = X86_64_SSEDF_CLASS
;
5332 classes
[1] = X86_64_SSEDF_CLASS
;
5335 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
5338 /* This modes is larger than 16 bytes. */
5346 classes
[0] = X86_64_SSE_CLASS
;
5347 classes
[1] = X86_64_SSEUP_CLASS
;
5348 classes
[2] = X86_64_SSEUP_CLASS
;
5349 classes
[3] = X86_64_SSEUP_CLASS
;
5357 classes
[0] = X86_64_SSE_CLASS
;
5358 classes
[1] = X86_64_SSEUP_CLASS
;
5365 classes
[0] = X86_64_SSE_CLASS
;
5371 gcc_assert (VECTOR_MODE_P (mode
));
5376 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
5378 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
5379 classes
[0] = X86_64_INTEGERSI_CLASS
;
5381 classes
[0] = X86_64_INTEGER_CLASS
;
5382 classes
[1] = X86_64_INTEGER_CLASS
;
5383 return 1 + (bytes
> 8);
5387 /* Examine the argument and return set number of register required in each
5388 class. Return 0 iff parameter should be passed in memory. */
5390 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
5391 int *int_nregs
, int *sse_nregs
)
5393 enum x86_64_reg_class regclass
[MAX_CLASSES
];
5394 int n
= classify_argument (mode
, type
, regclass
, 0);
5400 for (n
--; n
>= 0; n
--)
5401 switch (regclass
[n
])
5403 case X86_64_INTEGER_CLASS
:
5404 case X86_64_INTEGERSI_CLASS
:
5407 case X86_64_SSE_CLASS
:
5408 case X86_64_SSESF_CLASS
:
5409 case X86_64_SSEDF_CLASS
:
5412 case X86_64_NO_CLASS
:
5413 case X86_64_SSEUP_CLASS
:
5415 case X86_64_X87_CLASS
:
5416 case X86_64_X87UP_CLASS
:
5420 case X86_64_COMPLEX_X87_CLASS
:
5421 return in_return
? 2 : 0;
5422 case X86_64_MEMORY_CLASS
:
5428 /* Construct container for the argument used by GCC interface. See
5429 FUNCTION_ARG for the detailed description. */
5432 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
5433 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
5434 const int *intreg
, int sse_regno
)
5436 /* The following variables hold the static issued_error state. */
5437 static bool issued_sse_arg_error
;
5438 static bool issued_sse_ret_error
;
5439 static bool issued_x87_ret_error
;
5441 enum machine_mode tmpmode
;
5443 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5444 enum x86_64_reg_class regclass
[MAX_CLASSES
];
5448 int needed_sseregs
, needed_intregs
;
5449 rtx exp
[MAX_CLASSES
];
5452 n
= classify_argument (mode
, type
, regclass
, 0);
5455 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
5458 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
5461 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5462 some less clueful developer tries to use floating-point anyway. */
5463 if (needed_sseregs
&& !TARGET_SSE
)
5467 if (!issued_sse_ret_error
)
5469 error ("SSE register return with SSE disabled");
5470 issued_sse_ret_error
= true;
5473 else if (!issued_sse_arg_error
)
5475 error ("SSE register argument with SSE disabled");
5476 issued_sse_arg_error
= true;
5481 /* Likewise, error if the ABI requires us to return values in the
5482 x87 registers and the user specified -mno-80387. */
5483 if (!TARGET_80387
&& in_return
)
5484 for (i
= 0; i
< n
; i
++)
5485 if (regclass
[i
] == X86_64_X87_CLASS
5486 || regclass
[i
] == X86_64_X87UP_CLASS
5487 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
5489 if (!issued_x87_ret_error
)
5491 error ("x87 register return with x87 disabled");
5492 issued_x87_ret_error
= true;
5497 /* First construct simple cases. Avoid SCmode, since we want to use
5498 single register to pass this type. */
5499 if (n
== 1 && mode
!= SCmode
)
5500 switch (regclass
[0])
5502 case X86_64_INTEGER_CLASS
:
5503 case X86_64_INTEGERSI_CLASS
:
5504 return gen_rtx_REG (mode
, intreg
[0]);
5505 case X86_64_SSE_CLASS
:
5506 case X86_64_SSESF_CLASS
:
5507 case X86_64_SSEDF_CLASS
:
5508 if (mode
!= BLKmode
)
5509 return gen_reg_or_parallel (mode
, orig_mode
,
5510 SSE_REGNO (sse_regno
));
5512 case X86_64_X87_CLASS
:
5513 case X86_64_COMPLEX_X87_CLASS
:
5514 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
5515 case X86_64_NO_CLASS
:
5516 /* Zero sized array, struct or class. */
5521 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
5522 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
5523 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
5525 && regclass
[0] == X86_64_SSE_CLASS
5526 && regclass
[1] == X86_64_SSEUP_CLASS
5527 && regclass
[2] == X86_64_SSEUP_CLASS
5528 && regclass
[3] == X86_64_SSEUP_CLASS
5530 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
5533 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
5534 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
5535 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
5536 && regclass
[1] == X86_64_INTEGER_CLASS
5537 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
5538 && intreg
[0] + 1 == intreg
[1])
5539 return gen_rtx_REG (mode
, intreg
[0]);
5541 /* Otherwise figure out the entries of the PARALLEL. */
5542 for (i
= 0; i
< n
; i
++)
5546 switch (regclass
[i
])
5548 case X86_64_NO_CLASS
:
5550 case X86_64_INTEGER_CLASS
:
5551 case X86_64_INTEGERSI_CLASS
:
5552 /* Merge TImodes on aligned occasions here too. */
5553 if (i
* 8 + 8 > bytes
)
5554 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
5555 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
5559 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5560 if (tmpmode
== BLKmode
)
5562 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
5563 gen_rtx_REG (tmpmode
, *intreg
),
5567 case X86_64_SSESF_CLASS
:
5568 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
5569 gen_rtx_REG (SFmode
,
5570 SSE_REGNO (sse_regno
)),
5574 case X86_64_SSEDF_CLASS
:
5575 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
5576 gen_rtx_REG (DFmode
,
5577 SSE_REGNO (sse_regno
)),
5581 case X86_64_SSE_CLASS
:
5589 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
5599 && regclass
[1] == X86_64_SSEUP_CLASS
5600 && regclass
[2] == X86_64_SSEUP_CLASS
5601 && regclass
[3] == X86_64_SSEUP_CLASS
);
5608 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
5609 gen_rtx_REG (tmpmode
,
5610 SSE_REGNO (sse_regno
)),
5619 /* Empty aligned struct, union or class. */
5623 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
5624 for (i
= 0; i
< nexps
; i
++)
5625 XVECEXP (ret
, 0, i
) = exp
[i
];
5629 /* Update the data in CUM to advance over an argument of mode MODE
5630 and data type TYPE. (TYPE is null for libcalls where that information
5631 may not be available.) */
5634 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5635 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
5651 cum
->words
+= words
;
5652 cum
->nregs
-= words
;
5653 cum
->regno
+= words
;
5655 if (cum
->nregs
<= 0)
5663 /* OImode shouldn't be used directly. */
5667 if (cum
->float_in_sse
< 2)
5670 if (cum
->float_in_sse
< 1)
5687 if (!type
|| !AGGREGATE_TYPE_P (type
))
5689 cum
->sse_words
+= words
;
5690 cum
->sse_nregs
-= 1;
5691 cum
->sse_regno
+= 1;
5692 if (cum
->sse_nregs
<= 0)
5705 if (!type
|| !AGGREGATE_TYPE_P (type
))
5707 cum
->mmx_words
+= words
;
5708 cum
->mmx_nregs
-= 1;
5709 cum
->mmx_regno
+= 1;
5710 if (cum
->mmx_nregs
<= 0)
5721 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5722 tree type
, HOST_WIDE_INT words
, int named
)
5724 int int_nregs
, sse_nregs
;
5726 /* Unnamed 256bit vector mode parameters are passed on stack. */
5727 if (!named
&& VALID_AVX256_REG_MODE (mode
))
5730 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
5731 cum
->words
+= words
;
5732 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
5734 cum
->nregs
-= int_nregs
;
5735 cum
->sse_nregs
-= sse_nregs
;
5736 cum
->regno
+= int_nregs
;
5737 cum
->sse_regno
+= sse_nregs
;
5740 cum
->words
+= words
;
5744 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
5745 HOST_WIDE_INT words
)
5747 /* Otherwise, this should be passed indirect. */
5748 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
5750 cum
->words
+= words
;
5759 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5760 tree type
, int named
)
5762 HOST_WIDE_INT bytes
, words
;
5764 if (mode
== BLKmode
)
5765 bytes
= int_size_in_bytes (type
);
5767 bytes
= GET_MODE_SIZE (mode
);
5768 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5771 mode
= type_natural_mode (type
, NULL
);
5773 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
5774 function_arg_advance_ms_64 (cum
, bytes
, words
);
5775 else if (TARGET_64BIT
)
5776 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
5778 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
5781 /* Define where to put the arguments to a function.
5782 Value is zero to push the argument on the stack,
5783 or a hard register in which to store the argument.
5785 MODE is the argument's machine mode.
5786 TYPE is the data type of the argument (as a tree).
5787 This is null for libcalls where that information may
5789 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5790 the preceding args and about the function being called.
5791 NAMED is nonzero if this argument is a named parameter
5792 (otherwise it is an extra parameter matching an ellipsis). */
5795 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5796 enum machine_mode orig_mode
, tree type
,
5797 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
5799 static bool warnedsse
, warnedmmx
;
5801 /* Avoid the AL settings for the Unix64 ABI. */
5802 if (mode
== VOIDmode
)
5818 if (words
<= cum
->nregs
)
5820 int regno
= cum
->regno
;
5822 /* Fastcall allocates the first two DWORD (SImode) or
5823 smaller arguments to ECX and EDX if it isn't an
5829 || (type
&& AGGREGATE_TYPE_P (type
)))
5832 /* ECX not EAX is the first allocated register. */
5833 if (regno
== AX_REG
)
5836 return gen_rtx_REG (mode
, regno
);
5841 if (cum
->float_in_sse
< 2)
5844 if (cum
->float_in_sse
< 1)
5848 /* In 32bit, we pass TImode in xmm registers. */
5855 if (!type
|| !AGGREGATE_TYPE_P (type
))
5857 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
5860 warning (0, "SSE vector argument without SSE enabled "
5864 return gen_reg_or_parallel (mode
, orig_mode
,
5865 cum
->sse_regno
+ FIRST_SSE_REG
);
5870 /* OImode shouldn't be used directly. */
5879 if (!type
|| !AGGREGATE_TYPE_P (type
))
5882 return gen_reg_or_parallel (mode
, orig_mode
,
5883 cum
->sse_regno
+ FIRST_SSE_REG
);
5892 if (!type
|| !AGGREGATE_TYPE_P (type
))
5894 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
5897 warning (0, "MMX vector argument without MMX enabled "
5901 return gen_reg_or_parallel (mode
, orig_mode
,
5902 cum
->mmx_regno
+ FIRST_MMX_REG
);
5911 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5912 enum machine_mode orig_mode
, tree type
, int named
)
5914 /* Handle a hidden AL argument containing number of registers
5915 for varargs x86-64 functions. */
5916 if (mode
== VOIDmode
)
5917 return GEN_INT (cum
->maybe_vaarg
5918 ? (cum
->sse_nregs
< 0
5919 ? (cum
->call_abi
== ix86_abi
5921 : (ix86_abi
!= SYSV_ABI
5922 ? X86_64_SSE_REGPARM_MAX
5923 : X86_64_MS_SSE_REGPARM_MAX
))
5938 /* Unnamed 256bit vector mode parameters are passed on stack. */
5944 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
5946 &x86_64_int_parameter_registers
[cum
->regno
],
5951 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5952 enum machine_mode orig_mode
, int named
,
5953 HOST_WIDE_INT bytes
)
5957 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5958 We use value of -2 to specify that current function call is MSABI. */
5959 if (mode
== VOIDmode
)
5960 return GEN_INT (-2);
5962 /* If we've run out of registers, it goes on the stack. */
5963 if (cum
->nregs
== 0)
5966 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
5968 /* Only floating point modes are passed in anything but integer regs. */
5969 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
5972 regno
= cum
->regno
+ FIRST_SSE_REG
;
5977 /* Unnamed floating parameters are passed in both the
5978 SSE and integer registers. */
5979 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
5980 t2
= gen_rtx_REG (mode
, regno
);
5981 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
5982 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
5983 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
5986 /* Handle aggregated types passed in register. */
5987 if (orig_mode
== BLKmode
)
5989 if (bytes
> 0 && bytes
<= 8)
5990 mode
= (bytes
> 4 ? DImode
: SImode
);
5991 if (mode
== BLKmode
)
5995 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
5999 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
6000 tree type
, int named
)
6002 enum machine_mode mode
= omode
;
6003 HOST_WIDE_INT bytes
, words
;
6005 if (mode
== BLKmode
)
6006 bytes
= int_size_in_bytes (type
);
6008 bytes
= GET_MODE_SIZE (mode
);
6009 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6011 /* To simplify the code below, represent vector types with a vector mode
6012 even if MMX/SSE are not active. */
6013 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6014 mode
= type_natural_mode (type
, cum
);
6016 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6017 return function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6018 else if (TARGET_64BIT
)
6019 return function_arg_64 (cum
, mode
, omode
, type
, named
);
6021 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6024 /* A C expression that indicates when an argument must be passed by
6025 reference. If nonzero for an argument, a copy of that argument is
6026 made in memory and a pointer to the argument is passed instead of
6027 the argument itself. The pointer is passed in whatever way is
6028 appropriate for passing a pointer to that type. */
6031 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
6032 enum machine_mode mode ATTRIBUTE_UNUSED
,
6033 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6035 /* See Windows x64 Software Convention. */
6036 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6038 int msize
= (int) GET_MODE_SIZE (mode
);
6041 /* Arrays are passed by reference. */
6042 if (TREE_CODE (type
) == ARRAY_TYPE
)
6045 if (AGGREGATE_TYPE_P (type
))
6047 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6048 are passed by reference. */
6049 msize
= int_size_in_bytes (type
);
6053 /* __m128 is passed by reference. */
6055 case 1: case 2: case 4: case 8:
6061 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6067 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6070 contains_aligned_value_p (tree type
)
6072 enum machine_mode mode
= TYPE_MODE (type
);
6073 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6077 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6079 if (TYPE_ALIGN (type
) < 128)
6082 if (AGGREGATE_TYPE_P (type
))
6084 /* Walk the aggregates recursively. */
6085 switch (TREE_CODE (type
))
6089 case QUAL_UNION_TYPE
:
6093 /* Walk all the structure fields. */
6094 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6096 if (TREE_CODE (field
) == FIELD_DECL
6097 && contains_aligned_value_p (TREE_TYPE (field
)))
6104 /* Just for use if some languages passes arrays by value. */
6105 if (contains_aligned_value_p (TREE_TYPE (type
)))
6116 /* Gives the alignment boundary, in bits, of an argument with the
6117 specified mode and type. */
6120 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
6125 /* Since canonical type is used for call, we convert it to
6126 canonical type if needed. */
6127 if (!TYPE_STRUCTURAL_EQUALITY_P (type
))
6128 type
= TYPE_CANONICAL (type
);
6129 align
= TYPE_ALIGN (type
);
6132 align
= GET_MODE_ALIGNMENT (mode
);
6133 if (align
< PARM_BOUNDARY
)
6134 align
= PARM_BOUNDARY
;
6135 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6136 natural boundaries. */
6137 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6139 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6140 make an exception for SSE modes since these require 128bit
6143 The handling here differs from field_alignment. ICC aligns MMX
6144 arguments to 4 byte boundaries, while structure fields are aligned
6145 to 8 byte boundaries. */
6148 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
6149 align
= PARM_BOUNDARY
;
6153 if (!contains_aligned_value_p (type
))
6154 align
= PARM_BOUNDARY
;
6157 if (align
> BIGGEST_ALIGNMENT
)
6158 align
= BIGGEST_ALIGNMENT
;
6162 /* Return true if N is a possible register number of function value. */
6165 ix86_function_value_regno_p (int regno
)
6172 case FIRST_FLOAT_REG
:
6173 /* TODO: The function should depend on current function ABI but
6174 builtins.c would need updating then. Therefore we use the
6176 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
6178 return TARGET_FLOAT_RETURNS_IN_80387
;
6184 if (TARGET_MACHO
|| TARGET_64BIT
)
6192 /* Define how to find the value returned by a function.
6193 VALTYPE is the data type of the value (as a tree).
6194 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6195 otherwise, FUNC is 0. */
6198 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
6199 const_tree fntype
, const_tree fn
)
6203 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6204 we normally prevent this case when mmx is not available. However
6205 some ABIs may require the result to be returned like DImode. */
6206 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
6207 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
6209 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6210 we prevent this case when sse is not available. However some ABIs
6211 may require the result to be returned like integer TImode. */
6212 else if (mode
== TImode
6213 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
6214 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
6216 /* 32-byte vector modes in %ymm0. */
6217 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
6218 regno
= TARGET_AVX
? FIRST_SSE_REG
: 0;
6220 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6221 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
6222 regno
= FIRST_FLOAT_REG
;
6224 /* Most things go in %eax. */
6227 /* Override FP return register with %xmm0 for local functions when
6228 SSE math is enabled or for functions with sseregparm attribute. */
6229 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
6231 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
6232 if ((sse_level
>= 1 && mode
== SFmode
)
6233 || (sse_level
== 2 && mode
== DFmode
))
6234 regno
= FIRST_SSE_REG
;
6237 /* OImode shouldn't be used directly. */
6238 gcc_assert (mode
!= OImode
);
6240 return gen_rtx_REG (orig_mode
, regno
);
6244 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
6249 /* Handle libcalls, which don't provide a type node. */
6250 if (valtype
== NULL
)
6262 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
6265 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
6269 return gen_rtx_REG (mode
, AX_REG
);
6273 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
6274 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
6275 x86_64_int_return_registers
, 0);
6277 /* For zero sized structures, construct_container returns NULL, but we
6278 need to keep rest of compiler happy by returning meaningful value. */
6280 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
6286 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
6288 unsigned int regno
= AX_REG
;
6292 switch (GET_MODE_SIZE (mode
))
6295 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
6296 && !COMPLEX_MODE_P (mode
))
6297 regno
= FIRST_SSE_REG
;
6301 if (mode
== SFmode
|| mode
== DFmode
)
6302 regno
= FIRST_SSE_REG
;
6308 return gen_rtx_REG (orig_mode
, regno
);
6312 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
6313 enum machine_mode orig_mode
, enum machine_mode mode
)
6315 const_tree fn
, fntype
;
6318 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
6319 fn
= fntype_or_decl
;
6320 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
6322 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
6323 return function_value_ms_64 (orig_mode
, mode
);
6324 else if (TARGET_64BIT
)
6325 return function_value_64 (orig_mode
, mode
, valtype
);
6327 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
6331 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
6332 bool outgoing ATTRIBUTE_UNUSED
)
6334 enum machine_mode mode
, orig_mode
;
6336 orig_mode
= TYPE_MODE (valtype
);
6337 mode
= type_natural_mode (valtype
, NULL
);
6338 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
6342 ix86_libcall_value (enum machine_mode mode
)
6344 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
6347 /* Return true iff type is returned in memory. */
6349 static int ATTRIBUTE_UNUSED
6350 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
6354 if (mode
== BLKmode
)
6357 size
= int_size_in_bytes (type
);
6359 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
6362 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
6364 /* User-created vectors small enough to fit in EAX. */
6368 /* MMX/3dNow values are returned in MM0,
6369 except when it doesn't exits. */
6371 return (TARGET_MMX
? 0 : 1);
6373 /* SSE values are returned in XMM0, except when it doesn't exist. */
6375 return (TARGET_SSE
? 0 : 1);
6377 /* AVX values are returned in YMM0, except when it doesn't exist. */
6379 return TARGET_AVX
? 0 : 1;
6388 /* OImode shouldn't be used directly. */
6389 gcc_assert (mode
!= OImode
);
6394 static int ATTRIBUTE_UNUSED
6395 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
6397 int needed_intregs
, needed_sseregs
;
6398 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
6401 static int ATTRIBUTE_UNUSED
6402 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
6404 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6406 /* __m128 is returned in xmm0. */
6407 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
6408 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
6411 /* Otherwise, the size must be exactly in [1248]. */
6412 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
6416 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
6418 #ifdef SUBTARGET_RETURN_IN_MEMORY
6419 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
6421 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
6425 if (ix86_function_type_abi (fntype
) == MS_ABI
)
6426 return return_in_memory_ms_64 (type
, mode
);
6428 return return_in_memory_64 (type
, mode
);
6431 return return_in_memory_32 (type
, mode
);
6435 /* Return false iff TYPE is returned in memory. This version is used
6436 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6437 but differs notably in that when MMX is available, 8-byte vectors
6438 are returned in memory, rather than in MMX registers. */
6441 ix86_sol10_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
6444 enum machine_mode mode
= type_natural_mode (type
, NULL
);
6447 return return_in_memory_64 (type
, mode
);
6449 if (mode
== BLKmode
)
6452 size
= int_size_in_bytes (type
);
6454 if (VECTOR_MODE_P (mode
))
6456 /* Return in memory only if MMX registers *are* available. This
6457 seems backwards, but it is consistent with the existing
6464 else if (mode
== TImode
)
6466 else if (mode
== XFmode
)
6472 /* When returning SSE vector types, we have a choice of either
6473 (1) being abi incompatible with a -march switch, or
6474 (2) generating an error.
6475 Given no good solution, I think the safest thing is one warning.
6476 The user won't be able to use -Werror, but....
6478 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6479 called in response to actually generating a caller or callee that
6480 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6481 via aggregate_value_p for general type probing from tree-ssa. */
6484 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
6486 static bool warnedsse
, warnedmmx
;
6488 if (!TARGET_64BIT
&& type
)
6490 /* Look at the return type of the function, not the function type. */
6491 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
6493 if (!TARGET_SSE
&& !warnedsse
)
6496 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
6499 warning (0, "SSE vector return without SSE enabled "
6504 if (!TARGET_MMX
&& !warnedmmx
)
6506 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
6509 warning (0, "MMX vector return without MMX enabled "
6519 /* Create the va_list data type. */
6521 /* Returns the calling convention specific va_list date type.
6522 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6525 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
6527 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
6529 /* For i386 we use plain pointer to argument area. */
6530 if (!TARGET_64BIT
|| abi
== MS_ABI
)
6531 return build_pointer_type (char_type_node
);
6533 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
6534 type_decl
= build_decl (BUILTINS_LOCATION
,
6535 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
6537 f_gpr
= build_decl (BUILTINS_LOCATION
,
6538 FIELD_DECL
, get_identifier ("gp_offset"),
6539 unsigned_type_node
);
6540 f_fpr
= build_decl (BUILTINS_LOCATION
,
6541 FIELD_DECL
, get_identifier ("fp_offset"),
6542 unsigned_type_node
);
6543 f_ovf
= build_decl (BUILTINS_LOCATION
,
6544 FIELD_DECL
, get_identifier ("overflow_arg_area"),
6546 f_sav
= build_decl (BUILTINS_LOCATION
,
6547 FIELD_DECL
, get_identifier ("reg_save_area"),
6550 va_list_gpr_counter_field
= f_gpr
;
6551 va_list_fpr_counter_field
= f_fpr
;
6553 DECL_FIELD_CONTEXT (f_gpr
) = record
;
6554 DECL_FIELD_CONTEXT (f_fpr
) = record
;
6555 DECL_FIELD_CONTEXT (f_ovf
) = record
;
6556 DECL_FIELD_CONTEXT (f_sav
) = record
;
6558 TREE_CHAIN (record
) = type_decl
;
6559 TYPE_NAME (record
) = type_decl
;
6560 TYPE_FIELDS (record
) = f_gpr
;
6561 TREE_CHAIN (f_gpr
) = f_fpr
;
6562 TREE_CHAIN (f_fpr
) = f_ovf
;
6563 TREE_CHAIN (f_ovf
) = f_sav
;
6565 layout_type (record
);
6567 /* The correct type is an array type of one element. */
6568 return build_array_type (record
, build_index_type (size_zero_node
));
6571 /* Setup the builtin va_list data type and for 64-bit the additional
6572 calling convention specific va_list data types. */
6575 ix86_build_builtin_va_list (void)
6577 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
6579 /* Initialize abi specific va_list builtin types. */
6583 if (ix86_abi
== MS_ABI
)
6585 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
6586 if (TREE_CODE (t
) != RECORD_TYPE
)
6587 t
= build_variant_type_copy (t
);
6588 sysv_va_list_type_node
= t
;
6593 if (TREE_CODE (t
) != RECORD_TYPE
)
6594 t
= build_variant_type_copy (t
);
6595 sysv_va_list_type_node
= t
;
6597 if (ix86_abi
!= MS_ABI
)
6599 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
6600 if (TREE_CODE (t
) != RECORD_TYPE
)
6601 t
= build_variant_type_copy (t
);
6602 ms_va_list_type_node
= t
;
6607 if (TREE_CODE (t
) != RECORD_TYPE
)
6608 t
= build_variant_type_copy (t
);
6609 ms_va_list_type_node
= t
;
6616 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6619 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
6628 int regparm
= ix86_regparm
;
6630 if (cum
->call_abi
!= ix86_abi
)
6631 regparm
= (ix86_abi
!= SYSV_ABI
6632 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
6634 /* GPR size of varargs save area. */
6635 if (cfun
->va_list_gpr_size
)
6636 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
6638 ix86_varargs_gpr_size
= 0;
6640 /* FPR size of varargs save area. We don't need it if we don't pass
6641 anything in SSE registers. */
6642 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
6643 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
6645 ix86_varargs_fpr_size
= 0;
6647 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
6650 save_area
= frame_pointer_rtx
;
6651 set
= get_varargs_alias_set ();
6653 for (i
= cum
->regno
;
6655 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
6658 mem
= gen_rtx_MEM (Pmode
,
6659 plus_constant (save_area
, i
* UNITS_PER_WORD
));
6660 MEM_NOTRAP_P (mem
) = 1;
6661 set_mem_alias_set (mem
, set
);
6662 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
6663 x86_64_int_parameter_registers
[i
]));
6666 if (ix86_varargs_fpr_size
)
6668 /* Now emit code to save SSE registers. The AX parameter contains number
6669 of SSE parameter registers used to call this function. We use
6670 sse_prologue_save insn template that produces computed jump across
6671 SSE saves. We need some preparation work to get this working. */
6673 label
= gen_label_rtx ();
6674 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
6676 /* Compute address to jump to :
6677 label - eax*4 + nnamed_sse_arguments*4 Or
6678 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6679 tmp_reg
= gen_reg_rtx (Pmode
);
6680 nsse_reg
= gen_reg_rtx (Pmode
);
6681 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, AX_REG
)));
6682 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
6683 gen_rtx_MULT (Pmode
, nsse_reg
,
6686 /* vmovaps is one byte longer than movaps. */
6688 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
6689 gen_rtx_PLUS (Pmode
, tmp_reg
,
6695 gen_rtx_CONST (DImode
,
6696 gen_rtx_PLUS (DImode
,
6698 GEN_INT (cum
->sse_regno
6699 * (TARGET_AVX
? 5 : 4)))));
6701 emit_move_insn (nsse_reg
, label_ref
);
6702 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
6704 /* Compute address of memory block we save into. We always use pointer
6705 pointing 127 bytes after first byte to store - this is needed to keep
6706 instruction size limited by 4 bytes (5 bytes for AVX) with one
6707 byte displacement. */
6708 tmp_reg
= gen_reg_rtx (Pmode
);
6709 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
6710 plus_constant (save_area
,
6711 ix86_varargs_gpr_size
+ 127)));
6712 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
6713 MEM_NOTRAP_P (mem
) = 1;
6714 set_mem_alias_set (mem
, set
);
6715 set_mem_align (mem
, BITS_PER_WORD
);
6717 /* And finally do the dirty job! */
6718 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
6719 GEN_INT (cum
->sse_regno
), label
));
6724 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
6726 alias_set_type set
= get_varargs_alias_set ();
6729 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
6733 mem
= gen_rtx_MEM (Pmode
,
6734 plus_constant (virtual_incoming_args_rtx
,
6735 i
* UNITS_PER_WORD
));
6736 MEM_NOTRAP_P (mem
) = 1;
6737 set_mem_alias_set (mem
, set
);
6739 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
6740 emit_move_insn (mem
, reg
);
6745 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6746 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
6749 CUMULATIVE_ARGS next_cum
;
6752 /* This argument doesn't appear to be used anymore. Which is good,
6753 because the old code here didn't suppress rtl generation. */
6754 gcc_assert (!no_rtl
);
6759 fntype
= TREE_TYPE (current_function_decl
);
6761 /* For varargs, we do not want to skip the dummy va_dcl argument.
6762 For stdargs, we do want to skip the last named argument. */
6764 if (stdarg_p (fntype
))
6765 function_arg_advance (&next_cum
, mode
, type
, 1);
6767 if (cum
->call_abi
== MS_ABI
)
6768 setup_incoming_varargs_ms_64 (&next_cum
);
6770 setup_incoming_varargs_64 (&next_cum
);
6773 /* Checks if TYPE is of kind va_list char *. */
6776 is_va_list_char_pointer (tree type
)
6780 /* For 32-bit it is always true. */
6783 canonic
= ix86_canonical_va_list_type (type
);
6784 return (canonic
== ms_va_list_type_node
6785 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
6788 /* Implement va_start. */
6791 ix86_va_start (tree valist
, rtx nextarg
)
6793 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
6794 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
6795 tree gpr
, fpr
, ovf
, sav
, t
;
6798 /* Only 64bit target needs something special. */
6799 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
6801 std_expand_builtin_va_start (valist
, nextarg
);
6805 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
6806 f_fpr
= TREE_CHAIN (f_gpr
);
6807 f_ovf
= TREE_CHAIN (f_fpr
);
6808 f_sav
= TREE_CHAIN (f_ovf
);
6810 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
6811 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
6812 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
6813 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
6814 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
6816 /* Count number of gp and fp argument registers used. */
6817 words
= crtl
->args
.info
.words
;
6818 n_gpr
= crtl
->args
.info
.regno
;
6819 n_fpr
= crtl
->args
.info
.sse_regno
;
6821 if (cfun
->va_list_gpr_size
)
6823 type
= TREE_TYPE (gpr
);
6824 t
= build2 (MODIFY_EXPR
, type
,
6825 gpr
, build_int_cst (type
, n_gpr
* 8));
6826 TREE_SIDE_EFFECTS (t
) = 1;
6827 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6830 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
6832 type
= TREE_TYPE (fpr
);
6833 t
= build2 (MODIFY_EXPR
, type
, fpr
,
6834 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
6835 TREE_SIDE_EFFECTS (t
) = 1;
6836 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6839 /* Find the overflow area. */
6840 type
= TREE_TYPE (ovf
);
6841 t
= make_tree (type
, crtl
->args
.internal_arg_pointer
);
6843 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
6844 size_int (words
* UNITS_PER_WORD
));
6845 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
6846 TREE_SIDE_EFFECTS (t
) = 1;
6847 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6849 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
6851 /* Find the register save area.
6852 Prologue of the function save it right above stack frame. */
6853 type
= TREE_TYPE (sav
);
6854 t
= make_tree (type
, frame_pointer_rtx
);
6855 if (!ix86_varargs_gpr_size
)
6856 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
6857 size_int (-8 * X86_64_REGPARM_MAX
));
6858 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
6859 TREE_SIDE_EFFECTS (t
) = 1;
6860 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6864 /* Implement va_arg. */
6867 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
6870 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
6871 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
6872 tree gpr
, fpr
, ovf
, sav
, t
;
6874 tree lab_false
, lab_over
= NULL_TREE
;
6879 enum machine_mode nat_mode
;
6882 /* Only 64bit target needs something special. */
6883 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
6884 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6886 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
6887 f_fpr
= TREE_CHAIN (f_gpr
);
6888 f_ovf
= TREE_CHAIN (f_fpr
);
6889 f_sav
= TREE_CHAIN (f_ovf
);
6891 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
6892 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
6893 valist
= build_va_arg_indirect_ref (valist
);
6894 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
6895 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
6896 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
6898 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6900 type
= build_pointer_type (type
);
6901 size
= int_size_in_bytes (type
);
6902 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6904 nat_mode
= type_natural_mode (type
, NULL
);
6913 /* Unnamed 256bit vector mode parameters are passed on stack. */
6914 if (ix86_cfun_abi () == SYSV_ABI
)
6921 container
= construct_container (nat_mode
, TYPE_MODE (type
),
6922 type
, 0, X86_64_REGPARM_MAX
,
6923 X86_64_SSE_REGPARM_MAX
, intreg
,
6928 /* Pull the value out of the saved registers. */
6930 addr
= create_tmp_var (ptr_type_node
, "addr");
6931 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
6935 int needed_intregs
, needed_sseregs
;
6937 tree int_addr
, sse_addr
;
6939 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
6940 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
6942 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
6944 need_temp
= (!REG_P (container
)
6945 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
6946 || TYPE_ALIGN (type
) > 128));
6948 /* In case we are passing structure, verify that it is consecutive block
6949 on the register save area. If not we need to do moves. */
6950 if (!need_temp
&& !REG_P (container
))
6952 /* Verify that all registers are strictly consecutive */
6953 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
6957 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
6959 rtx slot
= XVECEXP (container
, 0, i
);
6960 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
6961 || INTVAL (XEXP (slot
, 1)) != i
* 16)
6969 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
6971 rtx slot
= XVECEXP (container
, 0, i
);
6972 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
6973 || INTVAL (XEXP (slot
, 1)) != i
* 8)
6985 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
6986 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
6987 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
6988 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
6991 /* First ensure that we fit completely in registers. */
6994 t
= build_int_cst (TREE_TYPE (gpr
),
6995 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
6996 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
6997 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
6998 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
6999 gimplify_and_add (t
, pre_p
);
7003 t
= build_int_cst (TREE_TYPE (fpr
),
7004 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7005 + X86_64_REGPARM_MAX
* 8);
7006 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7007 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7008 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7009 gimplify_and_add (t
, pre_p
);
7012 /* Compute index to start of area used for integer regs. */
7015 /* int_addr = gpr + sav; */
7016 t
= fold_convert (sizetype
, gpr
);
7017 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
7018 gimplify_assign (int_addr
, t
, pre_p
);
7022 /* sse_addr = fpr + sav; */
7023 t
= fold_convert (sizetype
, fpr
);
7024 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
7025 gimplify_assign (sse_addr
, t
, pre_p
);
7030 tree temp
= create_tmp_var (type
, "va_arg_tmp");
7033 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
7034 gimplify_assign (addr
, t
, pre_p
);
7036 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
7038 rtx slot
= XVECEXP (container
, 0, i
);
7039 rtx reg
= XEXP (slot
, 0);
7040 enum machine_mode mode
= GET_MODE (reg
);
7041 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
7042 tree addr_type
= build_pointer_type (piece_type
);
7043 tree daddr_type
= build_pointer_type_for_mode (piece_type
,
7047 tree dest_addr
, dest
;
7049 if (SSE_REGNO_P (REGNO (reg
)))
7051 src_addr
= sse_addr
;
7052 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
7056 src_addr
= int_addr
;
7057 src_offset
= REGNO (reg
) * 8;
7059 src_addr
= fold_convert (addr_type
, src_addr
);
7060 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
7061 size_int (src_offset
));
7062 src
= build_va_arg_indirect_ref (src_addr
);
7064 dest_addr
= fold_convert (daddr_type
, addr
);
7065 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, daddr_type
, dest_addr
,
7066 size_int (INTVAL (XEXP (slot
, 1))));
7067 dest
= build_va_arg_indirect_ref (dest_addr
);
7069 gimplify_assign (dest
, src
, pre_p
);
7075 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
7076 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
7077 gimplify_assign (gpr
, t
, pre_p
);
7082 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
7083 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
7084 gimplify_assign (fpr
, t
, pre_p
);
7087 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
7089 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
7092 /* ... otherwise out of the overflow area. */
7094 /* When we align parameter on stack for caller, if the parameter
7095 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7096 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7097 here with caller. */
7098 arg_boundary
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
);
7099 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
7100 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
7102 /* Care for on-stack alignment if needed. */
7103 if (arg_boundary
<= 64
7104 || integer_zerop (TYPE_SIZE (type
)))
7108 HOST_WIDE_INT align
= arg_boundary
/ 8;
7109 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
7110 size_int (align
- 1));
7111 t
= fold_convert (sizetype
, t
);
7112 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7114 t
= fold_convert (TREE_TYPE (ovf
), t
);
7116 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
7117 gimplify_assign (addr
, t
, pre_p
);
7119 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
7120 size_int (rsize
* UNITS_PER_WORD
));
7121 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
7124 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
7126 ptrtype
= build_pointer_type (type
);
7127 addr
= fold_convert (ptrtype
, addr
);
7130 addr
= build_va_arg_indirect_ref (addr
);
7131 return build_va_arg_indirect_ref (addr
);
7134 /* Return nonzero if OPNUM's MEM should be matched
7135 in movabs* patterns. */
7138 ix86_check_movabs (rtx insn
, int opnum
)
7142 set
= PATTERN (insn
);
7143 if (GET_CODE (set
) == PARALLEL
)
7144 set
= XVECEXP (set
, 0, 0);
7145 gcc_assert (GET_CODE (set
) == SET
);
7146 mem
= XEXP (set
, opnum
);
7147 while (GET_CODE (mem
) == SUBREG
)
7148 mem
= SUBREG_REG (mem
);
7149 gcc_assert (MEM_P (mem
));
7150 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
7153 /* Initialize the table of extra 80387 mathematical constants. */
7156 init_ext_80387_constants (void)
7158 static const char * cst
[5] =
7160 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7161 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7162 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7163 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7164 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7168 for (i
= 0; i
< 5; i
++)
7170 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
7171 /* Ensure each constant is rounded to XFmode precision. */
7172 real_convert (&ext_80387_constants_table
[i
],
7173 XFmode
, &ext_80387_constants_table
[i
]);
7176 ext_80387_constants_init
= 1;
7179 /* Return true if the constant is something that can be loaded with
7180 a special instruction. */
7183 standard_80387_constant_p (rtx x
)
7185 enum machine_mode mode
= GET_MODE (x
);
7189 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
7192 if (x
== CONST0_RTX (mode
))
7194 if (x
== CONST1_RTX (mode
))
7197 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7199 /* For XFmode constants, try to find a special 80387 instruction when
7200 optimizing for size or on those CPUs that benefit from them. */
7202 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
7206 if (! ext_80387_constants_init
)
7207 init_ext_80387_constants ();
7209 for (i
= 0; i
< 5; i
++)
7210 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
7214 /* Load of the constant -0.0 or -1.0 will be split as
7215 fldz;fchs or fld1;fchs sequence. */
7216 if (real_isnegzero (&r
))
7218 if (real_identical (&r
, &dconstm1
))
7224 /* Return the opcode of the special instruction to be used to load
7228 standard_80387_constant_opcode (rtx x
)
7230 switch (standard_80387_constant_p (x
))
7254 /* Return the CONST_DOUBLE representing the 80387 constant that is
7255 loaded by the specified special instruction. The argument IDX
7256 matches the return value from standard_80387_constant_p. */
7259 standard_80387_constant_rtx (int idx
)
7263 if (! ext_80387_constants_init
)
7264 init_ext_80387_constants ();
7280 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
7284 /* Return 1 if mode is a valid mode for sse. */
7286 standard_sse_mode_p (enum machine_mode mode
)
7303 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7304 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7305 modes and AVX is enabled. */
7308 standard_sse_constant_p (rtx x
)
7310 enum machine_mode mode
= GET_MODE (x
);
7312 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
7314 if (vector_all_ones_operand (x
, mode
))
7316 if (standard_sse_mode_p (mode
))
7317 return TARGET_SSE2
? 2 : -2;
7318 else if (VALID_AVX256_REG_MODE (mode
))
7319 return TARGET_AVX
? 3 : -3;
7325 /* Return the opcode of the special instruction to be used to load
7329 standard_sse_constant_opcode (rtx insn
, rtx x
)
7331 switch (standard_sse_constant_p (x
))
7334 switch (get_attr_mode (insn
))
7337 return TARGET_AVX
? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7339 return TARGET_AVX
? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7341 return TARGET_AVX
? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7343 return "vxorps\t%x0, %x0, %x0";
7345 return "vxorpd\t%x0, %x0, %x0";
7347 return "vpxor\t%x0, %x0, %x0";
7353 switch (get_attr_mode (insn
))
7358 return "vpcmpeqd\t%0, %0, %0";
7364 return "pcmpeqd\t%0, %0";
7369 /* Returns 1 if OP contains a symbol reference */
7372 symbolic_reference_mentioned_p (rtx op
)
7377 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
7380 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
7381 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
7387 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
7388 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
7392 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
7399 /* Return 1 if it is appropriate to emit `ret' instructions in the
7400 body of a function. Do this only if the epilogue is simple, needing a
7401 couple of insns. Prior to reloading, we can't tell how many registers
7402 must be saved, so return 0 then. Return 0 if there is no frame
7403 marker to de-allocate. */
7406 ix86_can_use_return_insn_p (void)
7408 struct ix86_frame frame
;
7410 if (! reload_completed
|| frame_pointer_needed
)
7413 /* Don't allow more than 32 pop, since that's all we can do
7414 with one instruction. */
7415 if (crtl
->args
.pops_args
7416 && crtl
->args
.size
>= 32768)
7419 ix86_compute_frame_layout (&frame
);
7420 return frame
.to_allocate
== 0 && (frame
.nregs
+ frame
.nsseregs
) == 0;
7423 /* Value should be nonzero if functions must have frame pointers.
7424 Zero means the frame pointer need not be set up (and parms may
7425 be accessed via the stack pointer) in functions that seem suitable. */
7428 ix86_frame_pointer_required (void)
7430 /* If we accessed previous frames, then the generated code expects
7431 to be able to access the saved ebp value in our frame. */
7432 if (cfun
->machine
->accesses_prev_frame
)
7435 /* Several x86 os'es need a frame pointer for other reasons,
7436 usually pertaining to setjmp. */
7437 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
7440 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7441 the frame pointer by default. Turn it back on now if we've not
7442 got a leaf function. */
7443 if (TARGET_OMIT_LEAF_FRAME_POINTER
7444 && (!current_function_is_leaf
7445 || ix86_current_function_calls_tls_descriptor
))
7454 /* Record that the current function accesses previous call frames. */
7457 ix86_setup_frame_addresses (void)
7459 cfun
->machine
->accesses_prev_frame
= 1;
7462 #ifndef USE_HIDDEN_LINKONCE
7463 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7464 # define USE_HIDDEN_LINKONCE 1
7466 # define USE_HIDDEN_LINKONCE 0
7470 static int pic_labels_used
;
7472 /* Fills in the label name that should be used for a pc thunk for
7473 the given register. */
7476 get_pc_thunk_name (char name
[32], unsigned int regno
)
7478 gcc_assert (!TARGET_64BIT
);
7480 if (USE_HIDDEN_LINKONCE
)
7481 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
7483 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
7487 /* This function generates code for -fpic that loads %ebx with
7488 the return address of the caller and then returns. */
7491 ix86_file_end (void)
7496 for (regno
= 0; regno
< 8; ++regno
)
7500 if (! ((pic_labels_used
>> regno
) & 1))
7503 get_pc_thunk_name (name
, regno
);
7508 switch_to_section (darwin_sections
[text_coal_section
]);
7509 fputs ("\t.weak_definition\t", asm_out_file
);
7510 assemble_name (asm_out_file
, name
);
7511 fputs ("\n\t.private_extern\t", asm_out_file
);
7512 assemble_name (asm_out_file
, name
);
7513 fputs ("\n", asm_out_file
);
7514 ASM_OUTPUT_LABEL (asm_out_file
, name
);
7518 if (USE_HIDDEN_LINKONCE
)
7522 decl
= build_decl (BUILTINS_LOCATION
,
7523 FUNCTION_DECL
, get_identifier (name
),
7525 TREE_PUBLIC (decl
) = 1;
7526 TREE_STATIC (decl
) = 1;
7527 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
7529 (*targetm
.asm_out
.unique_section
) (decl
, 0);
7530 switch_to_section (get_named_section (decl
, NULL
, 0));
7532 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
7533 fputs ("\t.hidden\t", asm_out_file
);
7534 assemble_name (asm_out_file
, name
);
7535 fputc ('\n', asm_out_file
);
7536 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
7540 switch_to_section (text_section
);
7541 ASM_OUTPUT_LABEL (asm_out_file
, name
);
7544 xops
[0] = gen_rtx_REG (Pmode
, regno
);
7545 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
7546 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
7547 output_asm_insn ("ret", xops
);
7550 if (NEED_INDICATE_EXEC_STACK
)
7551 file_end_indicate_exec_stack ();
7554 /* Emit code for the SET_GOT patterns. */
7557 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
7563 if (TARGET_VXWORKS_RTP
&& flag_pic
)
7565 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7566 xops
[2] = gen_rtx_MEM (Pmode
,
7567 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
7568 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
7570 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7571 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7572 an unadorned address. */
7573 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7574 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
7575 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
7579 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
7581 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
7583 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
7586 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
7588 output_asm_insn ("call\t%a2", xops
);
7591 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7592 is what will be referenced by the Mach-O PIC subsystem. */
7594 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
7597 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
7598 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
7601 output_asm_insn ("pop%z0\t%0", xops
);
7606 get_pc_thunk_name (name
, REGNO (dest
));
7607 pic_labels_used
|= 1 << REGNO (dest
);
7609 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
7610 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
7611 output_asm_insn ("call\t%X2", xops
);
7612 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7613 is what will be referenced by the Mach-O PIC subsystem. */
7616 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
7618 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7619 CODE_LABEL_NUMBER (label
));
7626 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
7627 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
7629 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
7634 /* Generate an "push" pattern for input ARG. */
7639 if (ix86_cfa_state
->reg
== stack_pointer_rtx
)
7640 ix86_cfa_state
->offset
+= UNITS_PER_WORD
;
7642 return gen_rtx_SET (VOIDmode
,
7644 gen_rtx_PRE_DEC (Pmode
,
7645 stack_pointer_rtx
)),
7649 /* Return >= 0 if there is an unused call-clobbered register available
7650 for the entire function. */
7653 ix86_select_alt_pic_regnum (void)
7655 if (current_function_is_leaf
&& !crtl
->profile
7656 && !ix86_current_function_calls_tls_descriptor
)
7659 /* Can't use the same register for both PIC and DRAP. */
7661 drap
= REGNO (crtl
->drap_reg
);
7664 for (i
= 2; i
>= 0; --i
)
7665 if (i
!= drap
&& !df_regs_ever_live_p (i
))
7669 return INVALID_REGNUM
;
7672 /* Return 1 if we need to save REGNO. */
7674 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
7676 if (pic_offset_table_rtx
7677 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
7678 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
7680 || crtl
->calls_eh_return
7681 || crtl
->uses_const_pool
))
7683 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
7688 if (crtl
->calls_eh_return
&& maybe_eh_return
)
7693 unsigned test
= EH_RETURN_DATA_REGNO (i
);
7694 if (test
== INVALID_REGNUM
)
7701 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
7704 return (df_regs_ever_live_p (regno
)
7705 && !call_used_regs
[regno
]
7706 && !fixed_regs
[regno
]
7707 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
7710 /* Return number of saved general prupose registers. */
7713 ix86_nsaved_regs (void)
7718 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7719 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
7724 /* Return number of saved SSE registrers. */
7727 ix86_nsaved_sseregs (void)
7732 if (ix86_cfun_abi () != MS_ABI
)
7734 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7735 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
7740 /* Given FROM and TO register numbers, say whether this elimination is
7741 allowed. If stack alignment is needed, we can only replace argument
7742 pointer with hard frame pointer, or replace frame pointer with stack
7743 pointer. Otherwise, frame pointer elimination is automatically
7744 handled and all other eliminations are valid. */
7747 ix86_can_eliminate (int from
, int to
)
7749 if (stack_realign_fp
)
7750 return ((from
== ARG_POINTER_REGNUM
7751 && to
== HARD_FRAME_POINTER_REGNUM
)
7752 || (from
== FRAME_POINTER_REGNUM
7753 && to
== STACK_POINTER_REGNUM
));
7755 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: 1;
7758 /* Return the offset between two registers, one to be eliminated, and the other
7759 its replacement, at the start of a routine. */
7762 ix86_initial_elimination_offset (int from
, int to
)
7764 struct ix86_frame frame
;
7765 ix86_compute_frame_layout (&frame
);
7767 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7768 return frame
.hard_frame_pointer_offset
;
7769 else if (from
== FRAME_POINTER_REGNUM
7770 && to
== HARD_FRAME_POINTER_REGNUM
)
7771 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
7774 gcc_assert (to
== STACK_POINTER_REGNUM
);
7776 if (from
== ARG_POINTER_REGNUM
)
7777 return frame
.stack_pointer_offset
;
7779 gcc_assert (from
== FRAME_POINTER_REGNUM
);
7780 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
7784 /* In a dynamically-aligned function, we can't know the offset from
7785 stack pointer to frame pointer, so we must ensure that setjmp
7786 eliminates fp against the hard fp (%ebp) rather than trying to
7787 index from %esp up to the top of the frame across a gap that is
7788 of unknown (at compile-time) size. */
7790 ix86_builtin_setjmp_frame_value (void)
7792 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
7795 /* Fill structure ix86_frame about frame of currently computed function. */
7798 ix86_compute_frame_layout (struct ix86_frame
*frame
)
7800 HOST_WIDE_INT total_size
;
7801 unsigned int stack_alignment_needed
;
7802 HOST_WIDE_INT offset
;
7803 unsigned int preferred_alignment
;
7804 HOST_WIDE_INT size
= get_frame_size ();
7806 frame
->nregs
= ix86_nsaved_regs ();
7807 frame
->nsseregs
= ix86_nsaved_sseregs ();
7810 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
7811 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
7813 /* MS ABI seem to require stack alignment to be always 16 except for function
7815 if (ix86_cfun_abi () == MS_ABI
&& preferred_alignment
< 16)
7817 preferred_alignment
= 16;
7818 stack_alignment_needed
= 16;
7819 crtl
->preferred_stack_boundary
= 128;
7820 crtl
->stack_alignment_needed
= 128;
7823 gcc_assert (!size
|| stack_alignment_needed
);
7824 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
7825 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
7827 /* During reload iteration the amount of registers saved can change.
7828 Recompute the value as needed. Do not recompute when amount of registers
7829 didn't change as reload does multiple calls to the function and does not
7830 expect the decision to change within single iteration. */
7831 if (!optimize_function_for_size_p (cfun
)
7832 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
7834 int count
= frame
->nregs
;
7836 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
7837 /* The fast prologue uses move instead of push to save registers. This
7838 is significantly longer, but also executes faster as modern hardware
7839 can execute the moves in parallel, but can't do that for push/pop.
7841 Be careful about choosing what prologue to emit: When function takes
7842 many instructions to execute we may use slow version as well as in
7843 case function is known to be outside hot spot (this is known with
7844 feedback only). Weight the size of function by number of registers
7845 to save as it is cheap to use one or two push instructions but very
7846 slow to use many of them. */
7848 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
7849 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
7850 || (flag_branch_probabilities
7851 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
7852 cfun
->machine
->use_fast_prologue_epilogue
= false;
7854 cfun
->machine
->use_fast_prologue_epilogue
7855 = !expensive_function_p (count
);
7857 if (TARGET_PROLOGUE_USING_MOVE
7858 && cfun
->machine
->use_fast_prologue_epilogue
)
7859 frame
->save_regs_using_mov
= true;
7861 frame
->save_regs_using_mov
= false;
7864 /* Skip return address and saved base pointer. */
7865 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
7867 frame
->hard_frame_pointer_offset
= offset
;
7869 /* Set offset to aligned because the realigned frame starts from
7871 if (stack_realign_fp
)
7872 offset
= (offset
+ stack_alignment_needed
-1) & -stack_alignment_needed
;
7874 /* Register save area */
7875 offset
+= frame
->nregs
* UNITS_PER_WORD
;
7877 /* Align SSE reg save area. */
7878 if (frame
->nsseregs
)
7879 frame
->padding0
= ((offset
+ 16 - 1) & -16) - offset
;
7881 frame
->padding0
= 0;
7883 /* SSE register save area. */
7884 offset
+= frame
->padding0
+ frame
->nsseregs
* 16;
7887 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
7888 offset
+= frame
->va_arg_size
;
7890 /* Align start of frame for local function. */
7891 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
7892 & -stack_alignment_needed
) - offset
;
7894 offset
+= frame
->padding1
;
7896 /* Frame pointer points here. */
7897 frame
->frame_pointer_offset
= offset
;
7901 /* Add outgoing arguments area. Can be skipped if we eliminated
7902 all the function calls as dead code.
7903 Skipping is however impossible when function calls alloca. Alloca
7904 expander assumes that last crtl->outgoing_args_size
7905 of stack frame are unused. */
7906 if (ACCUMULATE_OUTGOING_ARGS
7907 && (!current_function_is_leaf
|| cfun
->calls_alloca
7908 || ix86_current_function_calls_tls_descriptor
))
7910 offset
+= crtl
->outgoing_args_size
;
7911 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
7914 frame
->outgoing_arguments_size
= 0;
7916 /* Align stack boundary. Only needed if we're calling another function
7918 if (!current_function_is_leaf
|| cfun
->calls_alloca
7919 || ix86_current_function_calls_tls_descriptor
)
7920 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
7921 & -preferred_alignment
) - offset
;
7923 frame
->padding2
= 0;
7925 offset
+= frame
->padding2
;
7927 /* We've reached end of stack frame. */
7928 frame
->stack_pointer_offset
= offset
;
7930 /* Size prologue needs to allocate. */
7931 frame
->to_allocate
=
7932 (size
+ frame
->padding1
+ frame
->padding2
7933 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
7935 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
7936 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
7937 frame
->save_regs_using_mov
= false;
7939 if (!TARGET_64BIT_MS_ABI
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
7940 && current_function_is_leaf
7941 && !ix86_current_function_calls_tls_descriptor
)
7943 frame
->red_zone_size
= frame
->to_allocate
;
7944 if (frame
->save_regs_using_mov
)
7945 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
7946 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
7947 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
7950 frame
->red_zone_size
= 0;
7951 frame
->to_allocate
-= frame
->red_zone_size
;
7952 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
7954 fprintf (stderr
, "\n");
7955 fprintf (stderr
, "size: %ld\n", (long)size
);
7956 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
7957 fprintf (stderr
, "nsseregs: %ld\n", (long)frame
->nsseregs
);
7958 fprintf (stderr
, "padding0: %ld\n", (long)frame
->padding0
);
7959 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
7960 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
7961 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
7962 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
7963 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
7964 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
7965 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
7966 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
7967 (long)frame
->hard_frame_pointer_offset
);
7968 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
7969 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
7970 fprintf (stderr
, "cfun->calls_alloca: %ld\n", (long)cfun
->calls_alloca
);
7971 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
7975 /* Emit code to save registers in the prologue. */
7978 ix86_emit_save_regs (void)
7983 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
7984 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
7986 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
7987 RTX_FRAME_RELATED_P (insn
) = 1;
7991 /* Emit code to save registers using MOV insns. First register
7992 is restored from POINTER + OFFSET. */
7994 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
7999 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8000 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8002 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
8004 gen_rtx_REG (Pmode
, regno
));
8005 RTX_FRAME_RELATED_P (insn
) = 1;
8006 offset
+= UNITS_PER_WORD
;
8010 /* Emit code to save registers using MOV insns. First register
8011 is restored from POINTER + OFFSET. */
8013 ix86_emit_save_sse_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
8019 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8020 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8022 mem
= adjust_address (gen_rtx_MEM (TImode
, pointer
), TImode
, offset
);
8023 set_mem_align (mem
, 128);
8024 insn
= emit_move_insn (mem
, gen_rtx_REG (TImode
, regno
));
8025 RTX_FRAME_RELATED_P (insn
) = 1;
8030 static GTY(()) rtx queued_cfa_restores
;
8032 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8033 manipulation insn. Don't add it if the previously
8034 saved value will be left untouched within stack red-zone till return,
8035 as unwinders can find the same value in the register and
8039 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT red_offset
)
8042 && !TARGET_64BIT_MS_ABI
8043 && red_offset
+ RED_ZONE_SIZE
>= 0
8044 && crtl
->args
.pops_args
< 65536)
8049 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
8050 RTX_FRAME_RELATED_P (insn
) = 1;
8054 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
8057 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8060 ix86_add_queued_cfa_restore_notes (rtx insn
)
8063 if (!queued_cfa_restores
)
8065 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
8067 XEXP (last
, 1) = REG_NOTES (insn
);
8068 REG_NOTES (insn
) = queued_cfa_restores
;
8069 queued_cfa_restores
= NULL_RTX
;
8070 RTX_FRAME_RELATED_P (insn
) = 1;
8073 /* Expand prologue or epilogue stack adjustment.
8074 The pattern exist to put a dependency on all ebp-based memory accesses.
8075 STYLE should be negative if instructions should be marked as frame related,
8076 zero if %r11 register is live and cannot be freely used and positive
8080 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
8081 int style
, bool set_cfa
)
8086 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
8087 else if (x86_64_immediate_operand (offset
, DImode
))
8088 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
8092 /* r11 is used by indirect sibcall return as well, set before the
8093 epilogue and used after the epilogue. ATM indirect sibcall
8094 shouldn't be used together with huge frame sizes in one
8095 function because of the frame_size check in sibcall.c. */
8097 r11
= gen_rtx_REG (DImode
, R11_REG
);
8098 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
8100 RTX_FRAME_RELATED_P (insn
) = 1;
8101 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
8106 ix86_add_queued_cfa_restore_notes (insn
);
8112 gcc_assert (ix86_cfa_state
->reg
== src
);
8113 ix86_cfa_state
->offset
+= INTVAL (offset
);
8114 ix86_cfa_state
->reg
= dest
;
8116 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
8117 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
8118 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
8119 RTX_FRAME_RELATED_P (insn
) = 1;
8122 RTX_FRAME_RELATED_P (insn
) = 1;
8125 /* Find an available register to be used as dynamic realign argument
8126 pointer regsiter. Such a register will be written in prologue and
8127 used in begin of body, so it must not be
8128 1. parameter passing register.
8130 We reuse static-chain register if it is available. Otherwise, we
8131 use DI for i386 and R13 for x86-64. We chose R13 since it has
8134 Return: the regno of chosen register. */
8137 find_drap_reg (void)
8139 tree decl
= cfun
->decl
;
8143 /* Use R13 for nested function or function need static chain.
8144 Since function with tail call may use any caller-saved
8145 registers in epilogue, DRAP must not use caller-saved
8146 register in such case. */
8147 if ((decl_function_context (decl
)
8148 && !DECL_NO_STATIC_CHAIN (decl
))
8149 || crtl
->tail_call_emit
)
8156 /* Use DI for nested function or function need static chain.
8157 Since function with tail call may use any caller-saved
8158 registers in epilogue, DRAP must not use caller-saved
8159 register in such case. */
8160 if ((decl_function_context (decl
)
8161 && !DECL_NO_STATIC_CHAIN (decl
))
8162 || crtl
->tail_call_emit
)
8165 /* Reuse static chain register if it isn't used for parameter
8167 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2
8168 && !lookup_attribute ("fastcall",
8169 TYPE_ATTRIBUTES (TREE_TYPE (decl
))))
8176 /* Update incoming stack boundary and estimated stack alignment. */
8179 ix86_update_stack_boundary (void)
8181 /* Prefer the one specified at command line. */
8182 ix86_incoming_stack_boundary
8183 = (ix86_user_incoming_stack_boundary
8184 ? ix86_user_incoming_stack_boundary
8185 : ix86_default_incoming_stack_boundary
);
8187 /* Incoming stack alignment can be changed on individual functions
8188 via force_align_arg_pointer attribute. We use the smallest
8189 incoming stack boundary. */
8190 if (ix86_incoming_stack_boundary
> MIN_STACK_BOUNDARY
8191 && lookup_attribute (ix86_force_align_arg_pointer_string
,
8192 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
8193 ix86_incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
8195 /* The incoming stack frame has to be aligned at least at
8196 parm_stack_boundary. */
8197 if (ix86_incoming_stack_boundary
< crtl
->parm_stack_boundary
)
8198 ix86_incoming_stack_boundary
= crtl
->parm_stack_boundary
;
8200 /* Stack at entrance of main is aligned by runtime. We use the
8201 smallest incoming stack boundary. */
8202 if (ix86_incoming_stack_boundary
> MAIN_STACK_BOUNDARY
8203 && DECL_NAME (current_function_decl
)
8204 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
8205 && DECL_FILE_SCOPE_P (current_function_decl
))
8206 ix86_incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
8208 /* x86_64 vararg needs 16byte stack alignment for register save
8212 && crtl
->stack_alignment_estimated
< 128)
8213 crtl
->stack_alignment_estimated
= 128;
8216 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8217 needed or an rtx for DRAP otherwise. */
8220 ix86_get_drap_rtx (void)
8222 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
8223 crtl
->need_drap
= true;
8225 if (stack_realign_drap
)
8227 /* Assign DRAP to vDRAP and returns vDRAP */
8228 unsigned int regno
= find_drap_reg ();
8233 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
8234 crtl
->drap_reg
= arg_ptr
;
8237 drap_vreg
= copy_to_reg (arg_ptr
);
8241 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
8242 RTX_FRAME_RELATED_P (insn
) = 1;
8249 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8252 ix86_internal_arg_pointer (void)
8254 return virtual_incoming_args_rtx
;
8257 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8258 to be generated in correct form. */
8260 ix86_finalize_stack_realign_flags (void)
8262 /* Check if stack realign is really needed after reload, and
8263 stores result in cfun */
8264 unsigned int incoming_stack_boundary
8265 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
8266 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
8267 unsigned int stack_realign
= (incoming_stack_boundary
8268 < (current_function_is_leaf
8269 ? crtl
->max_used_stack_slot_alignment
8270 : crtl
->stack_alignment_needed
));
8272 if (crtl
->stack_realign_finalized
)
8274 /* After stack_realign_needed is finalized, we can't no longer
8276 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
8280 crtl
->stack_realign_needed
= stack_realign
;
8281 crtl
->stack_realign_finalized
= true;
8285 /* Expand the prologue into a bunch of separate insns. */
8288 ix86_expand_prologue (void)
8292 struct ix86_frame frame
;
8293 HOST_WIDE_INT allocate
;
8295 ix86_finalize_stack_realign_flags ();
8297 /* DRAP should not coexist with stack_realign_fp */
8298 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
8300 /* Initialize CFA state for before the prologue. */
8301 ix86_cfa_state
->reg
= stack_pointer_rtx
;
8302 ix86_cfa_state
->offset
= INCOMING_FRAME_SP_OFFSET
;
8304 ix86_compute_frame_layout (&frame
);
8306 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8307 of DRAP is needed and stack realignment is really needed after reload */
8308 if (crtl
->drap_reg
&& crtl
->stack_realign_needed
)
8311 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8312 int param_ptr_offset
= (call_used_regs
[REGNO (crtl
->drap_reg
)]
8313 ? 0 : UNITS_PER_WORD
);
8315 gcc_assert (stack_realign_drap
);
8317 /* Grab the argument pointer. */
8318 x
= plus_constant (stack_pointer_rtx
,
8319 (UNITS_PER_WORD
+ param_ptr_offset
));
8322 /* Only need to push parameter pointer reg if it is caller
8324 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8326 /* Push arg pointer reg */
8327 insn
= emit_insn (gen_push (y
));
8328 RTX_FRAME_RELATED_P (insn
) = 1;
8331 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
8332 RTX_FRAME_RELATED_P (insn
) = 1;
8333 ix86_cfa_state
->reg
= crtl
->drap_reg
;
8335 /* Align the stack. */
8336 insn
= emit_insn ((*ix86_gen_andsp
) (stack_pointer_rtx
,
8338 GEN_INT (-align_bytes
)));
8339 RTX_FRAME_RELATED_P (insn
) = 1;
8341 /* Replicate the return address on the stack so that return
8342 address can be reached via (argp - 1) slot. This is needed
8343 to implement macro RETURN_ADDR_RTX and intrinsic function
8344 expand_builtin_return_addr etc. */
8346 x
= gen_frame_mem (Pmode
,
8347 plus_constant (x
, -UNITS_PER_WORD
));
8348 insn
= emit_insn (gen_push (x
));
8349 RTX_FRAME_RELATED_P (insn
) = 1;
8352 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8353 slower on all targets. Also sdb doesn't like it. */
8355 if (frame_pointer_needed
)
8357 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
8358 RTX_FRAME_RELATED_P (insn
) = 1;
8360 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
8361 RTX_FRAME_RELATED_P (insn
) = 1;
8363 if (ix86_cfa_state
->reg
== stack_pointer_rtx
)
8364 ix86_cfa_state
->reg
= hard_frame_pointer_rtx
;
8367 if (stack_realign_fp
)
8369 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8370 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
8372 /* Align the stack. */
8373 insn
= emit_insn ((*ix86_gen_andsp
) (stack_pointer_rtx
,
8375 GEN_INT (-align_bytes
)));
8376 RTX_FRAME_RELATED_P (insn
) = 1;
8379 allocate
= frame
.to_allocate
+ frame
.nsseregs
* 16 + frame
.padding0
;
8381 if (!frame
.save_regs_using_mov
)
8382 ix86_emit_save_regs ();
8384 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
8386 /* When using red zone we may start register saving before allocating
8387 the stack frame saving one cycle of the prologue. However I will
8388 avoid doing this if I am going to have to probe the stack since
8389 at least on x86_64 the stack probe can turn into a call that clobbers
8390 a red zone location */
8391 if (!TARGET_64BIT_MS_ABI
&& TARGET_RED_ZONE
&& frame
.save_regs_using_mov
8392 && (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
))
8393 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8394 && !crtl
->stack_realign_needed
)
8395 ? hard_frame_pointer_rtx
8396 : stack_pointer_rtx
,
8397 -frame
.nregs
* UNITS_PER_WORD
);
8401 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
8402 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8403 GEN_INT (-allocate
), -1,
8404 ix86_cfa_state
->reg
== stack_pointer_rtx
);
8407 /* Only valid for Win32. */
8408 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8412 gcc_assert (!TARGET_64BIT
|| cfun
->machine
->call_abi
== MS_ABI
);
8414 if (cfun
->machine
->call_abi
== MS_ABI
)
8417 eax_live
= ix86_eax_live_at_start_p ();
8421 emit_insn (gen_push (eax
));
8422 allocate
-= UNITS_PER_WORD
;
8425 emit_move_insn (eax
, GEN_INT (allocate
));
8428 insn
= gen_allocate_stack_worker_64 (eax
, eax
);
8430 insn
= gen_allocate_stack_worker_32 (eax
, eax
);
8431 insn
= emit_insn (insn
);
8433 if (ix86_cfa_state
->reg
== stack_pointer_rtx
)
8435 ix86_cfa_state
->offset
+= allocate
;
8436 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
8437 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
8438 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
8439 RTX_FRAME_RELATED_P (insn
) = 1;
8444 if (frame_pointer_needed
)
8445 t
= plus_constant (hard_frame_pointer_rtx
,
8448 - frame
.nregs
* UNITS_PER_WORD
);
8450 t
= plus_constant (stack_pointer_rtx
, allocate
);
8451 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
8455 if (frame
.save_regs_using_mov
8456 && !(!TARGET_64BIT_MS_ABI
&& TARGET_RED_ZONE
8457 && (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)))
8459 if (!frame_pointer_needed
8460 || !frame
.to_allocate
8461 || crtl
->stack_realign_needed
)
8462 ix86_emit_save_regs_using_mov (stack_pointer_rtx
,
8464 + frame
.nsseregs
* 16 + frame
.padding0
);
8466 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
8467 -frame
.nregs
* UNITS_PER_WORD
);
8469 if (!frame_pointer_needed
8470 || !frame
.to_allocate
8471 || crtl
->stack_realign_needed
)
8472 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx
,
8475 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx
,
8476 - frame
.nregs
* UNITS_PER_WORD
8477 - frame
.nsseregs
* 16
8480 pic_reg_used
= false;
8481 if (pic_offset_table_rtx
8482 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8485 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
8487 if (alt_pic_reg_used
!= INVALID_REGNUM
)
8488 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
8490 pic_reg_used
= true;
8497 if (ix86_cmodel
== CM_LARGE_PIC
)
8499 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
8500 rtx label
= gen_label_rtx ();
8502 LABEL_PRESERVE_P (label
) = 1;
8503 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
8504 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
8505 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
8506 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
8507 pic_offset_table_rtx
, tmp_reg
));
8510 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
8513 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
8516 /* In the pic_reg_used case, make sure that the got load isn't deleted
8517 when mcount needs it. Blockage to avoid call movement across mcount
8518 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8520 if (crtl
->profile
&& pic_reg_used
)
8521 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
8523 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
8525 /* vDRAP is setup but after reload it turns out stack realign
8526 isn't necessary, here we will emit prologue to setup DRAP
8527 without stack realign adjustment */
8528 int drap_bp_offset
= UNITS_PER_WORD
* 2;
8529 rtx x
= plus_constant (hard_frame_pointer_rtx
, drap_bp_offset
);
8530 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, x
));
8533 /* Prevent instructions from being scheduled into register save push
8534 sequence when access to the redzone area is done through frame pointer.
8535 The offset betweeh the frame pointer and the stack pointer is calculated
8536 relative to the value of the stack pointer at the end of the function
8537 prologue, and moving instructions that access redzone area via frame
8538 pointer inside push sequence violates this assumption. */
8539 if (frame_pointer_needed
&& frame
.red_zone_size
)
8540 emit_insn (gen_memory_blockage ());
8542 /* Emit cld instruction if stringops are used in the function. */
8543 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
8544 emit_insn (gen_cld ());
8547 /* Emit code to restore REG using a POP insn. */
8550 ix86_emit_restore_reg_using_pop (rtx reg
, HOST_WIDE_INT red_offset
)
8552 rtx insn
= emit_insn (ix86_gen_pop1 (reg
));
8554 if (ix86_cfa_state
->reg
== crtl
->drap_reg
8555 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
8557 /* Previously we'd represented the CFA as an expression
8558 like *(%ebp - 8). We've just popped that value from
8559 the stack, which means we need to reset the CFA to
8560 the drap register. This will remain until we restore
8561 the stack pointer. */
8562 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8563 RTX_FRAME_RELATED_P (insn
) = 1;
8567 if (ix86_cfa_state
->reg
== stack_pointer_rtx
)
8569 ix86_cfa_state
->offset
-= UNITS_PER_WORD
;
8570 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
8571 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
8572 RTX_FRAME_RELATED_P (insn
) = 1;
8575 /* When the frame pointer is the CFA, and we pop it, we are
8576 swapping back to the stack pointer as the CFA. This happens
8577 for stack frames that don't allocate other data, so we assume
8578 the stack pointer is now pointing at the return address, i.e.
8579 the function entry state, which makes the offset be 1 word. */
8580 else if (ix86_cfa_state
->reg
== hard_frame_pointer_rtx
8581 && reg
== hard_frame_pointer_rtx
)
8583 ix86_cfa_state
->reg
= stack_pointer_rtx
;
8584 ix86_cfa_state
->offset
= UNITS_PER_WORD
;
8586 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8587 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8588 GEN_INT (UNITS_PER_WORD
)));
8589 RTX_FRAME_RELATED_P (insn
) = 1;
8592 ix86_add_cfa_restore_note (insn
, reg
, red_offset
);
8595 /* Emit code to restore saved registers using POP insns. */
8598 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset
)
8602 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8603 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
8605 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
),
8607 red_offset
+= UNITS_PER_WORD
;
8611 /* Emit code and notes for the LEAVE instruction. */
8614 ix86_emit_leave (HOST_WIDE_INT red_offset
)
8616 rtx insn
= emit_insn (ix86_gen_leave ());
8618 ix86_add_queued_cfa_restore_notes (insn
);
8620 if (ix86_cfa_state
->reg
== hard_frame_pointer_rtx
)
8622 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
8623 copy_rtx (XVECEXP (PATTERN (insn
), 0, 0)));
8624 RTX_FRAME_RELATED_P (insn
) = 1;
8625 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
, red_offset
);
8629 /* Emit code to restore saved registers using MOV insns. First register
8630 is restored from POINTER + OFFSET. */
8632 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
8633 HOST_WIDE_INT red_offset
,
8634 int maybe_eh_return
)
8637 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
8640 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8641 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
8643 rtx reg
= gen_rtx_REG (Pmode
, regno
);
8645 /* Ensure that adjust_address won't be forced to produce pointer
8646 out of range allowed by x86-64 instruction set. */
8647 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
8651 r11
= gen_rtx_REG (DImode
, R11_REG
);
8652 emit_move_insn (r11
, GEN_INT (offset
));
8653 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
8654 base_address
= gen_rtx_MEM (Pmode
, r11
);
8657 insn
= emit_move_insn (reg
,
8658 adjust_address (base_address
, Pmode
, offset
));
8659 offset
+= UNITS_PER_WORD
;
8661 if (ix86_cfa_state
->reg
== crtl
->drap_reg
8662 && regno
== REGNO (crtl
->drap_reg
))
8664 /* Previously we'd represented the CFA as an expression
8665 like *(%ebp - 8). We've just popped that value from
8666 the stack, which means we need to reset the CFA to
8667 the drap register. This will remain until we restore
8668 the stack pointer. */
8669 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8670 RTX_FRAME_RELATED_P (insn
) = 1;
8673 ix86_add_cfa_restore_note (NULL_RTX
, reg
, red_offset
);
8675 red_offset
+= UNITS_PER_WORD
;
8679 /* Emit code to restore saved registers using MOV insns. First register
8680 is restored from POINTER + OFFSET. */
8682 ix86_emit_restore_sse_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
8683 HOST_WIDE_INT red_offset
,
8684 int maybe_eh_return
)
8687 rtx base_address
= gen_rtx_MEM (TImode
, pointer
);
8690 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8691 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
8693 rtx reg
= gen_rtx_REG (TImode
, regno
);
8695 /* Ensure that adjust_address won't be forced to produce pointer
8696 out of range allowed by x86-64 instruction set. */
8697 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
8701 r11
= gen_rtx_REG (DImode
, R11_REG
);
8702 emit_move_insn (r11
, GEN_INT (offset
));
8703 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
8704 base_address
= gen_rtx_MEM (TImode
, r11
);
8707 mem
= adjust_address (base_address
, TImode
, offset
);
8708 set_mem_align (mem
, 128);
8709 insn
= emit_move_insn (reg
, mem
);
8712 ix86_add_cfa_restore_note (NULL_RTX
, reg
, red_offset
);
8718 /* Restore function stack, frame, and registers. */
8721 ix86_expand_epilogue (int style
)
8724 struct ix86_frame frame
;
8725 HOST_WIDE_INT offset
, red_offset
;
8726 struct machine_cfa_state cfa_state_save
= *ix86_cfa_state
;
8729 ix86_finalize_stack_realign_flags ();
8731 /* When stack is realigned, SP must be valid. */
8732 sp_valid
= (!frame_pointer_needed
8733 || current_function_sp_is_unchanging
8734 || stack_realign_fp
);
8736 ix86_compute_frame_layout (&frame
);
8738 /* See the comment about red zone and frame
8739 pointer usage in ix86_expand_prologue. */
8740 if (frame_pointer_needed
&& frame
.red_zone_size
)
8741 emit_insn (gen_memory_blockage ());
8743 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
8744 gcc_assert (!using_drap
|| ix86_cfa_state
->reg
== crtl
->drap_reg
);
8746 /* Calculate start of saved registers relative to ebp. Special care
8747 must be taken for the normal return case of a function using
8748 eh_return: the eax and edx registers are marked as saved, but not
8749 restored along this path. */
8750 offset
= frame
.nregs
;
8751 if (crtl
->calls_eh_return
&& style
!= 2)
8753 offset
*= -UNITS_PER_WORD
;
8754 offset
-= frame
.nsseregs
* 16 + frame
.padding0
;
8756 /* Calculate start of saved registers relative to esp on entry of the
8757 function. When realigning stack, this needs to be the most negative
8758 value possible at runtime. */
8759 red_offset
= offset
;
8761 red_offset
-= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
8763 else if (stack_realign_fp
)
8764 red_offset
-= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
8766 if (frame_pointer_needed
)
8767 red_offset
-= UNITS_PER_WORD
;
8769 /* If we're only restoring one register and sp is not valid then
8770 using a move instruction to restore the register since it's
8771 less work than reloading sp and popping the register.
8773 The default code result in stack adjustment using add/lea instruction,
8774 while this code results in LEAVE instruction (or discrete equivalent),
8775 so it is profitable in some other cases as well. Especially when there
8776 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8777 and there is exactly one register to pop. This heuristic may need some
8778 tuning in future. */
8779 if ((!sp_valid
&& (frame
.nregs
+ frame
.nsseregs
) <= 1)
8780 || (TARGET_EPILOGUE_USING_MOVE
8781 && cfun
->machine
->use_fast_prologue_epilogue
8782 && ((frame
.nregs
+ frame
.nsseregs
) > 1 || frame
.to_allocate
))
8783 || (frame_pointer_needed
&& !(frame
.nregs
+ frame
.nsseregs
)
8784 && frame
.to_allocate
)
8785 || (frame_pointer_needed
&& TARGET_USE_LEAVE
8786 && cfun
->machine
->use_fast_prologue_epilogue
8787 && (frame
.nregs
+ frame
.nsseregs
) == 1)
8788 || crtl
->calls_eh_return
)
8790 /* Restore registers. We can use ebp or esp to address the memory
8791 locations. If both are available, default to ebp, since offsets
8792 are known to be small. Only exception is esp pointing directly
8793 to the end of block of saved registers, where we may simplify
8796 If we are realigning stack with bp and sp, regs restore can't
8797 be addressed by bp. sp must be used instead. */
8799 if (!frame_pointer_needed
8800 || (sp_valid
&& !frame
.to_allocate
)
8801 || stack_realign_fp
)
8803 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx
,
8804 frame
.to_allocate
, red_offset
,
8806 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
8808 + frame
.nsseregs
* 16
8811 + frame
.nsseregs
* 16
8812 + frame
.padding0
, style
== 2);
8816 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx
,
8819 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
8821 + frame
.nsseregs
* 16
8824 + frame
.nsseregs
* 16
8825 + frame
.padding0
, style
== 2);
8828 red_offset
-= offset
;
8830 /* eh_return epilogues need %ecx added to the stack pointer. */
8833 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
8835 /* Stack align doesn't work with eh_return. */
8836 gcc_assert (!crtl
->stack_realign_needed
);
8838 if (frame_pointer_needed
)
8840 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
8841 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
8842 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
8844 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
8845 tmp
= emit_move_insn (hard_frame_pointer_rtx
, tmp
);
8847 /* Note that we use SA as a temporary CFA, as the return
8848 address is at the proper place relative to it. We
8849 pretend this happens at the FP restore insn because
8850 prior to this insn the FP would be stored at the wrong
8851 offset relative to SA, and after this insn we have no
8852 other reasonable register to use for the CFA. We don't
8853 bother resetting the CFA to the SP for the duration of
8855 add_reg_note (tmp
, REG_CFA_DEF_CFA
,
8856 plus_constant (sa
, UNITS_PER_WORD
));
8857 ix86_add_queued_cfa_restore_notes (tmp
);
8858 add_reg_note (tmp
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
8859 RTX_FRAME_RELATED_P (tmp
) = 1;
8860 ix86_cfa_state
->reg
= sa
;
8861 ix86_cfa_state
->offset
= UNITS_PER_WORD
;
8863 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
8864 const0_rtx
, style
, false);
8868 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
8869 tmp
= plus_constant (tmp
, (frame
.to_allocate
8870 + frame
.nregs
* UNITS_PER_WORD
8871 + frame
.nsseregs
* 16
8873 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
8874 ix86_add_queued_cfa_restore_notes (tmp
);
8876 gcc_assert (ix86_cfa_state
->reg
== stack_pointer_rtx
);
8877 if (ix86_cfa_state
->offset
!= UNITS_PER_WORD
)
8879 ix86_cfa_state
->offset
= UNITS_PER_WORD
;
8880 add_reg_note (tmp
, REG_CFA_DEF_CFA
,
8881 plus_constant (stack_pointer_rtx
,
8883 RTX_FRAME_RELATED_P (tmp
) = 1;
8887 else if (!frame_pointer_needed
)
8888 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8889 GEN_INT (frame
.to_allocate
8890 + frame
.nregs
* UNITS_PER_WORD
8891 + frame
.nsseregs
* 16
8893 style
, !using_drap
);
8894 /* If not an i386, mov & pop is faster than "leave". */
8895 else if (TARGET_USE_LEAVE
|| optimize_function_for_size_p (cfun
)
8896 || !cfun
->machine
->use_fast_prologue_epilogue
)
8897 ix86_emit_leave (red_offset
);
8900 pro_epilogue_adjust_stack (stack_pointer_rtx
,
8901 hard_frame_pointer_rtx
,
8902 const0_rtx
, style
, !using_drap
);
8904 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
, red_offset
);
8909 /* First step is to deallocate the stack frame so that we can
8912 If we realign stack with frame pointer, then stack pointer
8913 won't be able to recover via lea $offset(%bp), %sp, because
8914 there is a padding area between bp and sp for realign.
8915 "add $to_allocate, %sp" must be used instead. */
8918 gcc_assert (frame_pointer_needed
);
8919 gcc_assert (!stack_realign_fp
);
8920 pro_epilogue_adjust_stack (stack_pointer_rtx
,
8921 hard_frame_pointer_rtx
,
8922 GEN_INT (offset
), style
, false);
8923 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx
,
8924 frame
.to_allocate
, red_offset
,
8926 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8927 GEN_INT (frame
.nsseregs
* 16),
8930 else if (frame
.to_allocate
|| frame
.nsseregs
)
8932 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx
,
8933 frame
.to_allocate
, red_offset
,
8935 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8936 GEN_INT (frame
.to_allocate
8937 + frame
.nsseregs
* 16
8938 + frame
.padding0
), style
,
8939 !using_drap
&& !frame_pointer_needed
);
8942 ix86_emit_restore_regs_using_pop (red_offset
+ frame
.nsseregs
* 16
8944 red_offset
-= offset
;
8946 if (frame_pointer_needed
)
8948 /* Leave results in shorter dependency chains on CPUs that are
8949 able to grok it fast. */
8950 if (TARGET_USE_LEAVE
)
8951 ix86_emit_leave (red_offset
);
8954 /* For stack realigned really happens, recover stack
8955 pointer to hard frame pointer is a must, if not using
8957 if (stack_realign_fp
)
8958 pro_epilogue_adjust_stack (stack_pointer_rtx
,
8959 hard_frame_pointer_rtx
,
8960 const0_rtx
, style
, !using_drap
);
8961 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
,
8969 int param_ptr_offset
= (call_used_regs
[REGNO (crtl
->drap_reg
)]
8970 ? 0 : UNITS_PER_WORD
);
8973 gcc_assert (stack_realign_drap
);
8975 insn
= emit_insn ((*ix86_gen_add3
) (stack_pointer_rtx
,
8977 GEN_INT (-(UNITS_PER_WORD
8978 + param_ptr_offset
))));
8980 ix86_cfa_state
->reg
= stack_pointer_rtx
;
8981 ix86_cfa_state
->offset
= UNITS_PER_WORD
+ param_ptr_offset
;
8983 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8984 gen_rtx_PLUS (Pmode
, ix86_cfa_state
->reg
,
8985 GEN_INT (ix86_cfa_state
->offset
)));
8986 RTX_FRAME_RELATED_P (insn
) = 1;
8988 if (param_ptr_offset
)
8989 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
, -UNITS_PER_WORD
);
8992 /* Sibcall epilogues don't want a return instruction. */
8995 *ix86_cfa_state
= cfa_state_save
;
8999 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
9001 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
9003 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9004 address, do explicit add, and jump indirectly to the caller. */
9006 if (crtl
->args
.pops_args
>= 65536)
9008 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
9011 /* There is no "pascal" calling convention in any 64bit ABI. */
9012 gcc_assert (!TARGET_64BIT
);
9014 insn
= emit_insn (gen_popsi1 (ecx
));
9015 ix86_cfa_state
->offset
-= UNITS_PER_WORD
;
9017 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
9018 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
9019 add_reg_note (insn
, REG_CFA_REGISTER
,
9020 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
9021 RTX_FRAME_RELATED_P (insn
) = 1;
9023 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9025 emit_jump_insn (gen_return_indirect_internal (ecx
));
9028 emit_jump_insn (gen_return_pop_internal (popc
));
9031 emit_jump_insn (gen_return_internal ());
9033 /* Restore the state back to the state from the prologue,
9034 so that it's correct for the next epilogue. */
9035 *ix86_cfa_state
= cfa_state_save
;
9038 /* Reset from the function's potential modifications. */
9041 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
9042 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
9044 if (pic_offset_table_rtx
)
9045 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9047 /* Mach-O doesn't support labels at the end of objects, so if
9048 it looks like we might want one, insert a NOP. */
9050 rtx insn
= get_last_insn ();
9053 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9054 insn
= PREV_INSN (insn
);
9058 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
9059 fputs ("\tnop\n", file
);
9065 /* Extract the parts of an RTL expression that is a valid memory address
9066 for an instruction. Return 0 if the structure of the address is
9067 grossly off. Return -1 if the address contains ASHIFT, so it is not
9068 strictly valid, but still used for computing length of lea instruction. */
9071 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
9073 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
9074 rtx base_reg
, index_reg
;
9075 HOST_WIDE_INT scale
= 1;
9076 rtx scale_rtx
= NULL_RTX
;
9078 enum ix86_address_seg seg
= SEG_DEFAULT
;
9080 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
9082 else if (GET_CODE (addr
) == PLUS
)
9092 addends
[n
++] = XEXP (op
, 1);
9095 while (GET_CODE (op
) == PLUS
);
9100 for (i
= n
; i
>= 0; --i
)
9103 switch (GET_CODE (op
))
9108 index
= XEXP (op
, 0);
9109 scale_rtx
= XEXP (op
, 1);
9113 if (XINT (op
, 1) == UNSPEC_TP
9114 && TARGET_TLS_DIRECT_SEG_REFS
9115 && seg
== SEG_DEFAULT
)
9116 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
9145 else if (GET_CODE (addr
) == MULT
)
9147 index
= XEXP (addr
, 0); /* index*scale */
9148 scale_rtx
= XEXP (addr
, 1);
9150 else if (GET_CODE (addr
) == ASHIFT
)
9154 /* We're called for lea too, which implements ashift on occasion. */
9155 index
= XEXP (addr
, 0);
9156 tmp
= XEXP (addr
, 1);
9157 if (!CONST_INT_P (tmp
))
9159 scale
= INTVAL (tmp
);
9160 if ((unsigned HOST_WIDE_INT
) scale
> 3)
9166 disp
= addr
; /* displacement */
9168 /* Extract the integral value of scale. */
9171 if (!CONST_INT_P (scale_rtx
))
9173 scale
= INTVAL (scale_rtx
);
9176 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
9177 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
9179 /* Avoid useless 0 displacement. */
9180 if (disp
== const0_rtx
&& (base
|| index
))
9183 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9184 if (base_reg
&& index_reg
&& scale
== 1
9185 && (index_reg
== arg_pointer_rtx
9186 || index_reg
== frame_pointer_rtx
9187 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
9190 tmp
= base
, base
= index
, index
= tmp
;
9191 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
9194 /* Special case: %ebp cannot be encoded as a base without a displacement.
9198 && (base_reg
== hard_frame_pointer_rtx
9199 || base_reg
== frame_pointer_rtx
9200 || base_reg
== arg_pointer_rtx
9201 || (REG_P (base_reg
)
9202 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
9203 || REGNO (base_reg
) == R13_REG
))))
9206 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9207 Avoid this by transforming to [%esi+0].
9208 Reload calls address legitimization without cfun defined, so we need
9209 to test cfun for being non-NULL. */
9210 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
9211 && base_reg
&& !index_reg
&& !disp
9213 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
9216 /* Special case: encode reg+reg instead of reg*2. */
9217 if (!base
&& index
&& scale
== 2)
9218 base
= index
, base_reg
= index_reg
, scale
= 1;
9220 /* Special case: scaling cannot be encoded without base or displacement. */
9221 if (!base
&& !disp
&& index
&& scale
!= 1)
9233 /* Return cost of the memory address x.
9234 For i386, it is better to use a complex address than let gcc copy
9235 the address into a reg and make a new pseudo. But not if the address
9236 requires to two regs - that would mean more pseudos with longer
9239 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
9241 struct ix86_address parts
;
9243 int ok
= ix86_decompose_address (x
, &parts
);
9247 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
9248 parts
.base
= SUBREG_REG (parts
.base
);
9249 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
9250 parts
.index
= SUBREG_REG (parts
.index
);
9252 /* Attempt to minimize number of registers in the address. */
9254 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
9256 && (!REG_P (parts
.index
)
9257 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
9261 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
9263 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
9264 && parts
.base
!= parts
.index
)
9267 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9268 since it's predecode logic can't detect the length of instructions
9269 and it degenerates to vector decoded. Increase cost of such
9270 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9271 to split such addresses or even refuse such addresses at all.
9273 Following addressing modes are affected:
9278 The first and last case may be avoidable by explicitly coding the zero in
9279 memory address, but I don't have AMD-K6 machine handy to check this
9283 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9284 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9285 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
9291 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9292 this is used for to form addresses to local data when -fPIC is in
9296 darwin_local_data_pic (rtx disp
)
9298 return (GET_CODE (disp
) == UNSPEC
9299 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
9302 /* Determine if a given RTX is a valid constant. We already know this
9303 satisfies CONSTANT_P. */
9306 legitimate_constant_p (rtx x
)
9308 switch (GET_CODE (x
))
9313 if (GET_CODE (x
) == PLUS
)
9315 if (!CONST_INT_P (XEXP (x
, 1)))
9320 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
9323 /* Only some unspecs are valid as "constants". */
9324 if (GET_CODE (x
) == UNSPEC
)
9325 switch (XINT (x
, 1))
9330 return TARGET_64BIT
;
9333 x
= XVECEXP (x
, 0, 0);
9334 return (GET_CODE (x
) == SYMBOL_REF
9335 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
9337 x
= XVECEXP (x
, 0, 0);
9338 return (GET_CODE (x
) == SYMBOL_REF
9339 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
9344 /* We must have drilled down to a symbol. */
9345 if (GET_CODE (x
) == LABEL_REF
)
9347 if (GET_CODE (x
) != SYMBOL_REF
)
9352 /* TLS symbols are never valid. */
9353 if (SYMBOL_REF_TLS_MODEL (x
))
9356 /* DLLIMPORT symbols are never valid. */
9357 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9358 && SYMBOL_REF_DLLIMPORT_P (x
))
9363 if (GET_MODE (x
) == TImode
9364 && x
!= CONST0_RTX (TImode
)
9370 if (!standard_sse_constant_p (x
))
9377 /* Otherwise we handle everything else in the move patterns. */
9381 /* Determine if it's legal to put X into the constant pool. This
9382 is not possible for the address of thread-local symbols, which
9383 is checked above. */
9386 ix86_cannot_force_const_mem (rtx x
)
9388 /* We can always put integral constants and vectors in memory. */
9389 switch (GET_CODE (x
))
9399 return !legitimate_constant_p (x
);
9403 /* Nonzero if the constant value X is a legitimate general operand
9404 when generating PIC code. It is given that flag_pic is on and
9405 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9408 legitimate_pic_operand_p (rtx x
)
9412 switch (GET_CODE (x
))
9415 inner
= XEXP (x
, 0);
9416 if (GET_CODE (inner
) == PLUS
9417 && CONST_INT_P (XEXP (inner
, 1)))
9418 inner
= XEXP (inner
, 0);
9420 /* Only some unspecs are valid as "constants". */
9421 if (GET_CODE (inner
) == UNSPEC
)
9422 switch (XINT (inner
, 1))
9427 return TARGET_64BIT
;
9429 x
= XVECEXP (inner
, 0, 0);
9430 return (GET_CODE (x
) == SYMBOL_REF
9431 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
9432 case UNSPEC_MACHOPIC_OFFSET
:
9433 return legitimate_pic_address_disp_p (x
);
9441 return legitimate_pic_address_disp_p (x
);
9448 /* Determine if a given CONST RTX is a valid memory displacement
9452 legitimate_pic_address_disp_p (rtx disp
)
9456 /* In 64bit mode we can allow direct addresses of symbols and labels
9457 when they are not dynamic symbols. */
9460 rtx op0
= disp
, op1
;
9462 switch (GET_CODE (disp
))
9468 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
9470 op0
= XEXP (XEXP (disp
, 0), 0);
9471 op1
= XEXP (XEXP (disp
, 0), 1);
9472 if (!CONST_INT_P (op1
)
9473 || INTVAL (op1
) >= 16*1024*1024
9474 || INTVAL (op1
) < -16*1024*1024)
9476 if (GET_CODE (op0
) == LABEL_REF
)
9478 if (GET_CODE (op0
) != SYMBOL_REF
)
9483 /* TLS references should always be enclosed in UNSPEC. */
9484 if (SYMBOL_REF_TLS_MODEL (op0
))
9486 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
9487 && ix86_cmodel
!= CM_LARGE_PIC
)
9495 if (GET_CODE (disp
) != CONST
)
9497 disp
= XEXP (disp
, 0);
9501 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9502 of GOT tables. We should not need these anyway. */
9503 if (GET_CODE (disp
) != UNSPEC
9504 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
9505 && XINT (disp
, 1) != UNSPEC_GOTOFF
9506 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
9509 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
9510 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
9516 if (GET_CODE (disp
) == PLUS
)
9518 if (!CONST_INT_P (XEXP (disp
, 1)))
9520 disp
= XEXP (disp
, 0);
9524 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
9527 if (GET_CODE (disp
) != UNSPEC
)
9530 switch (XINT (disp
, 1))
9535 /* We need to check for both symbols and labels because VxWorks loads
9536 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9538 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
9539 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
9541 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9542 While ABI specify also 32bit relocation but we don't produce it in
9543 small PIC model at all. */
9544 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
9545 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
9547 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
9549 case UNSPEC_GOTTPOFF
:
9550 case UNSPEC_GOTNTPOFF
:
9551 case UNSPEC_INDNTPOFF
:
9554 disp
= XVECEXP (disp
, 0, 0);
9555 return (GET_CODE (disp
) == SYMBOL_REF
9556 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
9558 disp
= XVECEXP (disp
, 0, 0);
9559 return (GET_CODE (disp
) == SYMBOL_REF
9560 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
9562 disp
= XVECEXP (disp
, 0, 0);
9563 return (GET_CODE (disp
) == SYMBOL_REF
9564 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
9570 /* Recognizes RTL expressions that are valid memory addresses for an
9571 instruction. The MODE argument is the machine mode for the MEM
9572 expression that wants to use this address.
9574 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9575 convert common non-canonical forms to canonical form so that they will
9579 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
9580 rtx addr
, bool strict
)
9582 struct ix86_address parts
;
9583 rtx base
, index
, disp
;
9584 HOST_WIDE_INT scale
;
9585 const char *reason
= NULL
;
9586 rtx reason_rtx
= NULL_RTX
;
9588 if (ix86_decompose_address (addr
, &parts
) <= 0)
9590 reason
= "decomposition failed";
9595 index
= parts
.index
;
9597 scale
= parts
.scale
;
9599 /* Validate base register.
9601 Don't allow SUBREG's that span more than a word here. It can lead to spill
9602 failures when the base is one word out of a two word structure, which is
9603 represented internally as a DImode int. */
9612 else if (GET_CODE (base
) == SUBREG
9613 && REG_P (SUBREG_REG (base
))
9614 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
9616 reg
= SUBREG_REG (base
);
9619 reason
= "base is not a register";
9623 if (GET_MODE (base
) != Pmode
)
9625 reason
= "base is not in Pmode";
9629 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
9630 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
9632 reason
= "base is not valid";
9637 /* Validate index register.
9639 Don't allow SUBREG's that span more than a word here -- same as above. */
9648 else if (GET_CODE (index
) == SUBREG
9649 && REG_P (SUBREG_REG (index
))
9650 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
9652 reg
= SUBREG_REG (index
);
9655 reason
= "index is not a register";
9659 if (GET_MODE (index
) != Pmode
)
9661 reason
= "index is not in Pmode";
9665 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
9666 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
9668 reason
= "index is not valid";
9673 /* Validate scale factor. */
9676 reason_rtx
= GEN_INT (scale
);
9679 reason
= "scale without index";
9683 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
9685 reason
= "scale is not a valid multiplier";
9690 /* Validate displacement. */
9695 if (GET_CODE (disp
) == CONST
9696 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
9697 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
9698 switch (XINT (XEXP (disp
, 0), 1))
9700 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9701 used. While ABI specify also 32bit relocations, we don't produce
9702 them at all and use IP relative instead. */
9705 gcc_assert (flag_pic
);
9707 goto is_legitimate_pic
;
9708 reason
= "64bit address unspec";
9711 case UNSPEC_GOTPCREL
:
9712 gcc_assert (flag_pic
);
9713 goto is_legitimate_pic
;
9715 case UNSPEC_GOTTPOFF
:
9716 case UNSPEC_GOTNTPOFF
:
9717 case UNSPEC_INDNTPOFF
:
9723 reason
= "invalid address unspec";
9727 else if (SYMBOLIC_CONST (disp
)
9731 && MACHOPIC_INDIRECT
9732 && !machopic_operand_p (disp
)
9738 if (TARGET_64BIT
&& (index
|| base
))
9740 /* foo@dtpoff(%rX) is ok. */
9741 if (GET_CODE (disp
) != CONST
9742 || GET_CODE (XEXP (disp
, 0)) != PLUS
9743 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
9744 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
9745 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
9746 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
9748 reason
= "non-constant pic memory reference";
9752 else if (! legitimate_pic_address_disp_p (disp
))
9754 reason
= "displacement is an invalid pic construct";
9758 /* This code used to verify that a symbolic pic displacement
9759 includes the pic_offset_table_rtx register.
9761 While this is good idea, unfortunately these constructs may
9762 be created by "adds using lea" optimization for incorrect
9771 This code is nonsensical, but results in addressing
9772 GOT table with pic_offset_table_rtx base. We can't
9773 just refuse it easily, since it gets matched by
9774 "addsi3" pattern, that later gets split to lea in the
9775 case output register differs from input. While this
9776 can be handled by separate addsi pattern for this case
9777 that never results in lea, this seems to be easier and
9778 correct fix for crash to disable this test. */
9780 else if (GET_CODE (disp
) != LABEL_REF
9781 && !CONST_INT_P (disp
)
9782 && (GET_CODE (disp
) != CONST
9783 || !legitimate_constant_p (disp
))
9784 && (GET_CODE (disp
) != SYMBOL_REF
9785 || !legitimate_constant_p (disp
)))
9787 reason
= "displacement is not constant";
9790 else if (TARGET_64BIT
9791 && !x86_64_immediate_operand (disp
, VOIDmode
))
9793 reason
= "displacement is out of range";
9798 /* Everything looks valid. */
9805 /* Determine if a given RTX is a valid constant address. */
9808 constant_address_p (rtx x
)
9810 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
9813 /* Return a unique alias set for the GOT. */
9815 static alias_set_type
9816 ix86_GOT_alias_set (void)
9818 static alias_set_type set
= -1;
9820 set
= new_alias_set ();
9824 /* Return a legitimate reference for ORIG (an address) using the
9825 register REG. If REG is 0, a new pseudo is generated.
9827 There are two types of references that must be handled:
9829 1. Global data references must load the address from the GOT, via
9830 the PIC reg. An insn is emitted to do this load, and the reg is
9833 2. Static data references, constant pool addresses, and code labels
9834 compute the address as an offset from the GOT, whose base is in
9835 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9836 differentiate them from global data objects. The returned
9837 address is the PIC reg + an unspec constant.
9839 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9840 reg also appears in the address. */
9843 legitimize_pic_address (rtx orig
, rtx reg
)
9850 if (TARGET_MACHO
&& !TARGET_64BIT
)
9853 reg
= gen_reg_rtx (Pmode
);
9854 /* Use the generic Mach-O PIC machinery. */
9855 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
9859 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
9861 else if (TARGET_64BIT
9862 && ix86_cmodel
!= CM_SMALL_PIC
9863 && gotoff_operand (addr
, Pmode
))
9866 /* This symbol may be referenced via a displacement from the PIC
9867 base address (@GOTOFF). */
9869 if (reload_in_progress
)
9870 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
9871 if (GET_CODE (addr
) == CONST
)
9872 addr
= XEXP (addr
, 0);
9873 if (GET_CODE (addr
) == PLUS
)
9875 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
9877 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
9880 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
9881 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
9883 tmpreg
= gen_reg_rtx (Pmode
);
9886 emit_move_insn (tmpreg
, new_rtx
);
9890 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
9891 tmpreg
, 1, OPTAB_DIRECT
);
9894 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
9896 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
9898 /* This symbol may be referenced via a displacement from the PIC
9899 base address (@GOTOFF). */
9901 if (reload_in_progress
)
9902 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
9903 if (GET_CODE (addr
) == CONST
)
9904 addr
= XEXP (addr
, 0);
9905 if (GET_CODE (addr
) == PLUS
)
9907 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
9909 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
9912 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
9913 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
9914 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
9918 emit_move_insn (reg
, new_rtx
);
9922 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
9923 /* We can't use @GOTOFF for text labels on VxWorks;
9924 see gotoff_operand. */
9925 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
9927 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
9929 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
9930 return legitimize_dllimport_symbol (addr
, true);
9931 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
9932 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
9933 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
9935 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
9936 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
9940 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
9942 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
9943 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
9944 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
9945 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
9948 reg
= gen_reg_rtx (Pmode
);
9949 /* Use directly gen_movsi, otherwise the address is loaded
9950 into register for CSE. We don't want to CSE this addresses,
9951 instead we CSE addresses from the GOT table, so skip this. */
9952 emit_insn (gen_movsi (reg
, new_rtx
));
9957 /* This symbol must be referenced via a load from the
9958 Global Offset Table (@GOT). */
9960 if (reload_in_progress
)
9961 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
9962 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
9963 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
9965 new_rtx
= force_reg (Pmode
, new_rtx
);
9966 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
9967 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
9968 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
9971 reg
= gen_reg_rtx (Pmode
);
9972 emit_move_insn (reg
, new_rtx
);
9978 if (CONST_INT_P (addr
)
9979 && !x86_64_immediate_operand (addr
, VOIDmode
))
9983 emit_move_insn (reg
, addr
);
9987 new_rtx
= force_reg (Pmode
, addr
);
9989 else if (GET_CODE (addr
) == CONST
)
9991 addr
= XEXP (addr
, 0);
9993 /* We must match stuff we generate before. Assume the only
9994 unspecs that can get here are ours. Not that we could do
9995 anything with them anyway.... */
9996 if (GET_CODE (addr
) == UNSPEC
9997 || (GET_CODE (addr
) == PLUS
9998 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
10000 gcc_assert (GET_CODE (addr
) == PLUS
);
10002 if (GET_CODE (addr
) == PLUS
)
10004 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
10006 /* Check first to see if this is a constant offset from a @GOTOFF
10007 symbol reference. */
10008 if (gotoff_operand (op0
, Pmode
)
10009 && CONST_INT_P (op1
))
10013 if (reload_in_progress
)
10014 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
10015 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
10017 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
10018 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10019 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10023 emit_move_insn (reg
, new_rtx
);
10029 if (INTVAL (op1
) < -16*1024*1024
10030 || INTVAL (op1
) >= 16*1024*1024)
10032 if (!x86_64_immediate_operand (op1
, Pmode
))
10033 op1
= force_reg (Pmode
, op1
);
10034 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
10040 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
10041 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
10042 base
== reg
? NULL_RTX
: reg
);
10044 if (CONST_INT_P (new_rtx
))
10045 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
10048 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
10050 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
10051 new_rtx
= XEXP (new_rtx
, 1);
10053 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
10061 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10064 get_thread_pointer (int to_reg
)
10068 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
10072 reg
= gen_reg_rtx (Pmode
);
10073 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
10074 insn
= emit_insn (insn
);
10079 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10080 false if we expect this to be used for a memory address and true if
10081 we expect to load the address into a register. */
10084 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
10086 rtx dest
, base
, off
, pic
, tp
;
10091 case TLS_MODEL_GLOBAL_DYNAMIC
:
10092 dest
= gen_reg_rtx (Pmode
);
10093 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
10095 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
10097 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
10100 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
10101 insns
= get_insns ();
10104 RTL_CONST_CALL_P (insns
) = 1;
10105 emit_libcall_block (insns
, dest
, rax
, x
);
10107 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
10108 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
10110 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
10112 if (TARGET_GNU2_TLS
)
10114 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
10116 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
10120 case TLS_MODEL_LOCAL_DYNAMIC
:
10121 base
= gen_reg_rtx (Pmode
);
10122 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
10124 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
10126 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, note
;
10129 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
10130 insns
= get_insns ();
10133 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
10134 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
10135 RTL_CONST_CALL_P (insns
) = 1;
10136 emit_libcall_block (insns
, base
, rax
, note
);
10138 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
10139 emit_insn (gen_tls_local_dynamic_base_64 (base
));
10141 emit_insn (gen_tls_local_dynamic_base_32 (base
));
10143 if (TARGET_GNU2_TLS
)
10145 rtx x
= ix86_tls_module_base ();
10147 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
10148 gen_rtx_MINUS (Pmode
, x
, tp
));
10151 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
10152 off
= gen_rtx_CONST (Pmode
, off
);
10154 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
10156 if (TARGET_GNU2_TLS
)
10158 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
10160 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
10165 case TLS_MODEL_INITIAL_EXEC
:
10169 type
= UNSPEC_GOTNTPOFF
;
10173 if (reload_in_progress
)
10174 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
10175 pic
= pic_offset_table_rtx
;
10176 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
10178 else if (!TARGET_ANY_GNU_TLS
)
10180 pic
= gen_reg_rtx (Pmode
);
10181 emit_insn (gen_set_got (pic
));
10182 type
= UNSPEC_GOTTPOFF
;
10187 type
= UNSPEC_INDNTPOFF
;
10190 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
10191 off
= gen_rtx_CONST (Pmode
, off
);
10193 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
10194 off
= gen_const_mem (Pmode
, off
);
10195 set_mem_alias_set (off
, ix86_GOT_alias_set ());
10197 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10199 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10200 off
= force_reg (Pmode
, off
);
10201 return gen_rtx_PLUS (Pmode
, base
, off
);
10205 base
= get_thread_pointer (true);
10206 dest
= gen_reg_rtx (Pmode
);
10207 emit_insn (gen_subsi3 (dest
, base
, off
));
10211 case TLS_MODEL_LOCAL_EXEC
:
10212 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
10213 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10214 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
10215 off
= gen_rtx_CONST (Pmode
, off
);
10217 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10219 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10220 return gen_rtx_PLUS (Pmode
, base
, off
);
10224 base
= get_thread_pointer (true);
10225 dest
= gen_reg_rtx (Pmode
);
10226 emit_insn (gen_subsi3 (dest
, base
, off
));
10231 gcc_unreachable ();
10237 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10240 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
10241 htab_t dllimport_map
;
10244 get_dllimport_decl (tree decl
)
10246 struct tree_map
*h
, in
;
10249 const char *prefix
;
10250 size_t namelen
, prefixlen
;
10255 if (!dllimport_map
)
10256 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
10258 in
.hash
= htab_hash_pointer (decl
);
10259 in
.base
.from
= decl
;
10260 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
10261 h
= (struct tree_map
*) *loc
;
10265 *loc
= h
= GGC_NEW (struct tree_map
);
10267 h
->base
.from
= decl
;
10268 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
10269 VAR_DECL
, NULL
, ptr_type_node
);
10270 DECL_ARTIFICIAL (to
) = 1;
10271 DECL_IGNORED_P (to
) = 1;
10272 DECL_EXTERNAL (to
) = 1;
10273 TREE_READONLY (to
) = 1;
10275 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
10276 name
= targetm
.strip_name_encoding (name
);
10277 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
10278 ? "*__imp_" : "*__imp__";
10279 namelen
= strlen (name
);
10280 prefixlen
= strlen (prefix
);
10281 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
10282 memcpy (imp_name
, prefix
, prefixlen
);
10283 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
10285 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
10286 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10287 SET_SYMBOL_REF_DECL (rtl
, to
);
10288 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
10290 rtl
= gen_const_mem (Pmode
, rtl
);
10291 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
10293 SET_DECL_RTL (to
, rtl
);
10294 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
10299 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10300 true if we require the result be a register. */
10303 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
10308 gcc_assert (SYMBOL_REF_DECL (symbol
));
10309 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
10311 x
= DECL_RTL (imp_decl
);
10313 x
= force_reg (Pmode
, x
);
10317 /* Try machine-dependent ways of modifying an illegitimate address
10318 to be legitimate. If we find one, return the new, valid address.
10319 This macro is used in only one place: `memory_address' in explow.c.
10321 OLDX is the address as it was before break_out_memory_refs was called.
10322 In some cases it is useful to look at this to decide what needs to be done.
10324 It is always safe for this macro to do nothing. It exists to recognize
10325 opportunities to optimize the output.
10327 For the 80386, we handle X+REG by loading X into a register R and
10328 using R+REG. R will go in a general reg and indexing will be used.
10329 However, if REG is a broken-out memory address or multiplication,
10330 nothing needs to be done because REG can certainly go in a general reg.
10332 When -fpic is used, special handling is needed for symbolic references.
10333 See comments by legitimize_pic_address in i386.c for details. */
10336 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
10337 enum machine_mode mode
)
10342 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
10344 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
10345 if (GET_CODE (x
) == CONST
10346 && GET_CODE (XEXP (x
, 0)) == PLUS
10347 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
10348 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
10350 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
10351 (enum tls_model
) log
, false);
10352 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
10355 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
10357 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
10358 return legitimize_dllimport_symbol (x
, true);
10359 if (GET_CODE (x
) == CONST
10360 && GET_CODE (XEXP (x
, 0)) == PLUS
10361 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
10362 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
10364 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
10365 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
10369 if (flag_pic
&& SYMBOLIC_CONST (x
))
10370 return legitimize_pic_address (x
, 0);
10372 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10373 if (GET_CODE (x
) == ASHIFT
10374 && CONST_INT_P (XEXP (x
, 1))
10375 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
10378 log
= INTVAL (XEXP (x
, 1));
10379 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
10380 GEN_INT (1 << log
));
10383 if (GET_CODE (x
) == PLUS
)
10385 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10387 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
10388 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10389 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
10392 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
10393 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
10394 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
10395 GEN_INT (1 << log
));
10398 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
10399 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10400 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
10403 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
10404 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
10405 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
10406 GEN_INT (1 << log
));
10409 /* Put multiply first if it isn't already. */
10410 if (GET_CODE (XEXP (x
, 1)) == MULT
)
10412 rtx tmp
= XEXP (x
, 0);
10413 XEXP (x
, 0) = XEXP (x
, 1);
10418 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10419 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10420 created by virtual register instantiation, register elimination, and
10421 similar optimizations. */
10422 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
10425 x
= gen_rtx_PLUS (Pmode
,
10426 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
10427 XEXP (XEXP (x
, 1), 0)),
10428 XEXP (XEXP (x
, 1), 1));
10432 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10433 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10434 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
10435 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10436 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
10437 && CONSTANT_P (XEXP (x
, 1)))
10440 rtx other
= NULL_RTX
;
10442 if (CONST_INT_P (XEXP (x
, 1)))
10444 constant
= XEXP (x
, 1);
10445 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
10447 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
10449 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
10450 other
= XEXP (x
, 1);
10458 x
= gen_rtx_PLUS (Pmode
,
10459 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
10460 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
10461 plus_constant (other
, INTVAL (constant
)));
10465 if (changed
&& ix86_legitimate_address_p (mode
, x
, FALSE
))
10468 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10471 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
10474 if (GET_CODE (XEXP (x
, 1)) == MULT
)
10477 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
10481 && REG_P (XEXP (x
, 1))
10482 && REG_P (XEXP (x
, 0)))
10485 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
10488 x
= legitimize_pic_address (x
, 0);
10491 if (changed
&& ix86_legitimate_address_p (mode
, x
, FALSE
))
10494 if (REG_P (XEXP (x
, 0)))
10496 rtx temp
= gen_reg_rtx (Pmode
);
10497 rtx val
= force_operand (XEXP (x
, 1), temp
);
10499 emit_move_insn (temp
, val
);
10501 XEXP (x
, 1) = temp
;
10505 else if (REG_P (XEXP (x
, 1)))
10507 rtx temp
= gen_reg_rtx (Pmode
);
10508 rtx val
= force_operand (XEXP (x
, 0), temp
);
10510 emit_move_insn (temp
, val
);
10512 XEXP (x
, 0) = temp
;
10520 /* Print an integer constant expression in assembler syntax. Addition
10521 and subtraction are the only arithmetic that may appear in these
10522 expressions. FILE is the stdio stream to write to, X is the rtx, and
10523 CODE is the operand print code from the output string. */
10526 output_pic_addr_const (FILE *file
, rtx x
, int code
)
10530 switch (GET_CODE (x
))
10533 gcc_assert (flag_pic
);
10538 if (! TARGET_MACHO
|| TARGET_64BIT
)
10539 output_addr_const (file
, x
);
10542 const char *name
= XSTR (x
, 0);
10544 /* Mark the decl as referenced so that cgraph will
10545 output the function. */
10546 if (SYMBOL_REF_DECL (x
))
10547 mark_decl_referenced (SYMBOL_REF_DECL (x
));
10550 if (MACHOPIC_INDIRECT
10551 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
10552 name
= machopic_indirection_name (x
, /*stub_p=*/true);
10554 assemble_name (file
, name
);
10556 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
10557 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
10558 fputs ("@PLT", file
);
10565 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
10566 assemble_name (asm_out_file
, buf
);
10570 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
10574 /* This used to output parentheses around the expression,
10575 but that does not work on the 386 (either ATT or BSD assembler). */
10576 output_pic_addr_const (file
, XEXP (x
, 0), code
);
10580 if (GET_MODE (x
) == VOIDmode
)
10582 /* We can use %d if the number is <32 bits and positive. */
10583 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
10584 fprintf (file
, "0x%lx%08lx",
10585 (unsigned long) CONST_DOUBLE_HIGH (x
),
10586 (unsigned long) CONST_DOUBLE_LOW (x
));
10588 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
10591 /* We can't handle floating point constants;
10592 PRINT_OPERAND must handle them. */
10593 output_operand_lossage ("floating constant misused");
10597 /* Some assemblers need integer constants to appear first. */
10598 if (CONST_INT_P (XEXP (x
, 0)))
10600 output_pic_addr_const (file
, XEXP (x
, 0), code
);
10602 output_pic_addr_const (file
, XEXP (x
, 1), code
);
10606 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
10607 output_pic_addr_const (file
, XEXP (x
, 1), code
);
10609 output_pic_addr_const (file
, XEXP (x
, 0), code
);
10615 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
10616 output_pic_addr_const (file
, XEXP (x
, 0), code
);
10618 output_pic_addr_const (file
, XEXP (x
, 1), code
);
10620 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
10624 gcc_assert (XVECLEN (x
, 0) == 1);
10625 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
10626 switch (XINT (x
, 1))
10629 fputs ("@GOT", file
);
10631 case UNSPEC_GOTOFF
:
10632 fputs ("@GOTOFF", file
);
10634 case UNSPEC_PLTOFF
:
10635 fputs ("@PLTOFF", file
);
10637 case UNSPEC_GOTPCREL
:
10638 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
10639 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
10641 case UNSPEC_GOTTPOFF
:
10642 /* FIXME: This might be @TPOFF in Sun ld too. */
10643 fputs ("@GOTTPOFF", file
);
10646 fputs ("@TPOFF", file
);
10648 case UNSPEC_NTPOFF
:
10650 fputs ("@TPOFF", file
);
10652 fputs ("@NTPOFF", file
);
10654 case UNSPEC_DTPOFF
:
10655 fputs ("@DTPOFF", file
);
10657 case UNSPEC_GOTNTPOFF
:
10659 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
10660 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file
);
10662 fputs ("@GOTNTPOFF", file
);
10664 case UNSPEC_INDNTPOFF
:
10665 fputs ("@INDNTPOFF", file
);
10668 case UNSPEC_MACHOPIC_OFFSET
:
10670 machopic_output_function_base_name (file
);
10674 output_operand_lossage ("invalid UNSPEC as operand");
10680 output_operand_lossage ("invalid expression as operand");
10684 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10685 We need to emit DTP-relative relocations. */
10687 static void ATTRIBUTE_UNUSED
10688 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
10690 fputs (ASM_LONG
, file
);
10691 output_addr_const (file
, x
);
10692 fputs ("@DTPOFF", file
);
10698 fputs (", 0", file
);
10701 gcc_unreachable ();
10705 /* Return true if X is a representation of the PIC register. This copes
10706 with calls from ix86_find_base_term, where the register might have
10707 been replaced by a cselib value. */
10710 ix86_pic_register_p (rtx x
)
10712 if (GET_CODE (x
) == VALUE
)
10713 return (pic_offset_table_rtx
10714 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
10716 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
10719 /* In the name of slightly smaller debug output, and to cater to
10720 general assembler lossage, recognize PIC+GOTOFF and turn it back
10721 into a direct symbol reference.
10723 On Darwin, this is necessary to avoid a crash, because Darwin
10724 has a different PIC label for each routine but the DWARF debugging
10725 information is not associated with any particular routine, so it's
10726 necessary to remove references to the PIC label from RTL stored by
10727 the DWARF output code. */
10730 ix86_delegitimize_address (rtx orig_x
)
10733 /* reg_addend is NULL or a multiple of some register. */
10734 rtx reg_addend
= NULL_RTX
;
10735 /* const_addend is NULL or a const_int. */
10736 rtx const_addend
= NULL_RTX
;
10737 /* This is the result, or NULL. */
10738 rtx result
= NULL_RTX
;
10745 if (GET_CODE (x
) != CONST
10746 || GET_CODE (XEXP (x
, 0)) != UNSPEC
10747 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
10748 || !MEM_P (orig_x
))
10750 return XVECEXP (XEXP (x
, 0), 0, 0);
10753 if (GET_CODE (x
) != PLUS
10754 || GET_CODE (XEXP (x
, 1)) != CONST
)
10757 if (ix86_pic_register_p (XEXP (x
, 0)))
10758 /* %ebx + GOT/GOTOFF */
10760 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
10762 /* %ebx + %reg * scale + GOT/GOTOFF */
10763 reg_addend
= XEXP (x
, 0);
10764 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
10765 reg_addend
= XEXP (reg_addend
, 1);
10766 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
10767 reg_addend
= XEXP (reg_addend
, 0);
10770 if (!REG_P (reg_addend
)
10771 && GET_CODE (reg_addend
) != MULT
10772 && GET_CODE (reg_addend
) != ASHIFT
)
10778 x
= XEXP (XEXP (x
, 1), 0);
10779 if (GET_CODE (x
) == PLUS
10780 && CONST_INT_P (XEXP (x
, 1)))
10782 const_addend
= XEXP (x
, 1);
10786 if (GET_CODE (x
) == UNSPEC
10787 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
10788 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
10789 result
= XVECEXP (x
, 0, 0);
10791 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
10792 && !MEM_P (orig_x
))
10793 result
= XVECEXP (x
, 0, 0);
10799 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
10801 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
10805 /* If X is a machine specific address (i.e. a symbol or label being
10806 referenced as a displacement from the GOT implemented using an
10807 UNSPEC), then return the base term. Otherwise return X. */
10810 ix86_find_base_term (rtx x
)
10816 if (GET_CODE (x
) != CONST
)
10818 term
= XEXP (x
, 0);
10819 if (GET_CODE (term
) == PLUS
10820 && (CONST_INT_P (XEXP (term
, 1))
10821 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
10822 term
= XEXP (term
, 0);
10823 if (GET_CODE (term
) != UNSPEC
10824 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
10827 return XVECEXP (term
, 0, 0);
10830 return ix86_delegitimize_address (x
);
10834 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
10835 int fp
, FILE *file
)
10837 const char *suffix
;
10839 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
10841 code
= ix86_fp_compare_code_to_integer (code
);
10845 code
= reverse_condition (code
);
10896 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
10900 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10901 Those same assemblers have the same but opposite lossage on cmov. */
10902 if (mode
== CCmode
)
10903 suffix
= fp
? "nbe" : "a";
10904 else if (mode
== CCCmode
)
10907 gcc_unreachable ();
10923 gcc_unreachable ();
10927 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
10944 gcc_unreachable ();
10948 /* ??? As above. */
10949 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
10950 suffix
= fp
? "nb" : "ae";
10953 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
10957 /* ??? As above. */
10958 if (mode
== CCmode
)
10960 else if (mode
== CCCmode
)
10961 suffix
= fp
? "nb" : "ae";
10963 gcc_unreachable ();
10966 suffix
= fp
? "u" : "p";
10969 suffix
= fp
? "nu" : "np";
10972 gcc_unreachable ();
10974 fputs (suffix
, file
);
10977 /* Print the name of register X to FILE based on its machine mode and number.
10978 If CODE is 'w', pretend the mode is HImode.
10979 If CODE is 'b', pretend the mode is QImode.
10980 If CODE is 'k', pretend the mode is SImode.
10981 If CODE is 'q', pretend the mode is DImode.
10982 If CODE is 'x', pretend the mode is V4SFmode.
10983 If CODE is 't', pretend the mode is V8SFmode.
10984 If CODE is 'h', pretend the reg is the 'high' byte register.
10985 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10986 If CODE is 'd', duplicate the operand for AVX instruction.
10990 print_reg (rtx x
, int code
, FILE *file
)
10993 bool duplicated
= code
== 'd' && TARGET_AVX
;
10995 gcc_assert (x
== pc_rtx
10996 || (REGNO (x
) != ARG_POINTER_REGNUM
10997 && REGNO (x
) != FRAME_POINTER_REGNUM
10998 && REGNO (x
) != FLAGS_REG
10999 && REGNO (x
) != FPSR_REG
11000 && REGNO (x
) != FPCR_REG
));
11002 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11007 gcc_assert (TARGET_64BIT
);
11008 fputs ("rip", file
);
11012 if (code
== 'w' || MMX_REG_P (x
))
11014 else if (code
== 'b')
11016 else if (code
== 'k')
11018 else if (code
== 'q')
11020 else if (code
== 'y')
11022 else if (code
== 'h')
11024 else if (code
== 'x')
11026 else if (code
== 't')
11029 code
= GET_MODE_SIZE (GET_MODE (x
));
11031 /* Irritatingly, AMD extended registers use different naming convention
11032 from the normal registers. */
11033 if (REX_INT_REG_P (x
))
11035 gcc_assert (TARGET_64BIT
);
11039 error ("extended registers have no high halves");
11042 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
11045 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
11048 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
11051 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
11054 error ("unsupported operand size for extended register");
11064 if (STACK_TOP_P (x
))
11073 if (! ANY_FP_REG_P (x
))
11074 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
11079 reg
= hi_reg_name
[REGNO (x
)];
11082 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
11084 reg
= qi_reg_name
[REGNO (x
)];
11087 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
11089 reg
= qi_high_reg_name
[REGNO (x
)];
11094 gcc_assert (!duplicated
);
11096 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
11101 gcc_unreachable ();
11107 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11108 fprintf (file
, ", %%%s", reg
);
11110 fprintf (file
, ", %s", reg
);
11114 /* Locate some local-dynamic symbol still in use by this function
11115 so that we can print its name in some tls_local_dynamic_base
11119 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
11123 if (GET_CODE (x
) == SYMBOL_REF
11124 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
11126 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
11133 static const char *
11134 get_some_local_dynamic_name (void)
11138 if (cfun
->machine
->some_ld_name
)
11139 return cfun
->machine
->some_ld_name
;
11141 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
11143 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
11144 return cfun
->machine
->some_ld_name
;
11146 gcc_unreachable ();
11149 /* Meaning of CODE:
11150 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11151 C -- print opcode suffix for set/cmov insn.
11152 c -- like C, but print reversed condition
11153 E,e -- likewise, but for compare-and-branch fused insn.
11154 F,f -- likewise, but for floating-point.
11155 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11157 R -- print the prefix for register names.
11158 z -- print the opcode suffix for the size of the current operand.
11159 Z -- likewise, with special suffixes for x87 instructions.
11160 * -- print a star (in certain assembler syntax)
11161 A -- print an absolute memory reference.
11162 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11163 s -- print a shift double count, followed by the assemblers argument
11165 b -- print the QImode name of the register for the indicated operand.
11166 %b0 would print %al if operands[0] is reg 0.
11167 w -- likewise, print the HImode name of the register.
11168 k -- likewise, print the SImode name of the register.
11169 q -- likewise, print the DImode name of the register.
11170 x -- likewise, print the V4SFmode name of the register.
11171 t -- likewise, print the V8SFmode name of the register.
11172 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11173 y -- print "st(0)" instead of "st" as a register.
11174 d -- print duplicated register operand for AVX instruction.
11175 D -- print condition for SSE cmp instruction.
11176 P -- if PIC, print an @PLT suffix.
11177 X -- don't print any sort of PIC '@' suffix for a symbol.
11178 & -- print some in-use local-dynamic symbol name.
11179 H -- print a memory address offset by 8; used for sse high-parts
11180 Y -- print condition for SSE5 com* instruction.
11181 + -- print a branch hint as 'cs' or 'ds' prefix
11182 ; -- print a semicolon (after prefixes due to bug in older gas).
11186 print_operand (FILE *file
, rtx x
, int code
)
11193 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11198 assemble_name (file
, get_some_local_dynamic_name ());
11202 switch (ASSEMBLER_DIALECT
)
11209 /* Intel syntax. For absolute addresses, registers should not
11210 be surrounded by braces. */
11214 PRINT_OPERAND (file
, x
, 0);
11221 gcc_unreachable ();
11224 PRINT_OPERAND (file
, x
, 0);
11229 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11234 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11239 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11244 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11249 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11254 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11259 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
11261 /* Opcodes don't get size suffixes if using Intel opcodes. */
11262 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
11265 switch (GET_MODE_SIZE (GET_MODE (x
)))
11284 output_operand_lossage
11285 ("invalid operand size for operand code '%c'", code
);
11290 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
11292 (0, "non-integer operand used with operand code '%c'", code
);
11296 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11297 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
11300 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
11302 switch (GET_MODE_SIZE (GET_MODE (x
)))
11305 #ifdef HAVE_AS_IX86_FILDS
11315 #ifdef HAVE_AS_IX86_FILDQ
11318 fputs ("ll", file
);
11326 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
11328 /* 387 opcodes don't get size suffixes
11329 if the operands are registers. */
11330 if (STACK_REG_P (x
))
11333 switch (GET_MODE_SIZE (GET_MODE (x
)))
11354 output_operand_lossage
11355 ("invalid operand type used with operand code '%c'", code
);
11359 output_operand_lossage
11360 ("invalid operand size for operand code '%c'", code
);
11377 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
11379 PRINT_OPERAND (file
, x
, 0);
11380 fputs (", ", file
);
11385 /* Little bit of braindamage here. The SSE compare instructions
11386 does use completely different names for the comparisons that the
11387 fp conditional moves. */
11390 switch (GET_CODE (x
))
11393 fputs ("eq", file
);
11396 fputs ("eq_us", file
);
11399 fputs ("lt", file
);
11402 fputs ("nge", file
);
11405 fputs ("le", file
);
11408 fputs ("ngt", file
);
11411 fputs ("unord", file
);
11414 fputs ("neq", file
);
11417 fputs ("neq_oq", file
);
11420 fputs ("ge", file
);
11423 fputs ("nlt", file
);
11426 fputs ("gt", file
);
11429 fputs ("nle", file
);
11432 fputs ("ord", file
);
11435 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11441 switch (GET_CODE (x
))
11445 fputs ("eq", file
);
11449 fputs ("lt", file
);
11453 fputs ("le", file
);
11456 fputs ("unord", file
);
11460 fputs ("neq", file
);
11464 fputs ("nlt", file
);
11468 fputs ("nle", file
);
11471 fputs ("ord", file
);
11474 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11480 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11481 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11483 switch (GET_MODE (x
))
11485 case HImode
: putc ('w', file
); break;
11487 case SFmode
: putc ('l', file
); break;
11489 case DFmode
: putc ('q', file
); break;
11490 default: gcc_unreachable ();
11497 if (!COMPARISON_P (x
))
11499 output_operand_lossage ("operand is neither a constant nor a "
11500 "condition code, invalid operand code "
11504 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
11507 if (!COMPARISON_P (x
))
11509 output_operand_lossage ("operand is neither a constant nor a "
11510 "condition code, invalid operand code "
11514 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11515 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11518 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
11521 /* Like above, but reverse condition */
11523 /* Check to see if argument to %c is really a constant
11524 and not a condition code which needs to be reversed. */
11525 if (!COMPARISON_P (x
))
11527 output_operand_lossage ("operand is neither a constant nor a "
11528 "condition code, invalid operand "
11532 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
11535 if (!COMPARISON_P (x
))
11537 output_operand_lossage ("operand is neither a constant nor a "
11538 "condition code, invalid operand "
11542 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11543 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11546 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
11550 put_condition_code (GET_CODE (x
), CCmode
, 0, 0, file
);
11554 put_condition_code (GET_CODE (x
), CCmode
, 1, 0, file
);
11558 /* It doesn't actually matter what mode we use here, as we're
11559 only going to use this for printing. */
11560 x
= adjust_address_nv (x
, DImode
, 8);
11568 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
11571 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
11574 int pred_val
= INTVAL (XEXP (x
, 0));
11576 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
11577 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
11579 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
11580 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
11582 /* Emit hints only in the case default branch prediction
11583 heuristics would fail. */
11584 if (taken
!= cputaken
)
11586 /* We use 3e (DS) prefix for taken branches and
11587 2e (CS) prefix for not taken branches. */
11589 fputs ("ds ; ", file
);
11591 fputs ("cs ; ", file
);
11599 switch (GET_CODE (x
))
11602 fputs ("neq", file
);
11605 fputs ("eq", file
);
11609 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
11613 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
11617 fputs ("le", file
);
11621 fputs ("lt", file
);
11624 fputs ("unord", file
);
11627 fputs ("ord", file
);
11630 fputs ("ueq", file
);
11633 fputs ("nlt", file
);
11636 fputs ("nle", file
);
11639 fputs ("ule", file
);
11642 fputs ("ult", file
);
11645 fputs ("une", file
);
11648 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11655 fputs (" ; ", file
);
11662 output_operand_lossage ("invalid operand code '%c'", code
);
11667 print_reg (x
, code
, file
);
11669 else if (MEM_P (x
))
11671 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11672 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
11673 && GET_MODE (x
) != BLKmode
)
11676 switch (GET_MODE_SIZE (GET_MODE (x
)))
11678 case 1: size
= "BYTE"; break;
11679 case 2: size
= "WORD"; break;
11680 case 4: size
= "DWORD"; break;
11681 case 8: size
= "QWORD"; break;
11682 case 12: size
= "XWORD"; break;
11684 if (GET_MODE (x
) == XFmode
)
11690 gcc_unreachable ();
11693 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11696 else if (code
== 'w')
11698 else if (code
== 'k')
11701 fputs (size
, file
);
11702 fputs (" PTR ", file
);
11706 /* Avoid (%rip) for call operands. */
11707 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
11708 && !CONST_INT_P (x
))
11709 output_addr_const (file
, x
);
11710 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
11711 output_operand_lossage ("invalid constraints for operand");
11713 output_address (x
);
11716 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
11721 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11722 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
11724 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11726 fprintf (file
, "0x%08lx", (long unsigned int) l
);
11729 /* These float cases don't actually occur as immediate operands. */
11730 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
11734 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
11735 fprintf (file
, "%s", dstr
);
11738 else if (GET_CODE (x
) == CONST_DOUBLE
11739 && GET_MODE (x
) == XFmode
)
11743 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
11744 fprintf (file
, "%s", dstr
);
11749 /* We have patterns that allow zero sets of memory, for instance.
11750 In 64-bit mode, we should probably support all 8-byte vectors,
11751 since we can in fact encode that into an immediate. */
11752 if (GET_CODE (x
) == CONST_VECTOR
)
11754 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
11760 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
11762 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11765 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
11766 || GET_CODE (x
) == LABEL_REF
)
11768 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11771 fputs ("OFFSET FLAT:", file
);
11774 if (CONST_INT_P (x
))
11775 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
11777 output_pic_addr_const (file
, x
, code
);
11779 output_addr_const (file
, x
);
11783 /* Print a memory operand whose address is ADDR. */
11786 print_operand_address (FILE *file
, rtx addr
)
11788 struct ix86_address parts
;
11789 rtx base
, index
, disp
;
11791 int ok
= ix86_decompose_address (addr
, &parts
);
11796 index
= parts
.index
;
11798 scale
= parts
.scale
;
11806 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11808 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
11811 gcc_unreachable ();
11814 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11815 if (TARGET_64BIT
&& !base
&& !index
)
11819 if (GET_CODE (disp
) == CONST
11820 && GET_CODE (XEXP (disp
, 0)) == PLUS
11821 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
11822 symbol
= XEXP (XEXP (disp
, 0), 0);
11824 if (GET_CODE (symbol
) == LABEL_REF
11825 || (GET_CODE (symbol
) == SYMBOL_REF
11826 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
11829 if (!base
&& !index
)
11831 /* Displacement only requires special attention. */
11833 if (CONST_INT_P (disp
))
11835 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
11836 fputs ("ds:", file
);
11837 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
11840 output_pic_addr_const (file
, disp
, 0);
11842 output_addr_const (file
, disp
);
11846 if (ASSEMBLER_DIALECT
== ASM_ATT
)
11851 output_pic_addr_const (file
, disp
, 0);
11852 else if (GET_CODE (disp
) == LABEL_REF
)
11853 output_asm_label (disp
);
11855 output_addr_const (file
, disp
);
11860 print_reg (base
, 0, file
);
11864 print_reg (index
, 0, file
);
11866 fprintf (file
, ",%d", scale
);
11872 rtx offset
= NULL_RTX
;
11876 /* Pull out the offset of a symbol; print any symbol itself. */
11877 if (GET_CODE (disp
) == CONST
11878 && GET_CODE (XEXP (disp
, 0)) == PLUS
11879 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
11881 offset
= XEXP (XEXP (disp
, 0), 1);
11882 disp
= gen_rtx_CONST (VOIDmode
,
11883 XEXP (XEXP (disp
, 0), 0));
11887 output_pic_addr_const (file
, disp
, 0);
11888 else if (GET_CODE (disp
) == LABEL_REF
)
11889 output_asm_label (disp
);
11890 else if (CONST_INT_P (disp
))
11893 output_addr_const (file
, disp
);
11899 print_reg (base
, 0, file
);
11902 if (INTVAL (offset
) >= 0)
11904 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
11908 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
11915 print_reg (index
, 0, file
);
11917 fprintf (file
, "*%d", scale
);
11925 output_addr_const_extra (FILE *file
, rtx x
)
11929 if (GET_CODE (x
) != UNSPEC
)
11932 op
= XVECEXP (x
, 0, 0);
11933 switch (XINT (x
, 1))
11935 case UNSPEC_GOTTPOFF
:
11936 output_addr_const (file
, op
);
11937 /* FIXME: This might be @TPOFF in Sun ld. */
11938 fputs ("@GOTTPOFF", file
);
11941 output_addr_const (file
, op
);
11942 fputs ("@TPOFF", file
);
11944 case UNSPEC_NTPOFF
:
11945 output_addr_const (file
, op
);
11947 fputs ("@TPOFF", file
);
11949 fputs ("@NTPOFF", file
);
11951 case UNSPEC_DTPOFF
:
11952 output_addr_const (file
, op
);
11953 fputs ("@DTPOFF", file
);
11955 case UNSPEC_GOTNTPOFF
:
11956 output_addr_const (file
, op
);
11958 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11959 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file
);
11961 fputs ("@GOTNTPOFF", file
);
11963 case UNSPEC_INDNTPOFF
:
11964 output_addr_const (file
, op
);
11965 fputs ("@INDNTPOFF", file
);
11968 case UNSPEC_MACHOPIC_OFFSET
:
11969 output_addr_const (file
, op
);
11971 machopic_output_function_base_name (file
);
11982 /* Split one or more DImode RTL references into pairs of SImode
11983 references. The RTL can be REG, offsettable MEM, integer constant, or
11984 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11985 split and "num" is its length. lo_half and hi_half are output arrays
11986 that parallel "operands". */
11989 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
11993 rtx op
= operands
[num
];
11995 /* simplify_subreg refuse to split volatile memory addresses,
11996 but we still have to handle it. */
11999 lo_half
[num
] = adjust_address (op
, SImode
, 0);
12000 hi_half
[num
] = adjust_address (op
, SImode
, 4);
12004 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
12005 GET_MODE (op
) == VOIDmode
12006 ? DImode
: GET_MODE (op
), 0);
12007 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
12008 GET_MODE (op
) == VOIDmode
12009 ? DImode
: GET_MODE (op
), 4);
12013 /* Split one or more TImode RTL references into pairs of DImode
12014 references. The RTL can be REG, offsettable MEM, integer constant, or
12015 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12016 split and "num" is its length. lo_half and hi_half are output arrays
12017 that parallel "operands". */
12020 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
12024 rtx op
= operands
[num
];
12026 /* simplify_subreg refuse to split volatile memory addresses, but we
12027 still have to handle it. */
12030 lo_half
[num
] = adjust_address (op
, DImode
, 0);
12031 hi_half
[num
] = adjust_address (op
, DImode
, 8);
12035 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
12036 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
12041 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12042 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12043 is the expression of the binary operation. The output may either be
12044 emitted here, or returned to the caller, like all output_* functions.
12046 There is no guarantee that the operands are the same mode, as they
12047 might be within FLOAT or FLOAT_EXTEND expressions. */
12049 #ifndef SYSV386_COMPAT
12050 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12051 wants to fix the assemblers because that causes incompatibility
12052 with gcc. No-one wants to fix gcc because that causes
12053 incompatibility with assemblers... You can use the option of
12054 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12055 #define SYSV386_COMPAT 1
12059 output_387_binary_op (rtx insn
, rtx
*operands
)
12061 static char buf
[40];
12064 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
12066 #ifdef ENABLE_CHECKING
12067 /* Even if we do not want to check the inputs, this documents input
12068 constraints. Which helps in understanding the following code. */
12069 if (STACK_REG_P (operands
[0])
12070 && ((REG_P (operands
[1])
12071 && REGNO (operands
[0]) == REGNO (operands
[1])
12072 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
12073 || (REG_P (operands
[2])
12074 && REGNO (operands
[0]) == REGNO (operands
[2])
12075 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
12076 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
12079 gcc_assert (is_sse
);
12082 switch (GET_CODE (operands
[3]))
12085 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
12086 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
12094 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
12095 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
12103 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
12104 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
12112 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
12113 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
12121 gcc_unreachable ();
12128 strcpy (buf
, ssep
);
12129 if (GET_MODE (operands
[0]) == SFmode
)
12130 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
12132 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
12136 strcpy (buf
, ssep
+ 1);
12137 if (GET_MODE (operands
[0]) == SFmode
)
12138 strcat (buf
, "ss\t{%2, %0|%0, %2}");
12140 strcat (buf
, "sd\t{%2, %0|%0, %2}");
12146 switch (GET_CODE (operands
[3]))
12150 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
12152 rtx temp
= operands
[2];
12153 operands
[2] = operands
[1];
12154 operands
[1] = temp
;
12157 /* know operands[0] == operands[1]. */
12159 if (MEM_P (operands
[2]))
12165 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
12167 if (STACK_TOP_P (operands
[0]))
12168 /* How is it that we are storing to a dead operand[2]?
12169 Well, presumably operands[1] is dead too. We can't
12170 store the result to st(0) as st(0) gets popped on this
12171 instruction. Instead store to operands[2] (which I
12172 think has to be st(1)). st(1) will be popped later.
12173 gcc <= 2.8.1 didn't have this check and generated
12174 assembly code that the Unixware assembler rejected. */
12175 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12177 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12181 if (STACK_TOP_P (operands
[0]))
12182 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12184 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12189 if (MEM_P (operands
[1]))
12195 if (MEM_P (operands
[2]))
12201 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
12204 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12205 derived assemblers, confusingly reverse the direction of
12206 the operation for fsub{r} and fdiv{r} when the
12207 destination register is not st(0). The Intel assembler
12208 doesn't have this brain damage. Read !SYSV386_COMPAT to
12209 figure out what the hardware really does. */
12210 if (STACK_TOP_P (operands
[0]))
12211 p
= "{p\t%0, %2|rp\t%2, %0}";
12213 p
= "{rp\t%2, %0|p\t%0, %2}";
12215 if (STACK_TOP_P (operands
[0]))
12216 /* As above for fmul/fadd, we can't store to st(0). */
12217 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12219 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12224 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
12227 if (STACK_TOP_P (operands
[0]))
12228 p
= "{rp\t%0, %1|p\t%1, %0}";
12230 p
= "{p\t%1, %0|rp\t%0, %1}";
12232 if (STACK_TOP_P (operands
[0]))
12233 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12235 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12240 if (STACK_TOP_P (operands
[0]))
12242 if (STACK_TOP_P (operands
[1]))
12243 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12245 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12248 else if (STACK_TOP_P (operands
[1]))
12251 p
= "{\t%1, %0|r\t%0, %1}";
12253 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12259 p
= "{r\t%2, %0|\t%0, %2}";
12261 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12267 gcc_unreachable ();
12274 /* Return needed mode for entity in optimize_mode_switching pass. */
12277 ix86_mode_needed (int entity
, rtx insn
)
12279 enum attr_i387_cw mode
;
12281 /* The mode UNINITIALIZED is used to store control word after a
12282 function call or ASM pattern. The mode ANY specify that function
12283 has no requirements on the control word and make no changes in the
12284 bits we are interested in. */
12287 || (NONJUMP_INSN_P (insn
)
12288 && (asm_noperands (PATTERN (insn
)) >= 0
12289 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
12290 return I387_CW_UNINITIALIZED
;
12292 if (recog_memoized (insn
) < 0)
12293 return I387_CW_ANY
;
12295 mode
= get_attr_i387_cw (insn
);
12300 if (mode
== I387_CW_TRUNC
)
12305 if (mode
== I387_CW_FLOOR
)
12310 if (mode
== I387_CW_CEIL
)
12315 if (mode
== I387_CW_MASK_PM
)
12320 gcc_unreachable ();
12323 return I387_CW_ANY
;
12326 /* Output code to initialize control word copies used by trunc?f?i and
12327 rounding patterns. CURRENT_MODE is set to current control word,
12328 while NEW_MODE is set to new control word. */
12331 emit_i387_cw_initialization (int mode
)
12333 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
12336 enum ix86_stack_slot slot
;
12338 rtx reg
= gen_reg_rtx (HImode
);
12340 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
12341 emit_move_insn (reg
, copy_rtx (stored_mode
));
12343 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
12344 || optimize_function_for_size_p (cfun
))
12348 case I387_CW_TRUNC
:
12349 /* round toward zero (truncate) */
12350 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
12351 slot
= SLOT_CW_TRUNC
;
12354 case I387_CW_FLOOR
:
12355 /* round down toward -oo */
12356 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
12357 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
12358 slot
= SLOT_CW_FLOOR
;
12362 /* round up toward +oo */
12363 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
12364 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
12365 slot
= SLOT_CW_CEIL
;
12368 case I387_CW_MASK_PM
:
12369 /* mask precision exception for nearbyint() */
12370 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
12371 slot
= SLOT_CW_MASK_PM
;
12375 gcc_unreachable ();
12382 case I387_CW_TRUNC
:
12383 /* round toward zero (truncate) */
12384 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
12385 slot
= SLOT_CW_TRUNC
;
12388 case I387_CW_FLOOR
:
12389 /* round down toward -oo */
12390 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
12391 slot
= SLOT_CW_FLOOR
;
12395 /* round up toward +oo */
12396 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
12397 slot
= SLOT_CW_CEIL
;
12400 case I387_CW_MASK_PM
:
12401 /* mask precision exception for nearbyint() */
12402 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
12403 slot
= SLOT_CW_MASK_PM
;
12407 gcc_unreachable ();
12411 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
12413 new_mode
= assign_386_stack_local (HImode
, slot
);
12414 emit_move_insn (new_mode
, reg
);
12417 /* Output code for INSN to convert a float to a signed int. OPERANDS
12418 are the insn operands. The output may be [HSD]Imode and the input
12419 operand may be [SDX]Fmode. */
12422 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
12424 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
12425 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
12426 int round_mode
= get_attr_i387_cw (insn
);
12428 /* Jump through a hoop or two for DImode, since the hardware has no
12429 non-popping instruction. We used to do this a different way, but
12430 that was somewhat fragile and broke with post-reload splitters. */
12431 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
12432 output_asm_insn ("fld\t%y1", operands
);
12434 gcc_assert (STACK_TOP_P (operands
[1]));
12435 gcc_assert (MEM_P (operands
[0]));
12436 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
12439 output_asm_insn ("fisttp%Z0\t%0", operands
);
12442 if (round_mode
!= I387_CW_ANY
)
12443 output_asm_insn ("fldcw\t%3", operands
);
12444 if (stack_top_dies
|| dimode_p
)
12445 output_asm_insn ("fistp%Z0\t%0", operands
);
12447 output_asm_insn ("fist%Z0\t%0", operands
);
12448 if (round_mode
!= I387_CW_ANY
)
12449 output_asm_insn ("fldcw\t%2", operands
);
12455 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12456 have the values zero or one, indicates the ffreep insn's operand
12457 from the OPERANDS array. */
12459 static const char *
12460 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
12462 if (TARGET_USE_FFREEP
)
12463 #if HAVE_AS_IX86_FFREEP
12464 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
12467 static char retval
[] = ".word\t0xc_df";
12468 int regno
= REGNO (operands
[opno
]);
12470 gcc_assert (FP_REGNO_P (regno
));
12472 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
12477 return opno
? "fstp\t%y1" : "fstp\t%y0";
12481 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12482 should be used. UNORDERED_P is true when fucom should be used. */
12485 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
12487 int stack_top_dies
;
12488 rtx cmp_op0
, cmp_op1
;
12489 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
12493 cmp_op0
= operands
[0];
12494 cmp_op1
= operands
[1];
12498 cmp_op0
= operands
[1];
12499 cmp_op1
= operands
[2];
12504 static const char ucomiss
[] = "vucomiss\t{%1, %0|%0, %1}";
12505 static const char ucomisd
[] = "vucomisd\t{%1, %0|%0, %1}";
12506 static const char comiss
[] = "vcomiss\t{%1, %0|%0, %1}";
12507 static const char comisd
[] = "vcomisd\t{%1, %0|%0, %1}";
12509 if (GET_MODE (operands
[0]) == SFmode
)
12511 return &ucomiss
[TARGET_AVX
? 0 : 1];
12513 return &comiss
[TARGET_AVX
? 0 : 1];
12516 return &ucomisd
[TARGET_AVX
? 0 : 1];
12518 return &comisd
[TARGET_AVX
? 0 : 1];
12521 gcc_assert (STACK_TOP_P (cmp_op0
));
12523 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
12525 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
12527 if (stack_top_dies
)
12529 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
12530 return output_387_ffreep (operands
, 1);
12533 return "ftst\n\tfnstsw\t%0";
12536 if (STACK_REG_P (cmp_op1
)
12538 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
12539 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
12541 /* If both the top of the 387 stack dies, and the other operand
12542 is also a stack register that dies, then this must be a
12543 `fcompp' float compare */
12547 /* There is no double popping fcomi variant. Fortunately,
12548 eflags is immune from the fstp's cc clobbering. */
12550 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
12552 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
12553 return output_387_ffreep (operands
, 0);
12558 return "fucompp\n\tfnstsw\t%0";
12560 return "fcompp\n\tfnstsw\t%0";
12565 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12567 static const char * const alt
[16] =
12569 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12570 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12571 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12572 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12574 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12575 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12579 "fcomi\t{%y1, %0|%0, %y1}",
12580 "fcomip\t{%y1, %0|%0, %y1}",
12581 "fucomi\t{%y1, %0|%0, %y1}",
12582 "fucomip\t{%y1, %0|%0, %y1}",
12593 mask
= eflags_p
<< 3;
12594 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
12595 mask
|= unordered_p
<< 1;
12596 mask
|= stack_top_dies
;
12598 gcc_assert (mask
< 16);
12607 ix86_output_addr_vec_elt (FILE *file
, int value
)
12609 const char *directive
= ASM_LONG
;
12613 directive
= ASM_QUAD
;
12615 gcc_assert (!TARGET_64BIT
);
12618 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
12622 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
12624 const char *directive
= ASM_LONG
;
12627 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
12628 directive
= ASM_QUAD
;
12630 gcc_assert (!TARGET_64BIT
);
12632 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12633 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
12634 fprintf (file
, "%s%s%d-%s%d\n",
12635 directive
, LPREFIX
, value
, LPREFIX
, rel
);
12636 else if (HAVE_AS_GOTOFF_IN_DATA
)
12637 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
12639 else if (TARGET_MACHO
)
12641 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
12642 machopic_output_function_base_name (file
);
12643 fprintf(file
, "\n");
12647 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
12648 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
12651 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12655 ix86_expand_clear (rtx dest
)
12659 /* We play register width games, which are only valid after reload. */
12660 gcc_assert (reload_completed
);
12662 /* Avoid HImode and its attendant prefix byte. */
12663 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
12664 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
12665 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
12667 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12668 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ()))
12670 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
12671 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
12677 /* X is an unchanging MEM. If it is a constant pool reference, return
12678 the constant pool rtx, else NULL. */
12681 maybe_get_pool_constant (rtx x
)
12683 x
= ix86_delegitimize_address (XEXP (x
, 0));
12685 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
12686 return get_pool_constant (x
);
12692 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
12695 enum tls_model model
;
12700 if (GET_CODE (op1
) == SYMBOL_REF
)
12702 model
= SYMBOL_REF_TLS_MODEL (op1
);
12705 op1
= legitimize_tls_address (op1
, model
, true);
12706 op1
= force_operand (op1
, op0
);
12710 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12711 && SYMBOL_REF_DLLIMPORT_P (op1
))
12712 op1
= legitimize_dllimport_symbol (op1
, false);
12714 else if (GET_CODE (op1
) == CONST
12715 && GET_CODE (XEXP (op1
, 0)) == PLUS
12716 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
12718 rtx addend
= XEXP (XEXP (op1
, 0), 1);
12719 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
12722 model
= SYMBOL_REF_TLS_MODEL (symbol
);
12724 tmp
= legitimize_tls_address (symbol
, model
, true);
12725 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12726 && SYMBOL_REF_DLLIMPORT_P (symbol
))
12727 tmp
= legitimize_dllimport_symbol (symbol
, true);
12731 tmp
= force_operand (tmp
, NULL
);
12732 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
12733 op0
, 1, OPTAB_DIRECT
);
12739 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
12741 if (TARGET_MACHO
&& !TARGET_64BIT
)
12746 rtx temp
= ((reload_in_progress
12747 || ((op0
&& REG_P (op0
))
12749 ? op0
: gen_reg_rtx (Pmode
));
12750 op1
= machopic_indirect_data_reference (op1
, temp
);
12751 op1
= machopic_legitimize_pic_address (op1
, mode
,
12752 temp
== op1
? 0 : temp
);
12754 else if (MACHOPIC_INDIRECT
)
12755 op1
= machopic_indirect_data_reference (op1
, 0);
12763 op1
= force_reg (Pmode
, op1
);
12764 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
12766 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
12767 op1
= legitimize_pic_address (op1
, reg
);
12776 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
12777 || !push_operand (op0
, mode
))
12779 op1
= force_reg (mode
, op1
);
12781 if (push_operand (op0
, mode
)
12782 && ! general_no_elim_operand (op1
, mode
))
12783 op1
= copy_to_mode_reg (mode
, op1
);
12785 /* Force large constants in 64bit compilation into register
12786 to get them CSEed. */
12787 if (can_create_pseudo_p ()
12788 && (mode
== DImode
) && TARGET_64BIT
12789 && immediate_operand (op1
, mode
)
12790 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
12791 && !register_operand (op0
, mode
)
12793 op1
= copy_to_mode_reg (mode
, op1
);
12795 if (can_create_pseudo_p ()
12796 && FLOAT_MODE_P (mode
)
12797 && GET_CODE (op1
) == CONST_DOUBLE
)
12799 /* If we are loading a floating point constant to a register,
12800 force the value to memory now, since we'll get better code
12801 out the back end. */
12803 op1
= validize_mem (force_const_mem (mode
, op1
));
12804 if (!register_operand (op0
, mode
))
12806 rtx temp
= gen_reg_rtx (mode
);
12807 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
12808 emit_move_insn (op0
, temp
);
12814 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
12818 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
12820 rtx op0
= operands
[0], op1
= operands
[1];
12821 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
12823 /* Force constants other than zero into memory. We do not know how
12824 the instructions used to build constants modify the upper 64 bits
12825 of the register, once we have that information we may be able
12826 to handle some of them more efficiently. */
12827 if (can_create_pseudo_p ()
12828 && register_operand (op0
, mode
)
12829 && (CONSTANT_P (op1
)
12830 || (GET_CODE (op1
) == SUBREG
12831 && CONSTANT_P (SUBREG_REG (op1
))))
12832 && standard_sse_constant_p (op1
) <= 0)
12833 op1
= validize_mem (force_const_mem (mode
, op1
));
12835 /* We need to check memory alignment for SSE mode since attribute
12836 can make operands unaligned. */
12837 if (can_create_pseudo_p ()
12838 && SSE_REG_MODE_P (mode
)
12839 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
12840 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
12844 /* ix86_expand_vector_move_misalign() does not like constants ... */
12845 if (CONSTANT_P (op1
)
12846 || (GET_CODE (op1
) == SUBREG
12847 && CONSTANT_P (SUBREG_REG (op1
))))
12848 op1
= validize_mem (force_const_mem (mode
, op1
));
12850 /* ... nor both arguments in memory. */
12851 if (!register_operand (op0
, mode
)
12852 && !register_operand (op1
, mode
))
12853 op1
= force_reg (mode
, op1
);
12855 tmp
[0] = op0
; tmp
[1] = op1
;
12856 ix86_expand_vector_move_misalign (mode
, tmp
);
12860 /* Make operand1 a register if it isn't already. */
12861 if (can_create_pseudo_p ()
12862 && !register_operand (op0
, mode
)
12863 && !register_operand (op1
, mode
))
12865 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
12869 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
12872 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12873 straight to ix86_expand_vector_move. */
12874 /* Code generation for scalar reg-reg moves of single and double precision data:
12875 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12879 if (x86_sse_partial_reg_dependency == true)
12884 Code generation for scalar loads of double precision data:
12885 if (x86_sse_split_regs == true)
12886 movlpd mem, reg (gas syntax)
12890 Code generation for unaligned packed loads of single precision data
12891 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12892 if (x86_sse_unaligned_move_optimal)
12895 if (x86_sse_partial_reg_dependency == true)
12907 Code generation for unaligned packed loads of double precision data
12908 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12909 if (x86_sse_unaligned_move_optimal)
12912 if (x86_sse_split_regs == true)
12925 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
12934 switch (GET_MODE_CLASS (mode
))
12936 case MODE_VECTOR_INT
:
12938 switch (GET_MODE_SIZE (mode
))
12941 op0
= gen_lowpart (V16QImode
, op0
);
12942 op1
= gen_lowpart (V16QImode
, op1
);
12943 emit_insn (gen_avx_movdqu (op0
, op1
));
12946 op0
= gen_lowpart (V32QImode
, op0
);
12947 op1
= gen_lowpart (V32QImode
, op1
);
12948 emit_insn (gen_avx_movdqu256 (op0
, op1
));
12951 gcc_unreachable ();
12954 case MODE_VECTOR_FLOAT
:
12955 op0
= gen_lowpart (mode
, op0
);
12956 op1
= gen_lowpart (mode
, op1
);
12961 emit_insn (gen_avx_movups (op0
, op1
));
12964 emit_insn (gen_avx_movups256 (op0
, op1
));
12967 emit_insn (gen_avx_movupd (op0
, op1
));
12970 emit_insn (gen_avx_movupd256 (op0
, op1
));
12973 gcc_unreachable ();
12978 gcc_unreachable ();
12986 /* If we're optimizing for size, movups is the smallest. */
12987 if (optimize_insn_for_size_p ())
12989 op0
= gen_lowpart (V4SFmode
, op0
);
12990 op1
= gen_lowpart (V4SFmode
, op1
);
12991 emit_insn (gen_sse_movups (op0
, op1
));
12995 /* ??? If we have typed data, then it would appear that using
12996 movdqu is the only way to get unaligned data loaded with
12998 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
13000 op0
= gen_lowpart (V16QImode
, op0
);
13001 op1
= gen_lowpart (V16QImode
, op1
);
13002 emit_insn (gen_sse2_movdqu (op0
, op1
));
13006 if (TARGET_SSE2
&& mode
== V2DFmode
)
13010 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
13012 op0
= gen_lowpart (V2DFmode
, op0
);
13013 op1
= gen_lowpart (V2DFmode
, op1
);
13014 emit_insn (gen_sse2_movupd (op0
, op1
));
13018 /* When SSE registers are split into halves, we can avoid
13019 writing to the top half twice. */
13020 if (TARGET_SSE_SPLIT_REGS
)
13022 emit_clobber (op0
);
13027 /* ??? Not sure about the best option for the Intel chips.
13028 The following would seem to satisfy; the register is
13029 entirely cleared, breaking the dependency chain. We
13030 then store to the upper half, with a dependency depth
13031 of one. A rumor has it that Intel recommends two movsd
13032 followed by an unpacklpd, but this is unconfirmed. And
13033 given that the dependency depth of the unpacklpd would
13034 still be one, I'm not sure why this would be better. */
13035 zero
= CONST0_RTX (V2DFmode
);
13038 m
= adjust_address (op1
, DFmode
, 0);
13039 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
13040 m
= adjust_address (op1
, DFmode
, 8);
13041 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
13045 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
13047 op0
= gen_lowpart (V4SFmode
, op0
);
13048 op1
= gen_lowpart (V4SFmode
, op1
);
13049 emit_insn (gen_sse_movups (op0
, op1
));
13053 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
13054 emit_move_insn (op0
, CONST0_RTX (mode
));
13056 emit_clobber (op0
);
13058 if (mode
!= V4SFmode
)
13059 op0
= gen_lowpart (V4SFmode
, op0
);
13060 m
= adjust_address (op1
, V2SFmode
, 0);
13061 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
13062 m
= adjust_address (op1
, V2SFmode
, 8);
13063 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
13066 else if (MEM_P (op0
))
13068 /* If we're optimizing for size, movups is the smallest. */
13069 if (optimize_insn_for_size_p ())
13071 op0
= gen_lowpart (V4SFmode
, op0
);
13072 op1
= gen_lowpart (V4SFmode
, op1
);
13073 emit_insn (gen_sse_movups (op0
, op1
));
13077 /* ??? Similar to above, only less clear because of quote
13078 typeless stores unquote. */
13079 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
13080 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
13082 op0
= gen_lowpart (V16QImode
, op0
);
13083 op1
= gen_lowpart (V16QImode
, op1
);
13084 emit_insn (gen_sse2_movdqu (op0
, op1
));
13088 if (TARGET_SSE2
&& mode
== V2DFmode
)
13090 m
= adjust_address (op0
, DFmode
, 0);
13091 emit_insn (gen_sse2_storelpd (m
, op1
));
13092 m
= adjust_address (op0
, DFmode
, 8);
13093 emit_insn (gen_sse2_storehpd (m
, op1
));
13097 if (mode
!= V4SFmode
)
13098 op1
= gen_lowpart (V4SFmode
, op1
);
13099 m
= adjust_address (op0
, V2SFmode
, 0);
13100 emit_insn (gen_sse_storelps (m
, op1
));
13101 m
= adjust_address (op0
, V2SFmode
, 8);
13102 emit_insn (gen_sse_storehps (m
, op1
));
13106 gcc_unreachable ();
13109 /* Expand a push in MODE. This is some mode for which we do not support
13110 proper push instructions, at least from the registers that we expect
13111 the value to live in. */
13114 ix86_expand_push (enum machine_mode mode
, rtx x
)
13118 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
13119 GEN_INT (-GET_MODE_SIZE (mode
)),
13120 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
13121 if (tmp
!= stack_pointer_rtx
)
13122 emit_move_insn (stack_pointer_rtx
, tmp
);
13124 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13126 /* When we push an operand onto stack, it has to be aligned at least
13127 at the function argument boundary. However since we don't have
13128 the argument type, we can't determine the actual argument
13130 emit_move_insn (tmp
, x
);
13133 /* Helper function of ix86_fixup_binary_operands to canonicalize
13134 operand order. Returns true if the operands should be swapped. */
13137 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
13140 rtx dst
= operands
[0];
13141 rtx src1
= operands
[1];
13142 rtx src2
= operands
[2];
13144 /* If the operation is not commutative, we can't do anything. */
13145 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
13148 /* Highest priority is that src1 should match dst. */
13149 if (rtx_equal_p (dst
, src1
))
13151 if (rtx_equal_p (dst
, src2
))
13154 /* Next highest priority is that immediate constants come second. */
13155 if (immediate_operand (src2
, mode
))
13157 if (immediate_operand (src1
, mode
))
13160 /* Lowest priority is that memory references should come second. */
13170 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13171 destination to use for the operation. If different from the true
13172 destination in operands[0], a copy operation will be required. */
13175 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
13178 rtx dst
= operands
[0];
13179 rtx src1
= operands
[1];
13180 rtx src2
= operands
[2];
13182 /* Canonicalize operand order. */
13183 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
13187 /* It is invalid to swap operands of different modes. */
13188 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
13195 /* Both source operands cannot be in memory. */
13196 if (MEM_P (src1
) && MEM_P (src2
))
13198 /* Optimization: Only read from memory once. */
13199 if (rtx_equal_p (src1
, src2
))
13201 src2
= force_reg (mode
, src2
);
13205 src2
= force_reg (mode
, src2
);
13208 /* If the destination is memory, and we do not have matching source
13209 operands, do things in registers. */
13210 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
13211 dst
= gen_reg_rtx (mode
);
13213 /* Source 1 cannot be a constant. */
13214 if (CONSTANT_P (src1
))
13215 src1
= force_reg (mode
, src1
);
13217 /* Source 1 cannot be a non-matching memory. */
13218 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
13219 src1
= force_reg (mode
, src1
);
13221 operands
[1] = src1
;
13222 operands
[2] = src2
;
13226 /* Similarly, but assume that the destination has already been
13227 set up properly. */
13230 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
13231 enum machine_mode mode
, rtx operands
[])
13233 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
13234 gcc_assert (dst
== operands
[0]);
13237 /* Attempt to expand a binary operator. Make the expansion closer to the
13238 actual machine, then just general_operand, which will allow 3 separate
13239 memory references (one output, two input) in a single insn. */
13242 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
13245 rtx src1
, src2
, dst
, op
, clob
;
13247 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
13248 src1
= operands
[1];
13249 src2
= operands
[2];
13251 /* Emit the instruction. */
13253 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
13254 if (reload_in_progress
)
13256 /* Reload doesn't know about the flags register, and doesn't know that
13257 it doesn't want to clobber it. We can only do this with PLUS. */
13258 gcc_assert (code
== PLUS
);
13263 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
13264 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
13267 /* Fix up the destination if needed. */
13268 if (dst
!= operands
[0])
13269 emit_move_insn (operands
[0], dst
);
13272 /* Return TRUE or FALSE depending on whether the binary operator meets the
13273 appropriate constraints. */
13276 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
13279 rtx dst
= operands
[0];
13280 rtx src1
= operands
[1];
13281 rtx src2
= operands
[2];
13283 /* Both source operands cannot be in memory. */
13284 if (MEM_P (src1
) && MEM_P (src2
))
13287 /* Canonicalize operand order for commutative operators. */
13288 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
13295 /* If the destination is memory, we must have a matching source operand. */
13296 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
13299 /* Source 1 cannot be a constant. */
13300 if (CONSTANT_P (src1
))
13303 /* Source 1 cannot be a non-matching memory. */
13304 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
13310 /* Attempt to expand a unary operator. Make the expansion closer to the
13311 actual machine, then just general_operand, which will allow 2 separate
13312 memory references (one output, one input) in a single insn. */
13315 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
13318 int matching_memory
;
13319 rtx src
, dst
, op
, clob
;
13324 /* If the destination is memory, and we do not have matching source
13325 operands, do things in registers. */
13326 matching_memory
= 0;
13329 if (rtx_equal_p (dst
, src
))
13330 matching_memory
= 1;
13332 dst
= gen_reg_rtx (mode
);
13335 /* When source operand is memory, destination must match. */
13336 if (MEM_P (src
) && !matching_memory
)
13337 src
= force_reg (mode
, src
);
13339 /* Emit the instruction. */
13341 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
13342 if (reload_in_progress
|| code
== NOT
)
13344 /* Reload doesn't know about the flags register, and doesn't know that
13345 it doesn't want to clobber it. */
13346 gcc_assert (code
== NOT
);
13351 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
13352 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
13355 /* Fix up the destination if needed. */
13356 if (dst
!= operands
[0])
13357 emit_move_insn (operands
[0], dst
);
13360 #define LEA_SEARCH_THRESHOLD 12
13362 /* Search backward for non-agu definition of register number REGNO1
13363 or register number REGNO2 in INSN's basic block until
13364 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13365 2. Reach BB boundary, or
13366 3. Reach agu definition.
13367 Returns the distance between the non-agu definition point and INSN.
13368 If no definition point, returns -1. */
13371 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
13374 basic_block bb
= BLOCK_FOR_INSN (insn
);
13377 enum attr_type insn_type
;
13379 if (insn
!= BB_HEAD (bb
))
13381 rtx prev
= PREV_INSN (insn
);
13382 while (prev
&& distance
< LEA_SEARCH_THRESHOLD
)
13387 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
13388 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
13389 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
13390 && (regno1
== DF_REF_REGNO (*def_rec
)
13391 || regno2
== DF_REF_REGNO (*def_rec
)))
13393 insn_type
= get_attr_type (prev
);
13394 if (insn_type
!= TYPE_LEA
)
13398 if (prev
== BB_HEAD (bb
))
13400 prev
= PREV_INSN (prev
);
13404 if (distance
< LEA_SEARCH_THRESHOLD
)
13408 bool simple_loop
= false;
13410 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
13413 simple_loop
= true;
13419 rtx prev
= BB_END (bb
);
13422 && distance
< LEA_SEARCH_THRESHOLD
)
13427 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
13428 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
13429 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
13430 && (regno1
== DF_REF_REGNO (*def_rec
)
13431 || regno2
== DF_REF_REGNO (*def_rec
)))
13433 insn_type
= get_attr_type (prev
);
13434 if (insn_type
!= TYPE_LEA
)
13438 prev
= PREV_INSN (prev
);
13446 /* get_attr_type may modify recog data. We want to make sure
13447 that recog data is valid for instruction INSN, on which
13448 distance_non_agu_define is called. INSN is unchanged here. */
13449 extract_insn_cached (insn
);
13453 /* Return the distance between INSN and the next insn that uses
13454 register number REGNO0 in memory address. Return -1 if no such
13455 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13458 distance_agu_use (unsigned int regno0
, rtx insn
)
13460 basic_block bb
= BLOCK_FOR_INSN (insn
);
13465 if (insn
!= BB_END (bb
))
13467 rtx next
= NEXT_INSN (insn
);
13468 while (next
&& distance
< LEA_SEARCH_THRESHOLD
)
13474 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
13475 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
13476 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
13477 && regno0
== DF_REF_REGNO (*use_rec
))
13479 /* Return DISTANCE if OP0 is used in memory
13480 address in NEXT. */
13484 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
13485 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
13486 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
13487 && regno0
== DF_REF_REGNO (*def_rec
))
13489 /* Return -1 if OP0 is set in NEXT. */
13493 if (next
== BB_END (bb
))
13495 next
= NEXT_INSN (next
);
13499 if (distance
< LEA_SEARCH_THRESHOLD
)
13503 bool simple_loop
= false;
13505 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
13508 simple_loop
= true;
13514 rtx next
= BB_HEAD (bb
);
13517 && distance
< LEA_SEARCH_THRESHOLD
)
13523 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
13524 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
13525 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
13526 && regno0
== DF_REF_REGNO (*use_rec
))
13528 /* Return DISTANCE if OP0 is used in memory
13529 address in NEXT. */
13533 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
13534 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
13535 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
13536 && regno0
== DF_REF_REGNO (*def_rec
))
13538 /* Return -1 if OP0 is set in NEXT. */
13543 next
= NEXT_INSN (next
);
13551 /* Define this macro to tune LEA priority vs ADD, it take effect when
13552 there is a dilemma of choicing LEA or ADD
13553 Negative value: ADD is more preferred than LEA
13555 Positive value: LEA is more preferred than ADD*/
13556 #define IX86_LEA_PRIORITY 2
13558 /* Return true if it is ok to optimize an ADD operation to LEA
13559 operation to avoid flag register consumation. For the processors
13560 like ATOM, if the destination register of LEA holds an actual
13561 address which will be used soon, LEA is better and otherwise ADD
13565 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
13566 rtx insn
, rtx operands
[])
13568 unsigned int regno0
= true_regnum (operands
[0]);
13569 unsigned int regno1
= true_regnum (operands
[1]);
13570 unsigned int regno2
;
13572 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
13573 return regno0
!= regno1
;
13575 regno2
= true_regnum (operands
[2]);
13577 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13578 if (regno0
!= regno1
&& regno0
!= regno2
)
13582 int dist_define
, dist_use
;
13583 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
13584 if (dist_define
<= 0)
13587 /* If this insn has both backward non-agu dependence and forward
13588 agu dependence, the one with short distance take effect. */
13589 dist_use
= distance_agu_use (regno0
, insn
);
13591 || (dist_define
+ IX86_LEA_PRIORITY
) < dist_use
)
13598 /* Return true if destination reg of SET_BODY is shift count of
13602 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
13608 /* Retrieve destination of SET_BODY. */
13609 switch (GET_CODE (set_body
))
13612 set_dest
= SET_DEST (set_body
);
13613 if (!set_dest
|| !REG_P (set_dest
))
13617 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
13618 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
13626 /* Retrieve shift count of USE_BODY. */
13627 switch (GET_CODE (use_body
))
13630 shift_rtx
= XEXP (use_body
, 1);
13633 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
13634 if (ix86_dep_by_shift_count_body (set_body
,
13635 XVECEXP (use_body
, 0, i
)))
13643 && (GET_CODE (shift_rtx
) == ASHIFT
13644 || GET_CODE (shift_rtx
) == LSHIFTRT
13645 || GET_CODE (shift_rtx
) == ASHIFTRT
13646 || GET_CODE (shift_rtx
) == ROTATE
13647 || GET_CODE (shift_rtx
) == ROTATERT
))
13649 rtx shift_count
= XEXP (shift_rtx
, 1);
13651 /* Return true if shift count is dest of SET_BODY. */
13652 if (REG_P (shift_count
)
13653 && true_regnum (set_dest
) == true_regnum (shift_count
))
13660 /* Return true if destination reg of SET_INSN is shift count of
13664 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
13666 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
13667 PATTERN (use_insn
));
13670 /* Return TRUE or FALSE depending on whether the unary operator meets the
13671 appropriate constraints. */
13674 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
13675 enum machine_mode mode ATTRIBUTE_UNUSED
,
13676 rtx operands
[2] ATTRIBUTE_UNUSED
)
13678 /* If one of operands is memory, source and destination must match. */
13679 if ((MEM_P (operands
[0])
13680 || MEM_P (operands
[1]))
13681 && ! rtx_equal_p (operands
[0], operands
[1]))
13686 /* Post-reload splitter for converting an SF or DFmode value in an
13687 SSE register into an unsigned SImode. */
13690 ix86_split_convert_uns_si_sse (rtx operands
[])
13692 enum machine_mode vecmode
;
13693 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
13695 large
= operands
[1];
13696 zero_or_two31
= operands
[2];
13697 input
= operands
[3];
13698 two31
= operands
[4];
13699 vecmode
= GET_MODE (large
);
13700 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
13702 /* Load up the value into the low element. We must ensure that the other
13703 elements are valid floats -- zero is the easiest such value. */
13706 if (vecmode
== V4SFmode
)
13707 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
13709 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
13713 input
= gen_rtx_REG (vecmode
, REGNO (input
));
13714 emit_move_insn (value
, CONST0_RTX (vecmode
));
13715 if (vecmode
== V4SFmode
)
13716 emit_insn (gen_sse_movss (value
, value
, input
));
13718 emit_insn (gen_sse2_movsd (value
, value
, input
));
13721 emit_move_insn (large
, two31
);
13722 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
13724 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
13725 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
13727 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
13728 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
13730 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
13731 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
13733 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
13734 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
13736 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
13737 if (vecmode
== V4SFmode
)
13738 emit_insn (gen_sse2_cvttps2dq (x
, value
));
13740 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
13743 emit_insn (gen_xorv4si3 (value
, value
, large
));
13746 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13747 Expects the 64-bit DImode to be supplied in a pair of integral
13748 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13749 -mfpmath=sse, !optimize_size only. */
13752 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
13754 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
13755 rtx int_xmm
, fp_xmm
;
13756 rtx biases
, exponents
;
13759 int_xmm
= gen_reg_rtx (V4SImode
);
13760 if (TARGET_INTER_UNIT_MOVES
)
13761 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
13762 else if (TARGET_SSE_SPLIT_REGS
)
13764 emit_clobber (int_xmm
);
13765 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
13769 x
= gen_reg_rtx (V2DImode
);
13770 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
13771 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
13774 x
= gen_rtx_CONST_VECTOR (V4SImode
,
13775 gen_rtvec (4, GEN_INT (0x43300000UL
),
13776 GEN_INT (0x45300000UL
),
13777 const0_rtx
, const0_rtx
));
13778 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
13780 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13781 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
13783 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13784 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13785 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13786 (0x1.0p84 + double(fp_value_hi_xmm)).
13787 Note these exponents differ by 32. */
13789 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
13791 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13792 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13793 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
13794 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
13795 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
13796 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
13797 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
13798 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
13799 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
13801 /* Add the upper and lower DFmode values together. */
13803 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
13806 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
13807 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
13808 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
13811 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
13814 /* Not used, but eases macroization of patterns. */
13816 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
13817 rtx input ATTRIBUTE_UNUSED
)
13819 gcc_unreachable ();
13822 /* Convert an unsigned SImode value into a DFmode. Only currently used
13823 for SSE, but applicable anywhere. */
13826 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
13828 REAL_VALUE_TYPE TWO31r
;
13831 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
13832 NULL
, 1, OPTAB_DIRECT
);
13834 fp
= gen_reg_rtx (DFmode
);
13835 emit_insn (gen_floatsidf2 (fp
, x
));
13837 real_ldexp (&TWO31r
, &dconst1
, 31);
13838 x
= const_double_from_real_value (TWO31r
, DFmode
);
13840 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
13842 emit_move_insn (target
, x
);
13845 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13846 32-bit mode; otherwise we have a direct convert instruction. */
13849 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
13851 REAL_VALUE_TYPE TWO32r
;
13852 rtx fp_lo
, fp_hi
, x
;
13854 fp_lo
= gen_reg_rtx (DFmode
);
13855 fp_hi
= gen_reg_rtx (DFmode
);
13857 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
13859 real_ldexp (&TWO32r
, &dconst1
, 32);
13860 x
= const_double_from_real_value (TWO32r
, DFmode
);
13861 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
13863 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
13865 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
13868 emit_move_insn (target
, x
);
13871 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13872 For x86_32, -mfpmath=sse, !optimize_size only. */
13874 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
13876 REAL_VALUE_TYPE ONE16r
;
13877 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
13879 real_ldexp (&ONE16r
, &dconst1
, 16);
13880 x
= const_double_from_real_value (ONE16r
, SFmode
);
13881 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
13882 NULL
, 0, OPTAB_DIRECT
);
13883 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
13884 NULL
, 0, OPTAB_DIRECT
);
13885 fp_hi
= gen_reg_rtx (SFmode
);
13886 fp_lo
= gen_reg_rtx (SFmode
);
13887 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
13888 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
13889 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
13891 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
13893 if (!rtx_equal_p (target
, fp_hi
))
13894 emit_move_insn (target
, fp_hi
);
13897 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13898 then replicate the value for all elements of the vector
13902 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
13909 v
= gen_rtvec (4, value
, value
, value
, value
);
13910 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
13914 v
= gen_rtvec (2, value
, value
);
13915 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
13919 v
= gen_rtvec (4, value
, value
, value
, value
);
13921 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
13922 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
13923 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
13927 v
= gen_rtvec (2, value
, value
);
13929 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
13930 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
13933 gcc_unreachable ();
13937 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13938 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13939 for an SSE register. If VECT is true, then replicate the mask for
13940 all elements of the vector register. If INVERT is true, then create
13941 a mask excluding the sign bit. */
13944 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
13946 enum machine_mode vec_mode
, imode
;
13947 HOST_WIDE_INT hi
, lo
;
13952 /* Find the sign bit, sign extended to 2*HWI. */
13958 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
13959 lo
= 0x80000000, hi
= lo
< 0;
13965 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
13966 if (HOST_BITS_PER_WIDE_INT
>= 64)
13967 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
13969 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
13974 vec_mode
= VOIDmode
;
13975 if (HOST_BITS_PER_WIDE_INT
>= 64)
13978 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
13985 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
13989 lo
= ~lo
, hi
= ~hi
;
13995 mask
= immed_double_const (lo
, hi
, imode
);
13997 vec
= gen_rtvec (2, v
, mask
);
13998 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
13999 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
14006 gcc_unreachable ();
14010 lo
= ~lo
, hi
= ~hi
;
14012 /* Force this value into the low part of a fp vector constant. */
14013 mask
= immed_double_const (lo
, hi
, imode
);
14014 mask
= gen_lowpart (mode
, mask
);
14016 if (vec_mode
== VOIDmode
)
14017 return force_reg (mode
, mask
);
14019 v
= ix86_build_const_vector (mode
, vect
, mask
);
14020 return force_reg (vec_mode
, v
);
14023 /* Generate code for floating point ABS or NEG. */
14026 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
14029 rtx mask
, set
, use
, clob
, dst
, src
;
14030 bool use_sse
= false;
14031 bool vector_mode
= VECTOR_MODE_P (mode
);
14032 enum machine_mode elt_mode
= mode
;
14036 elt_mode
= GET_MODE_INNER (mode
);
14039 else if (mode
== TFmode
)
14041 else if (TARGET_SSE_MATH
)
14042 use_sse
= SSE_FLOAT_MODE_P (mode
);
14044 /* NEG and ABS performed with SSE use bitwise mask operations.
14045 Create the appropriate mask now. */
14047 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
14056 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
14057 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
14062 set
= gen_rtx_fmt_e (code
, mode
, src
);
14063 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
14066 use
= gen_rtx_USE (VOIDmode
, mask
);
14067 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
14068 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
14069 gen_rtvec (3, set
, use
, clob
)));
14076 /* Expand a copysign operation. Special case operand 0 being a constant. */
14079 ix86_expand_copysign (rtx operands
[])
14081 enum machine_mode mode
;
14082 rtx dest
, op0
, op1
, mask
, nmask
;
14084 dest
= operands
[0];
14088 mode
= GET_MODE (dest
);
14090 if (GET_CODE (op0
) == CONST_DOUBLE
)
14092 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
14094 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
14095 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
14097 if (mode
== SFmode
|| mode
== DFmode
)
14099 enum machine_mode vmode
;
14101 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
14103 if (op0
== CONST0_RTX (mode
))
14104 op0
= CONST0_RTX (vmode
);
14109 if (mode
== SFmode
)
14110 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
14111 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
14113 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
14115 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
14118 else if (op0
!= CONST0_RTX (mode
))
14119 op0
= force_reg (mode
, op0
);
14121 mask
= ix86_build_signbit_mask (mode
, 0, 0);
14123 if (mode
== SFmode
)
14124 copysign_insn
= gen_copysignsf3_const
;
14125 else if (mode
== DFmode
)
14126 copysign_insn
= gen_copysigndf3_const
;
14128 copysign_insn
= gen_copysigntf3_const
;
14130 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
14134 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
14136 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
14137 mask
= ix86_build_signbit_mask (mode
, 0, 0);
14139 if (mode
== SFmode
)
14140 copysign_insn
= gen_copysignsf3_var
;
14141 else if (mode
== DFmode
)
14142 copysign_insn
= gen_copysigndf3_var
;
14144 copysign_insn
= gen_copysigntf3_var
;
14146 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
14150 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14151 be a constant, and so has already been expanded into a vector constant. */
14154 ix86_split_copysign_const (rtx operands
[])
14156 enum machine_mode mode
, vmode
;
14157 rtx dest
, op0
, op1
, mask
, x
;
14159 dest
= operands
[0];
14162 mask
= operands
[3];
14164 mode
= GET_MODE (dest
);
14165 vmode
= GET_MODE (mask
);
14167 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
14168 x
= gen_rtx_AND (vmode
, dest
, mask
);
14169 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
14171 if (op0
!= CONST0_RTX (vmode
))
14173 x
= gen_rtx_IOR (vmode
, dest
, op0
);
14174 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
14178 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14179 so we have to do two masks. */
14182 ix86_split_copysign_var (rtx operands
[])
14184 enum machine_mode mode
, vmode
;
14185 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
14187 dest
= operands
[0];
14188 scratch
= operands
[1];
14191 nmask
= operands
[4];
14192 mask
= operands
[5];
14194 mode
= GET_MODE (dest
);
14195 vmode
= GET_MODE (mask
);
14197 if (rtx_equal_p (op0
, op1
))
14199 /* Shouldn't happen often (it's useless, obviously), but when it does
14200 we'd generate incorrect code if we continue below. */
14201 emit_move_insn (dest
, op0
);
14205 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
14207 gcc_assert (REGNO (op1
) == REGNO (scratch
));
14209 x
= gen_rtx_AND (vmode
, scratch
, mask
);
14210 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
14213 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
14214 x
= gen_rtx_NOT (vmode
, dest
);
14215 x
= gen_rtx_AND (vmode
, x
, op0
);
14216 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
14220 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
14222 x
= gen_rtx_AND (vmode
, scratch
, mask
);
14224 else /* alternative 2,4 */
14226 gcc_assert (REGNO (mask
) == REGNO (scratch
));
14227 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
14228 x
= gen_rtx_AND (vmode
, scratch
, op1
);
14230 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
14232 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
14234 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
14235 x
= gen_rtx_AND (vmode
, dest
, nmask
);
14237 else /* alternative 3,4 */
14239 gcc_assert (REGNO (nmask
) == REGNO (dest
));
14241 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
14242 x
= gen_rtx_AND (vmode
, dest
, op0
);
14244 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
14247 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
14248 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
14251 /* Return TRUE or FALSE depending on whether the first SET in INSN
14252 has source and destination with matching CC modes, and that the
14253 CC mode is at least as constrained as REQ_MODE. */
14256 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
14259 enum machine_mode set_mode
;
14261 set
= PATTERN (insn
);
14262 if (GET_CODE (set
) == PARALLEL
)
14263 set
= XVECEXP (set
, 0, 0);
14264 gcc_assert (GET_CODE (set
) == SET
);
14265 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
14267 set_mode
= GET_MODE (SET_DEST (set
));
14271 if (req_mode
!= CCNOmode
14272 && (req_mode
!= CCmode
14273 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
14277 if (req_mode
== CCGCmode
)
14281 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
14285 if (req_mode
== CCZmode
)
14296 gcc_unreachable ();
14299 return (GET_MODE (SET_SRC (set
)) == set_mode
);
14302 /* Generate insn patterns to do an integer compare of OPERANDS. */
14305 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
14307 enum machine_mode cmpmode
;
14310 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
14311 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
14313 /* This is very simple, but making the interface the same as in the
14314 FP case makes the rest of the code easier. */
14315 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
14316 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
14318 /* Return the test that should be put into the flags user, i.e.
14319 the bcc, scc, or cmov instruction. */
14320 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
14323 /* Figure out whether to use ordered or unordered fp comparisons.
14324 Return the appropriate mode to use. */
14327 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
14329 /* ??? In order to make all comparisons reversible, we do all comparisons
14330 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14331 all forms trapping and nontrapping comparisons, we can make inequality
14332 comparisons trapping again, since it results in better code when using
14333 FCOM based compares. */
14334 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
14338 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
14340 enum machine_mode mode
= GET_MODE (op0
);
14342 if (SCALAR_FLOAT_MODE_P (mode
))
14344 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
14345 return ix86_fp_compare_mode (code
);
14350 /* Only zero flag is needed. */
14351 case EQ
: /* ZF=0 */
14352 case NE
: /* ZF!=0 */
14354 /* Codes needing carry flag. */
14355 case GEU
: /* CF=0 */
14356 case LTU
: /* CF=1 */
14357 /* Detect overflow checks. They need just the carry flag. */
14358 if (GET_CODE (op0
) == PLUS
14359 && rtx_equal_p (op1
, XEXP (op0
, 0)))
14363 case GTU
: /* CF=0 & ZF=0 */
14364 case LEU
: /* CF=1 | ZF=1 */
14365 /* Detect overflow checks. They need just the carry flag. */
14366 if (GET_CODE (op0
) == MINUS
14367 && rtx_equal_p (op1
, XEXP (op0
, 0)))
14371 /* Codes possibly doable only with sign flag when
14372 comparing against zero. */
14373 case GE
: /* SF=OF or SF=0 */
14374 case LT
: /* SF<>OF or SF=1 */
14375 if (op1
== const0_rtx
)
14378 /* For other cases Carry flag is not required. */
14380 /* Codes doable only with sign flag when comparing
14381 against zero, but we miss jump instruction for it
14382 so we need to use relational tests against overflow
14383 that thus needs to be zero. */
14384 case GT
: /* ZF=0 & SF=OF */
14385 case LE
: /* ZF=1 | SF<>OF */
14386 if (op1
== const0_rtx
)
14390 /* strcmp pattern do (use flags) and combine may ask us for proper
14395 gcc_unreachable ();
14399 /* Return the fixed registers used for condition codes. */
14402 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
14409 /* If two condition code modes are compatible, return a condition code
14410 mode which is compatible with both. Otherwise, return
14413 static enum machine_mode
14414 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
14419 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
14422 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
14423 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
14429 gcc_unreachable ();
14459 /* These are only compatible with themselves, which we already
14466 /* Return a comparison we can do and that it is equivalent to
14467 swap_condition (code) apart possibly from orderedness.
14468 But, never change orderedness if TARGET_IEEE_FP, returning
14469 UNKNOWN in that case if necessary. */
14471 static enum rtx_code
14472 ix86_fp_swap_condition (enum rtx_code code
)
14476 case GT
: /* GTU - CF=0 & ZF=0 */
14477 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
14478 case GE
: /* GEU - CF=0 */
14479 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
14480 case UNLT
: /* LTU - CF=1 */
14481 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
14482 case UNLE
: /* LEU - CF=1 | ZF=1 */
14483 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
14485 return swap_condition (code
);
14489 /* Return cost of comparison CODE using the best strategy for performance.
14490 All following functions do use number of instructions as a cost metrics.
14491 In future this should be tweaked to compute bytes for optimize_size and
14492 take into account performance of various instructions on various CPUs. */
14495 ix86_fp_comparison_cost (enum rtx_code code
)
14499 /* The cost of code using bit-twiddling on %ah. */
14516 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
14520 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
14523 gcc_unreachable ();
14526 switch (ix86_fp_comparison_strategy (code
))
14528 case IX86_FPCMP_COMI
:
14529 return arith_cost
> 4 ? 3 : 2;
14530 case IX86_FPCMP_SAHF
:
14531 return arith_cost
> 4 ? 4 : 3;
14537 /* Return strategy to use for floating-point. We assume that fcomi is always
14538 preferrable where available, since that is also true when looking at size
14539 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14541 enum ix86_fpcmp_strategy
14542 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
14544 /* Do fcomi/sahf based test when profitable. */
14547 return IX86_FPCMP_COMI
;
14549 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
14550 return IX86_FPCMP_SAHF
;
14552 return IX86_FPCMP_ARITH
;
14555 /* Swap, force into registers, or otherwise massage the two operands
14556 to a fp comparison. The operands are updated in place; the new
14557 comparison code is returned. */
14559 static enum rtx_code
14560 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
14562 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
14563 rtx op0
= *pop0
, op1
= *pop1
;
14564 enum machine_mode op_mode
= GET_MODE (op0
);
14565 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
14567 /* All of the unordered compare instructions only work on registers.
14568 The same is true of the fcomi compare instructions. The XFmode
14569 compare instructions require registers except when comparing
14570 against zero or when converting operand 1 from fixed point to
14574 && (fpcmp_mode
== CCFPUmode
14575 || (op_mode
== XFmode
14576 && ! (standard_80387_constant_p (op0
) == 1
14577 || standard_80387_constant_p (op1
) == 1)
14578 && GET_CODE (op1
) != FLOAT
)
14579 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
14581 op0
= force_reg (op_mode
, op0
);
14582 op1
= force_reg (op_mode
, op1
);
14586 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14587 things around if they appear profitable, otherwise force op0
14588 into a register. */
14590 if (standard_80387_constant_p (op0
) == 0
14592 && ! (standard_80387_constant_p (op1
) == 0
14595 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
14596 if (new_code
!= UNKNOWN
)
14599 tmp
= op0
, op0
= op1
, op1
= tmp
;
14605 op0
= force_reg (op_mode
, op0
);
14607 if (CONSTANT_P (op1
))
14609 int tmp
= standard_80387_constant_p (op1
);
14611 op1
= validize_mem (force_const_mem (op_mode
, op1
));
14615 op1
= force_reg (op_mode
, op1
);
14618 op1
= force_reg (op_mode
, op1
);
14622 /* Try to rearrange the comparison to make it cheaper. */
14623 if (ix86_fp_comparison_cost (code
)
14624 > ix86_fp_comparison_cost (swap_condition (code
))
14625 && (REG_P (op1
) || can_create_pseudo_p ()))
14628 tmp
= op0
, op0
= op1
, op1
= tmp
;
14629 code
= swap_condition (code
);
14631 op0
= force_reg (op_mode
, op0
);
14639 /* Convert comparison codes we use to represent FP comparison to integer
14640 code that will result in proper branch. Return UNKNOWN if no such code
14644 ix86_fp_compare_code_to_integer (enum rtx_code code
)
14673 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14676 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
14678 enum machine_mode fpcmp_mode
, intcmp_mode
;
14681 fpcmp_mode
= ix86_fp_compare_mode (code
);
14682 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
14684 /* Do fcomi/sahf based test when profitable. */
14685 switch (ix86_fp_comparison_strategy (code
))
14687 case IX86_FPCMP_COMI
:
14688 intcmp_mode
= fpcmp_mode
;
14689 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
14690 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
14695 case IX86_FPCMP_SAHF
:
14696 intcmp_mode
= fpcmp_mode
;
14697 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
14698 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
14702 scratch
= gen_reg_rtx (HImode
);
14703 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14704 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
14707 case IX86_FPCMP_ARITH
:
14708 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14709 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
14710 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
14712 scratch
= gen_reg_rtx (HImode
);
14713 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
14715 /* In the unordered case, we have to check C2 for NaN's, which
14716 doesn't happen to work out to anything nice combination-wise.
14717 So do some bit twiddling on the value we've got in AH to come
14718 up with an appropriate set of condition codes. */
14720 intcmp_mode
= CCNOmode
;
14725 if (code
== GT
|| !TARGET_IEEE_FP
)
14727 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
14732 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14733 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
14734 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
14735 intcmp_mode
= CCmode
;
14741 if (code
== LT
&& TARGET_IEEE_FP
)
14743 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14744 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
14745 intcmp_mode
= CCmode
;
14750 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
14756 if (code
== GE
|| !TARGET_IEEE_FP
)
14758 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
14763 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14764 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
14771 if (code
== LE
&& TARGET_IEEE_FP
)
14773 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14774 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
14775 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
14776 intcmp_mode
= CCmode
;
14781 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
14787 if (code
== EQ
&& TARGET_IEEE_FP
)
14789 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14790 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
14791 intcmp_mode
= CCmode
;
14796 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
14803 if (code
== NE
&& TARGET_IEEE_FP
)
14805 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
14806 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
14812 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
14818 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
14822 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
14827 gcc_unreachable ();
14835 /* Return the test that should be put into the flags user, i.e.
14836 the bcc, scc, or cmov instruction. */
14837 return gen_rtx_fmt_ee (code
, VOIDmode
,
14838 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
14843 ix86_expand_compare (enum rtx_code code
)
14846 op0
= ix86_compare_op0
;
14847 op1
= ix86_compare_op1
;
14849 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_CC
)
14850 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_op0
, ix86_compare_op1
);
14852 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
14854 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
14855 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
14858 ret
= ix86_expand_int_compare (code
, op0
, op1
);
14864 ix86_expand_branch (enum rtx_code code
, rtx label
)
14868 switch (GET_MODE (ix86_compare_op0
))
14877 tmp
= ix86_expand_compare (code
);
14878 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14879 gen_rtx_LABEL_REF (VOIDmode
, label
),
14881 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14888 /* Expand DImode branch into multiple compare+branch. */
14890 rtx lo
[2], hi
[2], label2
;
14891 enum rtx_code code1
, code2
, code3
;
14892 enum machine_mode submode
;
14894 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
14896 tmp
= ix86_compare_op0
;
14897 ix86_compare_op0
= ix86_compare_op1
;
14898 ix86_compare_op1
= tmp
;
14899 code
= swap_condition (code
);
14901 if (GET_MODE (ix86_compare_op0
) == DImode
)
14903 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
14904 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
14909 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
14910 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
14914 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14915 avoid two branches. This costs one extra insn, so disable when
14916 optimizing for size. */
14918 if ((code
== EQ
|| code
== NE
)
14919 && (!optimize_insn_for_size_p ()
14920 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
14925 if (hi
[1] != const0_rtx
)
14926 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
14927 NULL_RTX
, 0, OPTAB_WIDEN
);
14930 if (lo
[1] != const0_rtx
)
14931 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
14932 NULL_RTX
, 0, OPTAB_WIDEN
);
14934 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
14935 NULL_RTX
, 0, OPTAB_WIDEN
);
14937 ix86_compare_op0
= tmp
;
14938 ix86_compare_op1
= const0_rtx
;
14939 ix86_expand_branch (code
, label
);
14943 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14944 op1 is a constant and the low word is zero, then we can just
14945 examine the high word. Similarly for low word -1 and
14946 less-or-equal-than or greater-than. */
14948 if (CONST_INT_P (hi
[1]))
14951 case LT
: case LTU
: case GE
: case GEU
:
14952 if (lo
[1] == const0_rtx
)
14954 ix86_compare_op0
= hi
[0];
14955 ix86_compare_op1
= hi
[1];
14956 ix86_expand_branch (code
, label
);
14960 case LE
: case LEU
: case GT
: case GTU
:
14961 if (lo
[1] == constm1_rtx
)
14963 ix86_compare_op0
= hi
[0];
14964 ix86_compare_op1
= hi
[1];
14965 ix86_expand_branch (code
, label
);
14973 /* Otherwise, we need two or three jumps. */
14975 label2
= gen_label_rtx ();
14978 code2
= swap_condition (code
);
14979 code3
= unsigned_condition (code
);
14983 case LT
: case GT
: case LTU
: case GTU
:
14986 case LE
: code1
= LT
; code2
= GT
; break;
14987 case GE
: code1
= GT
; code2
= LT
; break;
14988 case LEU
: code1
= LTU
; code2
= GTU
; break;
14989 case GEU
: code1
= GTU
; code2
= LTU
; break;
14991 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
14992 case NE
: code2
= UNKNOWN
; break;
14995 gcc_unreachable ();
15000 * if (hi(a) < hi(b)) goto true;
15001 * if (hi(a) > hi(b)) goto false;
15002 * if (lo(a) < lo(b)) goto true;
15006 ix86_compare_op0
= hi
[0];
15007 ix86_compare_op1
= hi
[1];
15009 if (code1
!= UNKNOWN
)
15010 ix86_expand_branch (code1
, label
);
15011 if (code2
!= UNKNOWN
)
15012 ix86_expand_branch (code2
, label2
);
15014 ix86_compare_op0
= lo
[0];
15015 ix86_compare_op1
= lo
[1];
15016 ix86_expand_branch (code3
, label
);
15018 if (code2
!= UNKNOWN
)
15019 emit_label (label2
);
15024 /* If we have already emitted a compare insn, go straight to simple.
15025 ix86_expand_compare won't emit anything if ix86_compare_emitted
15027 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_CC
);
15032 /* Split branch based on floating point condition. */
15034 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
15035 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
15040 if (target2
!= pc_rtx
)
15043 code
= reverse_condition_maybe_unordered (code
);
15048 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
15051 /* Remove pushed operand from stack. */
15053 ix86_free_from_memory (GET_MODE (pushed
));
15055 i
= emit_jump_insn (gen_rtx_SET
15057 gen_rtx_IF_THEN_ELSE (VOIDmode
,
15058 condition
, target1
, target2
)));
15059 if (split_branch_probability
>= 0)
15060 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
15064 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
15068 gcc_assert (GET_MODE (dest
) == QImode
);
15070 ret
= ix86_expand_compare (code
);
15071 PUT_MODE (ret
, QImode
);
15072 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
15075 /* Expand comparison setting or clearing carry flag. Return true when
15076 successful and set pop for the operation. */
15078 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
15080 enum machine_mode mode
=
15081 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
15083 /* Do not handle DImode compares that go through special path. */
15084 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
15087 if (SCALAR_FLOAT_MODE_P (mode
))
15089 rtx compare_op
, compare_seq
;
15091 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
15093 /* Shortcut: following common codes never translate
15094 into carry flag compares. */
15095 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
15096 || code
== ORDERED
|| code
== UNORDERED
)
15099 /* These comparisons require zero flag; swap operands so they won't. */
15100 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
15101 && !TARGET_IEEE_FP
)
15106 code
= swap_condition (code
);
15109 /* Try to expand the comparison and verify that we end up with
15110 carry flag based comparison. This fails to be true only when
15111 we decide to expand comparison using arithmetic that is not
15112 too common scenario. */
15114 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
15115 compare_seq
= get_insns ();
15118 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
15119 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
15120 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
15122 code
= GET_CODE (compare_op
);
15124 if (code
!= LTU
&& code
!= GEU
)
15127 emit_insn (compare_seq
);
15132 if (!INTEGRAL_MODE_P (mode
))
15141 /* Convert a==0 into (unsigned)a<1. */
15144 if (op1
!= const0_rtx
)
15147 code
= (code
== EQ
? LTU
: GEU
);
15150 /* Convert a>b into b<a or a>=b-1. */
15153 if (CONST_INT_P (op1
))
15155 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
15156 /* Bail out on overflow. We still can swap operands but that
15157 would force loading of the constant into register. */
15158 if (op1
== const0_rtx
15159 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
15161 code
= (code
== GTU
? GEU
: LTU
);
15168 code
= (code
== GTU
? LTU
: GEU
);
15172 /* Convert a>=0 into (unsigned)a<0x80000000. */
15175 if (mode
== DImode
|| op1
!= const0_rtx
)
15177 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
15178 code
= (code
== LT
? GEU
: LTU
);
15182 if (mode
== DImode
|| op1
!= constm1_rtx
)
15184 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
15185 code
= (code
== LE
? GEU
: LTU
);
15191 /* Swapping operands may cause constant to appear as first operand. */
15192 if (!nonimmediate_operand (op0
, VOIDmode
))
15194 if (!can_create_pseudo_p ())
15196 op0
= force_reg (mode
, op0
);
15198 ix86_compare_op0
= op0
;
15199 ix86_compare_op1
= op1
;
15200 *pop
= ix86_expand_compare (code
);
15201 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
15206 ix86_expand_int_movcc (rtx operands
[])
15208 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
15209 rtx compare_seq
, compare_op
;
15210 enum machine_mode mode
= GET_MODE (operands
[0]);
15211 bool sign_bit_compare_p
= false;;
15214 ix86_compare_op0
= XEXP (operands
[1], 0);
15215 ix86_compare_op1
= XEXP (operands
[1], 1);
15216 compare_op
= ix86_expand_compare (code
);
15217 compare_seq
= get_insns ();
15220 compare_code
= GET_CODE (compare_op
);
15222 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
15223 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
15224 sign_bit_compare_p
= true;
15226 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15227 HImode insns, we'd be swallowed in word prefix ops. */
15229 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
15230 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
15231 && CONST_INT_P (operands
[2])
15232 && CONST_INT_P (operands
[3]))
15234 rtx out
= operands
[0];
15235 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
15236 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
15237 HOST_WIDE_INT diff
;
15240 /* Sign bit compares are better done using shifts than we do by using
15242 if (sign_bit_compare_p
15243 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
15244 ix86_compare_op1
, &compare_op
))
15246 /* Detect overlap between destination and compare sources. */
15249 if (!sign_bit_compare_p
)
15251 bool fpcmp
= false;
15253 compare_code
= GET_CODE (compare_op
);
15255 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
15256 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
15259 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
15262 /* To simplify rest of code, restrict to the GEU case. */
15263 if (compare_code
== LTU
)
15265 HOST_WIDE_INT tmp
= ct
;
15268 compare_code
= reverse_condition (compare_code
);
15269 code
= reverse_condition (code
);
15274 PUT_CODE (compare_op
,
15275 reverse_condition_maybe_unordered
15276 (GET_CODE (compare_op
)));
15278 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
15282 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
15283 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
15284 tmp
= gen_reg_rtx (mode
);
15286 if (mode
== DImode
)
15287 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
15289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
15293 if (code
== GT
|| code
== GE
)
15294 code
= reverse_condition (code
);
15297 HOST_WIDE_INT tmp
= ct
;
15302 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
15303 ix86_compare_op1
, VOIDmode
, 0, -1);
15316 tmp
= expand_simple_binop (mode
, PLUS
,
15318 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
15329 tmp
= expand_simple_binop (mode
, IOR
,
15331 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
15333 else if (diff
== -1 && ct
)
15343 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
15345 tmp
= expand_simple_binop (mode
, PLUS
,
15346 copy_rtx (tmp
), GEN_INT (cf
),
15347 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
15355 * andl cf - ct, dest
15365 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
15368 tmp
= expand_simple_binop (mode
, AND
,
15370 gen_int_mode (cf
- ct
, mode
),
15371 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
15373 tmp
= expand_simple_binop (mode
, PLUS
,
15374 copy_rtx (tmp
), GEN_INT (ct
),
15375 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
15378 if (!rtx_equal_p (tmp
, out
))
15379 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
15381 return 1; /* DONE */
15386 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
15389 tmp
= ct
, ct
= cf
, cf
= tmp
;
15392 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
15394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
15396 /* We may be reversing unordered compare to normal compare, that
15397 is not valid in general (we may convert non-trapping condition
15398 to trapping one), however on i386 we currently emit all
15399 comparisons unordered. */
15400 compare_code
= reverse_condition_maybe_unordered (compare_code
);
15401 code
= reverse_condition_maybe_unordered (code
);
15405 compare_code
= reverse_condition (compare_code
);
15406 code
= reverse_condition (code
);
15410 compare_code
= UNKNOWN
;
15411 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
15412 && CONST_INT_P (ix86_compare_op1
))
15414 if (ix86_compare_op1
== const0_rtx
15415 && (code
== LT
|| code
== GE
))
15416 compare_code
= code
;
15417 else if (ix86_compare_op1
== constm1_rtx
)
15421 else if (code
== GT
)
15426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15427 if (compare_code
!= UNKNOWN
15428 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
15429 && (cf
== -1 || ct
== -1))
15431 /* If lea code below could be used, only optimize
15432 if it results in a 2 insn sequence. */
15434 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
15435 || diff
== 3 || diff
== 5 || diff
== 9)
15436 || (compare_code
== LT
&& ct
== -1)
15437 || (compare_code
== GE
&& cf
== -1))
15440 * notl op1 (if necessary)
15448 code
= reverse_condition (code
);
15451 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
15452 ix86_compare_op1
, VOIDmode
, 0, -1);
15454 out
= expand_simple_binop (mode
, IOR
,
15456 out
, 1, OPTAB_DIRECT
);
15457 if (out
!= operands
[0])
15458 emit_move_insn (operands
[0], out
);
15460 return 1; /* DONE */
15465 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
15466 || diff
== 3 || diff
== 5 || diff
== 9)
15467 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
15469 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
15475 * lea cf(dest*(ct-cf)),dest
15479 * This also catches the degenerate setcc-only case.
15485 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
15486 ix86_compare_op1
, VOIDmode
, 0, 1);
15489 /* On x86_64 the lea instruction operates on Pmode, so we need
15490 to get arithmetics done in proper mode to match. */
15492 tmp
= copy_rtx (out
);
15496 out1
= copy_rtx (out
);
15497 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
15501 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
15507 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
15510 if (!rtx_equal_p (tmp
, out
))
15513 out
= force_operand (tmp
, copy_rtx (out
));
15515 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
15517 if (!rtx_equal_p (out
, operands
[0]))
15518 emit_move_insn (operands
[0], copy_rtx (out
));
15520 return 1; /* DONE */
15524 * General case: Jumpful:
15525 * xorl dest,dest cmpl op1, op2
15526 * cmpl op1, op2 movl ct, dest
15527 * setcc dest jcc 1f
15528 * decl dest movl cf, dest
15529 * andl (cf-ct),dest 1:
15532 * Size 20. Size 14.
15534 * This is reasonably steep, but branch mispredict costs are
15535 * high on modern cpus, so consider failing only if optimizing
15539 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
15540 && BRANCH_COST (optimize_insn_for_speed_p (),
15545 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
15550 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
15552 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
15554 /* We may be reversing unordered compare to normal compare,
15555 that is not valid in general (we may convert non-trapping
15556 condition to trapping one), however on i386 we currently
15557 emit all comparisons unordered. */
15558 code
= reverse_condition_maybe_unordered (code
);
15562 code
= reverse_condition (code
);
15563 if (compare_code
!= UNKNOWN
)
15564 compare_code
= reverse_condition (compare_code
);
15568 if (compare_code
!= UNKNOWN
)
15570 /* notl op1 (if needed)
15575 For x < 0 (resp. x <= -1) there will be no notl,
15576 so if possible swap the constants to get rid of the
15578 True/false will be -1/0 while code below (store flag
15579 followed by decrement) is 0/-1, so the constants need
15580 to be exchanged once more. */
15582 if (compare_code
== GE
|| !cf
)
15584 code
= reverse_condition (code
);
15589 HOST_WIDE_INT tmp
= cf
;
15594 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
15595 ix86_compare_op1
, VOIDmode
, 0, -1);
15599 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
15600 ix86_compare_op1
, VOIDmode
, 0, 1);
15602 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
15603 copy_rtx (out
), 1, OPTAB_DIRECT
);
15606 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
15607 gen_int_mode (cf
- ct
, mode
),
15608 copy_rtx (out
), 1, OPTAB_DIRECT
);
15610 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
15611 copy_rtx (out
), 1, OPTAB_DIRECT
);
15612 if (!rtx_equal_p (out
, operands
[0]))
15613 emit_move_insn (operands
[0], copy_rtx (out
));
15615 return 1; /* DONE */
15619 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
15621 /* Try a few things more with specific constants and a variable. */
15624 rtx var
, orig_out
, out
, tmp
;
15626 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15627 return 0; /* FAIL */
15629 /* If one of the two operands is an interesting constant, load a
15630 constant with the above and mask it in with a logical operation. */
15632 if (CONST_INT_P (operands
[2]))
15635 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
15636 operands
[3] = constm1_rtx
, op
= and_optab
;
15637 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
15638 operands
[3] = const0_rtx
, op
= ior_optab
;
15640 return 0; /* FAIL */
15642 else if (CONST_INT_P (operands
[3]))
15645 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
15646 operands
[2] = constm1_rtx
, op
= and_optab
;
15647 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
15648 operands
[2] = const0_rtx
, op
= ior_optab
;
15650 return 0; /* FAIL */
15653 return 0; /* FAIL */
15655 orig_out
= operands
[0];
15656 tmp
= gen_reg_rtx (mode
);
15659 /* Recurse to get the constant loaded. */
15660 if (ix86_expand_int_movcc (operands
) == 0)
15661 return 0; /* FAIL */
15663 /* Mask in the interesting variable. */
15664 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
15666 if (!rtx_equal_p (out
, orig_out
))
15667 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
15669 return 1; /* DONE */
15673 * For comparison with above,
15683 if (! nonimmediate_operand (operands
[2], mode
))
15684 operands
[2] = force_reg (mode
, operands
[2]);
15685 if (! nonimmediate_operand (operands
[3], mode
))
15686 operands
[3] = force_reg (mode
, operands
[3]);
15688 if (! register_operand (operands
[2], VOIDmode
)
15690 || ! register_operand (operands
[3], VOIDmode
)))
15691 operands
[2] = force_reg (mode
, operands
[2]);
15694 && ! register_operand (operands
[3], VOIDmode
))
15695 operands
[3] = force_reg (mode
, operands
[3]);
15697 emit_insn (compare_seq
);
15698 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
15699 gen_rtx_IF_THEN_ELSE (mode
,
15700 compare_op
, operands
[2],
15703 return 1; /* DONE */
15706 /* Swap, force into registers, or otherwise massage the two operands
15707 to an sse comparison with a mask result. Thus we differ a bit from
15708 ix86_prepare_fp_compare_args which expects to produce a flags result.
15710 The DEST operand exists to help determine whether to commute commutative
15711 operators. The POP0/POP1 operands are updated in place. The new
15712 comparison code is returned, or UNKNOWN if not implementable. */
15714 static enum rtx_code
15715 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
15716 rtx
*pop0
, rtx
*pop1
)
15724 /* We have no LTGT as an operator. We could implement it with
15725 NE & ORDERED, but this requires an extra temporary. It's
15726 not clear that it's worth it. */
15733 /* These are supported directly. */
15740 /* For commutative operators, try to canonicalize the destination
15741 operand to be first in the comparison - this helps reload to
15742 avoid extra moves. */
15743 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
15751 /* These are not supported directly. Swap the comparison operands
15752 to transform into something that is supported. */
15756 code
= swap_condition (code
);
15760 gcc_unreachable ();
15766 /* Detect conditional moves that exactly match min/max operational
15767 semantics. Note that this is IEEE safe, as long as we don't
15768 interchange the operands.
15770 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15771 and TRUE if the operation is successful and instructions are emitted. */
15774 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
15775 rtx cmp_op1
, rtx if_true
, rtx if_false
)
15777 enum machine_mode mode
;
15783 else if (code
== UNGE
)
15786 if_true
= if_false
;
15792 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
15794 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
15799 mode
= GET_MODE (dest
);
15801 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15802 but MODE may be a vector mode and thus not appropriate. */
15803 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
15805 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
15808 if_true
= force_reg (mode
, if_true
);
15809 v
= gen_rtvec (2, if_true
, if_false
);
15810 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
15814 code
= is_min
? SMIN
: SMAX
;
15815 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
15818 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
15822 /* Expand an sse vector comparison. Return the register with the result. */
15825 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
15826 rtx op_true
, rtx op_false
)
15828 enum machine_mode mode
= GET_MODE (dest
);
15831 cmp_op0
= force_reg (mode
, cmp_op0
);
15832 if (!nonimmediate_operand (cmp_op1
, mode
))
15833 cmp_op1
= force_reg (mode
, cmp_op1
);
15836 || reg_overlap_mentioned_p (dest
, op_true
)
15837 || reg_overlap_mentioned_p (dest
, op_false
))
15838 dest
= gen_reg_rtx (mode
);
15840 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
15841 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
15846 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15847 operations. This is used for both scalar and vector conditional moves. */
15850 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
15852 enum machine_mode mode
= GET_MODE (dest
);
15855 if (op_false
== CONST0_RTX (mode
))
15857 op_true
= force_reg (mode
, op_true
);
15858 x
= gen_rtx_AND (mode
, cmp
, op_true
);
15859 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
15861 else if (op_true
== CONST0_RTX (mode
))
15863 op_false
= force_reg (mode
, op_false
);
15864 x
= gen_rtx_NOT (mode
, cmp
);
15865 x
= gen_rtx_AND (mode
, x
, op_false
);
15866 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
15868 else if (TARGET_SSE5
)
15870 rtx pcmov
= gen_rtx_SET (mode
, dest
,
15871 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
15878 op_true
= force_reg (mode
, op_true
);
15879 op_false
= force_reg (mode
, op_false
);
15881 t2
= gen_reg_rtx (mode
);
15883 t3
= gen_reg_rtx (mode
);
15887 x
= gen_rtx_AND (mode
, op_true
, cmp
);
15888 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
15890 x
= gen_rtx_NOT (mode
, cmp
);
15891 x
= gen_rtx_AND (mode
, x
, op_false
);
15892 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
15894 x
= gen_rtx_IOR (mode
, t3
, t2
);
15895 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
15899 /* Expand a floating-point conditional move. Return true if successful. */
15902 ix86_expand_fp_movcc (rtx operands
[])
15904 enum machine_mode mode
= GET_MODE (operands
[0]);
15905 enum rtx_code code
= GET_CODE (operands
[1]);
15906 rtx tmp
, compare_op
;
15908 ix86_compare_op0
= XEXP (operands
[1], 0);
15909 ix86_compare_op1
= XEXP (operands
[1], 1);
15910 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
15912 enum machine_mode cmode
;
15914 /* Since we've no cmove for sse registers, don't force bad register
15915 allocation just to gain access to it. Deny movcc when the
15916 comparison mode doesn't match the move mode. */
15917 cmode
= GET_MODE (ix86_compare_op0
);
15918 if (cmode
== VOIDmode
)
15919 cmode
= GET_MODE (ix86_compare_op1
);
15923 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
15925 &ix86_compare_op1
);
15926 if (code
== UNKNOWN
)
15929 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
15930 ix86_compare_op1
, operands
[2],
15934 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
15935 ix86_compare_op1
, operands
[2], operands
[3]);
15936 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
15940 /* The floating point conditional move instructions don't directly
15941 support conditions resulting from a signed integer comparison. */
15943 compare_op
= ix86_expand_compare (code
);
15944 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
15946 tmp
= gen_reg_rtx (QImode
);
15947 ix86_expand_setcc (code
, tmp
);
15949 ix86_compare_op0
= tmp
;
15950 ix86_compare_op1
= const0_rtx
;
15951 compare_op
= ix86_expand_compare (code
);
15954 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
15955 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
15956 operands
[2], operands
[3])));
15961 /* Expand a floating-point vector conditional move; a vcond operation
15962 rather than a movcc operation. */
15965 ix86_expand_fp_vcond (rtx operands
[])
15967 enum rtx_code code
= GET_CODE (operands
[3]);
15970 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
15971 &operands
[4], &operands
[5]);
15972 if (code
== UNKNOWN
)
15975 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
15976 operands
[5], operands
[1], operands
[2]))
15979 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
15980 operands
[1], operands
[2]);
15981 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
15985 /* Expand a signed/unsigned integral vector conditional move. */
15988 ix86_expand_int_vcond (rtx operands
[])
15990 enum machine_mode mode
= GET_MODE (operands
[0]);
15991 enum rtx_code code
= GET_CODE (operands
[3]);
15992 bool negate
= false;
15995 cop0
= operands
[4];
15996 cop1
= operands
[5];
15998 /* SSE5 supports all of the comparisons on all vector int types. */
16001 /* Canonicalize the comparison to EQ, GT, GTU. */
16012 code
= reverse_condition (code
);
16018 code
= reverse_condition (code
);
16024 code
= swap_condition (code
);
16025 x
= cop0
, cop0
= cop1
, cop1
= x
;
16029 gcc_unreachable ();
16032 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16033 if (mode
== V2DImode
)
16038 /* SSE4.1 supports EQ. */
16039 if (!TARGET_SSE4_1
)
16045 /* SSE4.2 supports GT/GTU. */
16046 if (!TARGET_SSE4_2
)
16051 gcc_unreachable ();
16055 /* Unsigned parallel compare is not supported by the hardware. Play some
16056 tricks to turn this into a signed comparison against 0. */
16059 cop0
= force_reg (mode
, cop0
);
16068 /* Perform a parallel modulo subtraction. */
16069 t1
= gen_reg_rtx (mode
);
16070 emit_insn ((mode
== V4SImode
16072 : gen_subv2di3
) (t1
, cop0
, cop1
));
16074 /* Extract the original sign bit of op0. */
16075 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
16077 t2
= gen_reg_rtx (mode
);
16078 emit_insn ((mode
== V4SImode
16080 : gen_andv2di3
) (t2
, cop0
, mask
));
16082 /* XOR it back into the result of the subtraction. This results
16083 in the sign bit set iff we saw unsigned underflow. */
16084 x
= gen_reg_rtx (mode
);
16085 emit_insn ((mode
== V4SImode
16087 : gen_xorv2di3
) (x
, t1
, t2
));
16095 /* Perform a parallel unsigned saturating subtraction. */
16096 x
= gen_reg_rtx (mode
);
16097 emit_insn (gen_rtx_SET (VOIDmode
, x
,
16098 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
16105 gcc_unreachable ();
16109 cop1
= CONST0_RTX (mode
);
16113 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
16114 operands
[1+negate
], operands
[2-negate
]);
16116 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
16117 operands
[2-negate
]);
16121 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16122 true if we should do zero extension, else sign extension. HIGH_P is
16123 true if we want the N/2 high elements, else the low elements. */
16126 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
16128 enum machine_mode imode
= GET_MODE (operands
[1]);
16129 rtx (*unpack
)(rtx
, rtx
, rtx
);
16136 unpack
= gen_vec_interleave_highv16qi
;
16138 unpack
= gen_vec_interleave_lowv16qi
;
16142 unpack
= gen_vec_interleave_highv8hi
;
16144 unpack
= gen_vec_interleave_lowv8hi
;
16148 unpack
= gen_vec_interleave_highv4si
;
16150 unpack
= gen_vec_interleave_lowv4si
;
16153 gcc_unreachable ();
16156 dest
= gen_lowpart (imode
, operands
[0]);
16159 se
= force_reg (imode
, CONST0_RTX (imode
));
16161 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
16162 operands
[1], pc_rtx
, pc_rtx
);
16164 emit_insn (unpack (dest
, operands
[1], se
));
16167 /* This function performs the same task as ix86_expand_sse_unpack,
16168 but with SSE4.1 instructions. */
16171 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
16173 enum machine_mode imode
= GET_MODE (operands
[1]);
16174 rtx (*unpack
)(rtx
, rtx
);
16181 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
16183 unpack
= gen_sse4_1_extendv8qiv8hi2
;
16187 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
16189 unpack
= gen_sse4_1_extendv4hiv4si2
;
16193 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
16195 unpack
= gen_sse4_1_extendv2siv2di2
;
16198 gcc_unreachable ();
16201 dest
= operands
[0];
16204 /* Shift higher 8 bytes to lower 8 bytes. */
16205 src
= gen_reg_rtx (imode
);
16206 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
16207 gen_lowpart (TImode
, operands
[1]),
16213 emit_insn (unpack (dest
, src
));
16216 /* This function performs the same task as ix86_expand_sse_unpack,
16217 but with sse5 instructions. */
16220 ix86_expand_sse5_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
16222 enum machine_mode imode
= GET_MODE (operands
[1]);
16223 int pperm_bytes
[16];
16225 int h
= (high_p
) ? 8 : 0;
16228 rtvec v
= rtvec_alloc (16);
16231 rtx op0
= operands
[0], op1
= operands
[1];
16236 vs
= rtvec_alloc (8);
16237 h2
= (high_p
) ? 8 : 0;
16238 for (i
= 0; i
< 8; i
++)
16240 pperm_bytes
[2*i
+0] = PPERM_SRC
| PPERM_SRC2
| i
| h
;
16241 pperm_bytes
[2*i
+1] = ((unsigned_p
)
16243 : PPERM_SIGN
| PPERM_SRC2
| i
| h
);
16246 for (i
= 0; i
< 16; i
++)
16247 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16249 for (i
= 0; i
< 8; i
++)
16250 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
16252 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
16253 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16255 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0
, op1
, p
, x
));
16257 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0
, op1
, p
, x
));
16261 vs
= rtvec_alloc (4);
16262 h2
= (high_p
) ? 4 : 0;
16263 for (i
= 0; i
< 4; i
++)
16265 sign_extend
= ((unsigned_p
)
16267 : PPERM_SIGN
| PPERM_SRC2
| ((2*i
) + 1 + h
));
16268 pperm_bytes
[4*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 0 + h
);
16269 pperm_bytes
[4*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 1 + h
);
16270 pperm_bytes
[4*i
+2] = sign_extend
;
16271 pperm_bytes
[4*i
+3] = sign_extend
;
16274 for (i
= 0; i
< 16; i
++)
16275 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16277 for (i
= 0; i
< 4; i
++)
16278 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
16280 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
16281 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16283 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0
, op1
, p
, x
));
16285 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0
, op1
, p
, x
));
16289 vs
= rtvec_alloc (2);
16290 h2
= (high_p
) ? 2 : 0;
16291 for (i
= 0; i
< 2; i
++)
16293 sign_extend
= ((unsigned_p
)
16295 : PPERM_SIGN
| PPERM_SRC2
| ((4*i
) + 3 + h
));
16296 pperm_bytes
[8*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 0 + h
);
16297 pperm_bytes
[8*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 1 + h
);
16298 pperm_bytes
[8*i
+2] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 2 + h
);
16299 pperm_bytes
[8*i
+3] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 3 + h
);
16300 pperm_bytes
[8*i
+4] = sign_extend
;
16301 pperm_bytes
[8*i
+5] = sign_extend
;
16302 pperm_bytes
[8*i
+6] = sign_extend
;
16303 pperm_bytes
[8*i
+7] = sign_extend
;
16306 for (i
= 0; i
< 16; i
++)
16307 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16309 for (i
= 0; i
< 2; i
++)
16310 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
16312 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
16313 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16315 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0
, op1
, p
, x
));
16317 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0
, op1
, p
, x
));
16321 gcc_unreachable ();
16327 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16328 next narrower integer vector type */
16330 ix86_expand_sse5_pack (rtx operands
[3])
16332 enum machine_mode imode
= GET_MODE (operands
[0]);
16333 int pperm_bytes
[16];
16335 rtvec v
= rtvec_alloc (16);
16337 rtx op0
= operands
[0];
16338 rtx op1
= operands
[1];
16339 rtx op2
= operands
[2];
16344 for (i
= 0; i
< 8; i
++)
16346 pperm_bytes
[i
+0] = PPERM_SRC
| PPERM_SRC1
| (i
*2);
16347 pperm_bytes
[i
+8] = PPERM_SRC
| PPERM_SRC2
| (i
*2);
16350 for (i
= 0; i
< 16; i
++)
16351 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16353 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16354 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0
, op1
, op2
, x
));
16358 for (i
= 0; i
< 4; i
++)
16360 pperm_bytes
[(2*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 0);
16361 pperm_bytes
[(2*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 1);
16362 pperm_bytes
[(2*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 0);
16363 pperm_bytes
[(2*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 1);
16366 for (i
= 0; i
< 16; i
++)
16367 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16369 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16370 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0
, op1
, op2
, x
));
16374 for (i
= 0; i
< 2; i
++)
16376 pperm_bytes
[(4*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 0);
16377 pperm_bytes
[(4*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 1);
16378 pperm_bytes
[(4*i
)+2] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 2);
16379 pperm_bytes
[(4*i
)+3] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 3);
16380 pperm_bytes
[(4*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 0);
16381 pperm_bytes
[(4*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 1);
16382 pperm_bytes
[(4*i
)+10] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 2);
16383 pperm_bytes
[(4*i
)+11] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 3);
16386 for (i
= 0; i
< 16; i
++)
16387 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
16389 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
16390 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0
, op1
, op2
, x
));
16394 gcc_unreachable ();
16400 /* Expand conditional increment or decrement using adb/sbb instructions.
16401 The default case using setcc followed by the conditional move can be
16402 done by generic code. */
16404 ix86_expand_int_addcc (rtx operands
[])
16406 enum rtx_code code
= GET_CODE (operands
[1]);
16408 rtx val
= const0_rtx
;
16409 bool fpcmp
= false;
16410 enum machine_mode mode
= GET_MODE (operands
[0]);
16412 ix86_compare_op0
= XEXP (operands
[1], 0);
16413 ix86_compare_op1
= XEXP (operands
[1], 1);
16414 if (operands
[3] != const1_rtx
16415 && operands
[3] != constm1_rtx
)
16417 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
16418 ix86_compare_op1
, &compare_op
))
16420 code
= GET_CODE (compare_op
);
16422 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
16423 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
16426 code
= ix86_fp_compare_code_to_integer (code
);
16433 PUT_CODE (compare_op
,
16434 reverse_condition_maybe_unordered
16435 (GET_CODE (compare_op
)));
16437 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
16439 PUT_MODE (compare_op
, mode
);
16441 /* Construct either adc or sbb insn. */
16442 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
16444 switch (GET_MODE (operands
[0]))
16447 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
16450 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
16453 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
16456 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
16459 gcc_unreachable ();
16464 switch (GET_MODE (operands
[0]))
16467 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
16470 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
16473 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
16476 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
16479 gcc_unreachable ();
16482 return 1; /* DONE */
16486 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16487 works for floating pointer parameters and nonoffsetable memories.
16488 For pushes, it returns just stack offsets; the values will be saved
16489 in the right order. Maximally three parts are generated. */
16492 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
16497 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
16499 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
16501 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
16502 gcc_assert (size
>= 2 && size
<= 4);
16504 /* Optimize constant pool reference to immediates. This is used by fp
16505 moves, that force all constants to memory to allow combining. */
16506 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
16508 rtx tmp
= maybe_get_pool_constant (operand
);
16513 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
16515 /* The only non-offsetable memories we handle are pushes. */
16516 int ok
= push_operand (operand
, VOIDmode
);
16520 operand
= copy_rtx (operand
);
16521 PUT_MODE (operand
, Pmode
);
16522 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
16526 if (GET_CODE (operand
) == CONST_VECTOR
)
16528 enum machine_mode imode
= int_mode_for_mode (mode
);
16529 /* Caution: if we looked through a constant pool memory above,
16530 the operand may actually have a different mode now. That's
16531 ok, since we want to pun this all the way back to an integer. */
16532 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
16533 gcc_assert (operand
!= NULL
);
16539 if (mode
== DImode
)
16540 split_di (&operand
, 1, &parts
[0], &parts
[1]);
16545 if (REG_P (operand
))
16547 gcc_assert (reload_completed
);
16548 for (i
= 0; i
< size
; i
++)
16549 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
16551 else if (offsettable_memref_p (operand
))
16553 operand
= adjust_address (operand
, SImode
, 0);
16554 parts
[0] = operand
;
16555 for (i
= 1; i
< size
; i
++)
16556 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
16558 else if (GET_CODE (operand
) == CONST_DOUBLE
)
16563 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
16567 real_to_target (l
, &r
, mode
);
16568 parts
[3] = gen_int_mode (l
[3], SImode
);
16569 parts
[2] = gen_int_mode (l
[2], SImode
);
16572 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
16573 parts
[2] = gen_int_mode (l
[2], SImode
);
16576 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
16579 gcc_unreachable ();
16581 parts
[1] = gen_int_mode (l
[1], SImode
);
16582 parts
[0] = gen_int_mode (l
[0], SImode
);
16585 gcc_unreachable ();
16590 if (mode
== TImode
)
16591 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
16592 if (mode
== XFmode
|| mode
== TFmode
)
16594 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
16595 if (REG_P (operand
))
16597 gcc_assert (reload_completed
);
16598 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
16599 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
16601 else if (offsettable_memref_p (operand
))
16603 operand
= adjust_address (operand
, DImode
, 0);
16604 parts
[0] = operand
;
16605 parts
[1] = adjust_address (operand
, upper_mode
, 8);
16607 else if (GET_CODE (operand
) == CONST_DOUBLE
)
16612 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
16613 real_to_target (l
, &r
, mode
);
16615 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16616 if (HOST_BITS_PER_WIDE_INT
>= 64)
16619 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
16620 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
16623 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
16625 if (upper_mode
== SImode
)
16626 parts
[1] = gen_int_mode (l
[2], SImode
);
16627 else if (HOST_BITS_PER_WIDE_INT
>= 64)
16630 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
16631 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
16634 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
16637 gcc_unreachable ();
16644 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16645 Return false when normal moves are needed; true when all required
16646 insns have been emitted. Operands 2-4 contain the input values
16647 int the correct order; operands 5-7 contain the output values. */
16650 ix86_split_long_move (rtx operands
[])
16655 int collisions
= 0;
16656 enum machine_mode mode
= GET_MODE (operands
[0]);
16657 bool collisionparts
[4];
16659 /* The DFmode expanders may ask us to move double.
16660 For 64bit target this is single move. By hiding the fact
16661 here we simplify i386.md splitters. */
16662 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
16664 /* Optimize constant pool reference to immediates. This is used by
16665 fp moves, that force all constants to memory to allow combining. */
16667 if (MEM_P (operands
[1])
16668 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
16669 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
16670 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
16671 if (push_operand (operands
[0], VOIDmode
))
16673 operands
[0] = copy_rtx (operands
[0]);
16674 PUT_MODE (operands
[0], Pmode
);
16677 operands
[0] = gen_lowpart (DImode
, operands
[0]);
16678 operands
[1] = gen_lowpart (DImode
, operands
[1]);
16679 emit_move_insn (operands
[0], operands
[1]);
16683 /* The only non-offsettable memory we handle is push. */
16684 if (push_operand (operands
[0], VOIDmode
))
16687 gcc_assert (!MEM_P (operands
[0])
16688 || offsettable_memref_p (operands
[0]));
16690 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
16691 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
16693 /* When emitting push, take care for source operands on the stack. */
16694 if (push
&& MEM_P (operands
[1])
16695 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
16696 for (i
= 0; i
< nparts
- 1; i
++)
16697 part
[1][i
] = change_address (part
[1][i
],
16698 GET_MODE (part
[1][i
]),
16699 XEXP (part
[1][i
+ 1], 0));
16701 /* We need to do copy in the right order in case an address register
16702 of the source overlaps the destination. */
16703 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
16707 for (i
= 0; i
< nparts
; i
++)
16710 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
16711 if (collisionparts
[i
])
16715 /* Collision in the middle part can be handled by reordering. */
16716 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
16718 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
16719 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
16721 else if (collisions
== 1
16723 && (collisionparts
[1] || collisionparts
[2]))
16725 if (collisionparts
[1])
16727 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
16728 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
16732 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
16733 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
16737 /* If there are more collisions, we can't handle it by reordering.
16738 Do an lea to the last part and use only one colliding move. */
16739 else if (collisions
> 1)
16745 base
= part
[0][nparts
- 1];
16747 /* Handle the case when the last part isn't valid for lea.
16748 Happens in 64-bit mode storing the 12-byte XFmode. */
16749 if (GET_MODE (base
) != Pmode
)
16750 base
= gen_rtx_REG (Pmode
, REGNO (base
));
16752 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
16753 part
[1][0] = replace_equiv_address (part
[1][0], base
);
16754 for (i
= 1; i
< nparts
; i
++)
16756 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
16757 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
16768 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
16769 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
16770 emit_move_insn (part
[0][2], part
[1][2]);
16772 else if (nparts
== 4)
16774 emit_move_insn (part
[0][3], part
[1][3]);
16775 emit_move_insn (part
[0][2], part
[1][2]);
16780 /* In 64bit mode we don't have 32bit push available. In case this is
16781 register, it is OK - we will just use larger counterpart. We also
16782 retype memory - these comes from attempt to avoid REX prefix on
16783 moving of second half of TFmode value. */
16784 if (GET_MODE (part
[1][1]) == SImode
)
16786 switch (GET_CODE (part
[1][1]))
16789 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
16793 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
16797 gcc_unreachable ();
16800 if (GET_MODE (part
[1][0]) == SImode
)
16801 part
[1][0] = part
[1][1];
16804 emit_move_insn (part
[0][1], part
[1][1]);
16805 emit_move_insn (part
[0][0], part
[1][0]);
16809 /* Choose correct order to not overwrite the source before it is copied. */
16810 if ((REG_P (part
[0][0])
16811 && REG_P (part
[1][1])
16812 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
16814 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
16816 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
16818 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
16820 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
16822 operands
[2 + i
] = part
[0][j
];
16823 operands
[6 + i
] = part
[1][j
];
16828 for (i
= 0; i
< nparts
; i
++)
16830 operands
[2 + i
] = part
[0][i
];
16831 operands
[6 + i
] = part
[1][i
];
16835 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16836 if (optimize_insn_for_size_p ())
16838 for (j
= 0; j
< nparts
- 1; j
++)
16839 if (CONST_INT_P (operands
[6 + j
])
16840 && operands
[6 + j
] != const0_rtx
16841 && REG_P (operands
[2 + j
]))
16842 for (i
= j
; i
< nparts
- 1; i
++)
16843 if (CONST_INT_P (operands
[7 + i
])
16844 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
16845 operands
[7 + i
] = operands
[2 + j
];
16848 for (i
= 0; i
< nparts
; i
++)
16849 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
16854 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16855 left shift by a constant, either using a single shift or
16856 a sequence of add instructions. */
16859 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
16863 emit_insn ((mode
== DImode
16865 : gen_adddi3
) (operand
, operand
, operand
));
16867 else if (!optimize_insn_for_size_p ()
16868 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
16871 for (i
=0; i
<count
; i
++)
16873 emit_insn ((mode
== DImode
16875 : gen_adddi3
) (operand
, operand
, operand
));
16879 emit_insn ((mode
== DImode
16881 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
16885 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
16887 rtx low
[2], high
[2];
16889 const int single_width
= mode
== DImode
? 32 : 64;
16891 if (CONST_INT_P (operands
[2]))
16893 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
16894 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
16896 if (count
>= single_width
)
16898 emit_move_insn (high
[0], low
[1]);
16899 emit_move_insn (low
[0], const0_rtx
);
16901 if (count
> single_width
)
16902 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
16906 if (!rtx_equal_p (operands
[0], operands
[1]))
16907 emit_move_insn (operands
[0], operands
[1]);
16908 emit_insn ((mode
== DImode
16910 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
16911 ix86_expand_ashl_const (low
[0], count
, mode
);
16916 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
16918 if (operands
[1] == const1_rtx
)
16920 /* Assuming we've chosen a QImode capable registers, then 1 << N
16921 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16922 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
16924 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
16926 ix86_expand_clear (low
[0]);
16927 ix86_expand_clear (high
[0]);
16928 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
16930 d
= gen_lowpart (QImode
, low
[0]);
16931 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
16932 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
16933 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
16935 d
= gen_lowpart (QImode
, high
[0]);
16936 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
16937 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
16938 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
16941 /* Otherwise, we can get the same results by manually performing
16942 a bit extract operation on bit 5/6, and then performing the two
16943 shifts. The two methods of getting 0/1 into low/high are exactly
16944 the same size. Avoiding the shift in the bit extract case helps
16945 pentium4 a bit; no one else seems to care much either way. */
16950 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
16951 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
16953 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
16954 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
16956 emit_insn ((mode
== DImode
16958 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
16959 emit_insn ((mode
== DImode
16961 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
16962 emit_move_insn (low
[0], high
[0]);
16963 emit_insn ((mode
== DImode
16965 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
16968 emit_insn ((mode
== DImode
16970 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
16971 emit_insn ((mode
== DImode
16973 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
16977 if (operands
[1] == constm1_rtx
)
16979 /* For -1 << N, we can avoid the shld instruction, because we
16980 know that we're shifting 0...31/63 ones into a -1. */
16981 emit_move_insn (low
[0], constm1_rtx
);
16982 if (optimize_insn_for_size_p ())
16983 emit_move_insn (high
[0], low
[0]);
16985 emit_move_insn (high
[0], constm1_rtx
);
16989 if (!rtx_equal_p (operands
[0], operands
[1]))
16990 emit_move_insn (operands
[0], operands
[1]);
16992 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
16993 emit_insn ((mode
== DImode
16995 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
16998 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
17000 if (TARGET_CMOVE
&& scratch
)
17002 ix86_expand_clear (scratch
);
17003 emit_insn ((mode
== DImode
17004 ? gen_x86_shift_adj_1
17005 : gen_x86_64_shift_adj_1
) (high
[0], low
[0], operands
[2],
17009 emit_insn ((mode
== DImode
17010 ? gen_x86_shift_adj_2
17011 : gen_x86_64_shift_adj_2
) (high
[0], low
[0], operands
[2]));
17015 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
17017 rtx low
[2], high
[2];
17019 const int single_width
= mode
== DImode
? 32 : 64;
17021 if (CONST_INT_P (operands
[2]))
17023 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
17024 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
17026 if (count
== single_width
* 2 - 1)
17028 emit_move_insn (high
[0], high
[1]);
17029 emit_insn ((mode
== DImode
17031 : gen_ashrdi3
) (high
[0], high
[0],
17032 GEN_INT (single_width
- 1)));
17033 emit_move_insn (low
[0], high
[0]);
17036 else if (count
>= single_width
)
17038 emit_move_insn (low
[0], high
[1]);
17039 emit_move_insn (high
[0], low
[0]);
17040 emit_insn ((mode
== DImode
17042 : gen_ashrdi3
) (high
[0], high
[0],
17043 GEN_INT (single_width
- 1)));
17044 if (count
> single_width
)
17045 emit_insn ((mode
== DImode
17047 : gen_ashrdi3
) (low
[0], low
[0],
17048 GEN_INT (count
- single_width
)));
17052 if (!rtx_equal_p (operands
[0], operands
[1]))
17053 emit_move_insn (operands
[0], operands
[1]);
17054 emit_insn ((mode
== DImode
17056 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
17057 emit_insn ((mode
== DImode
17059 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
17064 if (!rtx_equal_p (operands
[0], operands
[1]))
17065 emit_move_insn (operands
[0], operands
[1]);
17067 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
17069 emit_insn ((mode
== DImode
17071 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
17072 emit_insn ((mode
== DImode
17074 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
17076 if (TARGET_CMOVE
&& scratch
)
17078 emit_move_insn (scratch
, high
[0]);
17079 emit_insn ((mode
== DImode
17081 : gen_ashrdi3
) (scratch
, scratch
,
17082 GEN_INT (single_width
- 1)));
17083 emit_insn ((mode
== DImode
17084 ? gen_x86_shift_adj_1
17085 : gen_x86_64_shift_adj_1
) (low
[0], high
[0], operands
[2],
17089 emit_insn ((mode
== DImode
17090 ? gen_x86_shift_adj_3
17091 : gen_x86_64_shift_adj_3
) (low
[0], high
[0], operands
[2]));
17096 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
17098 rtx low
[2], high
[2];
17100 const int single_width
= mode
== DImode
? 32 : 64;
17102 if (CONST_INT_P (operands
[2]))
17104 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
17105 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
17107 if (count
>= single_width
)
17109 emit_move_insn (low
[0], high
[1]);
17110 ix86_expand_clear (high
[0]);
17112 if (count
> single_width
)
17113 emit_insn ((mode
== DImode
17115 : gen_lshrdi3
) (low
[0], low
[0],
17116 GEN_INT (count
- single_width
)));
17120 if (!rtx_equal_p (operands
[0], operands
[1]))
17121 emit_move_insn (operands
[0], operands
[1]);
17122 emit_insn ((mode
== DImode
17124 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
17125 emit_insn ((mode
== DImode
17127 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
17132 if (!rtx_equal_p (operands
[0], operands
[1]))
17133 emit_move_insn (operands
[0], operands
[1]);
17135 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
17137 emit_insn ((mode
== DImode
17139 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
17140 emit_insn ((mode
== DImode
17142 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
17144 /* Heh. By reversing the arguments, we can reuse this pattern. */
17145 if (TARGET_CMOVE
&& scratch
)
17147 ix86_expand_clear (scratch
);
17148 emit_insn ((mode
== DImode
17149 ? gen_x86_shift_adj_1
17150 : gen_x86_64_shift_adj_1
) (low
[0], high
[0], operands
[2],
17154 emit_insn ((mode
== DImode
17155 ? gen_x86_shift_adj_2
17156 : gen_x86_64_shift_adj_2
) (low
[0], high
[0], operands
[2]));
17160 /* Predict just emitted jump instruction to be taken with probability PROB. */
17162 predict_jump (int prob
)
17164 rtx insn
= get_last_insn ();
17165 gcc_assert (JUMP_P (insn
));
17166 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
17169 /* Helper function for the string operations below. Dest VARIABLE whether
17170 it is aligned to VALUE bytes. If true, jump to the label. */
17172 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
17174 rtx label
= gen_label_rtx ();
17175 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
17176 if (GET_MODE (variable
) == DImode
)
17177 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
17179 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
17180 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
17183 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17185 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
17189 /* Adjust COUNTER by the VALUE. */
17191 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
17193 if (GET_MODE (countreg
) == DImode
)
17194 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
17196 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
17199 /* Zero extend possibly SImode EXP to Pmode register. */
17201 ix86_zero_extend_to_Pmode (rtx exp
)
17204 if (GET_MODE (exp
) == VOIDmode
)
17205 return force_reg (Pmode
, exp
);
17206 if (GET_MODE (exp
) == Pmode
)
17207 return copy_to_mode_reg (Pmode
, exp
);
17208 r
= gen_reg_rtx (Pmode
);
17209 emit_insn (gen_zero_extendsidi2 (r
, exp
));
17213 /* Divide COUNTREG by SCALE. */
17215 scale_counter (rtx countreg
, int scale
)
17218 rtx piece_size_mask
;
17222 if (CONST_INT_P (countreg
))
17223 return GEN_INT (INTVAL (countreg
) / scale
);
17224 gcc_assert (REG_P (countreg
));
17226 piece_size_mask
= GEN_INT (scale
- 1);
17227 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
17228 GEN_INT (exact_log2 (scale
)),
17229 NULL
, 1, OPTAB_DIRECT
);
17233 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17234 DImode for constant loop counts. */
17236 static enum machine_mode
17237 counter_mode (rtx count_exp
)
17239 if (GET_MODE (count_exp
) != VOIDmode
)
17240 return GET_MODE (count_exp
);
17241 if (!CONST_INT_P (count_exp
))
17243 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
17248 /* When SRCPTR is non-NULL, output simple loop to move memory
17249 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17250 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17251 equivalent loop to set memory by VALUE (supposed to be in MODE).
17253 The size is rounded down to whole number of chunk size moved at once.
17254 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17258 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
17259 rtx destptr
, rtx srcptr
, rtx value
,
17260 rtx count
, enum machine_mode mode
, int unroll
,
17263 rtx out_label
, top_label
, iter
, tmp
;
17264 enum machine_mode iter_mode
= counter_mode (count
);
17265 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
17266 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
17272 top_label
= gen_label_rtx ();
17273 out_label
= gen_label_rtx ();
17274 iter
= gen_reg_rtx (iter_mode
);
17276 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
17277 NULL
, 1, OPTAB_DIRECT
);
17278 /* Those two should combine. */
17279 if (piece_size
== const1_rtx
)
17281 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
17283 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
17285 emit_move_insn (iter
, const0_rtx
);
17287 emit_label (top_label
);
17289 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
17290 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
17291 destmem
= change_address (destmem
, mode
, x_addr
);
17295 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
17296 srcmem
= change_address (srcmem
, mode
, y_addr
);
17298 /* When unrolling for chips that reorder memory reads and writes,
17299 we can save registers by using single temporary.
17300 Also using 4 temporaries is overkill in 32bit mode. */
17301 if (!TARGET_64BIT
&& 0)
17303 for (i
= 0; i
< unroll
; i
++)
17308 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
17310 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
17312 emit_move_insn (destmem
, srcmem
);
17318 gcc_assert (unroll
<= 4);
17319 for (i
= 0; i
< unroll
; i
++)
17321 tmpreg
[i
] = gen_reg_rtx (mode
);
17325 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
17327 emit_move_insn (tmpreg
[i
], srcmem
);
17329 for (i
= 0; i
< unroll
; i
++)
17334 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
17336 emit_move_insn (destmem
, tmpreg
[i
]);
17341 for (i
= 0; i
< unroll
; i
++)
17345 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
17346 emit_move_insn (destmem
, value
);
17349 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
17350 true, OPTAB_LIB_WIDEN
);
17352 emit_move_insn (iter
, tmp
);
17354 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
17356 if (expected_size
!= -1)
17358 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
17359 if (expected_size
== 0)
17361 else if (expected_size
> REG_BR_PROB_BASE
)
17362 predict_jump (REG_BR_PROB_BASE
- 1);
17364 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
17367 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
17368 iter
= ix86_zero_extend_to_Pmode (iter
);
17369 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
17370 true, OPTAB_LIB_WIDEN
);
17371 if (tmp
!= destptr
)
17372 emit_move_insn (destptr
, tmp
);
17375 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
17376 true, OPTAB_LIB_WIDEN
);
17378 emit_move_insn (srcptr
, tmp
);
17380 emit_label (out_label
);
17383 /* Output "rep; mov" instruction.
17384 Arguments have same meaning as for previous function */
17386 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
17387 rtx destptr
, rtx srcptr
,
17389 enum machine_mode mode
)
17395 /* If the size is known, it is shorter to use rep movs. */
17396 if (mode
== QImode
&& CONST_INT_P (count
)
17397 && !(INTVAL (count
) & 3))
17400 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
17401 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
17402 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
17403 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
17404 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
17405 if (mode
!= QImode
)
17407 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
17408 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
17409 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
17410 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
17411 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
17412 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
17416 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
17417 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
17419 if (CONST_INT_P (count
))
17421 count
= GEN_INT (INTVAL (count
)
17422 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
17423 destmem
= shallow_copy_rtx (destmem
);
17424 srcmem
= shallow_copy_rtx (srcmem
);
17425 set_mem_size (destmem
, count
);
17426 set_mem_size (srcmem
, count
);
17430 if (MEM_SIZE (destmem
))
17431 set_mem_size (destmem
, NULL_RTX
);
17432 if (MEM_SIZE (srcmem
))
17433 set_mem_size (srcmem
, NULL_RTX
);
17435 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
17439 /* Output "rep; stos" instruction.
17440 Arguments have same meaning as for previous function */
17442 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
17443 rtx count
, enum machine_mode mode
,
17449 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
17450 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
17451 value
= force_reg (mode
, gen_lowpart (mode
, value
));
17452 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
17453 if (mode
!= QImode
)
17455 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
17456 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
17457 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
17460 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
17461 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
17463 count
= GEN_INT (INTVAL (count
)
17464 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
17465 destmem
= shallow_copy_rtx (destmem
);
17466 set_mem_size (destmem
, count
);
17468 else if (MEM_SIZE (destmem
))
17469 set_mem_size (destmem
, NULL_RTX
);
17470 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
17474 emit_strmov (rtx destmem
, rtx srcmem
,
17475 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
17477 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
17478 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
17479 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
17482 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17484 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
17485 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
17488 if (CONST_INT_P (count
))
17490 HOST_WIDE_INT countval
= INTVAL (count
);
17493 if ((countval
& 0x10) && max_size
> 16)
17497 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
17498 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
17501 gcc_unreachable ();
17504 if ((countval
& 0x08) && max_size
> 8)
17507 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
17510 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
17511 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
17515 if ((countval
& 0x04) && max_size
> 4)
17517 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
17520 if ((countval
& 0x02) && max_size
> 2)
17522 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
17525 if ((countval
& 0x01) && max_size
> 1)
17527 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
17534 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
17535 count
, 1, OPTAB_DIRECT
);
17536 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
17537 count
, QImode
, 1, 4);
17541 /* When there are stringops, we can cheaply increase dest and src pointers.
17542 Otherwise we save code size by maintaining offset (zero is readily
17543 available from preceding rep operation) and using x86 addressing modes.
17545 if (TARGET_SINGLE_STRINGOP
)
17549 rtx label
= ix86_expand_aligntest (count
, 4, true);
17550 src
= change_address (srcmem
, SImode
, srcptr
);
17551 dest
= change_address (destmem
, SImode
, destptr
);
17552 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
17553 emit_label (label
);
17554 LABEL_NUSES (label
) = 1;
17558 rtx label
= ix86_expand_aligntest (count
, 2, true);
17559 src
= change_address (srcmem
, HImode
, srcptr
);
17560 dest
= change_address (destmem
, HImode
, destptr
);
17561 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
17562 emit_label (label
);
17563 LABEL_NUSES (label
) = 1;
17567 rtx label
= ix86_expand_aligntest (count
, 1, true);
17568 src
= change_address (srcmem
, QImode
, srcptr
);
17569 dest
= change_address (destmem
, QImode
, destptr
);
17570 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
17571 emit_label (label
);
17572 LABEL_NUSES (label
) = 1;
17577 rtx offset
= force_reg (Pmode
, const0_rtx
);
17582 rtx label
= ix86_expand_aligntest (count
, 4, true);
17583 src
= change_address (srcmem
, SImode
, srcptr
);
17584 dest
= change_address (destmem
, SImode
, destptr
);
17585 emit_move_insn (dest
, src
);
17586 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
17587 true, OPTAB_LIB_WIDEN
);
17589 emit_move_insn (offset
, tmp
);
17590 emit_label (label
);
17591 LABEL_NUSES (label
) = 1;
17595 rtx label
= ix86_expand_aligntest (count
, 2, true);
17596 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
17597 src
= change_address (srcmem
, HImode
, tmp
);
17598 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
17599 dest
= change_address (destmem
, HImode
, tmp
);
17600 emit_move_insn (dest
, src
);
17601 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
17602 true, OPTAB_LIB_WIDEN
);
17604 emit_move_insn (offset
, tmp
);
17605 emit_label (label
);
17606 LABEL_NUSES (label
) = 1;
17610 rtx label
= ix86_expand_aligntest (count
, 1, true);
17611 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
17612 src
= change_address (srcmem
, QImode
, tmp
);
17613 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
17614 dest
= change_address (destmem
, QImode
, tmp
);
17615 emit_move_insn (dest
, src
);
17616 emit_label (label
);
17617 LABEL_NUSES (label
) = 1;
17622 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17624 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
17625 rtx count
, int max_size
)
17628 expand_simple_binop (counter_mode (count
), AND
, count
,
17629 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
17630 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
17631 gen_lowpart (QImode
, value
), count
, QImode
,
17635 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17637 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
17641 if (CONST_INT_P (count
))
17643 HOST_WIDE_INT countval
= INTVAL (count
);
17646 if ((countval
& 0x10) && max_size
> 16)
17650 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
17651 emit_insn (gen_strset (destptr
, dest
, value
));
17652 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
17653 emit_insn (gen_strset (destptr
, dest
, value
));
17656 gcc_unreachable ();
17659 if ((countval
& 0x08) && max_size
> 8)
17663 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
17664 emit_insn (gen_strset (destptr
, dest
, value
));
17668 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
17669 emit_insn (gen_strset (destptr
, dest
, value
));
17670 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
17671 emit_insn (gen_strset (destptr
, dest
, value
));
17675 if ((countval
& 0x04) && max_size
> 4)
17677 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
17678 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
17681 if ((countval
& 0x02) && max_size
> 2)
17683 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
17684 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
17687 if ((countval
& 0x01) && max_size
> 1)
17689 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
17690 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
17697 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
17702 rtx label
= ix86_expand_aligntest (count
, 16, true);
17705 dest
= change_address (destmem
, DImode
, destptr
);
17706 emit_insn (gen_strset (destptr
, dest
, value
));
17707 emit_insn (gen_strset (destptr
, dest
, value
));
17711 dest
= change_address (destmem
, SImode
, destptr
);
17712 emit_insn (gen_strset (destptr
, dest
, value
));
17713 emit_insn (gen_strset (destptr
, dest
, value
));
17714 emit_insn (gen_strset (destptr
, dest
, value
));
17715 emit_insn (gen_strset (destptr
, dest
, value
));
17717 emit_label (label
);
17718 LABEL_NUSES (label
) = 1;
17722 rtx label
= ix86_expand_aligntest (count
, 8, true);
17725 dest
= change_address (destmem
, DImode
, destptr
);
17726 emit_insn (gen_strset (destptr
, dest
, value
));
17730 dest
= change_address (destmem
, SImode
, destptr
);
17731 emit_insn (gen_strset (destptr
, dest
, value
));
17732 emit_insn (gen_strset (destptr
, dest
, value
));
17734 emit_label (label
);
17735 LABEL_NUSES (label
) = 1;
17739 rtx label
= ix86_expand_aligntest (count
, 4, true);
17740 dest
= change_address (destmem
, SImode
, destptr
);
17741 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
17742 emit_label (label
);
17743 LABEL_NUSES (label
) = 1;
17747 rtx label
= ix86_expand_aligntest (count
, 2, true);
17748 dest
= change_address (destmem
, HImode
, destptr
);
17749 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
17750 emit_label (label
);
17751 LABEL_NUSES (label
) = 1;
17755 rtx label
= ix86_expand_aligntest (count
, 1, true);
17756 dest
= change_address (destmem
, QImode
, destptr
);
17757 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
17758 emit_label (label
);
17759 LABEL_NUSES (label
) = 1;
17763 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17764 DESIRED_ALIGNMENT. */
17766 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
17767 rtx destptr
, rtx srcptr
, rtx count
,
17768 int align
, int desired_alignment
)
17770 if (align
<= 1 && desired_alignment
> 1)
17772 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
17773 srcmem
= change_address (srcmem
, QImode
, srcptr
);
17774 destmem
= change_address (destmem
, QImode
, destptr
);
17775 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
17776 ix86_adjust_counter (count
, 1);
17777 emit_label (label
);
17778 LABEL_NUSES (label
) = 1;
17780 if (align
<= 2 && desired_alignment
> 2)
17782 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
17783 srcmem
= change_address (srcmem
, HImode
, srcptr
);
17784 destmem
= change_address (destmem
, HImode
, destptr
);
17785 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
17786 ix86_adjust_counter (count
, 2);
17787 emit_label (label
);
17788 LABEL_NUSES (label
) = 1;
17790 if (align
<= 4 && desired_alignment
> 4)
17792 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
17793 srcmem
= change_address (srcmem
, SImode
, srcptr
);
17794 destmem
= change_address (destmem
, SImode
, destptr
);
17795 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
17796 ix86_adjust_counter (count
, 4);
17797 emit_label (label
);
17798 LABEL_NUSES (label
) = 1;
17800 gcc_assert (desired_alignment
<= 8);
17803 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17804 ALIGN_BYTES is how many bytes need to be copied. */
17806 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
17807 int desired_align
, int align_bytes
)
17810 rtx src_size
, dst_size
;
17812 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
17813 if (src_align_bytes
>= 0)
17814 src_align_bytes
= desired_align
- src_align_bytes
;
17815 src_size
= MEM_SIZE (src
);
17816 dst_size
= MEM_SIZE (dst
);
17817 if (align_bytes
& 1)
17819 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
17820 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
17822 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
17824 if (align_bytes
& 2)
17826 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
17827 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
17828 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
17829 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
17830 if (src_align_bytes
>= 0
17831 && (src_align_bytes
& 1) == (align_bytes
& 1)
17832 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
17833 set_mem_align (src
, 2 * BITS_PER_UNIT
);
17835 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
17837 if (align_bytes
& 4)
17839 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
17840 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
17841 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
17842 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
17843 if (src_align_bytes
>= 0)
17845 unsigned int src_align
= 0;
17846 if ((src_align_bytes
& 3) == (align_bytes
& 3))
17848 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
17850 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
17851 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
17854 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
17856 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
17857 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
17858 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
17859 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
17860 if (src_align_bytes
>= 0)
17862 unsigned int src_align
= 0;
17863 if ((src_align_bytes
& 7) == (align_bytes
& 7))
17865 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
17867 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
17869 if (src_align
> (unsigned int) desired_align
)
17870 src_align
= desired_align
;
17871 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
17872 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
17875 set_mem_size (dst
, GEN_INT (INTVAL (dst_size
) - align_bytes
));
17877 set_mem_size (dst
, GEN_INT (INTVAL (src_size
) - align_bytes
));
17882 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17883 DESIRED_ALIGNMENT. */
17885 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
17886 int align
, int desired_alignment
)
17888 if (align
<= 1 && desired_alignment
> 1)
17890 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
17891 destmem
= change_address (destmem
, QImode
, destptr
);
17892 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
17893 ix86_adjust_counter (count
, 1);
17894 emit_label (label
);
17895 LABEL_NUSES (label
) = 1;
17897 if (align
<= 2 && desired_alignment
> 2)
17899 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
17900 destmem
= change_address (destmem
, HImode
, destptr
);
17901 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
17902 ix86_adjust_counter (count
, 2);
17903 emit_label (label
);
17904 LABEL_NUSES (label
) = 1;
17906 if (align
<= 4 && desired_alignment
> 4)
17908 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
17909 destmem
= change_address (destmem
, SImode
, destptr
);
17910 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
17911 ix86_adjust_counter (count
, 4);
17912 emit_label (label
);
17913 LABEL_NUSES (label
) = 1;
17915 gcc_assert (desired_alignment
<= 8);
17918 /* Set enough from DST to align DST known to by aligned by ALIGN to
17919 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17921 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
17922 int desired_align
, int align_bytes
)
17925 rtx dst_size
= MEM_SIZE (dst
);
17926 if (align_bytes
& 1)
17928 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
17930 emit_insn (gen_strset (destreg
, dst
,
17931 gen_lowpart (QImode
, value
)));
17933 if (align_bytes
& 2)
17935 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
17936 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
17937 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
17939 emit_insn (gen_strset (destreg
, dst
,
17940 gen_lowpart (HImode
, value
)));
17942 if (align_bytes
& 4)
17944 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
17945 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
17946 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
17948 emit_insn (gen_strset (destreg
, dst
,
17949 gen_lowpart (SImode
, value
)));
17951 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
17952 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
17953 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
17955 set_mem_size (dst
, GEN_INT (INTVAL (dst_size
) - align_bytes
));
17959 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17960 static enum stringop_alg
17961 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
17962 int *dynamic_check
)
17964 const struct stringop_algs
* algs
;
17965 bool optimize_for_speed
;
17966 /* Algorithms using the rep prefix want at least edi and ecx;
17967 additionally, memset wants eax and memcpy wants esi. Don't
17968 consider such algorithms if the user has appropriated those
17969 registers for their own purposes. */
17970 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
17972 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
17974 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17975 || (alg != rep_prefix_1_byte \
17976 && alg != rep_prefix_4_byte \
17977 && alg != rep_prefix_8_byte))
17978 const struct processor_costs
*cost
;
17980 /* Even if the string operation call is cold, we still might spend a lot
17981 of time processing large blocks. */
17982 if (optimize_function_for_size_p (cfun
)
17983 || (optimize_insn_for_size_p ()
17984 && expected_size
!= -1 && expected_size
< 256))
17985 optimize_for_speed
= false;
17987 optimize_for_speed
= true;
17989 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
17991 *dynamic_check
= -1;
17993 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
17995 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
17996 if (stringop_alg
!= no_stringop
&& ALG_USABLE_P (stringop_alg
))
17997 return stringop_alg
;
17998 /* rep; movq or rep; movl is the smallest variant. */
17999 else if (!optimize_for_speed
)
18001 if (!count
|| (count
& 3))
18002 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
18004 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
18006 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18008 else if (expected_size
!= -1 && expected_size
< 4)
18009 return loop_1_byte
;
18010 else if (expected_size
!= -1)
18013 enum stringop_alg alg
= libcall
;
18014 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
18016 /* We get here if the algorithms that were not libcall-based
18017 were rep-prefix based and we are unable to use rep prefixes
18018 based on global register usage. Break out of the loop and
18019 use the heuristic below. */
18020 if (algs
->size
[i
].max
== 0)
18022 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
18024 enum stringop_alg candidate
= algs
->size
[i
].alg
;
18026 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
18028 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18029 last non-libcall inline algorithm. */
18030 if (TARGET_INLINE_ALL_STRINGOPS
)
18032 /* When the current size is best to be copied by a libcall,
18033 but we are still forced to inline, run the heuristic below
18034 that will pick code for medium sized blocks. */
18035 if (alg
!= libcall
)
18039 else if (ALG_USABLE_P (candidate
))
18043 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
18045 /* When asked to inline the call anyway, try to pick meaningful choice.
18046 We look for maximal size of block that is faster to copy by hand and
18047 take blocks of at most of that size guessing that average size will
18048 be roughly half of the block.
18050 If this turns out to be bad, we might simply specify the preferred
18051 choice in ix86_costs. */
18052 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
18053 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
18056 enum stringop_alg alg
;
18058 bool any_alg_usable_p
= true;
18060 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
18062 enum stringop_alg candidate
= algs
->size
[i
].alg
;
18063 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
18065 if (candidate
!= libcall
&& candidate
18066 && ALG_USABLE_P (candidate
))
18067 max
= algs
->size
[i
].max
;
18069 /* If there aren't any usable algorithms, then recursing on
18070 smaller sizes isn't going to find anything. Just return the
18071 simple byte-at-a-time copy loop. */
18072 if (!any_alg_usable_p
)
18074 /* Pick something reasonable. */
18075 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
18076 *dynamic_check
= 128;
18077 return loop_1_byte
;
18081 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
18082 gcc_assert (*dynamic_check
== -1);
18083 gcc_assert (alg
!= libcall
);
18084 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
18085 *dynamic_check
= max
;
18088 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
18089 #undef ALG_USABLE_P
18092 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18093 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18095 decide_alignment (int align
,
18096 enum stringop_alg alg
,
18099 int desired_align
= 0;
18103 gcc_unreachable ();
18105 case unrolled_loop
:
18106 desired_align
= GET_MODE_SIZE (Pmode
);
18108 case rep_prefix_8_byte
:
18111 case rep_prefix_4_byte
:
18112 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18113 copying whole cacheline at once. */
18114 if (TARGET_PENTIUMPRO
)
18119 case rep_prefix_1_byte
:
18120 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18121 copying whole cacheline at once. */
18122 if (TARGET_PENTIUMPRO
)
18136 if (desired_align
< align
)
18137 desired_align
= align
;
18138 if (expected_size
!= -1 && expected_size
< 4)
18139 desired_align
= align
;
18140 return desired_align
;
18143 /* Return the smallest power of 2 greater than VAL. */
18145 smallest_pow2_greater_than (int val
)
18153 /* Expand string move (memcpy) operation. Use i386 string operations when
18154 profitable. expand_setmem contains similar code. The code depends upon
18155 architecture, block size and alignment, but always has the same
18158 1) Prologue guard: Conditional that jumps up to epilogues for small
18159 blocks that can be handled by epilogue alone. This is faster but
18160 also needed for correctness, since prologue assume the block is larger
18161 than the desired alignment.
18163 Optional dynamic check for size and libcall for large
18164 blocks is emitted here too, with -minline-stringops-dynamically.
18166 2) Prologue: copy first few bytes in order to get destination aligned
18167 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18168 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18169 We emit either a jump tree on power of two sized blocks, or a byte loop.
18171 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18172 with specified algorithm.
18174 4) Epilogue: code copying tail of the block that is too small to be
18175 handled by main body (or up to size guarded by prologue guard). */
18178 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
18179 rtx expected_align_exp
, rtx expected_size_exp
)
18185 rtx jump_around_label
= NULL
;
18186 HOST_WIDE_INT align
= 1;
18187 unsigned HOST_WIDE_INT count
= 0;
18188 HOST_WIDE_INT expected_size
= -1;
18189 int size_needed
= 0, epilogue_size_needed
;
18190 int desired_align
= 0, align_bytes
= 0;
18191 enum stringop_alg alg
;
18193 bool need_zero_guard
= false;
18195 if (CONST_INT_P (align_exp
))
18196 align
= INTVAL (align_exp
);
18197 /* i386 can do misaligned access on reasonably increased cost. */
18198 if (CONST_INT_P (expected_align_exp
)
18199 && INTVAL (expected_align_exp
) > align
)
18200 align
= INTVAL (expected_align_exp
);
18201 /* ALIGN is the minimum of destination and source alignment, but we care here
18202 just about destination alignment. */
18203 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
18204 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
18206 if (CONST_INT_P (count_exp
))
18207 count
= expected_size
= INTVAL (count_exp
);
18208 if (CONST_INT_P (expected_size_exp
) && count
== 0)
18209 expected_size
= INTVAL (expected_size_exp
);
18211 /* Make sure we don't need to care about overflow later on. */
18212 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
18215 /* Step 0: Decide on preferred algorithm, desired alignment and
18216 size of chunks to be copied by main loop. */
18218 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
18219 desired_align
= decide_alignment (align
, alg
, expected_size
);
18221 if (!TARGET_ALIGN_STRINGOPS
)
18222 align
= desired_align
;
18224 if (alg
== libcall
)
18226 gcc_assert (alg
!= no_stringop
);
18228 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
18229 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
18230 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
18235 gcc_unreachable ();
18237 need_zero_guard
= true;
18238 size_needed
= GET_MODE_SIZE (Pmode
);
18240 case unrolled_loop
:
18241 need_zero_guard
= true;
18242 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
18244 case rep_prefix_8_byte
:
18247 case rep_prefix_4_byte
:
18250 case rep_prefix_1_byte
:
18254 need_zero_guard
= true;
18259 epilogue_size_needed
= size_needed
;
18261 /* Step 1: Prologue guard. */
18263 /* Alignment code needs count to be in register. */
18264 if (CONST_INT_P (count_exp
) && desired_align
> align
)
18266 if (INTVAL (count_exp
) > desired_align
18267 && INTVAL (count_exp
) > size_needed
)
18270 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
18271 if (align_bytes
<= 0)
18274 align_bytes
= desired_align
- align_bytes
;
18276 if (align_bytes
== 0)
18277 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
18279 gcc_assert (desired_align
>= 1 && align
>= 1);
18281 /* Ensure that alignment prologue won't copy past end of block. */
18282 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
18284 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
18285 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18286 Make sure it is power of 2. */
18287 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
18291 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
18293 /* If main algorithm works on QImode, no epilogue is needed.
18294 For small sizes just don't align anything. */
18295 if (size_needed
== 1)
18296 desired_align
= align
;
18303 label
= gen_label_rtx ();
18304 emit_cmp_and_jump_insns (count_exp
,
18305 GEN_INT (epilogue_size_needed
),
18306 LTU
, 0, counter_mode (count_exp
), 1, label
);
18307 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
18308 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
18310 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
18314 /* Emit code to decide on runtime whether library call or inline should be
18316 if (dynamic_check
!= -1)
18318 if (CONST_INT_P (count_exp
))
18320 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
18322 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
18323 count_exp
= const0_rtx
;
18329 rtx hot_label
= gen_label_rtx ();
18330 jump_around_label
= gen_label_rtx ();
18331 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
18332 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
18333 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
18334 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
18335 emit_jump (jump_around_label
);
18336 emit_label (hot_label
);
18340 /* Step 2: Alignment prologue. */
18342 if (desired_align
> align
)
18344 if (align_bytes
== 0)
18346 /* Except for the first move in epilogue, we no longer know
18347 constant offset in aliasing info. It don't seems to worth
18348 the pain to maintain it for the first move, so throw away
18350 src
= change_address (src
, BLKmode
, srcreg
);
18351 dst
= change_address (dst
, BLKmode
, destreg
);
18352 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
18357 /* If we know how many bytes need to be stored before dst is
18358 sufficiently aligned, maintain aliasing info accurately. */
18359 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
18360 desired_align
, align_bytes
);
18361 count_exp
= plus_constant (count_exp
, -align_bytes
);
18362 count
-= align_bytes
;
18364 if (need_zero_guard
18365 && (count
< (unsigned HOST_WIDE_INT
) size_needed
18366 || (align_bytes
== 0
18367 && count
< ((unsigned HOST_WIDE_INT
) size_needed
18368 + desired_align
- align
))))
18370 /* It is possible that we copied enough so the main loop will not
18372 gcc_assert (size_needed
> 1);
18373 if (label
== NULL_RTX
)
18374 label
= gen_label_rtx ();
18375 emit_cmp_and_jump_insns (count_exp
,
18376 GEN_INT (size_needed
),
18377 LTU
, 0, counter_mode (count_exp
), 1, label
);
18378 if (expected_size
== -1
18379 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
18380 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
18382 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
18385 if (label
&& size_needed
== 1)
18387 emit_label (label
);
18388 LABEL_NUSES (label
) = 1;
18390 epilogue_size_needed
= 1;
18392 else if (label
== NULL_RTX
)
18393 epilogue_size_needed
= size_needed
;
18395 /* Step 3: Main loop. */
18401 gcc_unreachable ();
18403 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
18404 count_exp
, QImode
, 1, expected_size
);
18407 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
18408 count_exp
, Pmode
, 1, expected_size
);
18410 case unrolled_loop
:
18411 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18412 registers for 4 temporaries anyway. */
18413 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
18414 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
18417 case rep_prefix_8_byte
:
18418 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
18421 case rep_prefix_4_byte
:
18422 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
18425 case rep_prefix_1_byte
:
18426 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
18430 /* Adjust properly the offset of src and dest memory for aliasing. */
18431 if (CONST_INT_P (count_exp
))
18433 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
18434 (count
/ size_needed
) * size_needed
);
18435 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
18436 (count
/ size_needed
) * size_needed
);
18440 src
= change_address (src
, BLKmode
, srcreg
);
18441 dst
= change_address (dst
, BLKmode
, destreg
);
18444 /* Step 4: Epilogue to copy the remaining bytes. */
18448 /* When the main loop is done, COUNT_EXP might hold original count,
18449 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18450 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18451 bytes. Compensate if needed. */
18453 if (size_needed
< epilogue_size_needed
)
18456 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
18457 GEN_INT (size_needed
- 1), count_exp
, 1,
18459 if (tmp
!= count_exp
)
18460 emit_move_insn (count_exp
, tmp
);
18462 emit_label (label
);
18463 LABEL_NUSES (label
) = 1;
18466 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
18467 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
18468 epilogue_size_needed
);
18469 if (jump_around_label
)
18470 emit_label (jump_around_label
);
18474 /* Helper function for memcpy. For QImode value 0xXY produce
18475 0xXYXYXYXY of wide specified by MODE. This is essentially
18476 a * 0x10101010, but we can do slightly better than
18477 synth_mult by unwinding the sequence by hand on CPUs with
18480 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
18482 enum machine_mode valmode
= GET_MODE (val
);
18484 int nops
= mode
== DImode
? 3 : 2;
18486 gcc_assert (mode
== SImode
|| mode
== DImode
);
18487 if (val
== const0_rtx
)
18488 return copy_to_mode_reg (mode
, const0_rtx
);
18489 if (CONST_INT_P (val
))
18491 HOST_WIDE_INT v
= INTVAL (val
) & 255;
18495 if (mode
== DImode
)
18496 v
|= (v
<< 16) << 16;
18497 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
18500 if (valmode
== VOIDmode
)
18502 if (valmode
!= QImode
)
18503 val
= gen_lowpart (QImode
, val
);
18504 if (mode
== QImode
)
18506 if (!TARGET_PARTIAL_REG_STALL
)
18508 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
18509 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
18510 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
18511 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
18513 rtx reg
= convert_modes (mode
, QImode
, val
, true);
18514 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
18515 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
18520 rtx reg
= convert_modes (mode
, QImode
, val
, true);
18522 if (!TARGET_PARTIAL_REG_STALL
)
18523 if (mode
== SImode
)
18524 emit_insn (gen_movsi_insv_1 (reg
, reg
));
18526 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
18529 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
18530 NULL
, 1, OPTAB_DIRECT
);
18532 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
18534 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
18535 NULL
, 1, OPTAB_DIRECT
);
18536 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
18537 if (mode
== SImode
)
18539 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
18540 NULL
, 1, OPTAB_DIRECT
);
18541 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
18546 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18547 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18548 alignment from ALIGN to DESIRED_ALIGN. */
18550 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
18555 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
18556 promoted_val
= promote_duplicated_reg (DImode
, val
);
18557 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
18558 promoted_val
= promote_duplicated_reg (SImode
, val
);
18559 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
18560 promoted_val
= promote_duplicated_reg (HImode
, val
);
18562 promoted_val
= val
;
18564 return promoted_val
;
18567 /* Expand string clear operation (bzero). Use i386 string operations when
18568 profitable. See expand_movmem comment for explanation of individual
18569 steps performed. */
18571 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
18572 rtx expected_align_exp
, rtx expected_size_exp
)
18577 rtx jump_around_label
= NULL
;
18578 HOST_WIDE_INT align
= 1;
18579 unsigned HOST_WIDE_INT count
= 0;
18580 HOST_WIDE_INT expected_size
= -1;
18581 int size_needed
= 0, epilogue_size_needed
;
18582 int desired_align
= 0, align_bytes
= 0;
18583 enum stringop_alg alg
;
18584 rtx promoted_val
= NULL
;
18585 bool force_loopy_epilogue
= false;
18587 bool need_zero_guard
= false;
18589 if (CONST_INT_P (align_exp
))
18590 align
= INTVAL (align_exp
);
18591 /* i386 can do misaligned access on reasonably increased cost. */
18592 if (CONST_INT_P (expected_align_exp
)
18593 && INTVAL (expected_align_exp
) > align
)
18594 align
= INTVAL (expected_align_exp
);
18595 if (CONST_INT_P (count_exp
))
18596 count
= expected_size
= INTVAL (count_exp
);
18597 if (CONST_INT_P (expected_size_exp
) && count
== 0)
18598 expected_size
= INTVAL (expected_size_exp
);
18600 /* Make sure we don't need to care about overflow later on. */
18601 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
18604 /* Step 0: Decide on preferred algorithm, desired alignment and
18605 size of chunks to be copied by main loop. */
18607 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
18608 desired_align
= decide_alignment (align
, alg
, expected_size
);
18610 if (!TARGET_ALIGN_STRINGOPS
)
18611 align
= desired_align
;
18613 if (alg
== libcall
)
18615 gcc_assert (alg
!= no_stringop
);
18617 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
18618 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
18623 gcc_unreachable ();
18625 need_zero_guard
= true;
18626 size_needed
= GET_MODE_SIZE (Pmode
);
18628 case unrolled_loop
:
18629 need_zero_guard
= true;
18630 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
18632 case rep_prefix_8_byte
:
18635 case rep_prefix_4_byte
:
18638 case rep_prefix_1_byte
:
18642 need_zero_guard
= true;
18646 epilogue_size_needed
= size_needed
;
18648 /* Step 1: Prologue guard. */
18650 /* Alignment code needs count to be in register. */
18651 if (CONST_INT_P (count_exp
) && desired_align
> align
)
18653 if (INTVAL (count_exp
) > desired_align
18654 && INTVAL (count_exp
) > size_needed
)
18657 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
18658 if (align_bytes
<= 0)
18661 align_bytes
= desired_align
- align_bytes
;
18663 if (align_bytes
== 0)
18665 enum machine_mode mode
= SImode
;
18666 if (TARGET_64BIT
&& (count
& ~0xffffffff))
18668 count_exp
= force_reg (mode
, count_exp
);
18671 /* Do the cheap promotion to allow better CSE across the
18672 main loop and epilogue (ie one load of the big constant in the
18673 front of all code. */
18674 if (CONST_INT_P (val_exp
))
18675 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
18676 desired_align
, align
);
18677 /* Ensure that alignment prologue won't copy past end of block. */
18678 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
18680 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
18681 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18682 Make sure it is power of 2. */
18683 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
18685 /* To improve performance of small blocks, we jump around the VAL
18686 promoting mode. This mean that if the promoted VAL is not constant,
18687 we might not use it in the epilogue and have to use byte
18689 if (epilogue_size_needed
> 2 && !promoted_val
)
18690 force_loopy_epilogue
= true;
18693 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
18695 /* If main algorithm works on QImode, no epilogue is needed.
18696 For small sizes just don't align anything. */
18697 if (size_needed
== 1)
18698 desired_align
= align
;
18705 label
= gen_label_rtx ();
18706 emit_cmp_and_jump_insns (count_exp
,
18707 GEN_INT (epilogue_size_needed
),
18708 LTU
, 0, counter_mode (count_exp
), 1, label
);
18709 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
18710 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
18712 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
18715 if (dynamic_check
!= -1)
18717 rtx hot_label
= gen_label_rtx ();
18718 jump_around_label
= gen_label_rtx ();
18719 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
18720 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
18721 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
18722 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
18723 emit_jump (jump_around_label
);
18724 emit_label (hot_label
);
18727 /* Step 2: Alignment prologue. */
18729 /* Do the expensive promotion once we branched off the small blocks. */
18731 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
18732 desired_align
, align
);
18733 gcc_assert (desired_align
>= 1 && align
>= 1);
18735 if (desired_align
> align
)
18737 if (align_bytes
== 0)
18739 /* Except for the first move in epilogue, we no longer know
18740 constant offset in aliasing info. It don't seems to worth
18741 the pain to maintain it for the first move, so throw away
18743 dst
= change_address (dst
, BLKmode
, destreg
);
18744 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
18749 /* If we know how many bytes need to be stored before dst is
18750 sufficiently aligned, maintain aliasing info accurately. */
18751 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
18752 desired_align
, align_bytes
);
18753 count_exp
= plus_constant (count_exp
, -align_bytes
);
18754 count
-= align_bytes
;
18756 if (need_zero_guard
18757 && (count
< (unsigned HOST_WIDE_INT
) size_needed
18758 || (align_bytes
== 0
18759 && count
< ((unsigned HOST_WIDE_INT
) size_needed
18760 + desired_align
- align
))))
18762 /* It is possible that we copied enough so the main loop will not
18764 gcc_assert (size_needed
> 1);
18765 if (label
== NULL_RTX
)
18766 label
= gen_label_rtx ();
18767 emit_cmp_and_jump_insns (count_exp
,
18768 GEN_INT (size_needed
),
18769 LTU
, 0, counter_mode (count_exp
), 1, label
);
18770 if (expected_size
== -1
18771 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
18772 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
18774 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
18777 if (label
&& size_needed
== 1)
18779 emit_label (label
);
18780 LABEL_NUSES (label
) = 1;
18782 promoted_val
= val_exp
;
18783 epilogue_size_needed
= 1;
18785 else if (label
== NULL_RTX
)
18786 epilogue_size_needed
= size_needed
;
18788 /* Step 3: Main loop. */
18794 gcc_unreachable ();
18796 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
18797 count_exp
, QImode
, 1, expected_size
);
18800 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
18801 count_exp
, Pmode
, 1, expected_size
);
18803 case unrolled_loop
:
18804 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
18805 count_exp
, Pmode
, 4, expected_size
);
18807 case rep_prefix_8_byte
:
18808 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
18811 case rep_prefix_4_byte
:
18812 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
18815 case rep_prefix_1_byte
:
18816 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
18820 /* Adjust properly the offset of src and dest memory for aliasing. */
18821 if (CONST_INT_P (count_exp
))
18822 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
18823 (count
/ size_needed
) * size_needed
);
18825 dst
= change_address (dst
, BLKmode
, destreg
);
18827 /* Step 4: Epilogue to copy the remaining bytes. */
18831 /* When the main loop is done, COUNT_EXP might hold original count,
18832 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18833 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18834 bytes. Compensate if needed. */
18836 if (size_needed
< epilogue_size_needed
)
18839 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
18840 GEN_INT (size_needed
- 1), count_exp
, 1,
18842 if (tmp
!= count_exp
)
18843 emit_move_insn (count_exp
, tmp
);
18845 emit_label (label
);
18846 LABEL_NUSES (label
) = 1;
18849 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
18851 if (force_loopy_epilogue
)
18852 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
18853 epilogue_size_needed
);
18855 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
18856 epilogue_size_needed
);
18858 if (jump_around_label
)
18859 emit_label (jump_around_label
);
18863 /* Expand the appropriate insns for doing strlen if not just doing
18866 out = result, initialized with the start address
18867 align_rtx = alignment of the address.
18868 scratch = scratch register, initialized with the startaddress when
18869 not aligned, otherwise undefined
18871 This is just the body. It needs the initializations mentioned above and
18872 some address computing at the end. These things are done in i386.md. */
18875 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
18879 rtx align_2_label
= NULL_RTX
;
18880 rtx align_3_label
= NULL_RTX
;
18881 rtx align_4_label
= gen_label_rtx ();
18882 rtx end_0_label
= gen_label_rtx ();
18884 rtx tmpreg
= gen_reg_rtx (SImode
);
18885 rtx scratch
= gen_reg_rtx (SImode
);
18889 if (CONST_INT_P (align_rtx
))
18890 align
= INTVAL (align_rtx
);
18892 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18894 /* Is there a known alignment and is it less than 4? */
18897 rtx scratch1
= gen_reg_rtx (Pmode
);
18898 emit_move_insn (scratch1
, out
);
18899 /* Is there a known alignment and is it not 2? */
18902 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
18903 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
18905 /* Leave just the 3 lower bits. */
18906 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
18907 NULL_RTX
, 0, OPTAB_WIDEN
);
18909 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
18910 Pmode
, 1, align_4_label
);
18911 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
18912 Pmode
, 1, align_2_label
);
18913 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
18914 Pmode
, 1, align_3_label
);
18918 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18919 check if is aligned to 4 - byte. */
18921 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
18922 NULL_RTX
, 0, OPTAB_WIDEN
);
18924 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
18925 Pmode
, 1, align_4_label
);
18928 mem
= change_address (src
, QImode
, out
);
18930 /* Now compare the bytes. */
18932 /* Compare the first n unaligned byte on a byte per byte basis. */
18933 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
18934 QImode
, 1, end_0_label
);
18936 /* Increment the address. */
18937 emit_insn ((*ix86_gen_add3
) (out
, out
, const1_rtx
));
18939 /* Not needed with an alignment of 2 */
18942 emit_label (align_2_label
);
18944 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
18947 emit_insn ((*ix86_gen_add3
) (out
, out
, const1_rtx
));
18949 emit_label (align_3_label
);
18952 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
18955 emit_insn ((*ix86_gen_add3
) (out
, out
, const1_rtx
));
18958 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18959 align this loop. It gives only huge programs, but does not help to
18961 emit_label (align_4_label
);
18963 mem
= change_address (src
, SImode
, out
);
18964 emit_move_insn (scratch
, mem
);
18965 emit_insn ((*ix86_gen_add3
) (out
, out
, GEN_INT (4)));
18967 /* This formula yields a nonzero result iff one of the bytes is zero.
18968 This saves three branches inside loop and many cycles. */
18970 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
18971 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
18972 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
18973 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
18974 gen_int_mode (0x80808080, SImode
)));
18975 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
18980 rtx reg
= gen_reg_rtx (SImode
);
18981 rtx reg2
= gen_reg_rtx (Pmode
);
18982 emit_move_insn (reg
, tmpreg
);
18983 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
18985 /* If zero is not in the first two bytes, move two bytes forward. */
18986 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
18987 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
18988 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
18989 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
18990 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
18993 /* Emit lea manually to avoid clobbering of flags. */
18994 emit_insn (gen_rtx_SET (SImode
, reg2
,
18995 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
18997 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
18998 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
18999 emit_insn (gen_rtx_SET (VOIDmode
, out
,
19000 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
19007 rtx end_2_label
= gen_label_rtx ();
19008 /* Is zero in the first two bytes? */
19010 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
19011 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
19012 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
19013 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19014 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
19016 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19017 JUMP_LABEL (tmp
) = end_2_label
;
19019 /* Not in the first two. Move two bytes forward. */
19020 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
19021 emit_insn ((*ix86_gen_add3
) (out
, out
, const2_rtx
));
19023 emit_label (end_2_label
);
19027 /* Avoid branch in fixing the byte. */
19028 tmpreg
= gen_lowpart (QImode
, tmpreg
);
19029 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
19030 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
19031 emit_insn ((*ix86_gen_sub3_carry
) (out
, out
, GEN_INT (3), cmp
));
19033 emit_label (end_0_label
);
19036 /* Expand strlen. */
19039 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
19041 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
19043 /* The generic case of strlen expander is long. Avoid it's
19044 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19046 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
19047 && !TARGET_INLINE_ALL_STRINGOPS
19048 && !optimize_insn_for_size_p ()
19049 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
19052 addr
= force_reg (Pmode
, XEXP (src
, 0));
19053 scratch1
= gen_reg_rtx (Pmode
);
19055 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
19056 && !optimize_insn_for_size_p ())
19058 /* Well it seems that some optimizer does not combine a call like
19059 foo(strlen(bar), strlen(bar));
19060 when the move and the subtraction is done here. It does calculate
19061 the length just once when these instructions are done inside of
19062 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19063 often used and I use one fewer register for the lifetime of
19064 output_strlen_unroll() this is better. */
19066 emit_move_insn (out
, addr
);
19068 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
19070 /* strlensi_unroll_1 returns the address of the zero at the end of
19071 the string, like memchr(), so compute the length by subtracting
19072 the start address. */
19073 emit_insn ((*ix86_gen_sub3
) (out
, out
, addr
));
19079 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19080 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
19083 scratch2
= gen_reg_rtx (Pmode
);
19084 scratch3
= gen_reg_rtx (Pmode
);
19085 scratch4
= force_reg (Pmode
, constm1_rtx
);
19087 emit_move_insn (scratch3
, addr
);
19088 eoschar
= force_reg (QImode
, eoschar
);
19090 src
= replace_equiv_address_nv (src
, scratch3
);
19092 /* If .md starts supporting :P, this can be done in .md. */
19093 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
19094 scratch4
), UNSPEC_SCAS
);
19095 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
19096 emit_insn ((*ix86_gen_one_cmpl2
) (scratch2
, scratch1
));
19097 emit_insn ((*ix86_gen_add3
) (out
, scratch2
, constm1_rtx
));
19102 /* For given symbol (function) construct code to compute address of it's PLT
19103 entry in large x86-64 PIC model. */
19105 construct_plt_address (rtx symbol
)
19107 rtx tmp
= gen_reg_rtx (Pmode
);
19108 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
19110 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
19111 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
19113 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
19114 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
19119 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
19121 rtx pop
, int sibcall
)
19123 rtx use
= NULL
, call
;
19125 if (pop
== const0_rtx
)
19127 gcc_assert (!TARGET_64BIT
|| !pop
);
19129 if (TARGET_MACHO
&& !TARGET_64BIT
)
19132 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
19133 fnaddr
= machopic_indirect_call_target (fnaddr
);
19138 /* Static functions and indirect calls don't need the pic register. */
19139 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
19140 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
19141 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
19142 use_reg (&use
, pic_offset_table_rtx
);
19145 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
19147 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
19148 emit_move_insn (al
, callarg2
);
19149 use_reg (&use
, al
);
19152 if (ix86_cmodel
== CM_LARGE_PIC
19154 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
19155 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
19156 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
19157 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
19159 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
19160 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
19162 if (sibcall
&& TARGET_64BIT
19163 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
19166 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
19167 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
19168 emit_move_insn (fnaddr
, addr
);
19169 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
19172 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
19174 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
19177 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
19178 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
19179 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
19182 && ix86_cfun_abi () == MS_ABI
19183 && (!callarg2
|| INTVAL (callarg2
) != -2))
19185 /* We need to represent that SI and DI registers are clobbered
19187 static int clobbered_registers
[] = {
19188 XMM6_REG
, XMM7_REG
, XMM8_REG
,
19189 XMM9_REG
, XMM10_REG
, XMM11_REG
,
19190 XMM12_REG
, XMM13_REG
, XMM14_REG
,
19191 XMM15_REG
, SI_REG
, DI_REG
19194 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 2];
19195 rtx unspec
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
19196 UNSPEC_MS_TO_SYSV_CALL
);
19200 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
19201 vec
[i
+ 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
19204 (SSE_REGNO_P (clobbered_registers
[i
])
19206 clobbered_registers
[i
]));
19208 call
= gen_rtx_PARALLEL (VOIDmode
,
19209 gen_rtvec_v (ARRAY_SIZE (clobbered_registers
)
19213 call
= emit_call_insn (call
);
19215 CALL_INSN_FUNCTION_USAGE (call
) = use
;
19219 /* Clear stack slot assignments remembered from previous functions.
19220 This is called from INIT_EXPANDERS once before RTL is emitted for each
19223 static struct machine_function
*
19224 ix86_init_machine_status (void)
19226 struct machine_function
*f
;
19228 f
= GGC_CNEW (struct machine_function
);
19229 f
->use_fast_prologue_epilogue_nregs
= -1;
19230 f
->tls_descriptor_call_expanded_p
= 0;
19231 f
->call_abi
= ix86_abi
;
19236 /* Return a MEM corresponding to a stack slot with mode MODE.
19237 Allocate a new slot if necessary.
19239 The RTL for a function can have several slots available: N is
19240 which slot to use. */
19243 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
19245 struct stack_local_entry
*s
;
19247 gcc_assert (n
< MAX_386_STACK_LOCALS
);
19249 /* Virtual slot is valid only before vregs are instantiated. */
19250 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
19252 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
19253 if (s
->mode
== mode
&& s
->n
== n
)
19254 return copy_rtx (s
->rtl
);
19256 s
= (struct stack_local_entry
*)
19257 ggc_alloc (sizeof (struct stack_local_entry
));
19260 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
19262 s
->next
= ix86_stack_locals
;
19263 ix86_stack_locals
= s
;
19267 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19269 static GTY(()) rtx ix86_tls_symbol
;
19271 ix86_tls_get_addr (void)
19274 if (!ix86_tls_symbol
)
19276 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
19277 (TARGET_ANY_GNU_TLS
19279 ? "___tls_get_addr"
19280 : "__tls_get_addr");
19283 return ix86_tls_symbol
;
19286 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19288 static GTY(()) rtx ix86_tls_module_base_symbol
;
19290 ix86_tls_module_base (void)
19293 if (!ix86_tls_module_base_symbol
)
19295 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
19296 "_TLS_MODULE_BASE_");
19297 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
19298 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
19301 return ix86_tls_module_base_symbol
;
19304 /* Calculate the length of the memory address in the instruction
19305 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19308 memory_address_length (rtx addr
)
19310 struct ix86_address parts
;
19311 rtx base
, index
, disp
;
19315 if (GET_CODE (addr
) == PRE_DEC
19316 || GET_CODE (addr
) == POST_INC
19317 || GET_CODE (addr
) == PRE_MODIFY
19318 || GET_CODE (addr
) == POST_MODIFY
)
19321 ok
= ix86_decompose_address (addr
, &parts
);
19324 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
19325 parts
.base
= SUBREG_REG (parts
.base
);
19326 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
19327 parts
.index
= SUBREG_REG (parts
.index
);
19330 index
= parts
.index
;
19335 - esp as the base always wants an index,
19336 - ebp as the base always wants a displacement,
19337 - r12 as the base always wants an index,
19338 - r13 as the base always wants a displacement. */
19340 /* Register Indirect. */
19341 if (base
&& !index
&& !disp
)
19343 /* esp (for its index) and ebp (for its displacement) need
19344 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19347 && (addr
== arg_pointer_rtx
19348 || addr
== frame_pointer_rtx
19349 || REGNO (addr
) == SP_REG
19350 || REGNO (addr
) == BP_REG
19351 || REGNO (addr
) == R12_REG
19352 || REGNO (addr
) == R13_REG
))
19356 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19357 is not disp32, but disp32(%rip), so for disp32
19358 SIB byte is needed, unless print_operand_address
19359 optimizes it into disp32(%rip) or (%rip) is implied
19361 else if (disp
&& !base
&& !index
)
19368 if (GET_CODE (disp
) == CONST
)
19369 symbol
= XEXP (disp
, 0);
19370 if (GET_CODE (symbol
) == PLUS
19371 && CONST_INT_P (XEXP (symbol
, 1)))
19372 symbol
= XEXP (symbol
, 0);
19374 if (GET_CODE (symbol
) != LABEL_REF
19375 && (GET_CODE (symbol
) != SYMBOL_REF
19376 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
19377 && (GET_CODE (symbol
) != UNSPEC
19378 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
19379 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
19386 /* Find the length of the displacement constant. */
19389 if (base
&& satisfies_constraint_K (disp
))
19394 /* ebp always wants a displacement. Similarly r13. */
19395 else if (REG_P (base
)
19396 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
19399 /* An index requires the two-byte modrm form.... */
19401 /* ...like esp (or r12), which always wants an index. */
19402 || base
== arg_pointer_rtx
19403 || base
== frame_pointer_rtx
19405 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
19422 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19423 is set, expect that insn have 8bit immediate alternative. */
19425 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
19429 extract_insn_cached (insn
);
19430 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
19431 if (CONSTANT_P (recog_data
.operand
[i
]))
19433 enum attr_mode mode
= get_attr_mode (insn
);
19436 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
19438 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
19445 ival
= trunc_int_for_mode (ival
, HImode
);
19448 ival
= trunc_int_for_mode (ival
, SImode
);
19453 if (IN_RANGE (ival
, -128, 127))
19470 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19475 fatal_insn ("unknown insn mode", insn
);
19480 /* Compute default value for "length_address" attribute. */
19482 ix86_attr_length_address_default (rtx insn
)
19486 if (get_attr_type (insn
) == TYPE_LEA
)
19488 rtx set
= PATTERN (insn
), addr
;
19490 if (GET_CODE (set
) == PARALLEL
)
19491 set
= XVECEXP (set
, 0, 0);
19493 gcc_assert (GET_CODE (set
) == SET
);
19495 addr
= SET_SRC (set
);
19496 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
19498 if (GET_CODE (addr
) == ZERO_EXTEND
)
19499 addr
= XEXP (addr
, 0);
19500 if (GET_CODE (addr
) == SUBREG
)
19501 addr
= SUBREG_REG (addr
);
19504 return memory_address_length (addr
);
19507 extract_insn_cached (insn
);
19508 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
19509 if (MEM_P (recog_data
.operand
[i
]))
19511 constrain_operands_cached (reload_completed
);
19512 if (which_alternative
!= -1)
19514 const char *constraints
= recog_data
.constraints
[i
];
19515 int alt
= which_alternative
;
19517 while (*constraints
== '=' || *constraints
== '+')
19520 while (*constraints
++ != ',')
19522 /* Skip ignored operands. */
19523 if (*constraints
== 'X')
19526 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
19531 /* Compute default value for "length_vex" attribute. It includes
19532 2 or 3 byte VEX prefix and 1 opcode byte. */
19535 ix86_attr_length_vex_default (rtx insn
, int has_0f_opcode
,
19540 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19541 byte VEX prefix. */
19542 if (!has_0f_opcode
|| has_vex_w
)
19545 /* We can always use 2 byte VEX prefix in 32bit. */
19549 extract_insn_cached (insn
);
19551 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
19552 if (REG_P (recog_data
.operand
[i
]))
19554 /* REX.W bit uses 3 byte VEX prefix. */
19555 if (GET_MODE (recog_data
.operand
[i
]) == DImode
19556 && GENERAL_REG_P (recog_data
.operand
[i
]))
19561 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19562 if (MEM_P (recog_data
.operand
[i
])
19563 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
19570 /* Return the maximum number of instructions a cpu can issue. */
19573 ix86_issue_rate (void)
19577 case PROCESSOR_PENTIUM
:
19578 case PROCESSOR_ATOM
:
19582 case PROCESSOR_PENTIUMPRO
:
19583 case PROCESSOR_PENTIUM4
:
19584 case PROCESSOR_ATHLON
:
19586 case PROCESSOR_AMDFAM10
:
19587 case PROCESSOR_NOCONA
:
19588 case PROCESSOR_GENERIC32
:
19589 case PROCESSOR_GENERIC64
:
19592 case PROCESSOR_CORE2
:
19600 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19601 by DEP_INSN and nothing set by DEP_INSN. */
19604 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
19608 /* Simplify the test for uninteresting insns. */
19609 if (insn_type
!= TYPE_SETCC
19610 && insn_type
!= TYPE_ICMOV
19611 && insn_type
!= TYPE_FCMOV
19612 && insn_type
!= TYPE_IBR
)
19615 if ((set
= single_set (dep_insn
)) != 0)
19617 set
= SET_DEST (set
);
19620 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
19621 && XVECLEN (PATTERN (dep_insn
), 0) == 2
19622 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
19623 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
19625 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
19626 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
19631 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
19634 /* This test is true if the dependent insn reads the flags but
19635 not any other potentially set register. */
19636 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
19639 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
19645 /* Return true iff USE_INSN has a memory address with operands set by
19649 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
19652 extract_insn_cached (use_insn
);
19653 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
19654 if (MEM_P (recog_data
.operand
[i
]))
19656 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
19657 return modified_in_p (addr
, set_insn
) != 0;
19663 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
19665 enum attr_type insn_type
, dep_insn_type
;
19666 enum attr_memory memory
;
19668 int dep_insn_code_number
;
19670 /* Anti and output dependencies have zero cost on all CPUs. */
19671 if (REG_NOTE_KIND (link
) != 0)
19674 dep_insn_code_number
= recog_memoized (dep_insn
);
19676 /* If we can't recognize the insns, we can't really do anything. */
19677 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
19680 insn_type
= get_attr_type (insn
);
19681 dep_insn_type
= get_attr_type (dep_insn
);
19685 case PROCESSOR_PENTIUM
:
19686 /* Address Generation Interlock adds a cycle of latency. */
19687 if (insn_type
== TYPE_LEA
)
19689 rtx addr
= PATTERN (insn
);
19691 if (GET_CODE (addr
) == PARALLEL
)
19692 addr
= XVECEXP (addr
, 0, 0);
19694 gcc_assert (GET_CODE (addr
) == SET
);
19696 addr
= SET_SRC (addr
);
19697 if (modified_in_p (addr
, dep_insn
))
19700 else if (ix86_agi_dependent (dep_insn
, insn
))
19703 /* ??? Compares pair with jump/setcc. */
19704 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
19707 /* Floating point stores require value to be ready one cycle earlier. */
19708 if (insn_type
== TYPE_FMOV
19709 && get_attr_memory (insn
) == MEMORY_STORE
19710 && !ix86_agi_dependent (dep_insn
, insn
))
19714 case PROCESSOR_PENTIUMPRO
:
19715 memory
= get_attr_memory (insn
);
19717 /* INT->FP conversion is expensive. */
19718 if (get_attr_fp_int_src (dep_insn
))
19721 /* There is one cycle extra latency between an FP op and a store. */
19722 if (insn_type
== TYPE_FMOV
19723 && (set
= single_set (dep_insn
)) != NULL_RTX
19724 && (set2
= single_set (insn
)) != NULL_RTX
19725 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
19726 && MEM_P (SET_DEST (set2
)))
19729 /* Show ability of reorder buffer to hide latency of load by executing
19730 in parallel with previous instruction in case
19731 previous instruction is not needed to compute the address. */
19732 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
19733 && !ix86_agi_dependent (dep_insn
, insn
))
19735 /* Claim moves to take one cycle, as core can issue one load
19736 at time and the next load can start cycle later. */
19737 if (dep_insn_type
== TYPE_IMOV
19738 || dep_insn_type
== TYPE_FMOV
)
19746 memory
= get_attr_memory (insn
);
19748 /* The esp dependency is resolved before the instruction is really
19750 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
19751 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
19754 /* INT->FP conversion is expensive. */
19755 if (get_attr_fp_int_src (dep_insn
))
19758 /* Show ability of reorder buffer to hide latency of load by executing
19759 in parallel with previous instruction in case
19760 previous instruction is not needed to compute the address. */
19761 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
19762 && !ix86_agi_dependent (dep_insn
, insn
))
19764 /* Claim moves to take one cycle, as core can issue one load
19765 at time and the next load can start cycle later. */
19766 if (dep_insn_type
== TYPE_IMOV
19767 || dep_insn_type
== TYPE_FMOV
)
19776 case PROCESSOR_ATHLON
:
19778 case PROCESSOR_AMDFAM10
:
19779 case PROCESSOR_ATOM
:
19780 case PROCESSOR_GENERIC32
:
19781 case PROCESSOR_GENERIC64
:
19782 memory
= get_attr_memory (insn
);
19784 /* Show ability of reorder buffer to hide latency of load by executing
19785 in parallel with previous instruction in case
19786 previous instruction is not needed to compute the address. */
19787 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
19788 && !ix86_agi_dependent (dep_insn
, insn
))
19790 enum attr_unit unit
= get_attr_unit (insn
);
19793 /* Because of the difference between the length of integer and
19794 floating unit pipeline preparation stages, the memory operands
19795 for floating point are cheaper.
19797 ??? For Athlon it the difference is most probably 2. */
19798 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
19801 loadcost
= TARGET_ATHLON
? 2 : 0;
19803 if (cost
>= loadcost
)
19816 /* How many alternative schedules to try. This should be as wide as the
19817 scheduling freedom in the DFA, but no wider. Making this value too
19818 large results extra work for the scheduler. */
19821 ia32_multipass_dfa_lookahead (void)
19825 case PROCESSOR_PENTIUM
:
19828 case PROCESSOR_PENTIUMPRO
:
19838 /* Compute the alignment given to a constant that is being placed in memory.
19839 EXP is the constant and ALIGN is the alignment that the object would
19841 The value of this function is used instead of that alignment to align
19845 ix86_constant_alignment (tree exp
, int align
)
19847 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
19848 || TREE_CODE (exp
) == INTEGER_CST
)
19850 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
19852 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
19855 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
19856 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
19857 return BITS_PER_WORD
;
19862 /* Compute the alignment for a static variable.
19863 TYPE is the data type, and ALIGN is the alignment that
19864 the object would ordinarily have. The value of this function is used
19865 instead of that alignment to align the object. */
19868 ix86_data_alignment (tree type
, int align
)
19870 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
19872 if (AGGREGATE_TYPE_P (type
)
19873 && TYPE_SIZE (type
)
19874 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
19875 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
19876 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
19877 && align
< max_align
)
19880 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19881 to 16byte boundary. */
19884 if (AGGREGATE_TYPE_P (type
)
19885 && TYPE_SIZE (type
)
19886 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
19887 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
19888 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
19892 if (TREE_CODE (type
) == ARRAY_TYPE
)
19894 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
19896 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
19899 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
19902 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
19904 if ((TYPE_MODE (type
) == XCmode
19905 || TYPE_MODE (type
) == TCmode
) && align
< 128)
19908 else if ((TREE_CODE (type
) == RECORD_TYPE
19909 || TREE_CODE (type
) == UNION_TYPE
19910 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
19911 && TYPE_FIELDS (type
))
19913 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
19915 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
19918 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
19919 || TREE_CODE (type
) == INTEGER_TYPE
)
19921 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
19923 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
19930 /* Compute the alignment for a local variable or a stack slot. EXP is
19931 the data type or decl itself, MODE is the widest mode available and
19932 ALIGN is the alignment that the object would ordinarily have. The
19933 value of this macro is used instead of that alignment to align the
19937 ix86_local_alignment (tree exp
, enum machine_mode mode
,
19938 unsigned int align
)
19942 if (exp
&& DECL_P (exp
))
19944 type
= TREE_TYPE (exp
);
19953 /* Don't do dynamic stack realignment for long long objects with
19954 -mpreferred-stack-boundary=2. */
19957 && ix86_preferred_stack_boundary
< 64
19958 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
19959 && (!type
|| !TYPE_USER_ALIGN (type
))
19960 && (!decl
|| !DECL_USER_ALIGN (decl
)))
19963 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19964 register in MODE. We will return the largest alignment of XF
19968 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
19969 align
= GET_MODE_ALIGNMENT (DFmode
);
19973 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19974 to 16byte boundary. */
19977 if (AGGREGATE_TYPE_P (type
)
19978 && TYPE_SIZE (type
)
19979 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
19980 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
19981 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
19984 if (TREE_CODE (type
) == ARRAY_TYPE
)
19986 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
19988 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
19991 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
19993 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
19995 if ((TYPE_MODE (type
) == XCmode
19996 || TYPE_MODE (type
) == TCmode
) && align
< 128)
19999 else if ((TREE_CODE (type
) == RECORD_TYPE
20000 || TREE_CODE (type
) == UNION_TYPE
20001 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
20002 && TYPE_FIELDS (type
))
20004 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
20006 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
20009 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
20010 || TREE_CODE (type
) == INTEGER_TYPE
)
20013 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
20015 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
20021 /* Emit RTL insns to initialize the variable parts of a trampoline.
20022 FNADDR is an RTX for the address of the function's pure code.
20023 CXT is an RTX for the static chain value for the function. */
20025 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
20029 /* Compute offset from the end of the jmp to the target function. */
20030 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
20031 plus_constant (tramp
, 10),
20032 NULL_RTX
, 1, OPTAB_DIRECT
);
20033 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
20034 gen_int_mode (0xb9, QImode
));
20035 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
20036 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
20037 gen_int_mode (0xe9, QImode
));
20038 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
20043 /* Try to load address using shorter movl instead of movabs.
20044 We may want to support movq for kernel mode, but kernel does not use
20045 trampolines at the moment. */
20046 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
20048 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
20049 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
20050 gen_int_mode (0xbb41, HImode
));
20051 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
20052 gen_lowpart (SImode
, fnaddr
));
20057 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
20058 gen_int_mode (0xbb49, HImode
));
20059 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
20063 /* Load static chain using movabs to r10. */
20064 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
20065 gen_int_mode (0xba49, HImode
));
20066 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
20069 /* Jump to the r11 */
20070 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
20071 gen_int_mode (0xff49, HImode
));
20072 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
20073 gen_int_mode (0xe3, QImode
));
20075 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
20078 #ifdef ENABLE_EXECUTE_STACK
20079 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
20080 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
20084 /* Codes for all the SSE/MMX builtins. */
20087 IX86_BUILTIN_ADDPS
,
20088 IX86_BUILTIN_ADDSS
,
20089 IX86_BUILTIN_DIVPS
,
20090 IX86_BUILTIN_DIVSS
,
20091 IX86_BUILTIN_MULPS
,
20092 IX86_BUILTIN_MULSS
,
20093 IX86_BUILTIN_SUBPS
,
20094 IX86_BUILTIN_SUBSS
,
20096 IX86_BUILTIN_CMPEQPS
,
20097 IX86_BUILTIN_CMPLTPS
,
20098 IX86_BUILTIN_CMPLEPS
,
20099 IX86_BUILTIN_CMPGTPS
,
20100 IX86_BUILTIN_CMPGEPS
,
20101 IX86_BUILTIN_CMPNEQPS
,
20102 IX86_BUILTIN_CMPNLTPS
,
20103 IX86_BUILTIN_CMPNLEPS
,
20104 IX86_BUILTIN_CMPNGTPS
,
20105 IX86_BUILTIN_CMPNGEPS
,
20106 IX86_BUILTIN_CMPORDPS
,
20107 IX86_BUILTIN_CMPUNORDPS
,
20108 IX86_BUILTIN_CMPEQSS
,
20109 IX86_BUILTIN_CMPLTSS
,
20110 IX86_BUILTIN_CMPLESS
,
20111 IX86_BUILTIN_CMPNEQSS
,
20112 IX86_BUILTIN_CMPNLTSS
,
20113 IX86_BUILTIN_CMPNLESS
,
20114 IX86_BUILTIN_CMPNGTSS
,
20115 IX86_BUILTIN_CMPNGESS
,
20116 IX86_BUILTIN_CMPORDSS
,
20117 IX86_BUILTIN_CMPUNORDSS
,
20119 IX86_BUILTIN_COMIEQSS
,
20120 IX86_BUILTIN_COMILTSS
,
20121 IX86_BUILTIN_COMILESS
,
20122 IX86_BUILTIN_COMIGTSS
,
20123 IX86_BUILTIN_COMIGESS
,
20124 IX86_BUILTIN_COMINEQSS
,
20125 IX86_BUILTIN_UCOMIEQSS
,
20126 IX86_BUILTIN_UCOMILTSS
,
20127 IX86_BUILTIN_UCOMILESS
,
20128 IX86_BUILTIN_UCOMIGTSS
,
20129 IX86_BUILTIN_UCOMIGESS
,
20130 IX86_BUILTIN_UCOMINEQSS
,
20132 IX86_BUILTIN_CVTPI2PS
,
20133 IX86_BUILTIN_CVTPS2PI
,
20134 IX86_BUILTIN_CVTSI2SS
,
20135 IX86_BUILTIN_CVTSI642SS
,
20136 IX86_BUILTIN_CVTSS2SI
,
20137 IX86_BUILTIN_CVTSS2SI64
,
20138 IX86_BUILTIN_CVTTPS2PI
,
20139 IX86_BUILTIN_CVTTSS2SI
,
20140 IX86_BUILTIN_CVTTSS2SI64
,
20142 IX86_BUILTIN_MAXPS
,
20143 IX86_BUILTIN_MAXSS
,
20144 IX86_BUILTIN_MINPS
,
20145 IX86_BUILTIN_MINSS
,
20147 IX86_BUILTIN_LOADUPS
,
20148 IX86_BUILTIN_STOREUPS
,
20149 IX86_BUILTIN_MOVSS
,
20151 IX86_BUILTIN_MOVHLPS
,
20152 IX86_BUILTIN_MOVLHPS
,
20153 IX86_BUILTIN_LOADHPS
,
20154 IX86_BUILTIN_LOADLPS
,
20155 IX86_BUILTIN_STOREHPS
,
20156 IX86_BUILTIN_STORELPS
,
20158 IX86_BUILTIN_MASKMOVQ
,
20159 IX86_BUILTIN_MOVMSKPS
,
20160 IX86_BUILTIN_PMOVMSKB
,
20162 IX86_BUILTIN_MOVNTPS
,
20163 IX86_BUILTIN_MOVNTQ
,
20165 IX86_BUILTIN_LOADDQU
,
20166 IX86_BUILTIN_STOREDQU
,
20168 IX86_BUILTIN_PACKSSWB
,
20169 IX86_BUILTIN_PACKSSDW
,
20170 IX86_BUILTIN_PACKUSWB
,
20172 IX86_BUILTIN_PADDB
,
20173 IX86_BUILTIN_PADDW
,
20174 IX86_BUILTIN_PADDD
,
20175 IX86_BUILTIN_PADDQ
,
20176 IX86_BUILTIN_PADDSB
,
20177 IX86_BUILTIN_PADDSW
,
20178 IX86_BUILTIN_PADDUSB
,
20179 IX86_BUILTIN_PADDUSW
,
20180 IX86_BUILTIN_PSUBB
,
20181 IX86_BUILTIN_PSUBW
,
20182 IX86_BUILTIN_PSUBD
,
20183 IX86_BUILTIN_PSUBQ
,
20184 IX86_BUILTIN_PSUBSB
,
20185 IX86_BUILTIN_PSUBSW
,
20186 IX86_BUILTIN_PSUBUSB
,
20187 IX86_BUILTIN_PSUBUSW
,
20190 IX86_BUILTIN_PANDN
,
20194 IX86_BUILTIN_PAVGB
,
20195 IX86_BUILTIN_PAVGW
,
20197 IX86_BUILTIN_PCMPEQB
,
20198 IX86_BUILTIN_PCMPEQW
,
20199 IX86_BUILTIN_PCMPEQD
,
20200 IX86_BUILTIN_PCMPGTB
,
20201 IX86_BUILTIN_PCMPGTW
,
20202 IX86_BUILTIN_PCMPGTD
,
20204 IX86_BUILTIN_PMADDWD
,
20206 IX86_BUILTIN_PMAXSW
,
20207 IX86_BUILTIN_PMAXUB
,
20208 IX86_BUILTIN_PMINSW
,
20209 IX86_BUILTIN_PMINUB
,
20211 IX86_BUILTIN_PMULHUW
,
20212 IX86_BUILTIN_PMULHW
,
20213 IX86_BUILTIN_PMULLW
,
20215 IX86_BUILTIN_PSADBW
,
20216 IX86_BUILTIN_PSHUFW
,
20218 IX86_BUILTIN_PSLLW
,
20219 IX86_BUILTIN_PSLLD
,
20220 IX86_BUILTIN_PSLLQ
,
20221 IX86_BUILTIN_PSRAW
,
20222 IX86_BUILTIN_PSRAD
,
20223 IX86_BUILTIN_PSRLW
,
20224 IX86_BUILTIN_PSRLD
,
20225 IX86_BUILTIN_PSRLQ
,
20226 IX86_BUILTIN_PSLLWI
,
20227 IX86_BUILTIN_PSLLDI
,
20228 IX86_BUILTIN_PSLLQI
,
20229 IX86_BUILTIN_PSRAWI
,
20230 IX86_BUILTIN_PSRADI
,
20231 IX86_BUILTIN_PSRLWI
,
20232 IX86_BUILTIN_PSRLDI
,
20233 IX86_BUILTIN_PSRLQI
,
20235 IX86_BUILTIN_PUNPCKHBW
,
20236 IX86_BUILTIN_PUNPCKHWD
,
20237 IX86_BUILTIN_PUNPCKHDQ
,
20238 IX86_BUILTIN_PUNPCKLBW
,
20239 IX86_BUILTIN_PUNPCKLWD
,
20240 IX86_BUILTIN_PUNPCKLDQ
,
20242 IX86_BUILTIN_SHUFPS
,
20244 IX86_BUILTIN_RCPPS
,
20245 IX86_BUILTIN_RCPSS
,
20246 IX86_BUILTIN_RSQRTPS
,
20247 IX86_BUILTIN_RSQRTPS_NR
,
20248 IX86_BUILTIN_RSQRTSS
,
20249 IX86_BUILTIN_RSQRTF
,
20250 IX86_BUILTIN_SQRTPS
,
20251 IX86_BUILTIN_SQRTPS_NR
,
20252 IX86_BUILTIN_SQRTSS
,
20254 IX86_BUILTIN_UNPCKHPS
,
20255 IX86_BUILTIN_UNPCKLPS
,
20257 IX86_BUILTIN_ANDPS
,
20258 IX86_BUILTIN_ANDNPS
,
20260 IX86_BUILTIN_XORPS
,
20263 IX86_BUILTIN_LDMXCSR
,
20264 IX86_BUILTIN_STMXCSR
,
20265 IX86_BUILTIN_SFENCE
,
20267 /* 3DNow! Original */
20268 IX86_BUILTIN_FEMMS
,
20269 IX86_BUILTIN_PAVGUSB
,
20270 IX86_BUILTIN_PF2ID
,
20271 IX86_BUILTIN_PFACC
,
20272 IX86_BUILTIN_PFADD
,
20273 IX86_BUILTIN_PFCMPEQ
,
20274 IX86_BUILTIN_PFCMPGE
,
20275 IX86_BUILTIN_PFCMPGT
,
20276 IX86_BUILTIN_PFMAX
,
20277 IX86_BUILTIN_PFMIN
,
20278 IX86_BUILTIN_PFMUL
,
20279 IX86_BUILTIN_PFRCP
,
20280 IX86_BUILTIN_PFRCPIT1
,
20281 IX86_BUILTIN_PFRCPIT2
,
20282 IX86_BUILTIN_PFRSQIT1
,
20283 IX86_BUILTIN_PFRSQRT
,
20284 IX86_BUILTIN_PFSUB
,
20285 IX86_BUILTIN_PFSUBR
,
20286 IX86_BUILTIN_PI2FD
,
20287 IX86_BUILTIN_PMULHRW
,
20289 /* 3DNow! Athlon Extensions */
20290 IX86_BUILTIN_PF2IW
,
20291 IX86_BUILTIN_PFNACC
,
20292 IX86_BUILTIN_PFPNACC
,
20293 IX86_BUILTIN_PI2FW
,
20294 IX86_BUILTIN_PSWAPDSI
,
20295 IX86_BUILTIN_PSWAPDSF
,
20298 IX86_BUILTIN_ADDPD
,
20299 IX86_BUILTIN_ADDSD
,
20300 IX86_BUILTIN_DIVPD
,
20301 IX86_BUILTIN_DIVSD
,
20302 IX86_BUILTIN_MULPD
,
20303 IX86_BUILTIN_MULSD
,
20304 IX86_BUILTIN_SUBPD
,
20305 IX86_BUILTIN_SUBSD
,
20307 IX86_BUILTIN_CMPEQPD
,
20308 IX86_BUILTIN_CMPLTPD
,
20309 IX86_BUILTIN_CMPLEPD
,
20310 IX86_BUILTIN_CMPGTPD
,
20311 IX86_BUILTIN_CMPGEPD
,
20312 IX86_BUILTIN_CMPNEQPD
,
20313 IX86_BUILTIN_CMPNLTPD
,
20314 IX86_BUILTIN_CMPNLEPD
,
20315 IX86_BUILTIN_CMPNGTPD
,
20316 IX86_BUILTIN_CMPNGEPD
,
20317 IX86_BUILTIN_CMPORDPD
,
20318 IX86_BUILTIN_CMPUNORDPD
,
20319 IX86_BUILTIN_CMPEQSD
,
20320 IX86_BUILTIN_CMPLTSD
,
20321 IX86_BUILTIN_CMPLESD
,
20322 IX86_BUILTIN_CMPNEQSD
,
20323 IX86_BUILTIN_CMPNLTSD
,
20324 IX86_BUILTIN_CMPNLESD
,
20325 IX86_BUILTIN_CMPORDSD
,
20326 IX86_BUILTIN_CMPUNORDSD
,
20328 IX86_BUILTIN_COMIEQSD
,
20329 IX86_BUILTIN_COMILTSD
,
20330 IX86_BUILTIN_COMILESD
,
20331 IX86_BUILTIN_COMIGTSD
,
20332 IX86_BUILTIN_COMIGESD
,
20333 IX86_BUILTIN_COMINEQSD
,
20334 IX86_BUILTIN_UCOMIEQSD
,
20335 IX86_BUILTIN_UCOMILTSD
,
20336 IX86_BUILTIN_UCOMILESD
,
20337 IX86_BUILTIN_UCOMIGTSD
,
20338 IX86_BUILTIN_UCOMIGESD
,
20339 IX86_BUILTIN_UCOMINEQSD
,
20341 IX86_BUILTIN_MAXPD
,
20342 IX86_BUILTIN_MAXSD
,
20343 IX86_BUILTIN_MINPD
,
20344 IX86_BUILTIN_MINSD
,
20346 IX86_BUILTIN_ANDPD
,
20347 IX86_BUILTIN_ANDNPD
,
20349 IX86_BUILTIN_XORPD
,
20351 IX86_BUILTIN_SQRTPD
,
20352 IX86_BUILTIN_SQRTSD
,
20354 IX86_BUILTIN_UNPCKHPD
,
20355 IX86_BUILTIN_UNPCKLPD
,
20357 IX86_BUILTIN_SHUFPD
,
20359 IX86_BUILTIN_LOADUPD
,
20360 IX86_BUILTIN_STOREUPD
,
20361 IX86_BUILTIN_MOVSD
,
20363 IX86_BUILTIN_LOADHPD
,
20364 IX86_BUILTIN_LOADLPD
,
20366 IX86_BUILTIN_CVTDQ2PD
,
20367 IX86_BUILTIN_CVTDQ2PS
,
20369 IX86_BUILTIN_CVTPD2DQ
,
20370 IX86_BUILTIN_CVTPD2PI
,
20371 IX86_BUILTIN_CVTPD2PS
,
20372 IX86_BUILTIN_CVTTPD2DQ
,
20373 IX86_BUILTIN_CVTTPD2PI
,
20375 IX86_BUILTIN_CVTPI2PD
,
20376 IX86_BUILTIN_CVTSI2SD
,
20377 IX86_BUILTIN_CVTSI642SD
,
20379 IX86_BUILTIN_CVTSD2SI
,
20380 IX86_BUILTIN_CVTSD2SI64
,
20381 IX86_BUILTIN_CVTSD2SS
,
20382 IX86_BUILTIN_CVTSS2SD
,
20383 IX86_BUILTIN_CVTTSD2SI
,
20384 IX86_BUILTIN_CVTTSD2SI64
,
20386 IX86_BUILTIN_CVTPS2DQ
,
20387 IX86_BUILTIN_CVTPS2PD
,
20388 IX86_BUILTIN_CVTTPS2DQ
,
20390 IX86_BUILTIN_MOVNTI
,
20391 IX86_BUILTIN_MOVNTPD
,
20392 IX86_BUILTIN_MOVNTDQ
,
20394 IX86_BUILTIN_MOVQ128
,
20397 IX86_BUILTIN_MASKMOVDQU
,
20398 IX86_BUILTIN_MOVMSKPD
,
20399 IX86_BUILTIN_PMOVMSKB128
,
20401 IX86_BUILTIN_PACKSSWB128
,
20402 IX86_BUILTIN_PACKSSDW128
,
20403 IX86_BUILTIN_PACKUSWB128
,
20405 IX86_BUILTIN_PADDB128
,
20406 IX86_BUILTIN_PADDW128
,
20407 IX86_BUILTIN_PADDD128
,
20408 IX86_BUILTIN_PADDQ128
,
20409 IX86_BUILTIN_PADDSB128
,
20410 IX86_BUILTIN_PADDSW128
,
20411 IX86_BUILTIN_PADDUSB128
,
20412 IX86_BUILTIN_PADDUSW128
,
20413 IX86_BUILTIN_PSUBB128
,
20414 IX86_BUILTIN_PSUBW128
,
20415 IX86_BUILTIN_PSUBD128
,
20416 IX86_BUILTIN_PSUBQ128
,
20417 IX86_BUILTIN_PSUBSB128
,
20418 IX86_BUILTIN_PSUBSW128
,
20419 IX86_BUILTIN_PSUBUSB128
,
20420 IX86_BUILTIN_PSUBUSW128
,
20422 IX86_BUILTIN_PAND128
,
20423 IX86_BUILTIN_PANDN128
,
20424 IX86_BUILTIN_POR128
,
20425 IX86_BUILTIN_PXOR128
,
20427 IX86_BUILTIN_PAVGB128
,
20428 IX86_BUILTIN_PAVGW128
,
20430 IX86_BUILTIN_PCMPEQB128
,
20431 IX86_BUILTIN_PCMPEQW128
,
20432 IX86_BUILTIN_PCMPEQD128
,
20433 IX86_BUILTIN_PCMPGTB128
,
20434 IX86_BUILTIN_PCMPGTW128
,
20435 IX86_BUILTIN_PCMPGTD128
,
20437 IX86_BUILTIN_PMADDWD128
,
20439 IX86_BUILTIN_PMAXSW128
,
20440 IX86_BUILTIN_PMAXUB128
,
20441 IX86_BUILTIN_PMINSW128
,
20442 IX86_BUILTIN_PMINUB128
,
20444 IX86_BUILTIN_PMULUDQ
,
20445 IX86_BUILTIN_PMULUDQ128
,
20446 IX86_BUILTIN_PMULHUW128
,
20447 IX86_BUILTIN_PMULHW128
,
20448 IX86_BUILTIN_PMULLW128
,
20450 IX86_BUILTIN_PSADBW128
,
20451 IX86_BUILTIN_PSHUFHW
,
20452 IX86_BUILTIN_PSHUFLW
,
20453 IX86_BUILTIN_PSHUFD
,
20455 IX86_BUILTIN_PSLLDQI128
,
20456 IX86_BUILTIN_PSLLWI128
,
20457 IX86_BUILTIN_PSLLDI128
,
20458 IX86_BUILTIN_PSLLQI128
,
20459 IX86_BUILTIN_PSRAWI128
,
20460 IX86_BUILTIN_PSRADI128
,
20461 IX86_BUILTIN_PSRLDQI128
,
20462 IX86_BUILTIN_PSRLWI128
,
20463 IX86_BUILTIN_PSRLDI128
,
20464 IX86_BUILTIN_PSRLQI128
,
20466 IX86_BUILTIN_PSLLDQ128
,
20467 IX86_BUILTIN_PSLLW128
,
20468 IX86_BUILTIN_PSLLD128
,
20469 IX86_BUILTIN_PSLLQ128
,
20470 IX86_BUILTIN_PSRAW128
,
20471 IX86_BUILTIN_PSRAD128
,
20472 IX86_BUILTIN_PSRLW128
,
20473 IX86_BUILTIN_PSRLD128
,
20474 IX86_BUILTIN_PSRLQ128
,
20476 IX86_BUILTIN_PUNPCKHBW128
,
20477 IX86_BUILTIN_PUNPCKHWD128
,
20478 IX86_BUILTIN_PUNPCKHDQ128
,
20479 IX86_BUILTIN_PUNPCKHQDQ128
,
20480 IX86_BUILTIN_PUNPCKLBW128
,
20481 IX86_BUILTIN_PUNPCKLWD128
,
20482 IX86_BUILTIN_PUNPCKLDQ128
,
20483 IX86_BUILTIN_PUNPCKLQDQ128
,
20485 IX86_BUILTIN_CLFLUSH
,
20486 IX86_BUILTIN_MFENCE
,
20487 IX86_BUILTIN_LFENCE
,
20489 IX86_BUILTIN_BSRSI
,
20490 IX86_BUILTIN_BSRDI
,
20491 IX86_BUILTIN_RDPMC
,
20492 IX86_BUILTIN_RDTSC
,
20493 IX86_BUILTIN_RDTSCP
,
20494 IX86_BUILTIN_ROLQI
,
20495 IX86_BUILTIN_ROLHI
,
20496 IX86_BUILTIN_RORQI
,
20497 IX86_BUILTIN_RORHI
,
20500 IX86_BUILTIN_ADDSUBPS
,
20501 IX86_BUILTIN_HADDPS
,
20502 IX86_BUILTIN_HSUBPS
,
20503 IX86_BUILTIN_MOVSHDUP
,
20504 IX86_BUILTIN_MOVSLDUP
,
20505 IX86_BUILTIN_ADDSUBPD
,
20506 IX86_BUILTIN_HADDPD
,
20507 IX86_BUILTIN_HSUBPD
,
20508 IX86_BUILTIN_LDDQU
,
20510 IX86_BUILTIN_MONITOR
,
20511 IX86_BUILTIN_MWAIT
,
20514 IX86_BUILTIN_PHADDW
,
20515 IX86_BUILTIN_PHADDD
,
20516 IX86_BUILTIN_PHADDSW
,
20517 IX86_BUILTIN_PHSUBW
,
20518 IX86_BUILTIN_PHSUBD
,
20519 IX86_BUILTIN_PHSUBSW
,
20520 IX86_BUILTIN_PMADDUBSW
,
20521 IX86_BUILTIN_PMULHRSW
,
20522 IX86_BUILTIN_PSHUFB
,
20523 IX86_BUILTIN_PSIGNB
,
20524 IX86_BUILTIN_PSIGNW
,
20525 IX86_BUILTIN_PSIGND
,
20526 IX86_BUILTIN_PALIGNR
,
20527 IX86_BUILTIN_PABSB
,
20528 IX86_BUILTIN_PABSW
,
20529 IX86_BUILTIN_PABSD
,
20531 IX86_BUILTIN_PHADDW128
,
20532 IX86_BUILTIN_PHADDD128
,
20533 IX86_BUILTIN_PHADDSW128
,
20534 IX86_BUILTIN_PHSUBW128
,
20535 IX86_BUILTIN_PHSUBD128
,
20536 IX86_BUILTIN_PHSUBSW128
,
20537 IX86_BUILTIN_PMADDUBSW128
,
20538 IX86_BUILTIN_PMULHRSW128
,
20539 IX86_BUILTIN_PSHUFB128
,
20540 IX86_BUILTIN_PSIGNB128
,
20541 IX86_BUILTIN_PSIGNW128
,
20542 IX86_BUILTIN_PSIGND128
,
20543 IX86_BUILTIN_PALIGNR128
,
20544 IX86_BUILTIN_PABSB128
,
20545 IX86_BUILTIN_PABSW128
,
20546 IX86_BUILTIN_PABSD128
,
20548 /* AMDFAM10 - SSE4A New Instructions. */
20549 IX86_BUILTIN_MOVNTSD
,
20550 IX86_BUILTIN_MOVNTSS
,
20551 IX86_BUILTIN_EXTRQI
,
20552 IX86_BUILTIN_EXTRQ
,
20553 IX86_BUILTIN_INSERTQI
,
20554 IX86_BUILTIN_INSERTQ
,
20557 IX86_BUILTIN_BLENDPD
,
20558 IX86_BUILTIN_BLENDPS
,
20559 IX86_BUILTIN_BLENDVPD
,
20560 IX86_BUILTIN_BLENDVPS
,
20561 IX86_BUILTIN_PBLENDVB128
,
20562 IX86_BUILTIN_PBLENDW128
,
20567 IX86_BUILTIN_INSERTPS128
,
20569 IX86_BUILTIN_MOVNTDQA
,
20570 IX86_BUILTIN_MPSADBW128
,
20571 IX86_BUILTIN_PACKUSDW128
,
20572 IX86_BUILTIN_PCMPEQQ
,
20573 IX86_BUILTIN_PHMINPOSUW128
,
20575 IX86_BUILTIN_PMAXSB128
,
20576 IX86_BUILTIN_PMAXSD128
,
20577 IX86_BUILTIN_PMAXUD128
,
20578 IX86_BUILTIN_PMAXUW128
,
20580 IX86_BUILTIN_PMINSB128
,
20581 IX86_BUILTIN_PMINSD128
,
20582 IX86_BUILTIN_PMINUD128
,
20583 IX86_BUILTIN_PMINUW128
,
20585 IX86_BUILTIN_PMOVSXBW128
,
20586 IX86_BUILTIN_PMOVSXBD128
,
20587 IX86_BUILTIN_PMOVSXBQ128
,
20588 IX86_BUILTIN_PMOVSXWD128
,
20589 IX86_BUILTIN_PMOVSXWQ128
,
20590 IX86_BUILTIN_PMOVSXDQ128
,
20592 IX86_BUILTIN_PMOVZXBW128
,
20593 IX86_BUILTIN_PMOVZXBD128
,
20594 IX86_BUILTIN_PMOVZXBQ128
,
20595 IX86_BUILTIN_PMOVZXWD128
,
20596 IX86_BUILTIN_PMOVZXWQ128
,
20597 IX86_BUILTIN_PMOVZXDQ128
,
20599 IX86_BUILTIN_PMULDQ128
,
20600 IX86_BUILTIN_PMULLD128
,
20602 IX86_BUILTIN_ROUNDPD
,
20603 IX86_BUILTIN_ROUNDPS
,
20604 IX86_BUILTIN_ROUNDSD
,
20605 IX86_BUILTIN_ROUNDSS
,
20607 IX86_BUILTIN_PTESTZ
,
20608 IX86_BUILTIN_PTESTC
,
20609 IX86_BUILTIN_PTESTNZC
,
20611 IX86_BUILTIN_VEC_INIT_V2SI
,
20612 IX86_BUILTIN_VEC_INIT_V4HI
,
20613 IX86_BUILTIN_VEC_INIT_V8QI
,
20614 IX86_BUILTIN_VEC_EXT_V2DF
,
20615 IX86_BUILTIN_VEC_EXT_V2DI
,
20616 IX86_BUILTIN_VEC_EXT_V4SF
,
20617 IX86_BUILTIN_VEC_EXT_V4SI
,
20618 IX86_BUILTIN_VEC_EXT_V8HI
,
20619 IX86_BUILTIN_VEC_EXT_V2SI
,
20620 IX86_BUILTIN_VEC_EXT_V4HI
,
20621 IX86_BUILTIN_VEC_EXT_V16QI
,
20622 IX86_BUILTIN_VEC_SET_V2DI
,
20623 IX86_BUILTIN_VEC_SET_V4SF
,
20624 IX86_BUILTIN_VEC_SET_V4SI
,
20625 IX86_BUILTIN_VEC_SET_V8HI
,
20626 IX86_BUILTIN_VEC_SET_V4HI
,
20627 IX86_BUILTIN_VEC_SET_V16QI
,
20629 IX86_BUILTIN_VEC_PACK_SFIX
,
20632 IX86_BUILTIN_CRC32QI
,
20633 IX86_BUILTIN_CRC32HI
,
20634 IX86_BUILTIN_CRC32SI
,
20635 IX86_BUILTIN_CRC32DI
,
20637 IX86_BUILTIN_PCMPESTRI128
,
20638 IX86_BUILTIN_PCMPESTRM128
,
20639 IX86_BUILTIN_PCMPESTRA128
,
20640 IX86_BUILTIN_PCMPESTRC128
,
20641 IX86_BUILTIN_PCMPESTRO128
,
20642 IX86_BUILTIN_PCMPESTRS128
,
20643 IX86_BUILTIN_PCMPESTRZ128
,
20644 IX86_BUILTIN_PCMPISTRI128
,
20645 IX86_BUILTIN_PCMPISTRM128
,
20646 IX86_BUILTIN_PCMPISTRA128
,
20647 IX86_BUILTIN_PCMPISTRC128
,
20648 IX86_BUILTIN_PCMPISTRO128
,
20649 IX86_BUILTIN_PCMPISTRS128
,
20650 IX86_BUILTIN_PCMPISTRZ128
,
20652 IX86_BUILTIN_PCMPGTQ
,
20654 /* AES instructions */
20655 IX86_BUILTIN_AESENC128
,
20656 IX86_BUILTIN_AESENCLAST128
,
20657 IX86_BUILTIN_AESDEC128
,
20658 IX86_BUILTIN_AESDECLAST128
,
20659 IX86_BUILTIN_AESIMC128
,
20660 IX86_BUILTIN_AESKEYGENASSIST128
,
20662 /* PCLMUL instruction */
20663 IX86_BUILTIN_PCLMULQDQ128
,
20666 IX86_BUILTIN_ADDPD256
,
20667 IX86_BUILTIN_ADDPS256
,
20668 IX86_BUILTIN_ADDSUBPD256
,
20669 IX86_BUILTIN_ADDSUBPS256
,
20670 IX86_BUILTIN_ANDPD256
,
20671 IX86_BUILTIN_ANDPS256
,
20672 IX86_BUILTIN_ANDNPD256
,
20673 IX86_BUILTIN_ANDNPS256
,
20674 IX86_BUILTIN_BLENDPD256
,
20675 IX86_BUILTIN_BLENDPS256
,
20676 IX86_BUILTIN_BLENDVPD256
,
20677 IX86_BUILTIN_BLENDVPS256
,
20678 IX86_BUILTIN_DIVPD256
,
20679 IX86_BUILTIN_DIVPS256
,
20680 IX86_BUILTIN_DPPS256
,
20681 IX86_BUILTIN_HADDPD256
,
20682 IX86_BUILTIN_HADDPS256
,
20683 IX86_BUILTIN_HSUBPD256
,
20684 IX86_BUILTIN_HSUBPS256
,
20685 IX86_BUILTIN_MAXPD256
,
20686 IX86_BUILTIN_MAXPS256
,
20687 IX86_BUILTIN_MINPD256
,
20688 IX86_BUILTIN_MINPS256
,
20689 IX86_BUILTIN_MULPD256
,
20690 IX86_BUILTIN_MULPS256
,
20691 IX86_BUILTIN_ORPD256
,
20692 IX86_BUILTIN_ORPS256
,
20693 IX86_BUILTIN_SHUFPD256
,
20694 IX86_BUILTIN_SHUFPS256
,
20695 IX86_BUILTIN_SUBPD256
,
20696 IX86_BUILTIN_SUBPS256
,
20697 IX86_BUILTIN_XORPD256
,
20698 IX86_BUILTIN_XORPS256
,
20699 IX86_BUILTIN_CMPSD
,
20700 IX86_BUILTIN_CMPSS
,
20701 IX86_BUILTIN_CMPPD
,
20702 IX86_BUILTIN_CMPPS
,
20703 IX86_BUILTIN_CMPPD256
,
20704 IX86_BUILTIN_CMPPS256
,
20705 IX86_BUILTIN_CVTDQ2PD256
,
20706 IX86_BUILTIN_CVTDQ2PS256
,
20707 IX86_BUILTIN_CVTPD2PS256
,
20708 IX86_BUILTIN_CVTPS2DQ256
,
20709 IX86_BUILTIN_CVTPS2PD256
,
20710 IX86_BUILTIN_CVTTPD2DQ256
,
20711 IX86_BUILTIN_CVTPD2DQ256
,
20712 IX86_BUILTIN_CVTTPS2DQ256
,
20713 IX86_BUILTIN_EXTRACTF128PD256
,
20714 IX86_BUILTIN_EXTRACTF128PS256
,
20715 IX86_BUILTIN_EXTRACTF128SI256
,
20716 IX86_BUILTIN_VZEROALL
,
20717 IX86_BUILTIN_VZEROUPPER
,
20718 IX86_BUILTIN_VZEROUPPER_REX64
,
20719 IX86_BUILTIN_VPERMILVARPD
,
20720 IX86_BUILTIN_VPERMILVARPS
,
20721 IX86_BUILTIN_VPERMILVARPD256
,
20722 IX86_BUILTIN_VPERMILVARPS256
,
20723 IX86_BUILTIN_VPERMILPD
,
20724 IX86_BUILTIN_VPERMILPS
,
20725 IX86_BUILTIN_VPERMILPD256
,
20726 IX86_BUILTIN_VPERMILPS256
,
20727 IX86_BUILTIN_VPERM2F128PD256
,
20728 IX86_BUILTIN_VPERM2F128PS256
,
20729 IX86_BUILTIN_VPERM2F128SI256
,
20730 IX86_BUILTIN_VBROADCASTSS
,
20731 IX86_BUILTIN_VBROADCASTSD256
,
20732 IX86_BUILTIN_VBROADCASTSS256
,
20733 IX86_BUILTIN_VBROADCASTPD256
,
20734 IX86_BUILTIN_VBROADCASTPS256
,
20735 IX86_BUILTIN_VINSERTF128PD256
,
20736 IX86_BUILTIN_VINSERTF128PS256
,
20737 IX86_BUILTIN_VINSERTF128SI256
,
20738 IX86_BUILTIN_LOADUPD256
,
20739 IX86_BUILTIN_LOADUPS256
,
20740 IX86_BUILTIN_STOREUPD256
,
20741 IX86_BUILTIN_STOREUPS256
,
20742 IX86_BUILTIN_LDDQU256
,
20743 IX86_BUILTIN_MOVNTDQ256
,
20744 IX86_BUILTIN_MOVNTPD256
,
20745 IX86_BUILTIN_MOVNTPS256
,
20746 IX86_BUILTIN_LOADDQU256
,
20747 IX86_BUILTIN_STOREDQU256
,
20748 IX86_BUILTIN_MASKLOADPD
,
20749 IX86_BUILTIN_MASKLOADPS
,
20750 IX86_BUILTIN_MASKSTOREPD
,
20751 IX86_BUILTIN_MASKSTOREPS
,
20752 IX86_BUILTIN_MASKLOADPD256
,
20753 IX86_BUILTIN_MASKLOADPS256
,
20754 IX86_BUILTIN_MASKSTOREPD256
,
20755 IX86_BUILTIN_MASKSTOREPS256
,
20756 IX86_BUILTIN_MOVSHDUP256
,
20757 IX86_BUILTIN_MOVSLDUP256
,
20758 IX86_BUILTIN_MOVDDUP256
,
20760 IX86_BUILTIN_SQRTPD256
,
20761 IX86_BUILTIN_SQRTPS256
,
20762 IX86_BUILTIN_SQRTPS_NR256
,
20763 IX86_BUILTIN_RSQRTPS256
,
20764 IX86_BUILTIN_RSQRTPS_NR256
,
20766 IX86_BUILTIN_RCPPS256
,
20768 IX86_BUILTIN_ROUNDPD256
,
20769 IX86_BUILTIN_ROUNDPS256
,
20771 IX86_BUILTIN_UNPCKHPD256
,
20772 IX86_BUILTIN_UNPCKLPD256
,
20773 IX86_BUILTIN_UNPCKHPS256
,
20774 IX86_BUILTIN_UNPCKLPS256
,
20776 IX86_BUILTIN_SI256_SI
,
20777 IX86_BUILTIN_PS256_PS
,
20778 IX86_BUILTIN_PD256_PD
,
20779 IX86_BUILTIN_SI_SI256
,
20780 IX86_BUILTIN_PS_PS256
,
20781 IX86_BUILTIN_PD_PD256
,
20783 IX86_BUILTIN_VTESTZPD
,
20784 IX86_BUILTIN_VTESTCPD
,
20785 IX86_BUILTIN_VTESTNZCPD
,
20786 IX86_BUILTIN_VTESTZPS
,
20787 IX86_BUILTIN_VTESTCPS
,
20788 IX86_BUILTIN_VTESTNZCPS
,
20789 IX86_BUILTIN_VTESTZPD256
,
20790 IX86_BUILTIN_VTESTCPD256
,
20791 IX86_BUILTIN_VTESTNZCPD256
,
20792 IX86_BUILTIN_VTESTZPS256
,
20793 IX86_BUILTIN_VTESTCPS256
,
20794 IX86_BUILTIN_VTESTNZCPS256
,
20795 IX86_BUILTIN_PTESTZ256
,
20796 IX86_BUILTIN_PTESTC256
,
20797 IX86_BUILTIN_PTESTNZC256
,
20799 IX86_BUILTIN_MOVMSKPD256
,
20800 IX86_BUILTIN_MOVMSKPS256
,
20802 /* TFmode support builtins. */
20804 IX86_BUILTIN_HUGE_VALQ
,
20805 IX86_BUILTIN_FABSQ
,
20806 IX86_BUILTIN_COPYSIGNQ
,
20808 /* SSE5 instructions */
20809 IX86_BUILTIN_FMADDSS
,
20810 IX86_BUILTIN_FMADDSD
,
20811 IX86_BUILTIN_FMADDPS
,
20812 IX86_BUILTIN_FMADDPD
,
20813 IX86_BUILTIN_FMSUBSS
,
20814 IX86_BUILTIN_FMSUBSD
,
20815 IX86_BUILTIN_FMSUBPS
,
20816 IX86_BUILTIN_FMSUBPD
,
20817 IX86_BUILTIN_FNMADDSS
,
20818 IX86_BUILTIN_FNMADDSD
,
20819 IX86_BUILTIN_FNMADDPS
,
20820 IX86_BUILTIN_FNMADDPD
,
20821 IX86_BUILTIN_FNMSUBSS
,
20822 IX86_BUILTIN_FNMSUBSD
,
20823 IX86_BUILTIN_FNMSUBPS
,
20824 IX86_BUILTIN_FNMSUBPD
,
20825 IX86_BUILTIN_PCMOV
,
20826 IX86_BUILTIN_PCMOV_V2DI
,
20827 IX86_BUILTIN_PCMOV_V4SI
,
20828 IX86_BUILTIN_PCMOV_V8HI
,
20829 IX86_BUILTIN_PCMOV_V16QI
,
20830 IX86_BUILTIN_PCMOV_V4SF
,
20831 IX86_BUILTIN_PCMOV_V2DF
,
20832 IX86_BUILTIN_PPERM
,
20833 IX86_BUILTIN_PERMPS
,
20834 IX86_BUILTIN_PERMPD
,
20835 IX86_BUILTIN_PMACSSWW
,
20836 IX86_BUILTIN_PMACSWW
,
20837 IX86_BUILTIN_PMACSSWD
,
20838 IX86_BUILTIN_PMACSWD
,
20839 IX86_BUILTIN_PMACSSDD
,
20840 IX86_BUILTIN_PMACSDD
,
20841 IX86_BUILTIN_PMACSSDQL
,
20842 IX86_BUILTIN_PMACSSDQH
,
20843 IX86_BUILTIN_PMACSDQL
,
20844 IX86_BUILTIN_PMACSDQH
,
20845 IX86_BUILTIN_PMADCSSWD
,
20846 IX86_BUILTIN_PMADCSWD
,
20847 IX86_BUILTIN_PHADDBW
,
20848 IX86_BUILTIN_PHADDBD
,
20849 IX86_BUILTIN_PHADDBQ
,
20850 IX86_BUILTIN_PHADDWD
,
20851 IX86_BUILTIN_PHADDWQ
,
20852 IX86_BUILTIN_PHADDDQ
,
20853 IX86_BUILTIN_PHADDUBW
,
20854 IX86_BUILTIN_PHADDUBD
,
20855 IX86_BUILTIN_PHADDUBQ
,
20856 IX86_BUILTIN_PHADDUWD
,
20857 IX86_BUILTIN_PHADDUWQ
,
20858 IX86_BUILTIN_PHADDUDQ
,
20859 IX86_BUILTIN_PHSUBBW
,
20860 IX86_BUILTIN_PHSUBWD
,
20861 IX86_BUILTIN_PHSUBDQ
,
20862 IX86_BUILTIN_PROTB
,
20863 IX86_BUILTIN_PROTW
,
20864 IX86_BUILTIN_PROTD
,
20865 IX86_BUILTIN_PROTQ
,
20866 IX86_BUILTIN_PROTB_IMM
,
20867 IX86_BUILTIN_PROTW_IMM
,
20868 IX86_BUILTIN_PROTD_IMM
,
20869 IX86_BUILTIN_PROTQ_IMM
,
20870 IX86_BUILTIN_PSHLB
,
20871 IX86_BUILTIN_PSHLW
,
20872 IX86_BUILTIN_PSHLD
,
20873 IX86_BUILTIN_PSHLQ
,
20874 IX86_BUILTIN_PSHAB
,
20875 IX86_BUILTIN_PSHAW
,
20876 IX86_BUILTIN_PSHAD
,
20877 IX86_BUILTIN_PSHAQ
,
20878 IX86_BUILTIN_FRCZSS
,
20879 IX86_BUILTIN_FRCZSD
,
20880 IX86_BUILTIN_FRCZPS
,
20881 IX86_BUILTIN_FRCZPD
,
20882 IX86_BUILTIN_CVTPH2PS
,
20883 IX86_BUILTIN_CVTPS2PH
,
20885 IX86_BUILTIN_COMEQSS
,
20886 IX86_BUILTIN_COMNESS
,
20887 IX86_BUILTIN_COMLTSS
,
20888 IX86_BUILTIN_COMLESS
,
20889 IX86_BUILTIN_COMGTSS
,
20890 IX86_BUILTIN_COMGESS
,
20891 IX86_BUILTIN_COMUEQSS
,
20892 IX86_BUILTIN_COMUNESS
,
20893 IX86_BUILTIN_COMULTSS
,
20894 IX86_BUILTIN_COMULESS
,
20895 IX86_BUILTIN_COMUGTSS
,
20896 IX86_BUILTIN_COMUGESS
,
20897 IX86_BUILTIN_COMORDSS
,
20898 IX86_BUILTIN_COMUNORDSS
,
20899 IX86_BUILTIN_COMFALSESS
,
20900 IX86_BUILTIN_COMTRUESS
,
20902 IX86_BUILTIN_COMEQSD
,
20903 IX86_BUILTIN_COMNESD
,
20904 IX86_BUILTIN_COMLTSD
,
20905 IX86_BUILTIN_COMLESD
,
20906 IX86_BUILTIN_COMGTSD
,
20907 IX86_BUILTIN_COMGESD
,
20908 IX86_BUILTIN_COMUEQSD
,
20909 IX86_BUILTIN_COMUNESD
,
20910 IX86_BUILTIN_COMULTSD
,
20911 IX86_BUILTIN_COMULESD
,
20912 IX86_BUILTIN_COMUGTSD
,
20913 IX86_BUILTIN_COMUGESD
,
20914 IX86_BUILTIN_COMORDSD
,
20915 IX86_BUILTIN_COMUNORDSD
,
20916 IX86_BUILTIN_COMFALSESD
,
20917 IX86_BUILTIN_COMTRUESD
,
20919 IX86_BUILTIN_COMEQPS
,
20920 IX86_BUILTIN_COMNEPS
,
20921 IX86_BUILTIN_COMLTPS
,
20922 IX86_BUILTIN_COMLEPS
,
20923 IX86_BUILTIN_COMGTPS
,
20924 IX86_BUILTIN_COMGEPS
,
20925 IX86_BUILTIN_COMUEQPS
,
20926 IX86_BUILTIN_COMUNEPS
,
20927 IX86_BUILTIN_COMULTPS
,
20928 IX86_BUILTIN_COMULEPS
,
20929 IX86_BUILTIN_COMUGTPS
,
20930 IX86_BUILTIN_COMUGEPS
,
20931 IX86_BUILTIN_COMORDPS
,
20932 IX86_BUILTIN_COMUNORDPS
,
20933 IX86_BUILTIN_COMFALSEPS
,
20934 IX86_BUILTIN_COMTRUEPS
,
20936 IX86_BUILTIN_COMEQPD
,
20937 IX86_BUILTIN_COMNEPD
,
20938 IX86_BUILTIN_COMLTPD
,
20939 IX86_BUILTIN_COMLEPD
,
20940 IX86_BUILTIN_COMGTPD
,
20941 IX86_BUILTIN_COMGEPD
,
20942 IX86_BUILTIN_COMUEQPD
,
20943 IX86_BUILTIN_COMUNEPD
,
20944 IX86_BUILTIN_COMULTPD
,
20945 IX86_BUILTIN_COMULEPD
,
20946 IX86_BUILTIN_COMUGTPD
,
20947 IX86_BUILTIN_COMUGEPD
,
20948 IX86_BUILTIN_COMORDPD
,
20949 IX86_BUILTIN_COMUNORDPD
,
20950 IX86_BUILTIN_COMFALSEPD
,
20951 IX86_BUILTIN_COMTRUEPD
,
20953 IX86_BUILTIN_PCOMEQUB
,
20954 IX86_BUILTIN_PCOMNEUB
,
20955 IX86_BUILTIN_PCOMLTUB
,
20956 IX86_BUILTIN_PCOMLEUB
,
20957 IX86_BUILTIN_PCOMGTUB
,
20958 IX86_BUILTIN_PCOMGEUB
,
20959 IX86_BUILTIN_PCOMFALSEUB
,
20960 IX86_BUILTIN_PCOMTRUEUB
,
20961 IX86_BUILTIN_PCOMEQUW
,
20962 IX86_BUILTIN_PCOMNEUW
,
20963 IX86_BUILTIN_PCOMLTUW
,
20964 IX86_BUILTIN_PCOMLEUW
,
20965 IX86_BUILTIN_PCOMGTUW
,
20966 IX86_BUILTIN_PCOMGEUW
,
20967 IX86_BUILTIN_PCOMFALSEUW
,
20968 IX86_BUILTIN_PCOMTRUEUW
,
20969 IX86_BUILTIN_PCOMEQUD
,
20970 IX86_BUILTIN_PCOMNEUD
,
20971 IX86_BUILTIN_PCOMLTUD
,
20972 IX86_BUILTIN_PCOMLEUD
,
20973 IX86_BUILTIN_PCOMGTUD
,
20974 IX86_BUILTIN_PCOMGEUD
,
20975 IX86_BUILTIN_PCOMFALSEUD
,
20976 IX86_BUILTIN_PCOMTRUEUD
,
20977 IX86_BUILTIN_PCOMEQUQ
,
20978 IX86_BUILTIN_PCOMNEUQ
,
20979 IX86_BUILTIN_PCOMLTUQ
,
20980 IX86_BUILTIN_PCOMLEUQ
,
20981 IX86_BUILTIN_PCOMGTUQ
,
20982 IX86_BUILTIN_PCOMGEUQ
,
20983 IX86_BUILTIN_PCOMFALSEUQ
,
20984 IX86_BUILTIN_PCOMTRUEUQ
,
20986 IX86_BUILTIN_PCOMEQB
,
20987 IX86_BUILTIN_PCOMNEB
,
20988 IX86_BUILTIN_PCOMLTB
,
20989 IX86_BUILTIN_PCOMLEB
,
20990 IX86_BUILTIN_PCOMGTB
,
20991 IX86_BUILTIN_PCOMGEB
,
20992 IX86_BUILTIN_PCOMFALSEB
,
20993 IX86_BUILTIN_PCOMTRUEB
,
20994 IX86_BUILTIN_PCOMEQW
,
20995 IX86_BUILTIN_PCOMNEW
,
20996 IX86_BUILTIN_PCOMLTW
,
20997 IX86_BUILTIN_PCOMLEW
,
20998 IX86_BUILTIN_PCOMGTW
,
20999 IX86_BUILTIN_PCOMGEW
,
21000 IX86_BUILTIN_PCOMFALSEW
,
21001 IX86_BUILTIN_PCOMTRUEW
,
21002 IX86_BUILTIN_PCOMEQD
,
21003 IX86_BUILTIN_PCOMNED
,
21004 IX86_BUILTIN_PCOMLTD
,
21005 IX86_BUILTIN_PCOMLED
,
21006 IX86_BUILTIN_PCOMGTD
,
21007 IX86_BUILTIN_PCOMGED
,
21008 IX86_BUILTIN_PCOMFALSED
,
21009 IX86_BUILTIN_PCOMTRUED
,
21010 IX86_BUILTIN_PCOMEQQ
,
21011 IX86_BUILTIN_PCOMNEQ
,
21012 IX86_BUILTIN_PCOMLTQ
,
21013 IX86_BUILTIN_PCOMLEQ
,
21014 IX86_BUILTIN_PCOMGTQ
,
21015 IX86_BUILTIN_PCOMGEQ
,
21016 IX86_BUILTIN_PCOMFALSEQ
,
21017 IX86_BUILTIN_PCOMTRUEQ
,
21022 /* Table for the ix86 builtin decls. */
21023 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
21025 /* Table of all of the builtin functions that are possible with different ISA's
21026 but are waiting to be built until a function is declared to use that
21028 struct GTY(()) builtin_isa
{
21029 tree type
; /* builtin type to use in the declaration */
21030 const char *name
; /* function name */
21031 int isa
; /* isa_flags this builtin is defined for */
21032 bool const_p
; /* true if the declaration is constant */
21035 static GTY(()) struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
21038 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21039 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21040 * function decl in the ix86_builtins array. Returns the function decl or
21041 * NULL_TREE, if the builtin was not added.
21043 * If the front end has a special hook for builtin functions, delay adding
21044 * builtin functions that aren't in the current ISA until the ISA is changed
21045 * with function specific optimization. Doing so, can save about 300K for the
21046 * default compiler. When the builtin is expanded, check at that time whether
21049 * If the front end doesn't have a special hook, record all builtins, even if
21050 * it isn't an instruction set in the current ISA in case the user uses
21051 * function specific options for a different ISA, so that we don't get scope
21052 * errors if a builtin is added in the middle of a function scope. */
21055 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
21057 tree decl
= NULL_TREE
;
21059 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
21061 ix86_builtins_isa
[(int) code
].isa
= mask
;
21063 if ((mask
& ix86_isa_flags
) != 0
21064 || (lang_hooks
.builtin_function
21065 == lang_hooks
.builtin_function_ext_scope
))
21068 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
, NULL
,
21070 ix86_builtins
[(int) code
] = decl
;
21071 ix86_builtins_isa
[(int) code
].type
= NULL_TREE
;
21075 ix86_builtins
[(int) code
] = NULL_TREE
;
21076 ix86_builtins_isa
[(int) code
].const_p
= false;
21077 ix86_builtins_isa
[(int) code
].type
= type
;
21078 ix86_builtins_isa
[(int) code
].name
= name
;
21085 /* Like def_builtin, but also marks the function decl "const". */
21088 def_builtin_const (int mask
, const char *name
, tree type
,
21089 enum ix86_builtins code
)
21091 tree decl
= def_builtin (mask
, name
, type
, code
);
21093 TREE_READONLY (decl
) = 1;
21095 ix86_builtins_isa
[(int) code
].const_p
= true;
21100 /* Add any new builtin functions for a given ISA that may not have been
21101 declared. This saves a bit of space compared to adding all of the
21102 declarations to the tree, even if we didn't use them. */
21105 ix86_add_new_builtins (int isa
)
21110 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
21112 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
21113 && ix86_builtins_isa
[i
].type
!= NULL_TREE
)
21115 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
21116 ix86_builtins_isa
[i
].type
,
21117 i
, BUILT_IN_MD
, NULL
,
21120 ix86_builtins
[i
] = decl
;
21121 ix86_builtins_isa
[i
].type
= NULL_TREE
;
21122 if (ix86_builtins_isa
[i
].const_p
)
21123 TREE_READONLY (decl
) = 1;
21128 /* Bits for builtin_description.flag. */
21130 /* Set when we don't support the comparison natively, and should
21131 swap_comparison in order to support it. */
21132 #define BUILTIN_DESC_SWAP_OPERANDS 1
21134 struct builtin_description
21136 const unsigned int mask
;
21137 const enum insn_code icode
;
21138 const char *const name
;
21139 const enum ix86_builtins code
;
21140 const enum rtx_code comparison
;
21144 static const struct builtin_description bdesc_comi
[] =
21146 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
21147 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
21148 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
21149 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
21150 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
21151 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
21152 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
21153 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
21154 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
21155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
21156 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
21157 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
21158 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
21159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
21160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
21161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
21162 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
21163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
21164 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
21165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
21166 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
21167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
21168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
21169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
21172 static const struct builtin_description bdesc_pcmpestr
[] =
21175 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
21176 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
21177 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
21178 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
21179 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
21180 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
21181 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
21184 static const struct builtin_description bdesc_pcmpistr
[] =
21187 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
21188 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
21189 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
21190 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
21191 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
21192 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
21193 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
21196 /* Special builtin types */
21197 enum ix86_special_builtin_type
21199 SPECIAL_FTYPE_UNKNOWN
,
21202 UINT64_FTYPE_PUNSIGNED
,
21203 V32QI_FTYPE_PCCHAR
,
21204 V16QI_FTYPE_PCCHAR
,
21206 V8SF_FTYPE_PCFLOAT
,
21208 V4DF_FTYPE_PCDOUBLE
,
21209 V4SF_FTYPE_PCFLOAT
,
21210 V2DF_FTYPE_PCDOUBLE
,
21211 V8SF_FTYPE_PCV8SF_V8SF
,
21212 V4DF_FTYPE_PCV4DF_V4DF
,
21213 V4SF_FTYPE_V4SF_PCV2SF
,
21214 V4SF_FTYPE_PCV4SF_V4SF
,
21215 V2DF_FTYPE_V2DF_PCDOUBLE
,
21216 V2DF_FTYPE_PCV2DF_V2DF
,
21218 VOID_FTYPE_PV2SF_V4SF
,
21219 VOID_FTYPE_PV4DI_V4DI
,
21220 VOID_FTYPE_PV2DI_V2DI
,
21221 VOID_FTYPE_PCHAR_V32QI
,
21222 VOID_FTYPE_PCHAR_V16QI
,
21223 VOID_FTYPE_PFLOAT_V8SF
,
21224 VOID_FTYPE_PFLOAT_V4SF
,
21225 VOID_FTYPE_PDOUBLE_V4DF
,
21226 VOID_FTYPE_PDOUBLE_V2DF
,
21228 VOID_FTYPE_PINT_INT
,
21229 VOID_FTYPE_PV8SF_V8SF_V8SF
,
21230 VOID_FTYPE_PV4DF_V4DF_V4DF
,
21231 VOID_FTYPE_PV4SF_V4SF_V4SF
,
21232 VOID_FTYPE_PV2DF_V2DF_V2DF
21235 /* Builtin types */
21236 enum ix86_builtin_type
21239 FLOAT128_FTYPE_FLOAT128
,
21241 FLOAT128_FTYPE_FLOAT128_FLOAT128
,
21242 INT_FTYPE_V8SF_V8SF_PTEST
,
21243 INT_FTYPE_V4DI_V4DI_PTEST
,
21244 INT_FTYPE_V4DF_V4DF_PTEST
,
21245 INT_FTYPE_V4SF_V4SF_PTEST
,
21246 INT_FTYPE_V2DI_V2DI_PTEST
,
21247 INT_FTYPE_V2DF_V2DF_PTEST
,
21282 V4SF_FTYPE_V4SF_VEC_MERGE
,
21291 V2DF_FTYPE_V2DF_VEC_MERGE
,
21302 V16QI_FTYPE_V16QI_V16QI
,
21303 V16QI_FTYPE_V8HI_V8HI
,
21304 V8QI_FTYPE_V8QI_V8QI
,
21305 V8QI_FTYPE_V4HI_V4HI
,
21306 V8HI_FTYPE_V8HI_V8HI
,
21307 V8HI_FTYPE_V8HI_V8HI_COUNT
,
21308 V8HI_FTYPE_V16QI_V16QI
,
21309 V8HI_FTYPE_V4SI_V4SI
,
21310 V8HI_FTYPE_V8HI_SI_COUNT
,
21311 V8SF_FTYPE_V8SF_V8SF
,
21312 V8SF_FTYPE_V8SF_V8SI
,
21313 V4SI_FTYPE_V4SI_V4SI
,
21314 V4SI_FTYPE_V4SI_V4SI_COUNT
,
21315 V4SI_FTYPE_V8HI_V8HI
,
21316 V4SI_FTYPE_V4SF_V4SF
,
21317 V4SI_FTYPE_V2DF_V2DF
,
21318 V4SI_FTYPE_V4SI_SI_COUNT
,
21319 V4HI_FTYPE_V4HI_V4HI
,
21320 V4HI_FTYPE_V4HI_V4HI_COUNT
,
21321 V4HI_FTYPE_V8QI_V8QI
,
21322 V4HI_FTYPE_V2SI_V2SI
,
21323 V4HI_FTYPE_V4HI_SI_COUNT
,
21324 V4DF_FTYPE_V4DF_V4DF
,
21325 V4DF_FTYPE_V4DF_V4DI
,
21326 V4SF_FTYPE_V4SF_V4SF
,
21327 V4SF_FTYPE_V4SF_V4SF_SWAP
,
21328 V4SF_FTYPE_V4SF_V4SI
,
21329 V4SF_FTYPE_V4SF_V2SI
,
21330 V4SF_FTYPE_V4SF_V2DF
,
21331 V4SF_FTYPE_V4SF_DI
,
21332 V4SF_FTYPE_V4SF_SI
,
21333 V2DI_FTYPE_V2DI_V2DI
,
21334 V2DI_FTYPE_V2DI_V2DI_COUNT
,
21335 V2DI_FTYPE_V16QI_V16QI
,
21336 V2DI_FTYPE_V4SI_V4SI
,
21337 V2DI_FTYPE_V2DI_V16QI
,
21338 V2DI_FTYPE_V2DF_V2DF
,
21339 V2DI_FTYPE_V2DI_SI_COUNT
,
21340 V2SI_FTYPE_V2SI_V2SI
,
21341 V2SI_FTYPE_V2SI_V2SI_COUNT
,
21342 V2SI_FTYPE_V4HI_V4HI
,
21343 V2SI_FTYPE_V2SF_V2SF
,
21344 V2SI_FTYPE_V2SI_SI_COUNT
,
21345 V2DF_FTYPE_V2DF_V2DF
,
21346 V2DF_FTYPE_V2DF_V2DF_SWAP
,
21347 V2DF_FTYPE_V2DF_V4SF
,
21348 V2DF_FTYPE_V2DF_V2DI
,
21349 V2DF_FTYPE_V2DF_DI
,
21350 V2DF_FTYPE_V2DF_SI
,
21351 V2SF_FTYPE_V2SF_V2SF
,
21352 V1DI_FTYPE_V1DI_V1DI
,
21353 V1DI_FTYPE_V1DI_V1DI_COUNT
,
21354 V1DI_FTYPE_V8QI_V8QI
,
21355 V1DI_FTYPE_V2SI_V2SI
,
21356 V1DI_FTYPE_V1DI_SI_COUNT
,
21357 UINT64_FTYPE_UINT64_UINT64
,
21358 UINT_FTYPE_UINT_UINT
,
21359 UINT_FTYPE_UINT_USHORT
,
21360 UINT_FTYPE_UINT_UCHAR
,
21361 UINT16_FTYPE_UINT16_INT
,
21362 UINT8_FTYPE_UINT8_INT
,
21363 V8HI_FTYPE_V8HI_INT
,
21364 V4SI_FTYPE_V4SI_INT
,
21365 V4HI_FTYPE_V4HI_INT
,
21366 V8SF_FTYPE_V8SF_INT
,
21367 V4SI_FTYPE_V8SI_INT
,
21368 V4SF_FTYPE_V8SF_INT
,
21369 V2DF_FTYPE_V4DF_INT
,
21370 V4DF_FTYPE_V4DF_INT
,
21371 V4SF_FTYPE_V4SF_INT
,
21372 V2DI_FTYPE_V2DI_INT
,
21373 V2DI2TI_FTYPE_V2DI_INT
,
21374 V2DF_FTYPE_V2DF_INT
,
21375 V16QI_FTYPE_V16QI_V16QI_V16QI
,
21376 V8SF_FTYPE_V8SF_V8SF_V8SF
,
21377 V4DF_FTYPE_V4DF_V4DF_V4DF
,
21378 V4SF_FTYPE_V4SF_V4SF_V4SF
,
21379 V2DF_FTYPE_V2DF_V2DF_V2DF
,
21380 V16QI_FTYPE_V16QI_V16QI_INT
,
21381 V8SI_FTYPE_V8SI_V8SI_INT
,
21382 V8SI_FTYPE_V8SI_V4SI_INT
,
21383 V8HI_FTYPE_V8HI_V8HI_INT
,
21384 V8SF_FTYPE_V8SF_V8SF_INT
,
21385 V8SF_FTYPE_V8SF_V4SF_INT
,
21386 V4SI_FTYPE_V4SI_V4SI_INT
,
21387 V4DF_FTYPE_V4DF_V4DF_INT
,
21388 V4DF_FTYPE_V4DF_V2DF_INT
,
21389 V4SF_FTYPE_V4SF_V4SF_INT
,
21390 V2DI_FTYPE_V2DI_V2DI_INT
,
21391 V2DI2TI_FTYPE_V2DI_V2DI_INT
,
21392 V1DI2DI_FTYPE_V1DI_V1DI_INT
,
21393 V2DF_FTYPE_V2DF_V2DF_INT
,
21394 V2DI_FTYPE_V2DI_UINT_UINT
,
21395 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21398 /* Special builtins with variable number of arguments. */
21399 static const struct builtin_description bdesc_special_args
[] =
21401 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
21402 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
21405 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21408 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21411 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
21412 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
21413 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
21415 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
21416 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
21417 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
21418 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
21420 /* SSE or 3DNow!A */
21421 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21422 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PDI_DI
},
21425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
21428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
21429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
21430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
21431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntsi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
21432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
21433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
21435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
21436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
21439 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
21442 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
21445 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
21446 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
21449 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21450 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, 0, IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21451 { OPTION_MASK_ISA_AVX
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx_vzeroupper_rex64
, 0, IX86_BUILTIN_VZEROUPPER_REX64
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
21453 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastss
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
21454 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastsd256
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
21455 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastss256
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
21456 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_pd256
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
21457 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_ps256
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
21459 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
21460 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
21461 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
21462 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
21463 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
21464 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
21465 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
21467 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
21468 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
21469 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
21471 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DF
},
21472 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SF
},
21473 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DF
},
21474 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SF
},
21475 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DF_V2DF
},
21476 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SF_V4SF
},
21477 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DF_V4DF
},
21478 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SF_V8SF
},
21481 /* Builtins with variable number of arguments. */
21482 static const struct builtin_description bdesc_args
[] =
21484 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
21485 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
21486 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
21487 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
21488 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
21489 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
21490 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
21493 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21494 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21497 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21498 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21501 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21502 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21504 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21505 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21506 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21507 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21509 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21510 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21512 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21513 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21514 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21518 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21525 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21526 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21527 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21528 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21529 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21531 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
21532 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
21533 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
21535 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
21537 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
21538 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
21539 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
21540 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
21541 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
21542 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
21544 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
21545 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
21546 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
21547 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
21548 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
21549 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
21551 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
21552 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
21553 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
21554 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
21557 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
21558 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
21559 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
21560 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
21562 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21563 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21564 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21565 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
21566 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
21567 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
21568 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21569 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21570 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21571 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21572 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21573 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21574 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21575 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21576 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21579 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
21580 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
21581 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
21582 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
21583 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21584 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
21587 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
21588 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21590 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21591 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21592 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21593 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
21594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
21595 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
21596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
21597 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
21598 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
21600 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
21602 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21603 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21604 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21605 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21606 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21607 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21608 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21609 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21611 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
21612 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
21613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
21617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21619 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
21620 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21622 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
21623 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
21624 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
21625 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21626 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
21627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
21629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
21630 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21631 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
21632 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
21634 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21635 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21637 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21639 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21641 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21642 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21645 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21646 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21647 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21648 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
21651 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
21652 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
21654 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
21656 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
21657 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
21658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
21660 /* SSE MMX or 3Dnow!A */
21661 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21662 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21663 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21665 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21666 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21667 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21668 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21670 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
21671 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
21673 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
21676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
21678 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
21679 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
21680 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
21681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
21682 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
21684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
21685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
21686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
21687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
21688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
21690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
21692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
21693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
21694 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
21695 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
21697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
21698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
21699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
21701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
21711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
21712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
21714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
21715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
21716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
21719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
21720 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
21721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
21722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
21723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
21724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
21726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
21728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
21729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
21731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd_exp
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd_exp
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
21747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21766 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
21768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
21798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
21799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
21801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
21804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
21805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
21807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
21809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
21810 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
21811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
21812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
21814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI2TI_FTYPE_V2DI_INT
},
21815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
21816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
21817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
21818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
21819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
21820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
21822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI2TI_FTYPE_V2DI_INT
},
21823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
21824 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
21825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
21826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
21827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
21828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
21830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
21831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
21832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
21833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
21835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
21836 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
21837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
21839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
21841 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
21842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
21844 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
21847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
21848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
21851 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21852 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
21854 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21855 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21856 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21857 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21858 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
21859 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
21862 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
21863 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
21864 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
21865 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
21866 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
21867 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
21869 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21870 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21871 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21872 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21873 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21874 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21875 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21876 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21877 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21878 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21879 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21880 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21881 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
21882 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
21883 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21884 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21885 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21886 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21887 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21888 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
21889 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21890 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
21891 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21892 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
21895 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT
},
21896 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT
},
21899 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
21900 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
21901 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
21902 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
21903 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
21904 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
21905 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
21906 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
21907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
21908 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
21910 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
21911 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
21912 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
21913 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
21914 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
21915 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
21916 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
21917 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
21918 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
21919 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
21920 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
21921 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
21922 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
21924 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
21925 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21926 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21927 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21928 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21929 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21930 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
21931 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21932 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21933 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
21934 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
21935 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
21937 /* SSE4.1 and SSE5 */
21938 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
21939 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
21940 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
21941 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
21943 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
21944 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
21945 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
21948 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21949 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
21950 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
21951 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
21952 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
21955 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
21956 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
21957 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
21958 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
21962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
21964 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21966 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
21970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
21973 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21974 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21975 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21976 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21977 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21978 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21979 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21980 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21981 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21982 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21983 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21984 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21985 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21986 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21987 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21988 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21989 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21990 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21991 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21992 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21993 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21994 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21995 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21996 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
21997 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
21998 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
22000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
22001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
22002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
22003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
22005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
22006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
22007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
22008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
22009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
22010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
22011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
22012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpsdv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
22013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpssv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
22014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppdv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
22015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppsv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
22016 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppdv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
22017 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppsv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
22018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
22019 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
22020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
22021 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2pd256
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
22022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2ps256
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
22023 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
22024 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
22025 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
22026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttpd2dq256
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
22027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
22028 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttps2dq256
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
22029 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
22030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
22031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
22032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
22033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
22034 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
22035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
22036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
22037 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
22038 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
22040 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22041 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
22044 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
22045 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22046 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22048 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
22052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
22053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
22055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
22056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
22057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
22058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
22060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
22061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
22062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
22063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si_si256
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
22064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps_ps256
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
22065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd_pd256
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
22067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
22068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
22069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
22070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
22071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
22072 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
22073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
22074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
22075 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
22076 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
22077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
22078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
22079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
22080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
22081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
22083 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
22084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
22088 enum multi_arg_type
{
22098 MULTI_ARG_3_PERMPS
,
22099 MULTI_ARG_3_PERMPD
,
22106 MULTI_ARG_2_DI_IMM
,
22107 MULTI_ARG_2_SI_IMM
,
22108 MULTI_ARG_2_HI_IMM
,
22109 MULTI_ARG_2_QI_IMM
,
22110 MULTI_ARG_2_SF_CMP
,
22111 MULTI_ARG_2_DF_CMP
,
22112 MULTI_ARG_2_DI_CMP
,
22113 MULTI_ARG_2_SI_CMP
,
22114 MULTI_ARG_2_HI_CMP
,
22115 MULTI_ARG_2_QI_CMP
,
22138 static const struct builtin_description bdesc_multi_arg
[] =
22140 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv4sf4
, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22141 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv2df4
, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22142 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv4sf4
, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22143 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv2df4
, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22144 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv4sf4
, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22145 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv2df4
, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22146 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv4sf4
, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22147 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv2df4
, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22148 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv4sf4
, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22149 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv2df4
, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22150 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv4sf4
, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22151 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv2df4
, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22152 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv4sf4
, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22153 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv2df4
, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22154 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv4sf4
, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22155 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv2df4
, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22156 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
22157 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
22158 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4si
, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
22159 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v8hi
, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
22160 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v16qi
, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
22161 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2df
, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
22162 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4sf
, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
22163 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pperm
, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
22164 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv4sf
, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS
, UNKNOWN
, (int)MULTI_ARG_3_PERMPS
},
22165 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv2df
, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD
, UNKNOWN
, (int)MULTI_ARG_3_PERMPD
},
22166 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssww
, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
22167 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsww
, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
22168 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsswd
, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
22169 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacswd
, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
22170 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdd
, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
22171 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdd
, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
22172 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdql
, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
22173 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdqh
, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
22174 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdql
, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
22175 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdqh
, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
22176 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcsswd
, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
22177 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcswd
, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
22178 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vrotlv2di3
, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
22179 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vrotlv4si3
, "__builtin_ia32_protd", IX86_BUILTIN_PROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
22180 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vrotlv8hi3
, "__builtin_ia32_protw", IX86_BUILTIN_PROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
22181 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vrotlv16qi3
, "__builtin_ia32_protb", IX86_BUILTIN_PROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
22182 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv2di3
, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
22183 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv4si3
, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
22184 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv8hi3
, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
22185 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv16qi3
, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
22186 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv2di3
, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
22187 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv4si3
, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
22188 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv8hi3
, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
22189 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv16qi3
, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
22190 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv2di3
, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
22191 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv4si3
, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
22192 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv8hi3
, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
22193 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv16qi3
, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
22194 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv4sf2
, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
22195 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv2df2
, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
22196 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv4sf2
, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
22197 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv2df2
, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
22198 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtph2ps
, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int)MULTI_ARG_1_PH2PS
},
22199 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtps2ph
, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int)MULTI_ARG_1_PS2PH
},
22200 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbw
, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
22201 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbd
, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
22202 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbq
, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
22203 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwd
, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
22204 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwq
, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
22205 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadddq
, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
22206 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubw
, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
22207 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubd
, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
22208 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubq
, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
22209 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwd
, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
22210 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwq
, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
22211 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddudq
, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
22212 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubbw
, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
22213 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubwd
, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
22214 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubdq
, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
22216 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
22217 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
22218 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
22219 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
22220 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
22221 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
22222 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
22223 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
22224 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
22225 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
22226 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
22227 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
22228 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
22229 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
22230 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
22231 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
22233 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
22234 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
22235 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
22236 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
22237 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
22238 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
22239 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
22240 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
22241 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
22242 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
22243 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
22244 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
22245 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
22246 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
22247 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
22248 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
22250 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
22251 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
22252 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
22253 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
22254 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
22255 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
22256 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
22257 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
22258 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
22259 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
22260 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
22261 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
22262 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
22263 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
22264 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
22265 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
22267 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
22268 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
22269 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
22270 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
22271 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
22272 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
22273 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
22274 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
22275 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
22276 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
22277 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
22278 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
22279 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
22280 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
22281 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
22282 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
22284 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
22285 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
22286 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
22287 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
22288 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
22289 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
22290 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
22292 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
22293 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
22294 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
22295 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
22296 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
22297 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
22298 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
22300 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
22301 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
22302 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
22303 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
22304 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
22305 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
22306 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
22308 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
22309 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
22310 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
22311 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
22312 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
22313 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
22314 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
22316 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
22317 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
22318 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
22319 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
22320 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
22321 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
22322 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
22324 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
22325 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
22326 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
22327 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
22328 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
22329 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
22330 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
22332 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
22333 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
22334 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
22335 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
22336 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
22337 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
22338 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
22340 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
22341 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
22342 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
22343 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
22344 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
22345 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
22346 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
22348 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS
, (enum rtx_code
) COM_FALSE_S
, (int)MULTI_ARG_2_SF_TF
},
22349 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS
, (enum rtx_code
) COM_TRUE_S
, (int)MULTI_ARG_2_SF_TF
},
22350 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS
, (enum rtx_code
) COM_FALSE_P
, (int)MULTI_ARG_2_SF_TF
},
22351 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS
, (enum rtx_code
) COM_TRUE_P
, (int)MULTI_ARG_2_SF_TF
},
22352 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD
, (enum rtx_code
) COM_FALSE_S
, (int)MULTI_ARG_2_DF_TF
},
22353 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD
, (enum rtx_code
) COM_TRUE_S
, (int)MULTI_ARG_2_DF_TF
},
22354 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD
, (enum rtx_code
) COM_FALSE_P
, (int)MULTI_ARG_2_DF_TF
},
22355 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD
, (enum rtx_code
) COM_TRUE_P
, (int)MULTI_ARG_2_DF_TF
},
22357 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
22358 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
22359 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
22360 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
22361 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
22362 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
22363 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
22364 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
22366 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
22367 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
22368 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
22369 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
22370 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
22371 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
22372 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
22373 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
22376 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22377 in the current target ISA to allow the user to compile particular modules
22378 with different target specific options that differ from the command line
22381 ix86_init_mmx_sse_builtins (void)
22383 const struct builtin_description
* d
;
22386 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
22387 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
22388 tree V1DI_type_node
22389 = build_vector_type_for_mode (long_long_integer_type_node
, V1DImode
);
22390 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
22391 tree V2DI_type_node
22392 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
22393 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
22394 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
22395 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
22396 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
22397 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
22398 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
22400 tree pchar_type_node
= build_pointer_type (char_type_node
);
22401 tree pcchar_type_node
22402 = build_pointer_type (build_type_variant (char_type_node
, 1, 0));
22403 tree pfloat_type_node
= build_pointer_type (float_type_node
);
22404 tree pcfloat_type_node
22405 = build_pointer_type (build_type_variant (float_type_node
, 1, 0));
22406 tree pv2sf_type_node
= build_pointer_type (V2SF_type_node
);
22407 tree pcv2sf_type_node
22408 = build_pointer_type (build_type_variant (V2SF_type_node
, 1, 0));
22409 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
22410 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
22413 tree int_ftype_v4sf_v4sf
22414 = build_function_type_list (integer_type_node
,
22415 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
22416 tree v4si_ftype_v4sf_v4sf
22417 = build_function_type_list (V4SI_type_node
,
22418 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
22419 /* MMX/SSE/integer conversions. */
22420 tree int_ftype_v4sf
22421 = build_function_type_list (integer_type_node
,
22422 V4SF_type_node
, NULL_TREE
);
22423 tree int64_ftype_v4sf
22424 = build_function_type_list (long_long_integer_type_node
,
22425 V4SF_type_node
, NULL_TREE
);
22426 tree int_ftype_v8qi
22427 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
22428 tree v4sf_ftype_v4sf_int
22429 = build_function_type_list (V4SF_type_node
,
22430 V4SF_type_node
, integer_type_node
, NULL_TREE
);
22431 tree v4sf_ftype_v4sf_int64
22432 = build_function_type_list (V4SF_type_node
,
22433 V4SF_type_node
, long_long_integer_type_node
,
22435 tree v4sf_ftype_v4sf_v2si
22436 = build_function_type_list (V4SF_type_node
,
22437 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
22439 /* Miscellaneous. */
22440 tree v8qi_ftype_v4hi_v4hi
22441 = build_function_type_list (V8QI_type_node
,
22442 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
22443 tree v4hi_ftype_v2si_v2si
22444 = build_function_type_list (V4HI_type_node
,
22445 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
22446 tree v4sf_ftype_v4sf_v4sf_int
22447 = build_function_type_list (V4SF_type_node
,
22448 V4SF_type_node
, V4SF_type_node
,
22449 integer_type_node
, NULL_TREE
);
22450 tree v2si_ftype_v4hi_v4hi
22451 = build_function_type_list (V2SI_type_node
,
22452 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
22453 tree v4hi_ftype_v4hi_int
22454 = build_function_type_list (V4HI_type_node
,
22455 V4HI_type_node
, integer_type_node
, NULL_TREE
);
22456 tree v2si_ftype_v2si_int
22457 = build_function_type_list (V2SI_type_node
,
22458 V2SI_type_node
, integer_type_node
, NULL_TREE
);
22459 tree v1di_ftype_v1di_int
22460 = build_function_type_list (V1DI_type_node
,
22461 V1DI_type_node
, integer_type_node
, NULL_TREE
);
22463 tree void_ftype_void
22464 = build_function_type (void_type_node
, void_list_node
);
22465 tree void_ftype_unsigned
22466 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
22467 tree void_ftype_unsigned_unsigned
22468 = build_function_type_list (void_type_node
, unsigned_type_node
,
22469 unsigned_type_node
, NULL_TREE
);
22470 tree void_ftype_pcvoid_unsigned_unsigned
22471 = build_function_type_list (void_type_node
, const_ptr_type_node
,
22472 unsigned_type_node
, unsigned_type_node
,
22474 tree unsigned_ftype_void
22475 = build_function_type (unsigned_type_node
, void_list_node
);
22476 tree v2si_ftype_v4sf
22477 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
22478 /* Loads/stores. */
22479 tree void_ftype_v8qi_v8qi_pchar
22480 = build_function_type_list (void_type_node
,
22481 V8QI_type_node
, V8QI_type_node
,
22482 pchar_type_node
, NULL_TREE
);
22483 tree v4sf_ftype_pcfloat
22484 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
22485 tree v4sf_ftype_v4sf_pcv2sf
22486 = build_function_type_list (V4SF_type_node
,
22487 V4SF_type_node
, pcv2sf_type_node
, NULL_TREE
);
22488 tree void_ftype_pv2sf_v4sf
22489 = build_function_type_list (void_type_node
,
22490 pv2sf_type_node
, V4SF_type_node
, NULL_TREE
);
22491 tree void_ftype_pfloat_v4sf
22492 = build_function_type_list (void_type_node
,
22493 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
22494 tree void_ftype_pdi_di
22495 = build_function_type_list (void_type_node
,
22496 pdi_type_node
, long_long_unsigned_type_node
,
22498 tree void_ftype_pv2di_v2di
22499 = build_function_type_list (void_type_node
,
22500 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
22501 /* Normal vector unops. */
22502 tree v4sf_ftype_v4sf
22503 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
22504 tree v16qi_ftype_v16qi
22505 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
22506 tree v8hi_ftype_v8hi
22507 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
22508 tree v4si_ftype_v4si
22509 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
22510 tree v8qi_ftype_v8qi
22511 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
22512 tree v4hi_ftype_v4hi
22513 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
22515 /* Normal vector binops. */
22516 tree v4sf_ftype_v4sf_v4sf
22517 = build_function_type_list (V4SF_type_node
,
22518 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
22519 tree v8qi_ftype_v8qi_v8qi
22520 = build_function_type_list (V8QI_type_node
,
22521 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
22522 tree v4hi_ftype_v4hi_v4hi
22523 = build_function_type_list (V4HI_type_node
,
22524 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
22525 tree v2si_ftype_v2si_v2si
22526 = build_function_type_list (V2SI_type_node
,
22527 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
22528 tree v1di_ftype_v1di_v1di
22529 = build_function_type_list (V1DI_type_node
,
22530 V1DI_type_node
, V1DI_type_node
, NULL_TREE
);
22531 tree v1di_ftype_v1di_v1di_int
22532 = build_function_type_list (V1DI_type_node
,
22533 V1DI_type_node
, V1DI_type_node
,
22534 integer_type_node
, NULL_TREE
);
22535 tree v2si_ftype_v2sf
22536 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
22537 tree v2sf_ftype_v2si
22538 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
22539 tree v2si_ftype_v2si
22540 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
22541 tree v2sf_ftype_v2sf
22542 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
22543 tree v2sf_ftype_v2sf_v2sf
22544 = build_function_type_list (V2SF_type_node
,
22545 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
22546 tree v2si_ftype_v2sf_v2sf
22547 = build_function_type_list (V2SI_type_node
,
22548 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
22549 tree pint_type_node
= build_pointer_type (integer_type_node
);
22550 tree pdouble_type_node
= build_pointer_type (double_type_node
);
22551 tree pcdouble_type_node
= build_pointer_type (
22552 build_type_variant (double_type_node
, 1, 0));
22553 tree int_ftype_v2df_v2df
22554 = build_function_type_list (integer_type_node
,
22555 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
22557 tree void_ftype_pcvoid
22558 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
22559 tree v4sf_ftype_v4si
22560 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
22561 tree v4si_ftype_v4sf
22562 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
22563 tree v2df_ftype_v4si
22564 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
22565 tree v4si_ftype_v2df
22566 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
22567 tree v4si_ftype_v2df_v2df
22568 = build_function_type_list (V4SI_type_node
,
22569 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
22570 tree v2si_ftype_v2df
22571 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
22572 tree v4sf_ftype_v2df
22573 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
22574 tree v2df_ftype_v2si
22575 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
22576 tree v2df_ftype_v4sf
22577 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
22578 tree int_ftype_v2df
22579 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
22580 tree int64_ftype_v2df
22581 = build_function_type_list (long_long_integer_type_node
,
22582 V2DF_type_node
, NULL_TREE
);
22583 tree v2df_ftype_v2df_int
22584 = build_function_type_list (V2DF_type_node
,
22585 V2DF_type_node
, integer_type_node
, NULL_TREE
);
22586 tree v2df_ftype_v2df_int64
22587 = build_function_type_list (V2DF_type_node
,
22588 V2DF_type_node
, long_long_integer_type_node
,
22590 tree v4sf_ftype_v4sf_v2df
22591 = build_function_type_list (V4SF_type_node
,
22592 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
22593 tree v2df_ftype_v2df_v4sf
22594 = build_function_type_list (V2DF_type_node
,
22595 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
22596 tree v2df_ftype_v2df_v2df_int
22597 = build_function_type_list (V2DF_type_node
,
22598 V2DF_type_node
, V2DF_type_node
,
22601 tree v2df_ftype_v2df_pcdouble
22602 = build_function_type_list (V2DF_type_node
,
22603 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
22604 tree void_ftype_pdouble_v2df
22605 = build_function_type_list (void_type_node
,
22606 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
22607 tree void_ftype_pint_int
22608 = build_function_type_list (void_type_node
,
22609 pint_type_node
, integer_type_node
, NULL_TREE
);
22610 tree void_ftype_v16qi_v16qi_pchar
22611 = build_function_type_list (void_type_node
,
22612 V16QI_type_node
, V16QI_type_node
,
22613 pchar_type_node
, NULL_TREE
);
22614 tree v2df_ftype_pcdouble
22615 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
22616 tree v2df_ftype_v2df_v2df
22617 = build_function_type_list (V2DF_type_node
,
22618 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
22619 tree v16qi_ftype_v16qi_v16qi
22620 = build_function_type_list (V16QI_type_node
,
22621 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
22622 tree v8hi_ftype_v8hi_v8hi
22623 = build_function_type_list (V8HI_type_node
,
22624 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
22625 tree v4si_ftype_v4si_v4si
22626 = build_function_type_list (V4SI_type_node
,
22627 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
22628 tree v2di_ftype_v2di_v2di
22629 = build_function_type_list (V2DI_type_node
,
22630 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
22631 tree v2di_ftype_v2df_v2df
22632 = build_function_type_list (V2DI_type_node
,
22633 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
22634 tree v2df_ftype_v2df
22635 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
22636 tree v2di_ftype_v2di_int
22637 = build_function_type_list (V2DI_type_node
,
22638 V2DI_type_node
, integer_type_node
, NULL_TREE
);
22639 tree v2di_ftype_v2di_v2di_int
22640 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
22641 V2DI_type_node
, integer_type_node
, NULL_TREE
);
22642 tree v4si_ftype_v4si_int
22643 = build_function_type_list (V4SI_type_node
,
22644 V4SI_type_node
, integer_type_node
, NULL_TREE
);
22645 tree v8hi_ftype_v8hi_int
22646 = build_function_type_list (V8HI_type_node
,
22647 V8HI_type_node
, integer_type_node
, NULL_TREE
);
22648 tree v4si_ftype_v8hi_v8hi
22649 = build_function_type_list (V4SI_type_node
,
22650 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
22651 tree v1di_ftype_v8qi_v8qi
22652 = build_function_type_list (V1DI_type_node
,
22653 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
22654 tree v1di_ftype_v2si_v2si
22655 = build_function_type_list (V1DI_type_node
,
22656 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
22657 tree v2di_ftype_v16qi_v16qi
22658 = build_function_type_list (V2DI_type_node
,
22659 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
22660 tree v2di_ftype_v4si_v4si
22661 = build_function_type_list (V2DI_type_node
,
22662 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
22663 tree int_ftype_v16qi
22664 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
22665 tree v16qi_ftype_pcchar
22666 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
22667 tree void_ftype_pchar_v16qi
22668 = build_function_type_list (void_type_node
,
22669 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
22671 tree v2di_ftype_v2di_unsigned_unsigned
22672 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
22673 unsigned_type_node
, unsigned_type_node
,
22675 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22676 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
22677 unsigned_type_node
, unsigned_type_node
,
22679 tree v2di_ftype_v2di_v16qi
22680 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
22682 tree v2df_ftype_v2df_v2df_v2df
22683 = build_function_type_list (V2DF_type_node
,
22684 V2DF_type_node
, V2DF_type_node
,
22685 V2DF_type_node
, NULL_TREE
);
22686 tree v4sf_ftype_v4sf_v4sf_v4sf
22687 = build_function_type_list (V4SF_type_node
,
22688 V4SF_type_node
, V4SF_type_node
,
22689 V4SF_type_node
, NULL_TREE
);
22690 tree v8hi_ftype_v16qi
22691 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
22693 tree v4si_ftype_v16qi
22694 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
22696 tree v2di_ftype_v16qi
22697 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
22699 tree v4si_ftype_v8hi
22700 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
22702 tree v2di_ftype_v8hi
22703 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
22705 tree v2di_ftype_v4si
22706 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
22708 tree v2di_ftype_pv2di
22709 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
22711 tree v16qi_ftype_v16qi_v16qi_int
22712 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
22713 V16QI_type_node
, integer_type_node
,
22715 tree v16qi_ftype_v16qi_v16qi_v16qi
22716 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
22717 V16QI_type_node
, V16QI_type_node
,
22719 tree v8hi_ftype_v8hi_v8hi_int
22720 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
22721 V8HI_type_node
, integer_type_node
,
22723 tree v4si_ftype_v4si_v4si_int
22724 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
22725 V4SI_type_node
, integer_type_node
,
22727 tree int_ftype_v2di_v2di
22728 = build_function_type_list (integer_type_node
,
22729 V2DI_type_node
, V2DI_type_node
,
22731 tree int_ftype_v16qi_int_v16qi_int_int
22732 = build_function_type_list (integer_type_node
,
22739 tree v16qi_ftype_v16qi_int_v16qi_int_int
22740 = build_function_type_list (V16QI_type_node
,
22747 tree int_ftype_v16qi_v16qi_int
22748 = build_function_type_list (integer_type_node
,
22754 /* SSE5 instructions */
22755 tree v2di_ftype_v2di_v2di_v2di
22756 = build_function_type_list (V2DI_type_node
,
22762 tree v4si_ftype_v4si_v4si_v4si
22763 = build_function_type_list (V4SI_type_node
,
22769 tree v4si_ftype_v4si_v4si_v2di
22770 = build_function_type_list (V4SI_type_node
,
22776 tree v8hi_ftype_v8hi_v8hi_v8hi
22777 = build_function_type_list (V8HI_type_node
,
22783 tree v8hi_ftype_v8hi_v8hi_v4si
22784 = build_function_type_list (V8HI_type_node
,
22790 tree v2df_ftype_v2df_v2df_v16qi
22791 = build_function_type_list (V2DF_type_node
,
22797 tree v4sf_ftype_v4sf_v4sf_v16qi
22798 = build_function_type_list (V4SF_type_node
,
22804 tree v2di_ftype_v2di_si
22805 = build_function_type_list (V2DI_type_node
,
22810 tree v4si_ftype_v4si_si
22811 = build_function_type_list (V4SI_type_node
,
22816 tree v8hi_ftype_v8hi_si
22817 = build_function_type_list (V8HI_type_node
,
22822 tree v16qi_ftype_v16qi_si
22823 = build_function_type_list (V16QI_type_node
,
22827 tree v4sf_ftype_v4hi
22828 = build_function_type_list (V4SF_type_node
,
22832 tree v4hi_ftype_v4sf
22833 = build_function_type_list (V4HI_type_node
,
22837 tree v2di_ftype_v2di
22838 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
22840 tree v16qi_ftype_v8hi_v8hi
22841 = build_function_type_list (V16QI_type_node
,
22842 V8HI_type_node
, V8HI_type_node
,
22844 tree v8hi_ftype_v4si_v4si
22845 = build_function_type_list (V8HI_type_node
,
22846 V4SI_type_node
, V4SI_type_node
,
22848 tree v8hi_ftype_v16qi_v16qi
22849 = build_function_type_list (V8HI_type_node
,
22850 V16QI_type_node
, V16QI_type_node
,
22852 tree v4hi_ftype_v8qi_v8qi
22853 = build_function_type_list (V4HI_type_node
,
22854 V8QI_type_node
, V8QI_type_node
,
22856 tree unsigned_ftype_unsigned_uchar
22857 = build_function_type_list (unsigned_type_node
,
22858 unsigned_type_node
,
22859 unsigned_char_type_node
,
22861 tree unsigned_ftype_unsigned_ushort
22862 = build_function_type_list (unsigned_type_node
,
22863 unsigned_type_node
,
22864 short_unsigned_type_node
,
22866 tree unsigned_ftype_unsigned_unsigned
22867 = build_function_type_list (unsigned_type_node
,
22868 unsigned_type_node
,
22869 unsigned_type_node
,
22871 tree uint64_ftype_uint64_uint64
22872 = build_function_type_list (long_long_unsigned_type_node
,
22873 long_long_unsigned_type_node
,
22874 long_long_unsigned_type_node
,
22876 tree float_ftype_float
22877 = build_function_type_list (float_type_node
,
22882 tree V32QI_type_node
= build_vector_type_for_mode (char_type_node
,
22884 tree V8SI_type_node
= build_vector_type_for_mode (intSI_type_node
,
22886 tree V8SF_type_node
= build_vector_type_for_mode (float_type_node
,
22888 tree V4DI_type_node
= build_vector_type_for_mode (long_long_integer_type_node
,
22890 tree V4DF_type_node
= build_vector_type_for_mode (double_type_node
,
22892 tree v8sf_ftype_v8sf
22893 = build_function_type_list (V8SF_type_node
,
22896 tree v8si_ftype_v8sf
22897 = build_function_type_list (V8SI_type_node
,
22900 tree v8sf_ftype_v8si
22901 = build_function_type_list (V8SF_type_node
,
22904 tree v4si_ftype_v4df
22905 = build_function_type_list (V4SI_type_node
,
22908 tree v4df_ftype_v4df
22909 = build_function_type_list (V4DF_type_node
,
22912 tree v4df_ftype_v4si
22913 = build_function_type_list (V4DF_type_node
,
22916 tree v4df_ftype_v4sf
22917 = build_function_type_list (V4DF_type_node
,
22920 tree v4sf_ftype_v4df
22921 = build_function_type_list (V4SF_type_node
,
22924 tree v8sf_ftype_v8sf_v8sf
22925 = build_function_type_list (V8SF_type_node
,
22926 V8SF_type_node
, V8SF_type_node
,
22928 tree v4df_ftype_v4df_v4df
22929 = build_function_type_list (V4DF_type_node
,
22930 V4DF_type_node
, V4DF_type_node
,
22932 tree v8sf_ftype_v8sf_int
22933 = build_function_type_list (V8SF_type_node
,
22934 V8SF_type_node
, integer_type_node
,
22936 tree v4si_ftype_v8si_int
22937 = build_function_type_list (V4SI_type_node
,
22938 V8SI_type_node
, integer_type_node
,
22940 tree v4df_ftype_v4df_int
22941 = build_function_type_list (V4DF_type_node
,
22942 V4DF_type_node
, integer_type_node
,
22944 tree v4sf_ftype_v8sf_int
22945 = build_function_type_list (V4SF_type_node
,
22946 V8SF_type_node
, integer_type_node
,
22948 tree v2df_ftype_v4df_int
22949 = build_function_type_list (V2DF_type_node
,
22950 V4DF_type_node
, integer_type_node
,
22952 tree v8sf_ftype_v8sf_v8sf_int
22953 = build_function_type_list (V8SF_type_node
,
22954 V8SF_type_node
, V8SF_type_node
,
22957 tree v8sf_ftype_v8sf_v8sf_v8sf
22958 = build_function_type_list (V8SF_type_node
,
22959 V8SF_type_node
, V8SF_type_node
,
22962 tree v4df_ftype_v4df_v4df_v4df
22963 = build_function_type_list (V4DF_type_node
,
22964 V4DF_type_node
, V4DF_type_node
,
22967 tree v8si_ftype_v8si_v8si_int
22968 = build_function_type_list (V8SI_type_node
,
22969 V8SI_type_node
, V8SI_type_node
,
22972 tree v4df_ftype_v4df_v4df_int
22973 = build_function_type_list (V4DF_type_node
,
22974 V4DF_type_node
, V4DF_type_node
,
22977 tree v8sf_ftype_pcfloat
22978 = build_function_type_list (V8SF_type_node
,
22981 tree v4df_ftype_pcdouble
22982 = build_function_type_list (V4DF_type_node
,
22983 pcdouble_type_node
,
22985 tree pcv4sf_type_node
22986 = build_pointer_type (build_type_variant (V4SF_type_node
, 1, 0));
22987 tree pcv2df_type_node
22988 = build_pointer_type (build_type_variant (V2DF_type_node
, 1, 0));
22989 tree v8sf_ftype_pcv4sf
22990 = build_function_type_list (V8SF_type_node
,
22993 tree v4df_ftype_pcv2df
22994 = build_function_type_list (V4DF_type_node
,
22997 tree v32qi_ftype_pcchar
22998 = build_function_type_list (V32QI_type_node
,
23001 tree void_ftype_pchar_v32qi
23002 = build_function_type_list (void_type_node
,
23003 pchar_type_node
, V32QI_type_node
,
23005 tree v8si_ftype_v8si_v4si_int
23006 = build_function_type_list (V8SI_type_node
,
23007 V8SI_type_node
, V4SI_type_node
,
23010 tree pv4di_type_node
= build_pointer_type (V4DI_type_node
);
23011 tree void_ftype_pv4di_v4di
23012 = build_function_type_list (void_type_node
,
23013 pv4di_type_node
, V4DI_type_node
,
23015 tree v8sf_ftype_v8sf_v4sf_int
23016 = build_function_type_list (V8SF_type_node
,
23017 V8SF_type_node
, V4SF_type_node
,
23020 tree v4df_ftype_v4df_v2df_int
23021 = build_function_type_list (V4DF_type_node
,
23022 V4DF_type_node
, V2DF_type_node
,
23025 tree void_ftype_pfloat_v8sf
23026 = build_function_type_list (void_type_node
,
23027 pfloat_type_node
, V8SF_type_node
,
23029 tree void_ftype_pdouble_v4df
23030 = build_function_type_list (void_type_node
,
23031 pdouble_type_node
, V4DF_type_node
,
23033 tree pv8sf_type_node
= build_pointer_type (V8SF_type_node
);
23034 tree pv4sf_type_node
= build_pointer_type (V4SF_type_node
);
23035 tree pv4df_type_node
= build_pointer_type (V4DF_type_node
);
23036 tree pv2df_type_node
= build_pointer_type (V2DF_type_node
);
23037 tree pcv8sf_type_node
23038 = build_pointer_type (build_type_variant (V8SF_type_node
, 1, 0));
23039 tree pcv4df_type_node
23040 = build_pointer_type (build_type_variant (V4DF_type_node
, 1, 0));
23041 tree v8sf_ftype_pcv8sf_v8sf
23042 = build_function_type_list (V8SF_type_node
,
23043 pcv8sf_type_node
, V8SF_type_node
,
23045 tree v4df_ftype_pcv4df_v4df
23046 = build_function_type_list (V4DF_type_node
,
23047 pcv4df_type_node
, V4DF_type_node
,
23049 tree v4sf_ftype_pcv4sf_v4sf
23050 = build_function_type_list (V4SF_type_node
,
23051 pcv4sf_type_node
, V4SF_type_node
,
23053 tree v2df_ftype_pcv2df_v2df
23054 = build_function_type_list (V2DF_type_node
,
23055 pcv2df_type_node
, V2DF_type_node
,
23057 tree void_ftype_pv8sf_v8sf_v8sf
23058 = build_function_type_list (void_type_node
,
23059 pv8sf_type_node
, V8SF_type_node
,
23062 tree void_ftype_pv4df_v4df_v4df
23063 = build_function_type_list (void_type_node
,
23064 pv4df_type_node
, V4DF_type_node
,
23067 tree void_ftype_pv4sf_v4sf_v4sf
23068 = build_function_type_list (void_type_node
,
23069 pv4sf_type_node
, V4SF_type_node
,
23072 tree void_ftype_pv2df_v2df_v2df
23073 = build_function_type_list (void_type_node
,
23074 pv2df_type_node
, V2DF_type_node
,
23077 tree v4df_ftype_v2df
23078 = build_function_type_list (V4DF_type_node
,
23081 tree v8sf_ftype_v4sf
23082 = build_function_type_list (V8SF_type_node
,
23085 tree v8si_ftype_v4si
23086 = build_function_type_list (V8SI_type_node
,
23089 tree v2df_ftype_v4df
23090 = build_function_type_list (V2DF_type_node
,
23093 tree v4sf_ftype_v8sf
23094 = build_function_type_list (V4SF_type_node
,
23097 tree v4si_ftype_v8si
23098 = build_function_type_list (V4SI_type_node
,
23101 tree int_ftype_v4df
23102 = build_function_type_list (integer_type_node
,
23105 tree int_ftype_v8sf
23106 = build_function_type_list (integer_type_node
,
23109 tree int_ftype_v8sf_v8sf
23110 = build_function_type_list (integer_type_node
,
23111 V8SF_type_node
, V8SF_type_node
,
23113 tree int_ftype_v4di_v4di
23114 = build_function_type_list (integer_type_node
,
23115 V4DI_type_node
, V4DI_type_node
,
23117 tree int_ftype_v4df_v4df
23118 = build_function_type_list (integer_type_node
,
23119 V4DF_type_node
, V4DF_type_node
,
23121 tree v8sf_ftype_v8sf_v8si
23122 = build_function_type_list (V8SF_type_node
,
23123 V8SF_type_node
, V8SI_type_node
,
23125 tree v4df_ftype_v4df_v4di
23126 = build_function_type_list (V4DF_type_node
,
23127 V4DF_type_node
, V4DI_type_node
,
23129 tree v4sf_ftype_v4sf_v4si
23130 = build_function_type_list (V4SF_type_node
,
23131 V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
23132 tree v2df_ftype_v2df_v2di
23133 = build_function_type_list (V2DF_type_node
,
23134 V2DF_type_node
, V2DI_type_node
, NULL_TREE
);
23136 /* Integer intrinsics. */
23137 tree uint64_ftype_void
23138 = build_function_type (long_long_unsigned_type_node
,
23141 = build_function_type_list (integer_type_node
,
23142 integer_type_node
, NULL_TREE
);
23143 tree int64_ftype_int64
23144 = build_function_type_list (long_long_integer_type_node
,
23145 long_long_integer_type_node
,
23147 tree uint64_ftype_int
23148 = build_function_type_list (long_long_unsigned_type_node
,
23149 integer_type_node
, NULL_TREE
);
23150 tree punsigned_type_node
= build_pointer_type (unsigned_type_node
);
23151 tree uint64_ftype_punsigned
23152 = build_function_type_list (long_long_unsigned_type_node
,
23153 punsigned_type_node
, NULL_TREE
);
23154 tree ushort_ftype_ushort_int
23155 = build_function_type_list (short_unsigned_type_node
,
23156 short_unsigned_type_node
,
23159 tree uchar_ftype_uchar_int
23160 = build_function_type_list (unsigned_char_type_node
,
23161 unsigned_char_type_node
,
23167 /* Add all special builtins with variable number of operands. */
23168 for (i
= 0, d
= bdesc_special_args
;
23169 i
< ARRAY_SIZE (bdesc_special_args
);
23177 switch ((enum ix86_special_builtin_type
) d
->flag
)
23179 case VOID_FTYPE_VOID
:
23180 type
= void_ftype_void
;
23182 case UINT64_FTYPE_VOID
:
23183 type
= uint64_ftype_void
;
23185 case UINT64_FTYPE_PUNSIGNED
:
23186 type
= uint64_ftype_punsigned
;
23188 case V32QI_FTYPE_PCCHAR
:
23189 type
= v32qi_ftype_pcchar
;
23191 case V16QI_FTYPE_PCCHAR
:
23192 type
= v16qi_ftype_pcchar
;
23194 case V8SF_FTYPE_PCV4SF
:
23195 type
= v8sf_ftype_pcv4sf
;
23197 case V8SF_FTYPE_PCFLOAT
:
23198 type
= v8sf_ftype_pcfloat
;
23200 case V4DF_FTYPE_PCV2DF
:
23201 type
= v4df_ftype_pcv2df
;
23203 case V4DF_FTYPE_PCDOUBLE
:
23204 type
= v4df_ftype_pcdouble
;
23206 case V4SF_FTYPE_PCFLOAT
:
23207 type
= v4sf_ftype_pcfloat
;
23209 case V2DI_FTYPE_PV2DI
:
23210 type
= v2di_ftype_pv2di
;
23212 case V2DF_FTYPE_PCDOUBLE
:
23213 type
= v2df_ftype_pcdouble
;
23215 case V8SF_FTYPE_PCV8SF_V8SF
:
23216 type
= v8sf_ftype_pcv8sf_v8sf
;
23218 case V4DF_FTYPE_PCV4DF_V4DF
:
23219 type
= v4df_ftype_pcv4df_v4df
;
23221 case V4SF_FTYPE_V4SF_PCV2SF
:
23222 type
= v4sf_ftype_v4sf_pcv2sf
;
23224 case V4SF_FTYPE_PCV4SF_V4SF
:
23225 type
= v4sf_ftype_pcv4sf_v4sf
;
23227 case V2DF_FTYPE_V2DF_PCDOUBLE
:
23228 type
= v2df_ftype_v2df_pcdouble
;
23230 case V2DF_FTYPE_PCV2DF_V2DF
:
23231 type
= v2df_ftype_pcv2df_v2df
;
23233 case VOID_FTYPE_PV2SF_V4SF
:
23234 type
= void_ftype_pv2sf_v4sf
;
23236 case VOID_FTYPE_PV4DI_V4DI
:
23237 type
= void_ftype_pv4di_v4di
;
23239 case VOID_FTYPE_PV2DI_V2DI
:
23240 type
= void_ftype_pv2di_v2di
;
23242 case VOID_FTYPE_PCHAR_V32QI
:
23243 type
= void_ftype_pchar_v32qi
;
23245 case VOID_FTYPE_PCHAR_V16QI
:
23246 type
= void_ftype_pchar_v16qi
;
23248 case VOID_FTYPE_PFLOAT_V8SF
:
23249 type
= void_ftype_pfloat_v8sf
;
23251 case VOID_FTYPE_PFLOAT_V4SF
:
23252 type
= void_ftype_pfloat_v4sf
;
23254 case VOID_FTYPE_PDOUBLE_V4DF
:
23255 type
= void_ftype_pdouble_v4df
;
23257 case VOID_FTYPE_PDOUBLE_V2DF
:
23258 type
= void_ftype_pdouble_v2df
;
23260 case VOID_FTYPE_PDI_DI
:
23261 type
= void_ftype_pdi_di
;
23263 case VOID_FTYPE_PINT_INT
:
23264 type
= void_ftype_pint_int
;
23266 case VOID_FTYPE_PV8SF_V8SF_V8SF
:
23267 type
= void_ftype_pv8sf_v8sf_v8sf
;
23269 case VOID_FTYPE_PV4DF_V4DF_V4DF
:
23270 type
= void_ftype_pv4df_v4df_v4df
;
23272 case VOID_FTYPE_PV4SF_V4SF_V4SF
:
23273 type
= void_ftype_pv4sf_v4sf_v4sf
;
23275 case VOID_FTYPE_PV2DF_V2DF_V2DF
:
23276 type
= void_ftype_pv2df_v2df_v2df
;
23279 gcc_unreachable ();
23282 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
23285 /* Add all builtins with variable number of operands. */
23286 for (i
= 0, d
= bdesc_args
;
23287 i
< ARRAY_SIZE (bdesc_args
);
23295 switch ((enum ix86_builtin_type
) d
->flag
)
23297 case FLOAT_FTYPE_FLOAT
:
23298 type
= float_ftype_float
;
23300 case INT_FTYPE_V8SF_V8SF_PTEST
:
23301 type
= int_ftype_v8sf_v8sf
;
23303 case INT_FTYPE_V4DI_V4DI_PTEST
:
23304 type
= int_ftype_v4di_v4di
;
23306 case INT_FTYPE_V4DF_V4DF_PTEST
:
23307 type
= int_ftype_v4df_v4df
;
23309 case INT_FTYPE_V4SF_V4SF_PTEST
:
23310 type
= int_ftype_v4sf_v4sf
;
23312 case INT_FTYPE_V2DI_V2DI_PTEST
:
23313 type
= int_ftype_v2di_v2di
;
23315 case INT_FTYPE_V2DF_V2DF_PTEST
:
23316 type
= int_ftype_v2df_v2df
;
23318 case INT_FTYPE_INT
:
23319 type
= int_ftype_int
;
23321 case UINT64_FTYPE_INT
:
23322 type
= uint64_ftype_int
;
23324 case INT64_FTYPE_INT64
:
23325 type
= int64_ftype_int64
;
23327 case INT64_FTYPE_V4SF
:
23328 type
= int64_ftype_v4sf
;
23330 case INT64_FTYPE_V2DF
:
23331 type
= int64_ftype_v2df
;
23333 case INT_FTYPE_V16QI
:
23334 type
= int_ftype_v16qi
;
23336 case INT_FTYPE_V8QI
:
23337 type
= int_ftype_v8qi
;
23339 case INT_FTYPE_V8SF
:
23340 type
= int_ftype_v8sf
;
23342 case INT_FTYPE_V4DF
:
23343 type
= int_ftype_v4df
;
23345 case INT_FTYPE_V4SF
:
23346 type
= int_ftype_v4sf
;
23348 case INT_FTYPE_V2DF
:
23349 type
= int_ftype_v2df
;
23351 case V16QI_FTYPE_V16QI
:
23352 type
= v16qi_ftype_v16qi
;
23354 case V8SI_FTYPE_V8SF
:
23355 type
= v8si_ftype_v8sf
;
23357 case V8SI_FTYPE_V4SI
:
23358 type
= v8si_ftype_v4si
;
23360 case V8HI_FTYPE_V8HI
:
23361 type
= v8hi_ftype_v8hi
;
23363 case V8HI_FTYPE_V16QI
:
23364 type
= v8hi_ftype_v16qi
;
23366 case V8QI_FTYPE_V8QI
:
23367 type
= v8qi_ftype_v8qi
;
23369 case V8SF_FTYPE_V8SF
:
23370 type
= v8sf_ftype_v8sf
;
23372 case V8SF_FTYPE_V8SI
:
23373 type
= v8sf_ftype_v8si
;
23375 case V8SF_FTYPE_V4SF
:
23376 type
= v8sf_ftype_v4sf
;
23378 case V4SI_FTYPE_V4DF
:
23379 type
= v4si_ftype_v4df
;
23381 case V4SI_FTYPE_V4SI
:
23382 type
= v4si_ftype_v4si
;
23384 case V4SI_FTYPE_V16QI
:
23385 type
= v4si_ftype_v16qi
;
23387 case V4SI_FTYPE_V8SI
:
23388 type
= v4si_ftype_v8si
;
23390 case V4SI_FTYPE_V8HI
:
23391 type
= v4si_ftype_v8hi
;
23393 case V4SI_FTYPE_V4SF
:
23394 type
= v4si_ftype_v4sf
;
23396 case V4SI_FTYPE_V2DF
:
23397 type
= v4si_ftype_v2df
;
23399 case V4HI_FTYPE_V4HI
:
23400 type
= v4hi_ftype_v4hi
;
23402 case V4DF_FTYPE_V4DF
:
23403 type
= v4df_ftype_v4df
;
23405 case V4DF_FTYPE_V4SI
:
23406 type
= v4df_ftype_v4si
;
23408 case V4DF_FTYPE_V4SF
:
23409 type
= v4df_ftype_v4sf
;
23411 case V4DF_FTYPE_V2DF
:
23412 type
= v4df_ftype_v2df
;
23414 case V4SF_FTYPE_V4SF
:
23415 case V4SF_FTYPE_V4SF_VEC_MERGE
:
23416 type
= v4sf_ftype_v4sf
;
23418 case V4SF_FTYPE_V8SF
:
23419 type
= v4sf_ftype_v8sf
;
23421 case V4SF_FTYPE_V4SI
:
23422 type
= v4sf_ftype_v4si
;
23424 case V4SF_FTYPE_V4DF
:
23425 type
= v4sf_ftype_v4df
;
23427 case V4SF_FTYPE_V2DF
:
23428 type
= v4sf_ftype_v2df
;
23430 case V2DI_FTYPE_V2DI
:
23431 type
= v2di_ftype_v2di
;
23433 case V2DI_FTYPE_V16QI
:
23434 type
= v2di_ftype_v16qi
;
23436 case V2DI_FTYPE_V8HI
:
23437 type
= v2di_ftype_v8hi
;
23439 case V2DI_FTYPE_V4SI
:
23440 type
= v2di_ftype_v4si
;
23442 case V2SI_FTYPE_V2SI
:
23443 type
= v2si_ftype_v2si
;
23445 case V2SI_FTYPE_V4SF
:
23446 type
= v2si_ftype_v4sf
;
23448 case V2SI_FTYPE_V2DF
:
23449 type
= v2si_ftype_v2df
;
23451 case V2SI_FTYPE_V2SF
:
23452 type
= v2si_ftype_v2sf
;
23454 case V2DF_FTYPE_V4DF
:
23455 type
= v2df_ftype_v4df
;
23457 case V2DF_FTYPE_V4SF
:
23458 type
= v2df_ftype_v4sf
;
23460 case V2DF_FTYPE_V2DF
:
23461 case V2DF_FTYPE_V2DF_VEC_MERGE
:
23462 type
= v2df_ftype_v2df
;
23464 case V2DF_FTYPE_V2SI
:
23465 type
= v2df_ftype_v2si
;
23467 case V2DF_FTYPE_V4SI
:
23468 type
= v2df_ftype_v4si
;
23470 case V2SF_FTYPE_V2SF
:
23471 type
= v2sf_ftype_v2sf
;
23473 case V2SF_FTYPE_V2SI
:
23474 type
= v2sf_ftype_v2si
;
23476 case V16QI_FTYPE_V16QI_V16QI
:
23477 type
= v16qi_ftype_v16qi_v16qi
;
23479 case V16QI_FTYPE_V8HI_V8HI
:
23480 type
= v16qi_ftype_v8hi_v8hi
;
23482 case V8QI_FTYPE_V8QI_V8QI
:
23483 type
= v8qi_ftype_v8qi_v8qi
;
23485 case V8QI_FTYPE_V4HI_V4HI
:
23486 type
= v8qi_ftype_v4hi_v4hi
;
23488 case V8HI_FTYPE_V8HI_V8HI
:
23489 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
23490 type
= v8hi_ftype_v8hi_v8hi
;
23492 case V8HI_FTYPE_V16QI_V16QI
:
23493 type
= v8hi_ftype_v16qi_v16qi
;
23495 case V8HI_FTYPE_V4SI_V4SI
:
23496 type
= v8hi_ftype_v4si_v4si
;
23498 case V8HI_FTYPE_V8HI_SI_COUNT
:
23499 type
= v8hi_ftype_v8hi_int
;
23501 case V8SF_FTYPE_V8SF_V8SF
:
23502 type
= v8sf_ftype_v8sf_v8sf
;
23504 case V8SF_FTYPE_V8SF_V8SI
:
23505 type
= v8sf_ftype_v8sf_v8si
;
23507 case V4SI_FTYPE_V4SI_V4SI
:
23508 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
23509 type
= v4si_ftype_v4si_v4si
;
23511 case V4SI_FTYPE_V8HI_V8HI
:
23512 type
= v4si_ftype_v8hi_v8hi
;
23514 case V4SI_FTYPE_V4SF_V4SF
:
23515 type
= v4si_ftype_v4sf_v4sf
;
23517 case V4SI_FTYPE_V2DF_V2DF
:
23518 type
= v4si_ftype_v2df_v2df
;
23520 case V4SI_FTYPE_V4SI_SI_COUNT
:
23521 type
= v4si_ftype_v4si_int
;
23523 case V4HI_FTYPE_V4HI_V4HI
:
23524 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
23525 type
= v4hi_ftype_v4hi_v4hi
;
23527 case V4HI_FTYPE_V8QI_V8QI
:
23528 type
= v4hi_ftype_v8qi_v8qi
;
23530 case V4HI_FTYPE_V2SI_V2SI
:
23531 type
= v4hi_ftype_v2si_v2si
;
23533 case V4HI_FTYPE_V4HI_SI_COUNT
:
23534 type
= v4hi_ftype_v4hi_int
;
23536 case V4DF_FTYPE_V4DF_V4DF
:
23537 type
= v4df_ftype_v4df_v4df
;
23539 case V4DF_FTYPE_V4DF_V4DI
:
23540 type
= v4df_ftype_v4df_v4di
;
23542 case V4SF_FTYPE_V4SF_V4SF
:
23543 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
23544 type
= v4sf_ftype_v4sf_v4sf
;
23546 case V4SF_FTYPE_V4SF_V4SI
:
23547 type
= v4sf_ftype_v4sf_v4si
;
23549 case V4SF_FTYPE_V4SF_V2SI
:
23550 type
= v4sf_ftype_v4sf_v2si
;
23552 case V4SF_FTYPE_V4SF_V2DF
:
23553 type
= v4sf_ftype_v4sf_v2df
;
23555 case V4SF_FTYPE_V4SF_DI
:
23556 type
= v4sf_ftype_v4sf_int64
;
23558 case V4SF_FTYPE_V4SF_SI
:
23559 type
= v4sf_ftype_v4sf_int
;
23561 case V2DI_FTYPE_V2DI_V2DI
:
23562 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
23563 type
= v2di_ftype_v2di_v2di
;
23565 case V2DI_FTYPE_V16QI_V16QI
:
23566 type
= v2di_ftype_v16qi_v16qi
;
23568 case V2DI_FTYPE_V4SI_V4SI
:
23569 type
= v2di_ftype_v4si_v4si
;
23571 case V2DI_FTYPE_V2DI_V16QI
:
23572 type
= v2di_ftype_v2di_v16qi
;
23574 case V2DI_FTYPE_V2DF_V2DF
:
23575 type
= v2di_ftype_v2df_v2df
;
23577 case V2DI_FTYPE_V2DI_SI_COUNT
:
23578 type
= v2di_ftype_v2di_int
;
23580 case V2SI_FTYPE_V2SI_V2SI
:
23581 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
23582 type
= v2si_ftype_v2si_v2si
;
23584 case V2SI_FTYPE_V4HI_V4HI
:
23585 type
= v2si_ftype_v4hi_v4hi
;
23587 case V2SI_FTYPE_V2SF_V2SF
:
23588 type
= v2si_ftype_v2sf_v2sf
;
23590 case V2SI_FTYPE_V2SI_SI_COUNT
:
23591 type
= v2si_ftype_v2si_int
;
23593 case V2DF_FTYPE_V2DF_V2DF
:
23594 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
23595 type
= v2df_ftype_v2df_v2df
;
23597 case V2DF_FTYPE_V2DF_V4SF
:
23598 type
= v2df_ftype_v2df_v4sf
;
23600 case V2DF_FTYPE_V2DF_V2DI
:
23601 type
= v2df_ftype_v2df_v2di
;
23603 case V2DF_FTYPE_V2DF_DI
:
23604 type
= v2df_ftype_v2df_int64
;
23606 case V2DF_FTYPE_V2DF_SI
:
23607 type
= v2df_ftype_v2df_int
;
23609 case V2SF_FTYPE_V2SF_V2SF
:
23610 type
= v2sf_ftype_v2sf_v2sf
;
23612 case V1DI_FTYPE_V1DI_V1DI
:
23613 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
23614 type
= v1di_ftype_v1di_v1di
;
23616 case V1DI_FTYPE_V8QI_V8QI
:
23617 type
= v1di_ftype_v8qi_v8qi
;
23619 case V1DI_FTYPE_V2SI_V2SI
:
23620 type
= v1di_ftype_v2si_v2si
;
23622 case V1DI_FTYPE_V1DI_SI_COUNT
:
23623 type
= v1di_ftype_v1di_int
;
23625 case UINT64_FTYPE_UINT64_UINT64
:
23626 type
= uint64_ftype_uint64_uint64
;
23628 case UINT_FTYPE_UINT_UINT
:
23629 type
= unsigned_ftype_unsigned_unsigned
;
23631 case UINT_FTYPE_UINT_USHORT
:
23632 type
= unsigned_ftype_unsigned_ushort
;
23634 case UINT_FTYPE_UINT_UCHAR
:
23635 type
= unsigned_ftype_unsigned_uchar
;
23637 case UINT16_FTYPE_UINT16_INT
:
23638 type
= ushort_ftype_ushort_int
;
23640 case UINT8_FTYPE_UINT8_INT
:
23641 type
= uchar_ftype_uchar_int
;
23643 case V8HI_FTYPE_V8HI_INT
:
23644 type
= v8hi_ftype_v8hi_int
;
23646 case V8SF_FTYPE_V8SF_INT
:
23647 type
= v8sf_ftype_v8sf_int
;
23649 case V4SI_FTYPE_V4SI_INT
:
23650 type
= v4si_ftype_v4si_int
;
23652 case V4SI_FTYPE_V8SI_INT
:
23653 type
= v4si_ftype_v8si_int
;
23655 case V4HI_FTYPE_V4HI_INT
:
23656 type
= v4hi_ftype_v4hi_int
;
23658 case V4DF_FTYPE_V4DF_INT
:
23659 type
= v4df_ftype_v4df_int
;
23661 case V4SF_FTYPE_V4SF_INT
:
23662 type
= v4sf_ftype_v4sf_int
;
23664 case V4SF_FTYPE_V8SF_INT
:
23665 type
= v4sf_ftype_v8sf_int
;
23667 case V2DI_FTYPE_V2DI_INT
:
23668 case V2DI2TI_FTYPE_V2DI_INT
:
23669 type
= v2di_ftype_v2di_int
;
23671 case V2DF_FTYPE_V2DF_INT
:
23672 type
= v2df_ftype_v2df_int
;
23674 case V2DF_FTYPE_V4DF_INT
:
23675 type
= v2df_ftype_v4df_int
;
23677 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
23678 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
23680 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
23681 type
= v8sf_ftype_v8sf_v8sf_v8sf
;
23683 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
23684 type
= v4df_ftype_v4df_v4df_v4df
;
23686 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
23687 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
23689 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
23690 type
= v2df_ftype_v2df_v2df_v2df
;
23692 case V16QI_FTYPE_V16QI_V16QI_INT
:
23693 type
= v16qi_ftype_v16qi_v16qi_int
;
23695 case V8SI_FTYPE_V8SI_V8SI_INT
:
23696 type
= v8si_ftype_v8si_v8si_int
;
23698 case V8SI_FTYPE_V8SI_V4SI_INT
:
23699 type
= v8si_ftype_v8si_v4si_int
;
23701 case V8HI_FTYPE_V8HI_V8HI_INT
:
23702 type
= v8hi_ftype_v8hi_v8hi_int
;
23704 case V8SF_FTYPE_V8SF_V8SF_INT
:
23705 type
= v8sf_ftype_v8sf_v8sf_int
;
23707 case V8SF_FTYPE_V8SF_V4SF_INT
:
23708 type
= v8sf_ftype_v8sf_v4sf_int
;
23710 case V4SI_FTYPE_V4SI_V4SI_INT
:
23711 type
= v4si_ftype_v4si_v4si_int
;
23713 case V4DF_FTYPE_V4DF_V4DF_INT
:
23714 type
= v4df_ftype_v4df_v4df_int
;
23716 case V4DF_FTYPE_V4DF_V2DF_INT
:
23717 type
= v4df_ftype_v4df_v2df_int
;
23719 case V4SF_FTYPE_V4SF_V4SF_INT
:
23720 type
= v4sf_ftype_v4sf_v4sf_int
;
23722 case V2DI_FTYPE_V2DI_V2DI_INT
:
23723 case V2DI2TI_FTYPE_V2DI_V2DI_INT
:
23724 type
= v2di_ftype_v2di_v2di_int
;
23726 case V2DF_FTYPE_V2DF_V2DF_INT
:
23727 type
= v2df_ftype_v2df_v2df_int
;
23729 case V2DI_FTYPE_V2DI_UINT_UINT
:
23730 type
= v2di_ftype_v2di_unsigned_unsigned
;
23732 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
23733 type
= v2di_ftype_v2di_v2di_unsigned_unsigned
;
23735 case V1DI2DI_FTYPE_V1DI_V1DI_INT
:
23736 type
= v1di_ftype_v1di_v1di_int
;
23739 gcc_unreachable ();
23742 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
23745 /* pcmpestr[im] insns. */
23746 for (i
= 0, d
= bdesc_pcmpestr
;
23747 i
< ARRAY_SIZE (bdesc_pcmpestr
);
23750 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
23751 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
23753 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
23754 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
23757 /* pcmpistr[im] insns. */
23758 for (i
= 0, d
= bdesc_pcmpistr
;
23759 i
< ARRAY_SIZE (bdesc_pcmpistr
);
23762 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
23763 ftype
= v16qi_ftype_v16qi_v16qi_int
;
23765 ftype
= int_ftype_v16qi_v16qi_int
;
23766 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
23769 /* comi/ucomi insns. */
23770 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
23771 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
23772 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
23774 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
23777 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
23778 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
23780 /* SSE or 3DNow!A */
23781 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
23784 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
23786 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
23787 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
23790 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
23791 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
23794 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_AESENC128
);
23795 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_AESENCLAST128
);
23796 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_AESDEC128
);
23797 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_AESDECLAST128
);
23798 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128", v2di_ftype_v2di
, IX86_BUILTIN_AESIMC128
);
23799 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int
, IX86_BUILTIN_AESKEYGENASSIST128
);
23802 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PCLMULQDQ128
);
23805 def_builtin (OPTION_MASK_ISA_AVX
, "__builtin_ia32_vzeroupper", void_ftype_void
,
23806 TARGET_64BIT
? IX86_BUILTIN_VZEROUPPER_REX64
: IX86_BUILTIN_VZEROUPPER
);
23808 /* Access to the vec_init patterns. */
23809 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
23810 integer_type_node
, NULL_TREE
);
23811 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
23813 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
23814 short_integer_type_node
,
23815 short_integer_type_node
,
23816 short_integer_type_node
, NULL_TREE
);
23817 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
23819 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
23820 char_type_node
, char_type_node
,
23821 char_type_node
, char_type_node
,
23822 char_type_node
, char_type_node
,
23823 char_type_node
, NULL_TREE
);
23824 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
23826 /* Access to the vec_extract patterns. */
23827 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
23828 integer_type_node
, NULL_TREE
);
23829 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
23831 ftype
= build_function_type_list (long_long_integer_type_node
,
23832 V2DI_type_node
, integer_type_node
,
23834 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
23836 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
23837 integer_type_node
, NULL_TREE
);
23838 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
23840 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
23841 integer_type_node
, NULL_TREE
);
23842 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
23844 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
23845 integer_type_node
, NULL_TREE
);
23846 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
23848 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
23849 integer_type_node
, NULL_TREE
);
23850 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
23852 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
23853 integer_type_node
, NULL_TREE
);
23854 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
23856 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
23857 integer_type_node
, NULL_TREE
);
23858 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
23860 /* Access to the vec_set patterns. */
23861 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
23863 integer_type_node
, NULL_TREE
);
23864 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
23866 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
23868 integer_type_node
, NULL_TREE
);
23869 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
23871 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
23873 integer_type_node
, NULL_TREE
);
23874 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
23876 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
23878 integer_type_node
, NULL_TREE
);
23879 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
23881 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
23883 integer_type_node
, NULL_TREE
);
23884 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
23886 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
23888 integer_type_node
, NULL_TREE
);
23889 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
23891 /* Add SSE5 multi-arg argument instructions */
23892 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
23894 tree mtype
= NULL_TREE
;
23899 switch ((enum multi_arg_type
)d
->flag
)
23901 case MULTI_ARG_3_SF
: mtype
= v4sf_ftype_v4sf_v4sf_v4sf
; break;
23902 case MULTI_ARG_3_DF
: mtype
= v2df_ftype_v2df_v2df_v2df
; break;
23903 case MULTI_ARG_3_DI
: mtype
= v2di_ftype_v2di_v2di_v2di
; break;
23904 case MULTI_ARG_3_SI
: mtype
= v4si_ftype_v4si_v4si_v4si
; break;
23905 case MULTI_ARG_3_SI_DI
: mtype
= v4si_ftype_v4si_v4si_v2di
; break;
23906 case MULTI_ARG_3_HI
: mtype
= v8hi_ftype_v8hi_v8hi_v8hi
; break;
23907 case MULTI_ARG_3_HI_SI
: mtype
= v8hi_ftype_v8hi_v8hi_v4si
; break;
23908 case MULTI_ARG_3_QI
: mtype
= v16qi_ftype_v16qi_v16qi_v16qi
; break;
23909 case MULTI_ARG_3_PERMPS
: mtype
= v4sf_ftype_v4sf_v4sf_v16qi
; break;
23910 case MULTI_ARG_3_PERMPD
: mtype
= v2df_ftype_v2df_v2df_v16qi
; break;
23911 case MULTI_ARG_2_SF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
23912 case MULTI_ARG_2_DF
: mtype
= v2df_ftype_v2df_v2df
; break;
23913 case MULTI_ARG_2_DI
: mtype
= v2di_ftype_v2di_v2di
; break;
23914 case MULTI_ARG_2_SI
: mtype
= v4si_ftype_v4si_v4si
; break;
23915 case MULTI_ARG_2_HI
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
23916 case MULTI_ARG_2_QI
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
23917 case MULTI_ARG_2_DI_IMM
: mtype
= v2di_ftype_v2di_si
; break;
23918 case MULTI_ARG_2_SI_IMM
: mtype
= v4si_ftype_v4si_si
; break;
23919 case MULTI_ARG_2_HI_IMM
: mtype
= v8hi_ftype_v8hi_si
; break;
23920 case MULTI_ARG_2_QI_IMM
: mtype
= v16qi_ftype_v16qi_si
; break;
23921 case MULTI_ARG_2_SF_CMP
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
23922 case MULTI_ARG_2_DF_CMP
: mtype
= v2df_ftype_v2df_v2df
; break;
23923 case MULTI_ARG_2_DI_CMP
: mtype
= v2di_ftype_v2di_v2di
; break;
23924 case MULTI_ARG_2_SI_CMP
: mtype
= v4si_ftype_v4si_v4si
; break;
23925 case MULTI_ARG_2_HI_CMP
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
23926 case MULTI_ARG_2_QI_CMP
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
23927 case MULTI_ARG_2_SF_TF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
23928 case MULTI_ARG_2_DF_TF
: mtype
= v2df_ftype_v2df_v2df
; break;
23929 case MULTI_ARG_2_DI_TF
: mtype
= v2di_ftype_v2di_v2di
; break;
23930 case MULTI_ARG_2_SI_TF
: mtype
= v4si_ftype_v4si_v4si
; break;
23931 case MULTI_ARG_2_HI_TF
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
23932 case MULTI_ARG_2_QI_TF
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
23933 case MULTI_ARG_1_SF
: mtype
= v4sf_ftype_v4sf
; break;
23934 case MULTI_ARG_1_DF
: mtype
= v2df_ftype_v2df
; break;
23935 case MULTI_ARG_1_DI
: mtype
= v2di_ftype_v2di
; break;
23936 case MULTI_ARG_1_SI
: mtype
= v4si_ftype_v4si
; break;
23937 case MULTI_ARG_1_HI
: mtype
= v8hi_ftype_v8hi
; break;
23938 case MULTI_ARG_1_QI
: mtype
= v16qi_ftype_v16qi
; break;
23939 case MULTI_ARG_1_SI_DI
: mtype
= v2di_ftype_v4si
; break;
23940 case MULTI_ARG_1_HI_DI
: mtype
= v2di_ftype_v8hi
; break;
23941 case MULTI_ARG_1_HI_SI
: mtype
= v4si_ftype_v8hi
; break;
23942 case MULTI_ARG_1_QI_DI
: mtype
= v2di_ftype_v16qi
; break;
23943 case MULTI_ARG_1_QI_SI
: mtype
= v4si_ftype_v16qi
; break;
23944 case MULTI_ARG_1_QI_HI
: mtype
= v8hi_ftype_v16qi
; break;
23945 case MULTI_ARG_1_PH2PS
: mtype
= v4sf_ftype_v4hi
; break;
23946 case MULTI_ARG_1_PS2PH
: mtype
= v4hi_ftype_v4sf
; break;
23947 case MULTI_ARG_UNKNOWN
:
23949 gcc_unreachable ();
23953 def_builtin_const (d
->mask
, d
->name
, mtype
, d
->code
);
23957 /* Internal method for ix86_init_builtins. */
23960 ix86_init_builtins_va_builtins_abi (void)
23962 tree ms_va_ref
, sysv_va_ref
;
23963 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
23964 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
23965 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
23966 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
23970 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
23971 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
23972 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
23974 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
23977 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
23978 fnvoid_va_start_ms
=
23979 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
23980 fnvoid_va_end_sysv
=
23981 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
23982 fnvoid_va_start_sysv
=
23983 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
23985 fnvoid_va_copy_ms
=
23986 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
23988 fnvoid_va_copy_sysv
=
23989 build_function_type_list (void_type_node
, sysv_va_ref
,
23990 sysv_va_ref
, NULL_TREE
);
23992 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
23993 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
23994 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
23995 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
23996 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
23997 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
23998 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
23999 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
24000 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
24001 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
24002 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
24003 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
24007 ix86_init_builtins (void)
24009 tree float128_type_node
= make_node (REAL_TYPE
);
24012 /* The __float80 type. */
24013 if (TYPE_MODE (long_double_type_node
) == XFmode
)
24014 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
24018 /* The __float80 type. */
24019 tree float80_type_node
= make_node (REAL_TYPE
);
24021 TYPE_PRECISION (float80_type_node
) = 80;
24022 layout_type (float80_type_node
);
24023 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
24027 /* The __float128 type. */
24028 TYPE_PRECISION (float128_type_node
) = 128;
24029 layout_type (float128_type_node
);
24030 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
24033 /* TFmode support builtins. */
24034 ftype
= build_function_type (float128_type_node
, void_list_node
);
24035 decl
= add_builtin_function ("__builtin_infq", ftype
,
24036 IX86_BUILTIN_INFQ
, BUILT_IN_MD
,
24038 ix86_builtins
[(int) IX86_BUILTIN_INFQ
] = decl
;
24040 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
24041 IX86_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
24043 ix86_builtins
[(int) IX86_BUILTIN_HUGE_VALQ
] = decl
;
24045 /* We will expand them to normal call if SSE2 isn't available since
24046 they are used by libgcc. */
24047 ftype
= build_function_type_list (float128_type_node
,
24048 float128_type_node
,
24050 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
24051 IX86_BUILTIN_FABSQ
, BUILT_IN_MD
,
24052 "__fabstf2", NULL_TREE
);
24053 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = decl
;
24054 TREE_READONLY (decl
) = 1;
24056 ftype
= build_function_type_list (float128_type_node
,
24057 float128_type_node
,
24058 float128_type_node
,
24060 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
24061 IX86_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
24062 "__copysigntf3", NULL_TREE
);
24063 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = decl
;
24064 TREE_READONLY (decl
) = 1;
24066 ix86_init_mmx_sse_builtins ();
24068 ix86_init_builtins_va_builtins_abi ();
24071 /* Errors in the source file can cause expand_expr to return const0_rtx
24072 where we expect a vector. To avoid crashing, use one of the vector
24073 clear instructions. */
24075 safe_vector_operand (rtx x
, enum machine_mode mode
)
24077 if (x
== const0_rtx
)
24078 x
= CONST0_RTX (mode
);
24082 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24085 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
24088 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24089 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24090 rtx op0
= expand_normal (arg0
);
24091 rtx op1
= expand_normal (arg1
);
24092 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24093 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24094 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24096 if (VECTOR_MODE_P (mode0
))
24097 op0
= safe_vector_operand (op0
, mode0
);
24098 if (VECTOR_MODE_P (mode1
))
24099 op1
= safe_vector_operand (op1
, mode1
);
24101 if (optimize
|| !target
24102 || GET_MODE (target
) != tmode
24103 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24104 target
= gen_reg_rtx (tmode
);
24106 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
24108 rtx x
= gen_reg_rtx (V4SImode
);
24109 emit_insn (gen_sse2_loadd (x
, op1
));
24110 op1
= gen_lowpart (TImode
, x
);
24113 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24114 op0
= copy_to_mode_reg (mode0
, op0
);
24115 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24116 op1
= copy_to_mode_reg (mode1
, op1
);
24118 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24127 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24130 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
24131 enum multi_arg_type m_type
,
24132 enum rtx_code sub_code
)
24137 bool comparison_p
= false;
24139 bool last_arg_constant
= false;
24140 int num_memory
= 0;
24143 enum machine_mode mode
;
24146 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24150 case MULTI_ARG_3_SF
:
24151 case MULTI_ARG_3_DF
:
24152 case MULTI_ARG_3_DI
:
24153 case MULTI_ARG_3_SI
:
24154 case MULTI_ARG_3_SI_DI
:
24155 case MULTI_ARG_3_HI
:
24156 case MULTI_ARG_3_HI_SI
:
24157 case MULTI_ARG_3_QI
:
24158 case MULTI_ARG_3_PERMPS
:
24159 case MULTI_ARG_3_PERMPD
:
24163 case MULTI_ARG_2_SF
:
24164 case MULTI_ARG_2_DF
:
24165 case MULTI_ARG_2_DI
:
24166 case MULTI_ARG_2_SI
:
24167 case MULTI_ARG_2_HI
:
24168 case MULTI_ARG_2_QI
:
24172 case MULTI_ARG_2_DI_IMM
:
24173 case MULTI_ARG_2_SI_IMM
:
24174 case MULTI_ARG_2_HI_IMM
:
24175 case MULTI_ARG_2_QI_IMM
:
24177 last_arg_constant
= true;
24180 case MULTI_ARG_1_SF
:
24181 case MULTI_ARG_1_DF
:
24182 case MULTI_ARG_1_DI
:
24183 case MULTI_ARG_1_SI
:
24184 case MULTI_ARG_1_HI
:
24185 case MULTI_ARG_1_QI
:
24186 case MULTI_ARG_1_SI_DI
:
24187 case MULTI_ARG_1_HI_DI
:
24188 case MULTI_ARG_1_HI_SI
:
24189 case MULTI_ARG_1_QI_DI
:
24190 case MULTI_ARG_1_QI_SI
:
24191 case MULTI_ARG_1_QI_HI
:
24192 case MULTI_ARG_1_PH2PS
:
24193 case MULTI_ARG_1_PS2PH
:
24197 case MULTI_ARG_2_SF_CMP
:
24198 case MULTI_ARG_2_DF_CMP
:
24199 case MULTI_ARG_2_DI_CMP
:
24200 case MULTI_ARG_2_SI_CMP
:
24201 case MULTI_ARG_2_HI_CMP
:
24202 case MULTI_ARG_2_QI_CMP
:
24204 comparison_p
= true;
24207 case MULTI_ARG_2_SF_TF
:
24208 case MULTI_ARG_2_DF_TF
:
24209 case MULTI_ARG_2_DI_TF
:
24210 case MULTI_ARG_2_SI_TF
:
24211 case MULTI_ARG_2_HI_TF
:
24212 case MULTI_ARG_2_QI_TF
:
24217 case MULTI_ARG_UNKNOWN
:
24219 gcc_unreachable ();
24222 if (optimize
|| !target
24223 || GET_MODE (target
) != tmode
24224 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24225 target
= gen_reg_rtx (tmode
);
24227 gcc_assert (nargs
<= 4);
24229 for (i
= 0; i
< nargs
; i
++)
24231 tree arg
= CALL_EXPR_ARG (exp
, i
);
24232 rtx op
= expand_normal (arg
);
24233 int adjust
= (comparison_p
) ? 1 : 0;
24234 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
24236 if (last_arg_constant
&& i
== nargs
-1)
24238 if (!CONST_INT_P (op
))
24240 error ("last argument must be an immediate");
24241 return gen_reg_rtx (tmode
);
24246 if (VECTOR_MODE_P (mode
))
24247 op
= safe_vector_operand (op
, mode
);
24249 /* If we aren't optimizing, only allow one memory operand to be
24251 if (memory_operand (op
, mode
))
24254 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
24257 || ! (*insn_data
[icode
].operand
[i
+adjust
+1].predicate
) (op
, mode
)
24259 op
= force_reg (mode
, op
);
24263 args
[i
].mode
= mode
;
24269 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
24274 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
24275 GEN_INT ((int)sub_code
));
24276 else if (! comparison_p
)
24277 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
24280 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
24284 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
24289 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
24293 gcc_unreachable ();
24303 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24304 insns with vec_merge. */
24307 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
24311 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24312 rtx op1
, op0
= expand_normal (arg0
);
24313 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24314 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24316 if (optimize
|| !target
24317 || GET_MODE (target
) != tmode
24318 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24319 target
= gen_reg_rtx (tmode
);
24321 if (VECTOR_MODE_P (mode0
))
24322 op0
= safe_vector_operand (op0
, mode0
);
24324 if ((optimize
&& !register_operand (op0
, mode0
))
24325 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24326 op0
= copy_to_mode_reg (mode0
, op0
);
24329 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
24330 op1
= copy_to_mode_reg (mode0
, op1
);
24332 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24339 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24342 ix86_expand_sse_compare (const struct builtin_description
*d
,
24343 tree exp
, rtx target
, bool swap
)
24346 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24347 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24348 rtx op0
= expand_normal (arg0
);
24349 rtx op1
= expand_normal (arg1
);
24351 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
24352 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
24353 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
24354 enum rtx_code comparison
= d
->comparison
;
24356 if (VECTOR_MODE_P (mode0
))
24357 op0
= safe_vector_operand (op0
, mode0
);
24358 if (VECTOR_MODE_P (mode1
))
24359 op1
= safe_vector_operand (op1
, mode1
);
24361 /* Swap operands if we have a comparison that isn't available in
24365 rtx tmp
= gen_reg_rtx (mode1
);
24366 emit_move_insn (tmp
, op1
);
24371 if (optimize
|| !target
24372 || GET_MODE (target
) != tmode
24373 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
24374 target
= gen_reg_rtx (tmode
);
24376 if ((optimize
&& !register_operand (op0
, mode0
))
24377 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
24378 op0
= copy_to_mode_reg (mode0
, op0
);
24379 if ((optimize
&& !register_operand (op1
, mode1
))
24380 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
24381 op1
= copy_to_mode_reg (mode1
, op1
);
24383 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
24384 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
24391 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24394 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
24398 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24399 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24400 rtx op0
= expand_normal (arg0
);
24401 rtx op1
= expand_normal (arg1
);
24402 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
24403 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
24404 enum rtx_code comparison
= d
->comparison
;
24406 if (VECTOR_MODE_P (mode0
))
24407 op0
= safe_vector_operand (op0
, mode0
);
24408 if (VECTOR_MODE_P (mode1
))
24409 op1
= safe_vector_operand (op1
, mode1
);
24411 /* Swap operands if we have a comparison that isn't available in
24413 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
24420 target
= gen_reg_rtx (SImode
);
24421 emit_move_insn (target
, const0_rtx
);
24422 target
= gen_rtx_SUBREG (QImode
, target
, 0);
24424 if ((optimize
&& !register_operand (op0
, mode0
))
24425 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
24426 op0
= copy_to_mode_reg (mode0
, op0
);
24427 if ((optimize
&& !register_operand (op1
, mode1
))
24428 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
24429 op1
= copy_to_mode_reg (mode1
, op1
);
24431 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
24435 emit_insn (gen_rtx_SET (VOIDmode
,
24436 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
24437 gen_rtx_fmt_ee (comparison
, QImode
,
24441 return SUBREG_REG (target
);
24444 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24447 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
24451 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24452 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24453 rtx op0
= expand_normal (arg0
);
24454 rtx op1
= expand_normal (arg1
);
24455 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
24456 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
24457 enum rtx_code comparison
= d
->comparison
;
24459 if (VECTOR_MODE_P (mode0
))
24460 op0
= safe_vector_operand (op0
, mode0
);
24461 if (VECTOR_MODE_P (mode1
))
24462 op1
= safe_vector_operand (op1
, mode1
);
24464 target
= gen_reg_rtx (SImode
);
24465 emit_move_insn (target
, const0_rtx
);
24466 target
= gen_rtx_SUBREG (QImode
, target
, 0);
24468 if ((optimize
&& !register_operand (op0
, mode0
))
24469 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
24470 op0
= copy_to_mode_reg (mode0
, op0
);
24471 if ((optimize
&& !register_operand (op1
, mode1
))
24472 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
24473 op1
= copy_to_mode_reg (mode1
, op1
);
24475 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
24479 emit_insn (gen_rtx_SET (VOIDmode
,
24480 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
24481 gen_rtx_fmt_ee (comparison
, QImode
,
24485 return SUBREG_REG (target
);
24488 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24491 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
24492 tree exp
, rtx target
)
24495 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24496 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24497 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24498 tree arg3
= CALL_EXPR_ARG (exp
, 3);
24499 tree arg4
= CALL_EXPR_ARG (exp
, 4);
24500 rtx scratch0
, scratch1
;
24501 rtx op0
= expand_normal (arg0
);
24502 rtx op1
= expand_normal (arg1
);
24503 rtx op2
= expand_normal (arg2
);
24504 rtx op3
= expand_normal (arg3
);
24505 rtx op4
= expand_normal (arg4
);
24506 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
24508 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
24509 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
24510 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
24511 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
24512 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
24513 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
24514 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
24516 if (VECTOR_MODE_P (modev2
))
24517 op0
= safe_vector_operand (op0
, modev2
);
24518 if (VECTOR_MODE_P (modev4
))
24519 op2
= safe_vector_operand (op2
, modev4
);
24521 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
24522 op0
= copy_to_mode_reg (modev2
, op0
);
24523 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
24524 op1
= copy_to_mode_reg (modei3
, op1
);
24525 if ((optimize
&& !register_operand (op2
, modev4
))
24526 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
24527 op2
= copy_to_mode_reg (modev4
, op2
);
24528 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
24529 op3
= copy_to_mode_reg (modei5
, op3
);
24531 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
24533 error ("the fifth argument must be a 8-bit immediate");
24537 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
24539 if (optimize
|| !target
24540 || GET_MODE (target
) != tmode0
24541 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
24542 target
= gen_reg_rtx (tmode0
);
24544 scratch1
= gen_reg_rtx (tmode1
);
24546 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
24548 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
24550 if (optimize
|| !target
24551 || GET_MODE (target
) != tmode1
24552 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
24553 target
= gen_reg_rtx (tmode1
);
24555 scratch0
= gen_reg_rtx (tmode0
);
24557 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
24561 gcc_assert (d
->flag
);
24563 scratch0
= gen_reg_rtx (tmode0
);
24564 scratch1
= gen_reg_rtx (tmode1
);
24566 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
24576 target
= gen_reg_rtx (SImode
);
24577 emit_move_insn (target
, const0_rtx
);
24578 target
= gen_rtx_SUBREG (QImode
, target
, 0);
24581 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
24582 gen_rtx_fmt_ee (EQ
, QImode
,
24583 gen_rtx_REG ((enum machine_mode
) d
->flag
,
24586 return SUBREG_REG (target
);
24593 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24596 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
24597 tree exp
, rtx target
)
24600 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24601 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24602 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24603 rtx scratch0
, scratch1
;
24604 rtx op0
= expand_normal (arg0
);
24605 rtx op1
= expand_normal (arg1
);
24606 rtx op2
= expand_normal (arg2
);
24607 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
24609 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
24610 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
24611 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
24612 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
24613 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
24615 if (VECTOR_MODE_P (modev2
))
24616 op0
= safe_vector_operand (op0
, modev2
);
24617 if (VECTOR_MODE_P (modev3
))
24618 op1
= safe_vector_operand (op1
, modev3
);
24620 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
24621 op0
= copy_to_mode_reg (modev2
, op0
);
24622 if ((optimize
&& !register_operand (op1
, modev3
))
24623 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
24624 op1
= copy_to_mode_reg (modev3
, op1
);
24626 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
24628 error ("the third argument must be a 8-bit immediate");
24632 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
24634 if (optimize
|| !target
24635 || GET_MODE (target
) != tmode0
24636 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
24637 target
= gen_reg_rtx (tmode0
);
24639 scratch1
= gen_reg_rtx (tmode1
);
24641 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
24643 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
24645 if (optimize
|| !target
24646 || GET_MODE (target
) != tmode1
24647 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
24648 target
= gen_reg_rtx (tmode1
);
24650 scratch0
= gen_reg_rtx (tmode0
);
24652 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
24656 gcc_assert (d
->flag
);
24658 scratch0
= gen_reg_rtx (tmode0
);
24659 scratch1
= gen_reg_rtx (tmode1
);
24661 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
24671 target
= gen_reg_rtx (SImode
);
24672 emit_move_insn (target
, const0_rtx
);
24673 target
= gen_rtx_SUBREG (QImode
, target
, 0);
24676 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
24677 gen_rtx_fmt_ee (EQ
, QImode
,
24678 gen_rtx_REG ((enum machine_mode
) d
->flag
,
24681 return SUBREG_REG (target
);
24687 /* Subroutine of ix86_expand_builtin to take care of insns with
24688 variable number of operands. */
24691 ix86_expand_args_builtin (const struct builtin_description
*d
,
24692 tree exp
, rtx target
)
24694 rtx pat
, real_target
;
24695 unsigned int i
, nargs
;
24696 unsigned int nargs_constant
= 0;
24697 int num_memory
= 0;
24701 enum machine_mode mode
;
24703 bool last_arg_count
= false;
24704 enum insn_code icode
= d
->icode
;
24705 const struct insn_data
*insn_p
= &insn_data
[icode
];
24706 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
24707 enum machine_mode rmode
= VOIDmode
;
24709 enum rtx_code comparison
= d
->comparison
;
24711 switch ((enum ix86_builtin_type
) d
->flag
)
24713 case INT_FTYPE_V8SF_V8SF_PTEST
:
24714 case INT_FTYPE_V4DI_V4DI_PTEST
:
24715 case INT_FTYPE_V4DF_V4DF_PTEST
:
24716 case INT_FTYPE_V4SF_V4SF_PTEST
:
24717 case INT_FTYPE_V2DI_V2DI_PTEST
:
24718 case INT_FTYPE_V2DF_V2DF_PTEST
:
24719 return ix86_expand_sse_ptest (d
, exp
, target
);
24720 case FLOAT128_FTYPE_FLOAT128
:
24721 case FLOAT_FTYPE_FLOAT
:
24722 case INT_FTYPE_INT
:
24723 case UINT64_FTYPE_INT
:
24724 case INT64_FTYPE_INT64
:
24725 case INT64_FTYPE_V4SF
:
24726 case INT64_FTYPE_V2DF
:
24727 case INT_FTYPE_V16QI
:
24728 case INT_FTYPE_V8QI
:
24729 case INT_FTYPE_V8SF
:
24730 case INT_FTYPE_V4DF
:
24731 case INT_FTYPE_V4SF
:
24732 case INT_FTYPE_V2DF
:
24733 case V16QI_FTYPE_V16QI
:
24734 case V8SI_FTYPE_V8SF
:
24735 case V8SI_FTYPE_V4SI
:
24736 case V8HI_FTYPE_V8HI
:
24737 case V8HI_FTYPE_V16QI
:
24738 case V8QI_FTYPE_V8QI
:
24739 case V8SF_FTYPE_V8SF
:
24740 case V8SF_FTYPE_V8SI
:
24741 case V8SF_FTYPE_V4SF
:
24742 case V4SI_FTYPE_V4SI
:
24743 case V4SI_FTYPE_V16QI
:
24744 case V4SI_FTYPE_V4SF
:
24745 case V4SI_FTYPE_V8SI
:
24746 case V4SI_FTYPE_V8HI
:
24747 case V4SI_FTYPE_V4DF
:
24748 case V4SI_FTYPE_V2DF
:
24749 case V4HI_FTYPE_V4HI
:
24750 case V4DF_FTYPE_V4DF
:
24751 case V4DF_FTYPE_V4SI
:
24752 case V4DF_FTYPE_V4SF
:
24753 case V4DF_FTYPE_V2DF
:
24754 case V4SF_FTYPE_V4SF
:
24755 case V4SF_FTYPE_V4SI
:
24756 case V4SF_FTYPE_V8SF
:
24757 case V4SF_FTYPE_V4DF
:
24758 case V4SF_FTYPE_V2DF
:
24759 case V2DI_FTYPE_V2DI
:
24760 case V2DI_FTYPE_V16QI
:
24761 case V2DI_FTYPE_V8HI
:
24762 case V2DI_FTYPE_V4SI
:
24763 case V2DF_FTYPE_V2DF
:
24764 case V2DF_FTYPE_V4SI
:
24765 case V2DF_FTYPE_V4DF
:
24766 case V2DF_FTYPE_V4SF
:
24767 case V2DF_FTYPE_V2SI
:
24768 case V2SI_FTYPE_V2SI
:
24769 case V2SI_FTYPE_V4SF
:
24770 case V2SI_FTYPE_V2SF
:
24771 case V2SI_FTYPE_V2DF
:
24772 case V2SF_FTYPE_V2SF
:
24773 case V2SF_FTYPE_V2SI
:
24776 case V4SF_FTYPE_V4SF_VEC_MERGE
:
24777 case V2DF_FTYPE_V2DF_VEC_MERGE
:
24778 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
24779 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
24780 case V16QI_FTYPE_V16QI_V16QI
:
24781 case V16QI_FTYPE_V8HI_V8HI
:
24782 case V8QI_FTYPE_V8QI_V8QI
:
24783 case V8QI_FTYPE_V4HI_V4HI
:
24784 case V8HI_FTYPE_V8HI_V8HI
:
24785 case V8HI_FTYPE_V16QI_V16QI
:
24786 case V8HI_FTYPE_V4SI_V4SI
:
24787 case V8SF_FTYPE_V8SF_V8SF
:
24788 case V8SF_FTYPE_V8SF_V8SI
:
24789 case V4SI_FTYPE_V4SI_V4SI
:
24790 case V4SI_FTYPE_V8HI_V8HI
:
24791 case V4SI_FTYPE_V4SF_V4SF
:
24792 case V4SI_FTYPE_V2DF_V2DF
:
24793 case V4HI_FTYPE_V4HI_V4HI
:
24794 case V4HI_FTYPE_V8QI_V8QI
:
24795 case V4HI_FTYPE_V2SI_V2SI
:
24796 case V4DF_FTYPE_V4DF_V4DF
:
24797 case V4DF_FTYPE_V4DF_V4DI
:
24798 case V4SF_FTYPE_V4SF_V4SF
:
24799 case V4SF_FTYPE_V4SF_V4SI
:
24800 case V4SF_FTYPE_V4SF_V2SI
:
24801 case V4SF_FTYPE_V4SF_V2DF
:
24802 case V4SF_FTYPE_V4SF_DI
:
24803 case V4SF_FTYPE_V4SF_SI
:
24804 case V2DI_FTYPE_V2DI_V2DI
:
24805 case V2DI_FTYPE_V16QI_V16QI
:
24806 case V2DI_FTYPE_V4SI_V4SI
:
24807 case V2DI_FTYPE_V2DI_V16QI
:
24808 case V2DI_FTYPE_V2DF_V2DF
:
24809 case V2SI_FTYPE_V2SI_V2SI
:
24810 case V2SI_FTYPE_V4HI_V4HI
:
24811 case V2SI_FTYPE_V2SF_V2SF
:
24812 case V2DF_FTYPE_V2DF_V2DF
:
24813 case V2DF_FTYPE_V2DF_V4SF
:
24814 case V2DF_FTYPE_V2DF_V2DI
:
24815 case V2DF_FTYPE_V2DF_DI
:
24816 case V2DF_FTYPE_V2DF_SI
:
24817 case V2SF_FTYPE_V2SF_V2SF
:
24818 case V1DI_FTYPE_V1DI_V1DI
:
24819 case V1DI_FTYPE_V8QI_V8QI
:
24820 case V1DI_FTYPE_V2SI_V2SI
:
24821 if (comparison
== UNKNOWN
)
24822 return ix86_expand_binop_builtin (icode
, exp
, target
);
24825 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
24826 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
24827 gcc_assert (comparison
!= UNKNOWN
);
24831 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
24832 case V8HI_FTYPE_V8HI_SI_COUNT
:
24833 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
24834 case V4SI_FTYPE_V4SI_SI_COUNT
:
24835 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
24836 case V4HI_FTYPE_V4HI_SI_COUNT
:
24837 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
24838 case V2DI_FTYPE_V2DI_SI_COUNT
:
24839 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
24840 case V2SI_FTYPE_V2SI_SI_COUNT
:
24841 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
24842 case V1DI_FTYPE_V1DI_SI_COUNT
:
24844 last_arg_count
= true;
24846 case UINT64_FTYPE_UINT64_UINT64
:
24847 case UINT_FTYPE_UINT_UINT
:
24848 case UINT_FTYPE_UINT_USHORT
:
24849 case UINT_FTYPE_UINT_UCHAR
:
24850 case UINT16_FTYPE_UINT16_INT
:
24851 case UINT8_FTYPE_UINT8_INT
:
24854 case V2DI2TI_FTYPE_V2DI_INT
:
24857 nargs_constant
= 1;
24859 case V8HI_FTYPE_V8HI_INT
:
24860 case V8SF_FTYPE_V8SF_INT
:
24861 case V4SI_FTYPE_V4SI_INT
:
24862 case V4SI_FTYPE_V8SI_INT
:
24863 case V4HI_FTYPE_V4HI_INT
:
24864 case V4DF_FTYPE_V4DF_INT
:
24865 case V4SF_FTYPE_V4SF_INT
:
24866 case V4SF_FTYPE_V8SF_INT
:
24867 case V2DI_FTYPE_V2DI_INT
:
24868 case V2DF_FTYPE_V2DF_INT
:
24869 case V2DF_FTYPE_V4DF_INT
:
24871 nargs_constant
= 1;
24873 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
24874 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
24875 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
24876 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
24877 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
24880 case V16QI_FTYPE_V16QI_V16QI_INT
:
24881 case V8HI_FTYPE_V8HI_V8HI_INT
:
24882 case V8SI_FTYPE_V8SI_V8SI_INT
:
24883 case V8SI_FTYPE_V8SI_V4SI_INT
:
24884 case V8SF_FTYPE_V8SF_V8SF_INT
:
24885 case V8SF_FTYPE_V8SF_V4SF_INT
:
24886 case V4SI_FTYPE_V4SI_V4SI_INT
:
24887 case V4DF_FTYPE_V4DF_V4DF_INT
:
24888 case V4DF_FTYPE_V4DF_V2DF_INT
:
24889 case V4SF_FTYPE_V4SF_V4SF_INT
:
24890 case V2DI_FTYPE_V2DI_V2DI_INT
:
24891 case V2DF_FTYPE_V2DF_V2DF_INT
:
24893 nargs_constant
= 1;
24895 case V2DI2TI_FTYPE_V2DI_V2DI_INT
:
24898 nargs_constant
= 1;
24900 case V1DI2DI_FTYPE_V1DI_V1DI_INT
:
24903 nargs_constant
= 1;
24905 case V2DI_FTYPE_V2DI_UINT_UINT
:
24907 nargs_constant
= 2;
24909 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
24911 nargs_constant
= 2;
24914 gcc_unreachable ();
24917 gcc_assert (nargs
<= ARRAY_SIZE (args
));
24919 if (comparison
!= UNKNOWN
)
24921 gcc_assert (nargs
== 2);
24922 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
24925 if (rmode
== VOIDmode
|| rmode
== tmode
)
24929 || GET_MODE (target
) != tmode
24930 || ! (*insn_p
->operand
[0].predicate
) (target
, tmode
))
24931 target
= gen_reg_rtx (tmode
);
24932 real_target
= target
;
24936 target
= gen_reg_rtx (rmode
);
24937 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
24940 for (i
= 0; i
< nargs
; i
++)
24942 tree arg
= CALL_EXPR_ARG (exp
, i
);
24943 rtx op
= expand_normal (arg
);
24944 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
24945 bool match
= (*insn_p
->operand
[i
+ 1].predicate
) (op
, mode
);
24947 if (last_arg_count
&& (i
+ 1) == nargs
)
24949 /* SIMD shift insns take either an 8-bit immediate or
24950 register as count. But builtin functions take int as
24951 count. If count doesn't match, we put it in register. */
24954 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
24955 if (!(*insn_p
->operand
[i
+ 1].predicate
) (op
, mode
))
24956 op
= copy_to_reg (op
);
24959 else if ((nargs
- i
) <= nargs_constant
)
24964 case CODE_FOR_sse4_1_roundpd
:
24965 case CODE_FOR_sse4_1_roundps
:
24966 case CODE_FOR_sse4_1_roundsd
:
24967 case CODE_FOR_sse4_1_roundss
:
24968 case CODE_FOR_sse4_1_blendps
:
24969 case CODE_FOR_avx_blendpd256
:
24970 case CODE_FOR_avx_vpermilv4df
:
24971 case CODE_FOR_avx_roundpd256
:
24972 case CODE_FOR_avx_roundps256
:
24973 error ("the last argument must be a 4-bit immediate");
24976 case CODE_FOR_sse4_1_blendpd
:
24977 case CODE_FOR_avx_vpermilv2df
:
24978 error ("the last argument must be a 2-bit immediate");
24981 case CODE_FOR_avx_vextractf128v4df
:
24982 case CODE_FOR_avx_vextractf128v8sf
:
24983 case CODE_FOR_avx_vextractf128v8si
:
24984 case CODE_FOR_avx_vinsertf128v4df
:
24985 case CODE_FOR_avx_vinsertf128v8sf
:
24986 case CODE_FOR_avx_vinsertf128v8si
:
24987 error ("the last argument must be a 1-bit immediate");
24990 case CODE_FOR_avx_cmpsdv2df3
:
24991 case CODE_FOR_avx_cmpssv4sf3
:
24992 case CODE_FOR_avx_cmppdv2df3
:
24993 case CODE_FOR_avx_cmppsv4sf3
:
24994 case CODE_FOR_avx_cmppdv4df3
:
24995 case CODE_FOR_avx_cmppsv8sf3
:
24996 error ("the last argument must be a 5-bit immediate");
25000 switch (nargs_constant
)
25003 if ((nargs
- i
) == nargs_constant
)
25005 error ("the next to last argument must be an 8-bit immediate");
25009 error ("the last argument must be an 8-bit immediate");
25012 gcc_unreachable ();
25019 if (VECTOR_MODE_P (mode
))
25020 op
= safe_vector_operand (op
, mode
);
25022 /* If we aren't optimizing, only allow one memory operand to
25024 if (memory_operand (op
, mode
))
25027 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
25029 if (optimize
|| !match
|| num_memory
> 1)
25030 op
= copy_to_mode_reg (mode
, op
);
25034 op
= copy_to_reg (op
);
25035 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
25040 args
[i
].mode
= mode
;
25046 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
25049 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
25052 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
25056 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
25057 args
[2].op
, args
[3].op
);
25060 gcc_unreachable ();
25070 /* Subroutine of ix86_expand_builtin to take care of special insns
25071 with variable number of operands. */
25074 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
25075 tree exp
, rtx target
)
25079 unsigned int i
, nargs
, arg_adjust
, memory
;
25083 enum machine_mode mode
;
25085 enum insn_code icode
= d
->icode
;
25086 bool last_arg_constant
= false;
25087 const struct insn_data
*insn_p
= &insn_data
[icode
];
25088 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
25089 enum { load
, store
} klass
;
25091 switch ((enum ix86_special_builtin_type
) d
->flag
)
25093 case VOID_FTYPE_VOID
:
25094 emit_insn (GEN_FCN (icode
) (target
));
25096 case UINT64_FTYPE_VOID
:
25101 case UINT64_FTYPE_PUNSIGNED
:
25102 case V2DI_FTYPE_PV2DI
:
25103 case V32QI_FTYPE_PCCHAR
:
25104 case V16QI_FTYPE_PCCHAR
:
25105 case V8SF_FTYPE_PCV4SF
:
25106 case V8SF_FTYPE_PCFLOAT
:
25107 case V4SF_FTYPE_PCFLOAT
:
25108 case V4DF_FTYPE_PCV2DF
:
25109 case V4DF_FTYPE_PCDOUBLE
:
25110 case V2DF_FTYPE_PCDOUBLE
:
25115 case VOID_FTYPE_PV2SF_V4SF
:
25116 case VOID_FTYPE_PV4DI_V4DI
:
25117 case VOID_FTYPE_PV2DI_V2DI
:
25118 case VOID_FTYPE_PCHAR_V32QI
:
25119 case VOID_FTYPE_PCHAR_V16QI
:
25120 case VOID_FTYPE_PFLOAT_V8SF
:
25121 case VOID_FTYPE_PFLOAT_V4SF
:
25122 case VOID_FTYPE_PDOUBLE_V4DF
:
25123 case VOID_FTYPE_PDOUBLE_V2DF
:
25124 case VOID_FTYPE_PDI_DI
:
25125 case VOID_FTYPE_PINT_INT
:
25128 /* Reserve memory operand for target. */
25129 memory
= ARRAY_SIZE (args
);
25131 case V4SF_FTYPE_V4SF_PCV2SF
:
25132 case V2DF_FTYPE_V2DF_PCDOUBLE
:
25137 case V8SF_FTYPE_PCV8SF_V8SF
:
25138 case V4DF_FTYPE_PCV4DF_V4DF
:
25139 case V4SF_FTYPE_PCV4SF_V4SF
:
25140 case V2DF_FTYPE_PCV2DF_V2DF
:
25145 case VOID_FTYPE_PV8SF_V8SF_V8SF
:
25146 case VOID_FTYPE_PV4DF_V4DF_V4DF
:
25147 case VOID_FTYPE_PV4SF_V4SF_V4SF
:
25148 case VOID_FTYPE_PV2DF_V2DF_V2DF
:
25151 /* Reserve memory operand for target. */
25152 memory
= ARRAY_SIZE (args
);
25155 gcc_unreachable ();
25158 gcc_assert (nargs
<= ARRAY_SIZE (args
));
25160 if (klass
== store
)
25162 arg
= CALL_EXPR_ARG (exp
, 0);
25163 op
= expand_normal (arg
);
25164 gcc_assert (target
== 0);
25165 target
= gen_rtx_MEM (tmode
, copy_to_mode_reg (Pmode
, op
));
25173 || GET_MODE (target
) != tmode
25174 || ! (*insn_p
->operand
[0].predicate
) (target
, tmode
))
25175 target
= gen_reg_rtx (tmode
);
25178 for (i
= 0; i
< nargs
; i
++)
25180 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
25183 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
25184 op
= expand_normal (arg
);
25185 match
= (*insn_p
->operand
[i
+ 1].predicate
) (op
, mode
);
25187 if (last_arg_constant
&& (i
+ 1) == nargs
)
25193 error ("the last argument must be an 8-bit immediate");
25201 /* This must be the memory operand. */
25202 op
= gen_rtx_MEM (mode
, copy_to_mode_reg (Pmode
, op
));
25203 gcc_assert (GET_MODE (op
) == mode
25204 || GET_MODE (op
) == VOIDmode
);
25208 /* This must be register. */
25209 if (VECTOR_MODE_P (mode
))
25210 op
= safe_vector_operand (op
, mode
);
25212 gcc_assert (GET_MODE (op
) == mode
25213 || GET_MODE (op
) == VOIDmode
);
25214 op
= copy_to_mode_reg (mode
, op
);
25219 args
[i
].mode
= mode
;
25225 pat
= GEN_FCN (icode
) (target
);
25228 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
25231 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
25234 gcc_unreachable ();
25240 return klass
== store
? 0 : target
;
25243 /* Return the integer constant in ARG. Constrain it to be in the range
25244 of the subparts of VEC_TYPE; issue an error if not. */
25247 get_element_number (tree vec_type
, tree arg
)
25249 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
25251 if (!host_integerp (arg
, 1)
25252 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
25254 error ("selector must be an integer constant in the range 0..%wi", max
);
25261 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25262 ix86_expand_vector_init. We DO have language-level syntax for this, in
25263 the form of (type){ init-list }. Except that since we can't place emms
25264 instructions from inside the compiler, we can't allow the use of MMX
25265 registers unless the user explicitly asks for it. So we do *not* define
25266 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25267 we have builtins invoked by mmintrin.h that gives us license to emit
25268 these sorts of instructions. */
25271 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
25273 enum machine_mode tmode
= TYPE_MODE (type
);
25274 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
25275 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
25276 rtvec v
= rtvec_alloc (n_elt
);
25278 gcc_assert (VECTOR_MODE_P (tmode
));
25279 gcc_assert (call_expr_nargs (exp
) == n_elt
);
25281 for (i
= 0; i
< n_elt
; ++i
)
25283 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
25284 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
25287 if (!target
|| !register_operand (target
, tmode
))
25288 target
= gen_reg_rtx (tmode
);
25290 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
25294 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25295 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25296 had a language-level syntax for referencing vector elements. */
25299 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
25301 enum machine_mode tmode
, mode0
;
25306 arg0
= CALL_EXPR_ARG (exp
, 0);
25307 arg1
= CALL_EXPR_ARG (exp
, 1);
25309 op0
= expand_normal (arg0
);
25310 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
25312 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
25313 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
25314 gcc_assert (VECTOR_MODE_P (mode0
));
25316 op0
= force_reg (mode0
, op0
);
25318 if (optimize
|| !target
|| !register_operand (target
, tmode
))
25319 target
= gen_reg_rtx (tmode
);
25321 ix86_expand_vector_extract (true, target
, op0
, elt
);
25326 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25327 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25328 a language-level syntax for referencing vector elements. */
25331 ix86_expand_vec_set_builtin (tree exp
)
25333 enum machine_mode tmode
, mode1
;
25334 tree arg0
, arg1
, arg2
;
25336 rtx op0
, op1
, target
;
25338 arg0
= CALL_EXPR_ARG (exp
, 0);
25339 arg1
= CALL_EXPR_ARG (exp
, 1);
25340 arg2
= CALL_EXPR_ARG (exp
, 2);
25342 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
25343 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
25344 gcc_assert (VECTOR_MODE_P (tmode
));
25346 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
25347 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
25348 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
25350 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
25351 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
25353 op0
= force_reg (tmode
, op0
);
25354 op1
= force_reg (mode1
, op1
);
25356 /* OP0 is the source of these builtin functions and shouldn't be
25357 modified. Create a copy, use it and return it as target. */
25358 target
= gen_reg_rtx (tmode
);
25359 emit_move_insn (target
, op0
);
25360 ix86_expand_vector_set (true, target
, op1
, elt
);
25365 /* Expand an expression EXP that calls a built-in function,
25366 with result going to TARGET if that's convenient
25367 (and in mode MODE if that's convenient).
25368 SUBTARGET may be used as the target for computing one of EXP's operands.
25369 IGNORE is nonzero if the value is to be ignored. */
25372 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
25373 enum machine_mode mode ATTRIBUTE_UNUSED
,
25374 int ignore ATTRIBUTE_UNUSED
)
25376 const struct builtin_description
*d
;
25378 enum insn_code icode
;
25379 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25380 tree arg0
, arg1
, arg2
;
25381 rtx op0
, op1
, op2
, pat
;
25382 enum machine_mode mode0
, mode1
, mode2
;
25383 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25385 /* Determine whether the builtin function is available under the current ISA.
25386 Originally the builtin was not created if it wasn't applicable to the
25387 current ISA based on the command line switches. With function specific
25388 options, we need to check in the context of the function making the call
25389 whether it is supported. */
25390 if (ix86_builtins_isa
[fcode
].isa
25391 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
25393 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
25394 NULL
, NULL
, false);
25397 error ("%qE needs unknown isa option", fndecl
);
25400 gcc_assert (opts
!= NULL
);
25401 error ("%qE needs isa option %s", fndecl
, opts
);
25409 case IX86_BUILTIN_MASKMOVQ
:
25410 case IX86_BUILTIN_MASKMOVDQU
:
25411 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
25412 ? CODE_FOR_mmx_maskmovq
25413 : CODE_FOR_sse2_maskmovdqu
);
25414 /* Note the arg order is different from the operand order. */
25415 arg1
= CALL_EXPR_ARG (exp
, 0);
25416 arg2
= CALL_EXPR_ARG (exp
, 1);
25417 arg0
= CALL_EXPR_ARG (exp
, 2);
25418 op0
= expand_normal (arg0
);
25419 op1
= expand_normal (arg1
);
25420 op2
= expand_normal (arg2
);
25421 mode0
= insn_data
[icode
].operand
[0].mode
;
25422 mode1
= insn_data
[icode
].operand
[1].mode
;
25423 mode2
= insn_data
[icode
].operand
[2].mode
;
25425 op0
= force_reg (Pmode
, op0
);
25426 op0
= gen_rtx_MEM (mode1
, op0
);
25428 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25429 op0
= copy_to_mode_reg (mode0
, op0
);
25430 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
25431 op1
= copy_to_mode_reg (mode1
, op1
);
25432 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
25433 op2
= copy_to_mode_reg (mode2
, op2
);
25434 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
25440 case IX86_BUILTIN_LDMXCSR
:
25441 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
25442 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
25443 emit_move_insn (target
, op0
);
25444 emit_insn (gen_sse_ldmxcsr (target
));
25447 case IX86_BUILTIN_STMXCSR
:
25448 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
25449 emit_insn (gen_sse_stmxcsr (target
));
25450 return copy_to_mode_reg (SImode
, target
);
25452 case IX86_BUILTIN_CLFLUSH
:
25453 arg0
= CALL_EXPR_ARG (exp
, 0);
25454 op0
= expand_normal (arg0
);
25455 icode
= CODE_FOR_sse2_clflush
;
25456 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
25457 op0
= copy_to_mode_reg (Pmode
, op0
);
25459 emit_insn (gen_sse2_clflush (op0
));
25462 case IX86_BUILTIN_MONITOR
:
25463 arg0
= CALL_EXPR_ARG (exp
, 0);
25464 arg1
= CALL_EXPR_ARG (exp
, 1);
25465 arg2
= CALL_EXPR_ARG (exp
, 2);
25466 op0
= expand_normal (arg0
);
25467 op1
= expand_normal (arg1
);
25468 op2
= expand_normal (arg2
);
25470 op0
= copy_to_mode_reg (Pmode
, op0
);
25472 op1
= copy_to_mode_reg (SImode
, op1
);
25474 op2
= copy_to_mode_reg (SImode
, op2
);
25475 emit_insn ((*ix86_gen_monitor
) (op0
, op1
, op2
));
25478 case IX86_BUILTIN_MWAIT
:
25479 arg0
= CALL_EXPR_ARG (exp
, 0);
25480 arg1
= CALL_EXPR_ARG (exp
, 1);
25481 op0
= expand_normal (arg0
);
25482 op1
= expand_normal (arg1
);
25484 op0
= copy_to_mode_reg (SImode
, op0
);
25486 op1
= copy_to_mode_reg (SImode
, op1
);
25487 emit_insn (gen_sse3_mwait (op0
, op1
));
25490 case IX86_BUILTIN_VEC_INIT_V2SI
:
25491 case IX86_BUILTIN_VEC_INIT_V4HI
:
25492 case IX86_BUILTIN_VEC_INIT_V8QI
:
25493 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
25495 case IX86_BUILTIN_VEC_EXT_V2DF
:
25496 case IX86_BUILTIN_VEC_EXT_V2DI
:
25497 case IX86_BUILTIN_VEC_EXT_V4SF
:
25498 case IX86_BUILTIN_VEC_EXT_V4SI
:
25499 case IX86_BUILTIN_VEC_EXT_V8HI
:
25500 case IX86_BUILTIN_VEC_EXT_V2SI
:
25501 case IX86_BUILTIN_VEC_EXT_V4HI
:
25502 case IX86_BUILTIN_VEC_EXT_V16QI
:
25503 return ix86_expand_vec_ext_builtin (exp
, target
);
25505 case IX86_BUILTIN_VEC_SET_V2DI
:
25506 case IX86_BUILTIN_VEC_SET_V4SF
:
25507 case IX86_BUILTIN_VEC_SET_V4SI
:
25508 case IX86_BUILTIN_VEC_SET_V8HI
:
25509 case IX86_BUILTIN_VEC_SET_V4HI
:
25510 case IX86_BUILTIN_VEC_SET_V16QI
:
25511 return ix86_expand_vec_set_builtin (exp
);
25513 case IX86_BUILTIN_INFQ
:
25514 case IX86_BUILTIN_HUGE_VALQ
:
25516 REAL_VALUE_TYPE inf
;
25520 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
25522 tmp
= validize_mem (force_const_mem (mode
, tmp
));
25525 target
= gen_reg_rtx (mode
);
25527 emit_move_insn (target
, tmp
);
25535 for (i
= 0, d
= bdesc_special_args
;
25536 i
< ARRAY_SIZE (bdesc_special_args
);
25538 if (d
->code
== fcode
)
25539 return ix86_expand_special_args_builtin (d
, exp
, target
);
25541 for (i
= 0, d
= bdesc_args
;
25542 i
< ARRAY_SIZE (bdesc_args
);
25544 if (d
->code
== fcode
)
25547 case IX86_BUILTIN_FABSQ
:
25548 case IX86_BUILTIN_COPYSIGNQ
:
25550 /* Emit a normal call if SSE2 isn't available. */
25551 return expand_call (exp
, target
, ignore
);
25553 return ix86_expand_args_builtin (d
, exp
, target
);
25556 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
25557 if (d
->code
== fcode
)
25558 return ix86_expand_sse_comi (d
, exp
, target
);
25560 for (i
= 0, d
= bdesc_pcmpestr
;
25561 i
< ARRAY_SIZE (bdesc_pcmpestr
);
25563 if (d
->code
== fcode
)
25564 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
25566 for (i
= 0, d
= bdesc_pcmpistr
;
25567 i
< ARRAY_SIZE (bdesc_pcmpistr
);
25569 if (d
->code
== fcode
)
25570 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
25572 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
25573 if (d
->code
== fcode
)
25574 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
25575 (enum multi_arg_type
)d
->flag
,
25578 gcc_unreachable ();
25581 /* Returns a function decl for a vectorized version of the builtin function
25582 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25583 if it is not available. */
25586 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
25589 enum machine_mode in_mode
, out_mode
;
25592 if (TREE_CODE (type_out
) != VECTOR_TYPE
25593 || TREE_CODE (type_in
) != VECTOR_TYPE
)
25596 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
25597 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
25598 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
25599 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
25603 case BUILT_IN_SQRT
:
25604 if (out_mode
== DFmode
&& out_n
== 2
25605 && in_mode
== DFmode
&& in_n
== 2)
25606 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
25609 case BUILT_IN_SQRTF
:
25610 if (out_mode
== SFmode
&& out_n
== 4
25611 && in_mode
== SFmode
&& in_n
== 4)
25612 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
25615 case BUILT_IN_LRINT
:
25616 if (out_mode
== SImode
&& out_n
== 4
25617 && in_mode
== DFmode
&& in_n
== 2)
25618 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
25621 case BUILT_IN_LRINTF
:
25622 if (out_mode
== SImode
&& out_n
== 4
25623 && in_mode
== SFmode
&& in_n
== 4)
25624 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
25631 /* Dispatch to a handler for a vectorization library. */
25632 if (ix86_veclib_handler
)
25633 return (*ix86_veclib_handler
) ((enum built_in_function
) fn
, type_out
,
25639 /* Handler for an SVML-style interface to
25640 a library with vectorized intrinsics. */
25643 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
25646 tree fntype
, new_fndecl
, args
;
25649 enum machine_mode el_mode
, in_mode
;
25652 /* The SVML is suitable for unsafe math only. */
25653 if (!flag_unsafe_math_optimizations
)
25656 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
25657 n
= TYPE_VECTOR_SUBPARTS (type_out
);
25658 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
25659 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
25660 if (el_mode
!= in_mode
25668 case BUILT_IN_LOG10
:
25670 case BUILT_IN_TANH
:
25672 case BUILT_IN_ATAN
:
25673 case BUILT_IN_ATAN2
:
25674 case BUILT_IN_ATANH
:
25675 case BUILT_IN_CBRT
:
25676 case BUILT_IN_SINH
:
25678 case BUILT_IN_ASINH
:
25679 case BUILT_IN_ASIN
:
25680 case BUILT_IN_COSH
:
25682 case BUILT_IN_ACOSH
:
25683 case BUILT_IN_ACOS
:
25684 if (el_mode
!= DFmode
|| n
!= 2)
25688 case BUILT_IN_EXPF
:
25689 case BUILT_IN_LOGF
:
25690 case BUILT_IN_LOG10F
:
25691 case BUILT_IN_POWF
:
25692 case BUILT_IN_TANHF
:
25693 case BUILT_IN_TANF
:
25694 case BUILT_IN_ATANF
:
25695 case BUILT_IN_ATAN2F
:
25696 case BUILT_IN_ATANHF
:
25697 case BUILT_IN_CBRTF
:
25698 case BUILT_IN_SINHF
:
25699 case BUILT_IN_SINF
:
25700 case BUILT_IN_ASINHF
:
25701 case BUILT_IN_ASINF
:
25702 case BUILT_IN_COSHF
:
25703 case BUILT_IN_COSF
:
25704 case BUILT_IN_ACOSHF
:
25705 case BUILT_IN_ACOSF
:
25706 if (el_mode
!= SFmode
|| n
!= 4)
25714 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
25716 if (fn
== BUILT_IN_LOGF
)
25717 strcpy (name
, "vmlsLn4");
25718 else if (fn
== BUILT_IN_LOG
)
25719 strcpy (name
, "vmldLn2");
25722 sprintf (name
, "vmls%s", bname
+10);
25723 name
[strlen (name
)-1] = '4';
25726 sprintf (name
, "vmld%s2", bname
+10);
25728 /* Convert to uppercase. */
25732 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
25733 args
= TREE_CHAIN (args
))
25737 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
25739 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
25741 /* Build a function declaration for the vectorized function. */
25742 new_fndecl
= build_decl (BUILTINS_LOCATION
,
25743 FUNCTION_DECL
, get_identifier (name
), fntype
);
25744 TREE_PUBLIC (new_fndecl
) = 1;
25745 DECL_EXTERNAL (new_fndecl
) = 1;
25746 DECL_IS_NOVOPS (new_fndecl
) = 1;
25747 TREE_READONLY (new_fndecl
) = 1;
25752 /* Handler for an ACML-style interface to
25753 a library with vectorized intrinsics. */
25756 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
25758 char name
[20] = "__vr.._";
25759 tree fntype
, new_fndecl
, args
;
25762 enum machine_mode el_mode
, in_mode
;
25765 /* The ACML is 64bits only and suitable for unsafe math only as
25766 it does not correctly support parts of IEEE with the required
25767 precision such as denormals. */
25769 || !flag_unsafe_math_optimizations
)
25772 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
25773 n
= TYPE_VECTOR_SUBPARTS (type_out
);
25774 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
25775 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
25776 if (el_mode
!= in_mode
25786 case BUILT_IN_LOG2
:
25787 case BUILT_IN_LOG10
:
25790 if (el_mode
!= DFmode
25795 case BUILT_IN_SINF
:
25796 case BUILT_IN_COSF
:
25797 case BUILT_IN_EXPF
:
25798 case BUILT_IN_POWF
:
25799 case BUILT_IN_LOGF
:
25800 case BUILT_IN_LOG2F
:
25801 case BUILT_IN_LOG10F
:
25804 if (el_mode
!= SFmode
25813 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
25814 sprintf (name
+ 7, "%s", bname
+10);
25817 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
25818 args
= TREE_CHAIN (args
))
25822 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
25824 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
25826 /* Build a function declaration for the vectorized function. */
25827 new_fndecl
= build_decl (BUILTINS_LOCATION
,
25828 FUNCTION_DECL
, get_identifier (name
), fntype
);
25829 TREE_PUBLIC (new_fndecl
) = 1;
25830 DECL_EXTERNAL (new_fndecl
) = 1;
25831 DECL_IS_NOVOPS (new_fndecl
) = 1;
25832 TREE_READONLY (new_fndecl
) = 1;
25838 /* Returns a decl of a function that implements conversion of an integer vector
25839 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25840 side of the conversion.
25841 Return NULL_TREE if it is not available. */
25844 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
25846 if (TREE_CODE (type
) != VECTOR_TYPE
25847 /* There are only conversions from/to signed integers. */
25848 || TYPE_UNSIGNED (TREE_TYPE (type
)))
25854 switch (TYPE_MODE (type
))
25857 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
25862 case FIX_TRUNC_EXPR
:
25863 switch (TYPE_MODE (type
))
25866 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
25876 /* Returns a code for a target-specific builtin that implements
25877 reciprocal of the function, or NULL_TREE if not available. */
25880 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
25881 bool sqrt ATTRIBUTE_UNUSED
)
25883 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_insn_for_size_p ()
25884 && flag_finite_math_only
&& !flag_trapping_math
25885 && flag_unsafe_math_optimizations
))
25889 /* Machine dependent builtins. */
25892 /* Vectorized version of sqrt to rsqrt conversion. */
25893 case IX86_BUILTIN_SQRTPS_NR
:
25894 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
25900 /* Normal builtins. */
25903 /* Sqrt to rsqrt conversion. */
25904 case BUILT_IN_SQRTF
:
25905 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
25912 /* Store OPERAND to the memory after reload is completed. This means
25913 that we can't easily use assign_stack_local. */
25915 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
25919 gcc_assert (reload_completed
);
25920 if (!TARGET_64BIT_MS_ABI
&& TARGET_RED_ZONE
)
25922 result
= gen_rtx_MEM (mode
,
25923 gen_rtx_PLUS (Pmode
,
25925 GEN_INT (-RED_ZONE_SIZE
)));
25926 emit_move_insn (result
, operand
);
25928 else if ((TARGET_64BIT_MS_ABI
|| !TARGET_RED_ZONE
) && TARGET_64BIT
)
25934 operand
= gen_lowpart (DImode
, operand
);
25938 gen_rtx_SET (VOIDmode
,
25939 gen_rtx_MEM (DImode
,
25940 gen_rtx_PRE_DEC (DImode
,
25941 stack_pointer_rtx
)),
25945 gcc_unreachable ();
25947 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
25956 split_di (&operand
, 1, operands
, operands
+ 1);
25958 gen_rtx_SET (VOIDmode
,
25959 gen_rtx_MEM (SImode
,
25960 gen_rtx_PRE_DEC (Pmode
,
25961 stack_pointer_rtx
)),
25964 gen_rtx_SET (VOIDmode
,
25965 gen_rtx_MEM (SImode
,
25966 gen_rtx_PRE_DEC (Pmode
,
25967 stack_pointer_rtx
)),
25972 /* Store HImodes as SImodes. */
25973 operand
= gen_lowpart (SImode
, operand
);
25977 gen_rtx_SET (VOIDmode
,
25978 gen_rtx_MEM (GET_MODE (operand
),
25979 gen_rtx_PRE_DEC (SImode
,
25980 stack_pointer_rtx
)),
25984 gcc_unreachable ();
25986 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
25991 /* Free operand from the memory. */
25993 ix86_free_from_memory (enum machine_mode mode
)
25995 if (!TARGET_RED_ZONE
|| TARGET_64BIT_MS_ABI
)
25999 if (mode
== DImode
|| TARGET_64BIT
)
26003 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26004 to pop or add instruction if registers are available. */
26005 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26006 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
26011 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26012 QImode must go into class Q_REGS.
26013 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26014 movdf to do mem-to-mem moves through integer regs. */
26016 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
26018 enum machine_mode mode
= GET_MODE (x
);
26020 /* We're only allowed to return a subclass of CLASS. Many of the
26021 following checks fail for NO_REGS, so eliminate that early. */
26022 if (regclass
== NO_REGS
)
26025 /* All classes can load zeros. */
26026 if (x
== CONST0_RTX (mode
))
26029 /* Force constants into memory if we are loading a (nonzero) constant into
26030 an MMX or SSE register. This is because there are no MMX/SSE instructions
26031 to load from a constant. */
26033 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
26036 /* Prefer SSE regs only, if we can use them for math. */
26037 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
26038 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
26040 /* Floating-point constants need more complex checks. */
26041 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
26043 /* General regs can load everything. */
26044 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
26047 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26048 zero above. We only want to wind up preferring 80387 registers if
26049 we plan on doing computation with them. */
26051 && standard_80387_constant_p (x
))
26053 /* Limit class to non-sse. */
26054 if (regclass
== FLOAT_SSE_REGS
)
26056 if (regclass
== FP_TOP_SSE_REGS
)
26058 if (regclass
== FP_SECOND_SSE_REGS
)
26059 return FP_SECOND_REG
;
26060 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
26067 /* Generally when we see PLUS here, it's the function invariant
26068 (plus soft-fp const_int). Which can only be computed into general
26070 if (GET_CODE (x
) == PLUS
)
26071 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
26073 /* QImode constants are easy to load, but non-constant QImode data
26074 must go into Q_REGS. */
26075 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
26077 if (reg_class_subset_p (regclass
, Q_REGS
))
26079 if (reg_class_subset_p (Q_REGS
, regclass
))
26087 /* Discourage putting floating-point values in SSE registers unless
26088 SSE math is being used, and likewise for the 387 registers. */
26090 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
26092 enum machine_mode mode
= GET_MODE (x
);
26094 /* Restrict the output reload class to the register bank that we are doing
26095 math on. If we would like not to return a subset of CLASS, reject this
26096 alternative: if reload cannot do this, it will still use its choice. */
26097 mode
= GET_MODE (x
);
26098 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
26099 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
26101 if (X87_FLOAT_MODE_P (mode
))
26103 if (regclass
== FP_TOP_SSE_REGS
)
26105 else if (regclass
== FP_SECOND_SSE_REGS
)
26106 return FP_SECOND_REG
;
26108 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
26114 static enum reg_class
26115 ix86_secondary_reload (bool in_p
, rtx x
, enum reg_class rclass
,
26116 enum machine_mode mode
,
26117 secondary_reload_info
*sri ATTRIBUTE_UNUSED
)
26119 /* QImode spills from non-QI registers require
26120 intermediate register on 32bit targets. */
26121 if (!in_p
&& mode
== QImode
&& !TARGET_64BIT
26122 && (rclass
== GENERAL_REGS
26123 || rclass
== LEGACY_REGS
26124 || rclass
== INDEX_REGS
))
26133 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
26134 regno
= true_regnum (x
);
26136 /* Return Q_REGS if the operand is in memory. */
26144 /* If we are copying between general and FP registers, we need a memory
26145 location. The same is true for SSE and MMX registers.
26147 To optimize register_move_cost performance, allow inline variant.
26149 The macro can't work reliably when one of the CLASSES is class containing
26150 registers from multiple units (SSE, MMX, integer). We avoid this by never
26151 combining those units in single alternative in the machine description.
26152 Ensure that this constraint holds to avoid unexpected surprises.
26154 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26155 enforce these sanity checks. */
26158 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
26159 enum machine_mode mode
, int strict
)
26161 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
26162 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
26163 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
26164 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
26165 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
26166 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
26168 gcc_assert (!strict
);
26172 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
26175 /* ??? This is a lie. We do have moves between mmx/general, and for
26176 mmx/sse2. But by saying we need secondary memory we discourage the
26177 register allocator from using the mmx registers unless needed. */
26178 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
26181 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
26183 /* SSE1 doesn't have any direct moves from other classes. */
26187 /* If the target says that inter-unit moves are more expensive
26188 than moving through memory, then don't generate them. */
26189 if (!TARGET_INTER_UNIT_MOVES
)
26192 /* Between SSE and general, we have moves no larger than word size. */
26193 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
26201 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
26202 enum machine_mode mode
, int strict
)
26204 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
26207 /* Return true if the registers in CLASS cannot represent the change from
26208 modes FROM to TO. */
26211 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
26212 enum reg_class regclass
)
26217 /* x87 registers can't do subreg at all, as all values are reformatted
26218 to extended precision. */
26219 if (MAYBE_FLOAT_CLASS_P (regclass
))
26222 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
26224 /* Vector registers do not support QI or HImode loads. If we don't
26225 disallow a change to these modes, reload will assume it's ok to
26226 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26227 the vec_dupv4hi pattern. */
26228 if (GET_MODE_SIZE (from
) < 4)
26231 /* Vector registers do not support subreg with nonzero offsets, which
26232 are otherwise valid for integer registers. Since we can't see
26233 whether we have a nonzero offset from here, prohibit all
26234 nonparadoxical subregs changing size. */
26235 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
26242 /* Return the cost of moving data of mode M between a
26243 register and memory. A value of 2 is the default; this cost is
26244 relative to those in `REGISTER_MOVE_COST'.
26246 This function is used extensively by register_move_cost that is used to
26247 build tables at startup. Make it inline in this case.
26248 When IN is 2, return maximum of in and out move cost.
26250 If moving between registers and memory is more expensive than
26251 between two registers, you should define this macro to express the
26254 Model also increased moving costs of QImode registers in non
26258 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
26262 if (FLOAT_CLASS_P (regclass
))
26280 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
26281 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
26283 if (SSE_CLASS_P (regclass
))
26286 switch (GET_MODE_SIZE (mode
))
26301 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
26302 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
26304 if (MMX_CLASS_P (regclass
))
26307 switch (GET_MODE_SIZE (mode
))
26319 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
26320 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
26322 switch (GET_MODE_SIZE (mode
))
26325 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
26328 return ix86_cost
->int_store
[0];
26329 if (TARGET_PARTIAL_REG_DEPENDENCY
26330 && optimize_function_for_speed_p (cfun
))
26331 cost
= ix86_cost
->movzbl_load
;
26333 cost
= ix86_cost
->int_load
[0];
26335 return MAX (cost
, ix86_cost
->int_store
[0]);
26341 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
26343 return ix86_cost
->movzbl_load
;
26345 return ix86_cost
->int_store
[0] + 4;
26350 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
26351 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
26353 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26354 if (mode
== TFmode
)
26357 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
26359 cost
= ix86_cost
->int_load
[2];
26361 cost
= ix86_cost
->int_store
[2];
26362 return (cost
* (((int) GET_MODE_SIZE (mode
)
26363 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
26368 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
26370 return inline_memory_move_cost (mode
, regclass
, in
);
26374 /* Return the cost of moving data from a register in class CLASS1 to
26375 one in class CLASS2.
26377 It is not required that the cost always equal 2 when FROM is the same as TO;
26378 on some machines it is expensive to move between registers if they are not
26379 general registers. */
26382 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
26383 enum reg_class class2
)
26385 /* In case we require secondary memory, compute cost of the store followed
26386 by load. In order to avoid bad register allocation choices, we need
26387 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26389 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
26393 cost
+= inline_memory_move_cost (mode
, class1
, 2);
26394 cost
+= inline_memory_move_cost (mode
, class2
, 2);
26396 /* In case of copying from general_purpose_register we may emit multiple
26397 stores followed by single load causing memory size mismatch stall.
26398 Count this as arbitrarily high cost of 20. */
26399 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
26402 /* In the case of FP/MMX moves, the registers actually overlap, and we
26403 have to switch modes in order to treat them differently. */
26404 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
26405 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
26411 /* Moves between SSE/MMX and integer unit are expensive. */
26412 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
26413 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
26415 /* ??? By keeping returned value relatively high, we limit the number
26416 of moves between integer and MMX/SSE registers for all targets.
26417 Additionally, high value prevents problem with x86_modes_tieable_p(),
26418 where integer modes in MMX/SSE registers are not tieable
26419 because of missing QImode and HImode moves to, from or between
26420 MMX/SSE registers. */
26421 return MAX (8, ix86_cost
->mmxsse_to_integer
);
26423 if (MAYBE_FLOAT_CLASS_P (class1
))
26424 return ix86_cost
->fp_move
;
26425 if (MAYBE_SSE_CLASS_P (class1
))
26426 return ix86_cost
->sse_move
;
26427 if (MAYBE_MMX_CLASS_P (class1
))
26428 return ix86_cost
->mmx_move
;
26432 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26435 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
26437 /* Flags and only flags can only hold CCmode values. */
26438 if (CC_REGNO_P (regno
))
26439 return GET_MODE_CLASS (mode
) == MODE_CC
;
26440 if (GET_MODE_CLASS (mode
) == MODE_CC
26441 || GET_MODE_CLASS (mode
) == MODE_RANDOM
26442 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
26444 if (FP_REGNO_P (regno
))
26445 return VALID_FP_MODE_P (mode
);
26446 if (SSE_REGNO_P (regno
))
26448 /* We implement the move patterns for all vector modes into and
26449 out of SSE registers, even when no operation instructions
26450 are available. OImode move is available only when AVX is
26452 return ((TARGET_AVX
&& mode
== OImode
)
26453 || VALID_AVX256_REG_MODE (mode
)
26454 || VALID_SSE_REG_MODE (mode
)
26455 || VALID_SSE2_REG_MODE (mode
)
26456 || VALID_MMX_REG_MODE (mode
)
26457 || VALID_MMX_REG_MODE_3DNOW (mode
));
26459 if (MMX_REGNO_P (regno
))
26461 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26462 so if the register is available at all, then we can move data of
26463 the given mode into or out of it. */
26464 return (VALID_MMX_REG_MODE (mode
)
26465 || VALID_MMX_REG_MODE_3DNOW (mode
));
26468 if (mode
== QImode
)
26470 /* Take care for QImode values - they can be in non-QI regs,
26471 but then they do cause partial register stalls. */
26472 if (regno
<= BX_REG
|| TARGET_64BIT
)
26474 if (!TARGET_PARTIAL_REG_STALL
)
26476 return reload_in_progress
|| reload_completed
;
26478 /* We handle both integer and floats in the general purpose registers. */
26479 else if (VALID_INT_MODE_P (mode
))
26481 else if (VALID_FP_MODE_P (mode
))
26483 else if (VALID_DFP_MODE_P (mode
))
26485 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26486 on to use that value in smaller contexts, this can easily force a
26487 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26488 supporting DImode, allow it. */
26489 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
26495 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26496 tieable integer mode. */
26499 ix86_tieable_integer_mode_p (enum machine_mode mode
)
26508 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
26511 return TARGET_64BIT
;
26518 /* Return true if MODE1 is accessible in a register that can hold MODE2
26519 without copying. That is, all register classes that can hold MODE2
26520 can also hold MODE1. */
26523 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
26525 if (mode1
== mode2
)
26528 if (ix86_tieable_integer_mode_p (mode1
)
26529 && ix86_tieable_integer_mode_p (mode2
))
26532 /* MODE2 being XFmode implies fp stack or general regs, which means we
26533 can tie any smaller floating point modes to it. Note that we do not
26534 tie this with TFmode. */
26535 if (mode2
== XFmode
)
26536 return mode1
== SFmode
|| mode1
== DFmode
;
26538 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26539 that we can tie it with SFmode. */
26540 if (mode2
== DFmode
)
26541 return mode1
== SFmode
;
26543 /* If MODE2 is only appropriate for an SSE register, then tie with
26544 any other mode acceptable to SSE registers. */
26545 if (GET_MODE_SIZE (mode2
) == 16
26546 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
26547 return (GET_MODE_SIZE (mode1
) == 16
26548 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
26550 /* If MODE2 is appropriate for an MMX register, then tie
26551 with any other mode acceptable to MMX registers. */
26552 if (GET_MODE_SIZE (mode2
) == 8
26553 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
26554 return (GET_MODE_SIZE (mode1
) == 8
26555 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
26560 /* Compute a (partial) cost for rtx X. Return true if the complete
26561 cost has been computed, and false if subexpressions should be
26562 scanned. In either case, *TOTAL contains the cost result. */
26565 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
, bool speed
)
26567 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
26568 enum machine_mode mode
= GET_MODE (x
);
26569 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
26577 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
26579 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
26581 else if (flag_pic
&& SYMBOLIC_CONST (x
)
26583 || (!GET_CODE (x
) != LABEL_REF
26584 && (GET_CODE (x
) != SYMBOL_REF
26585 || !SYMBOL_REF_LOCAL_P (x
)))))
26592 if (mode
== VOIDmode
)
26595 switch (standard_80387_constant_p (x
))
26600 default: /* Other constants */
26605 /* Start with (MEM (SYMBOL_REF)), since that's where
26606 it'll probably end up. Add a penalty for size. */
26607 *total
= (COSTS_N_INSNS (1)
26608 + (flag_pic
!= 0 && !TARGET_64BIT
)
26609 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
26615 /* The zero extensions is often completely free on x86_64, so make
26616 it as cheap as possible. */
26617 if (TARGET_64BIT
&& mode
== DImode
26618 && GET_MODE (XEXP (x
, 0)) == SImode
)
26620 else if (TARGET_ZERO_EXTEND_WITH_AND
)
26621 *total
= cost
->add
;
26623 *total
= cost
->movzx
;
26627 *total
= cost
->movsx
;
26631 if (CONST_INT_P (XEXP (x
, 1))
26632 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
26634 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
26637 *total
= cost
->add
;
26640 if ((value
== 2 || value
== 3)
26641 && cost
->lea
<= cost
->shift_const
)
26643 *total
= cost
->lea
;
26653 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
26655 if (CONST_INT_P (XEXP (x
, 1)))
26657 if (INTVAL (XEXP (x
, 1)) > 32)
26658 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
26660 *total
= cost
->shift_const
* 2;
26664 if (GET_CODE (XEXP (x
, 1)) == AND
)
26665 *total
= cost
->shift_var
* 2;
26667 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
26672 if (CONST_INT_P (XEXP (x
, 1)))
26673 *total
= cost
->shift_const
;
26675 *total
= cost
->shift_var
;
26680 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26682 /* ??? SSE scalar cost should be used here. */
26683 *total
= cost
->fmul
;
26686 else if (X87_FLOAT_MODE_P (mode
))
26688 *total
= cost
->fmul
;
26691 else if (FLOAT_MODE_P (mode
))
26693 /* ??? SSE vector cost should be used here. */
26694 *total
= cost
->fmul
;
26699 rtx op0
= XEXP (x
, 0);
26700 rtx op1
= XEXP (x
, 1);
26702 if (CONST_INT_P (XEXP (x
, 1)))
26704 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
26705 for (nbits
= 0; value
!= 0; value
&= value
- 1)
26709 /* This is arbitrary. */
26712 /* Compute costs correctly for widening multiplication. */
26713 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
26714 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
26715 == GET_MODE_SIZE (mode
))
26717 int is_mulwiden
= 0;
26718 enum machine_mode inner_mode
= GET_MODE (op0
);
26720 if (GET_CODE (op0
) == GET_CODE (op1
))
26721 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
26722 else if (CONST_INT_P (op1
))
26724 if (GET_CODE (op0
) == SIGN_EXTEND
)
26725 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
26728 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
26732 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
26735 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
26736 + nbits
* cost
->mult_bit
26737 + rtx_cost (op0
, outer_code
, speed
) + rtx_cost (op1
, outer_code
, speed
));
26746 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26747 /* ??? SSE cost should be used here. */
26748 *total
= cost
->fdiv
;
26749 else if (X87_FLOAT_MODE_P (mode
))
26750 *total
= cost
->fdiv
;
26751 else if (FLOAT_MODE_P (mode
))
26752 /* ??? SSE vector cost should be used here. */
26753 *total
= cost
->fdiv
;
26755 *total
= cost
->divide
[MODE_INDEX (mode
)];
26759 if (GET_MODE_CLASS (mode
) == MODE_INT
26760 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
26762 if (GET_CODE (XEXP (x
, 0)) == PLUS
26763 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
26764 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
26765 && CONSTANT_P (XEXP (x
, 1)))
26767 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
26768 if (val
== 2 || val
== 4 || val
== 8)
26770 *total
= cost
->lea
;
26771 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
26772 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
26773 outer_code
, speed
);
26774 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
26778 else if (GET_CODE (XEXP (x
, 0)) == MULT
26779 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
26781 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
26782 if (val
== 2 || val
== 4 || val
== 8)
26784 *total
= cost
->lea
;
26785 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
26786 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
26790 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
26792 *total
= cost
->lea
;
26793 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
26794 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
26795 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
26802 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26804 /* ??? SSE cost should be used here. */
26805 *total
= cost
->fadd
;
26808 else if (X87_FLOAT_MODE_P (mode
))
26810 *total
= cost
->fadd
;
26813 else if (FLOAT_MODE_P (mode
))
26815 /* ??? SSE vector cost should be used here. */
26816 *total
= cost
->fadd
;
26824 if (!TARGET_64BIT
&& mode
== DImode
)
26826 *total
= (cost
->add
* 2
26827 + (rtx_cost (XEXP (x
, 0), outer_code
, speed
)
26828 << (GET_MODE (XEXP (x
, 0)) != DImode
))
26829 + (rtx_cost (XEXP (x
, 1), outer_code
, speed
)
26830 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
26836 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26838 /* ??? SSE cost should be used here. */
26839 *total
= cost
->fchs
;
26842 else if (X87_FLOAT_MODE_P (mode
))
26844 *total
= cost
->fchs
;
26847 else if (FLOAT_MODE_P (mode
))
26849 /* ??? SSE vector cost should be used here. */
26850 *total
= cost
->fchs
;
26856 if (!TARGET_64BIT
&& mode
== DImode
)
26857 *total
= cost
->add
* 2;
26859 *total
= cost
->add
;
26863 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
26864 && XEXP (XEXP (x
, 0), 1) == const1_rtx
26865 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
26866 && XEXP (x
, 1) == const0_rtx
)
26868 /* This kind of construct is implemented using test[bwl].
26869 Treat it as if we had an AND. */
26870 *total
= (cost
->add
26871 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
)
26872 + rtx_cost (const1_rtx
, outer_code
, speed
));
26878 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
26883 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26884 /* ??? SSE cost should be used here. */
26885 *total
= cost
->fabs
;
26886 else if (X87_FLOAT_MODE_P (mode
))
26887 *total
= cost
->fabs
;
26888 else if (FLOAT_MODE_P (mode
))
26889 /* ??? SSE vector cost should be used here. */
26890 *total
= cost
->fabs
;
26894 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
26895 /* ??? SSE cost should be used here. */
26896 *total
= cost
->fsqrt
;
26897 else if (X87_FLOAT_MODE_P (mode
))
26898 *total
= cost
->fsqrt
;
26899 else if (FLOAT_MODE_P (mode
))
26900 /* ??? SSE vector cost should be used here. */
26901 *total
= cost
->fsqrt
;
26905 if (XINT (x
, 1) == UNSPEC_TP
)
26916 static int current_machopic_label_num
;
26918 /* Given a symbol name and its associated stub, write out the
26919 definition of the stub. */
26922 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
26924 unsigned int length
;
26925 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
26926 int label
= ++current_machopic_label_num
;
26928 /* For 64-bit we shouldn't get here. */
26929 gcc_assert (!TARGET_64BIT
);
26931 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26932 symb
= (*targetm
.strip_name_encoding
) (symb
);
26934 length
= strlen (stub
);
26935 binder_name
= XALLOCAVEC (char, length
+ 32);
26936 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
26938 length
= strlen (symb
);
26939 symbol_name
= XALLOCAVEC (char, length
+ 32);
26940 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
26942 sprintf (lazy_ptr_name
, "L%d$lz", label
);
26945 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
26947 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
26949 fprintf (file
, "%s:\n", stub
);
26950 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
26954 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
26955 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
26956 fprintf (file
, "\tjmp\t*%%edx\n");
26959 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
26961 fprintf (file
, "%s:\n", binder_name
);
26965 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
26966 fprintf (file
, "\tpushl\t%%eax\n");
26969 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
26971 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
26973 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
26974 fprintf (file
, "%s:\n", lazy_ptr_name
);
26975 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
26976 fprintf (file
, "\t.long %s\n", binder_name
);
26980 darwin_x86_file_end (void)
26982 darwin_file_end ();
26985 #endif /* TARGET_MACHO */
26987 /* Order the registers for register allocator. */
26990 x86_order_regs_for_local_alloc (void)
26995 /* First allocate the local general purpose registers. */
26996 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
26997 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
26998 reg_alloc_order
[pos
++] = i
;
27000 /* Global general purpose registers. */
27001 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
27002 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
27003 reg_alloc_order
[pos
++] = i
;
27005 /* x87 registers come first in case we are doing FP math
27007 if (!TARGET_SSE_MATH
)
27008 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
27009 reg_alloc_order
[pos
++] = i
;
27011 /* SSE registers. */
27012 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
27013 reg_alloc_order
[pos
++] = i
;
27014 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
27015 reg_alloc_order
[pos
++] = i
;
27017 /* x87 registers. */
27018 if (TARGET_SSE_MATH
)
27019 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
27020 reg_alloc_order
[pos
++] = i
;
27022 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
27023 reg_alloc_order
[pos
++] = i
;
27025 /* Initialize the rest of array as we do not allocate some registers
27027 while (pos
< FIRST_PSEUDO_REGISTER
)
27028 reg_alloc_order
[pos
++] = 0;
27031 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27032 struct attribute_spec.handler. */
27034 ix86_handle_abi_attribute (tree
*node
, tree name
,
27035 tree args ATTRIBUTE_UNUSED
,
27036 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
27038 if (TREE_CODE (*node
) != FUNCTION_TYPE
27039 && TREE_CODE (*node
) != METHOD_TYPE
27040 && TREE_CODE (*node
) != FIELD_DECL
27041 && TREE_CODE (*node
) != TYPE_DECL
)
27043 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
27045 *no_add_attrs
= true;
27050 warning (OPT_Wattributes
, "%qE attribute only available for 64-bit",
27052 *no_add_attrs
= true;
27056 /* Can combine regparm with all attributes but fastcall. */
27057 if (is_attribute_p ("ms_abi", name
))
27059 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
27061 error ("ms_abi and sysv_abi attributes are not compatible");
27066 else if (is_attribute_p ("sysv_abi", name
))
27068 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
27070 error ("ms_abi and sysv_abi attributes are not compatible");
27079 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27080 struct attribute_spec.handler. */
27082 ix86_handle_struct_attribute (tree
*node
, tree name
,
27083 tree args ATTRIBUTE_UNUSED
,
27084 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
27087 if (DECL_P (*node
))
27089 if (TREE_CODE (*node
) == TYPE_DECL
)
27090 type
= &TREE_TYPE (*node
);
27095 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
27096 || TREE_CODE (*type
) == UNION_TYPE
)))
27098 warning (OPT_Wattributes
, "%qE attribute ignored",
27100 *no_add_attrs
= true;
27103 else if ((is_attribute_p ("ms_struct", name
)
27104 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
27105 || ((is_attribute_p ("gcc_struct", name
)
27106 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
27108 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
27110 *no_add_attrs
= true;
27117 ix86_ms_bitfield_layout_p (const_tree record_type
)
27119 return (TARGET_MS_BITFIELD_LAYOUT
&&
27120 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
27121 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
27124 /* Returns an expression indicating where the this parameter is
27125 located on entry to the FUNCTION. */
27128 x86_this_parameter (tree function
)
27130 tree type
= TREE_TYPE (function
);
27131 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
27136 const int *parm_regs
;
27138 if (ix86_function_type_abi (type
) == MS_ABI
)
27139 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
27141 parm_regs
= x86_64_int_parameter_registers
;
27142 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
27145 nregs
= ix86_function_regparm (type
, function
);
27147 if (nregs
> 0 && !stdarg_p (type
))
27151 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
27152 regno
= aggr
? DX_REG
: CX_REG
;
27160 return gen_rtx_MEM (SImode
,
27161 plus_constant (stack_pointer_rtx
, 4));
27164 return gen_rtx_REG (SImode
, regno
);
27167 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
27170 /* Determine whether x86_output_mi_thunk can succeed. */
27173 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
27174 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
27175 HOST_WIDE_INT vcall_offset
, const_tree function
)
27177 /* 64-bit can handle anything. */
27181 /* For 32-bit, everything's fine if we have one free register. */
27182 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
27185 /* Need a free register for vcall_offset. */
27189 /* Need a free register for GOT references. */
27190 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
27193 /* Otherwise ok. */
27197 /* Output the assembler code for a thunk function. THUNK_DECL is the
27198 declaration for the thunk function itself, FUNCTION is the decl for
27199 the target function. DELTA is an immediate constant offset to be
27200 added to THIS. If VCALL_OFFSET is nonzero, the word at
27201 *(*this + vcall_offset) should be added to THIS. */
27204 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
27205 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
27206 HOST_WIDE_INT vcall_offset
, tree function
)
27209 rtx this_param
= x86_this_parameter (function
);
27212 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27213 pull it in now and let DELTA benefit. */
27214 if (REG_P (this_param
))
27215 this_reg
= this_param
;
27216 else if (vcall_offset
)
27218 /* Put the this parameter into %eax. */
27219 xops
[0] = this_param
;
27220 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
27221 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
27224 this_reg
= NULL_RTX
;
27226 /* Adjust the this parameter by a fixed constant. */
27229 xops
[0] = GEN_INT (delta
);
27230 xops
[1] = this_reg
? this_reg
: this_param
;
27233 if (!x86_64_general_operand (xops
[0], DImode
))
27235 tmp
= gen_rtx_REG (DImode
, R10_REG
);
27237 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
27239 xops
[1] = this_param
;
27241 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
27244 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
27247 /* Adjust the this parameter by a value stored in the vtable. */
27251 tmp
= gen_rtx_REG (DImode
, R10_REG
);
27254 int tmp_regno
= CX_REG
;
27255 if (lookup_attribute ("fastcall",
27256 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
27257 tmp_regno
= AX_REG
;
27258 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
27261 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
27263 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
27265 /* Adjust the this parameter. */
27266 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
27267 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
27269 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
27270 xops
[0] = GEN_INT (vcall_offset
);
27272 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
27273 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
27275 xops
[1] = this_reg
;
27276 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops
);
27279 /* If necessary, drop THIS back to its stack slot. */
27280 if (this_reg
&& this_reg
!= this_param
)
27282 xops
[0] = this_reg
;
27283 xops
[1] = this_param
;
27284 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
27287 xops
[0] = XEXP (DECL_RTL (function
), 0);
27290 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
27291 output_asm_insn ("jmp\t%P0", xops
);
27292 /* All thunks should be in the same object as their target,
27293 and thus binds_local_p should be true. */
27294 else if (TARGET_64BIT
&& cfun
->machine
->call_abi
== MS_ABI
)
27295 gcc_unreachable ();
27298 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
27299 tmp
= gen_rtx_CONST (Pmode
, tmp
);
27300 tmp
= gen_rtx_MEM (QImode
, tmp
);
27302 output_asm_insn ("jmp\t%A0", xops
);
27307 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
27308 output_asm_insn ("jmp\t%P0", xops
);
27313 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
27314 tmp
= (gen_rtx_SYMBOL_REF
27316 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
27317 tmp
= gen_rtx_MEM (QImode
, tmp
);
27319 output_asm_insn ("jmp\t%0", xops
);
27322 #endif /* TARGET_MACHO */
27324 tmp
= gen_rtx_REG (SImode
, CX_REG
);
27325 output_set_got (tmp
, NULL_RTX
);
27328 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
27329 output_asm_insn ("jmp\t{*}%1", xops
);
27335 x86_file_start (void)
27337 default_file_start ();
27339 darwin_file_start ();
27341 if (X86_FILE_START_VERSION_DIRECTIVE
)
27342 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
27343 if (X86_FILE_START_FLTUSED
)
27344 fputs ("\t.global\t__fltused\n", asm_out_file
);
27345 if (ix86_asm_dialect
== ASM_INTEL
)
27346 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
27350 x86_field_alignment (tree field
, int computed
)
27352 enum machine_mode mode
;
27353 tree type
= TREE_TYPE (field
);
27355 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
27357 mode
= TYPE_MODE (strip_array_types (type
));
27358 if (mode
== DFmode
|| mode
== DCmode
27359 || GET_MODE_CLASS (mode
) == MODE_INT
27360 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
27361 return MIN (32, computed
);
27365 /* Output assembler code to FILE to increment profiler label # LABELNO
27366 for profiling a function entry. */
27368 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
27372 #ifndef NO_PROFILE_COUNTERS
27373 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
27376 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
27377 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
27379 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
27383 #ifndef NO_PROFILE_COUNTERS
27384 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27385 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
27387 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
27391 #ifndef NO_PROFILE_COUNTERS
27392 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
27393 PROFILE_COUNT_REGISTER
);
27395 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
27399 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27400 /* We don't have exact information about the insn sizes, but we may assume
27401 quite safely that we are informed about all 1 byte insns and memory
27402 address sizes. This is enough to eliminate unnecessary padding in
27406 min_insn_size (rtx insn
)
27410 if (!INSN_P (insn
) || !active_insn_p (insn
))
27413 /* Discard alignments we've emit and jump instructions. */
27414 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
27415 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
27417 if (JUMP_TABLE_DATA_P (insn
))
27420 /* Important case - calls are always 5 bytes.
27421 It is common to have many calls in the row. */
27423 && symbolic_reference_mentioned_p (PATTERN (insn
))
27424 && !SIBLING_CALL_P (insn
))
27426 len
= get_attr_length (insn
);
27430 /* For normal instructions we rely on get_attr_length being exact,
27431 with a few exceptions. */
27432 if (!JUMP_P (insn
))
27434 enum attr_type type
= get_attr_type (insn
);
27439 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
27440 || asm_noperands (PATTERN (insn
)) >= 0)
27447 /* Otherwise trust get_attr_length. */
27451 l
= get_attr_length_address (insn
);
27452 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
27461 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27465 ix86_avoid_jump_mispredicts (void)
27467 rtx insn
, start
= get_insns ();
27468 int nbytes
= 0, njumps
= 0;
27471 /* Look for all minimal intervals of instructions containing 4 jumps.
27472 The intervals are bounded by START and INSN. NBYTES is the total
27473 size of instructions in the interval including INSN and not including
27474 START. When the NBYTES is smaller than 16 bytes, it is possible
27475 that the end of START and INSN ends up in the same 16byte page.
27477 The smallest offset in the page INSN can start is the case where START
27478 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27479 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27481 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
27485 if (LABEL_P (insn
))
27487 int align
= label_to_alignment (insn
);
27488 int max_skip
= label_to_max_skip (insn
);
27492 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27493 already in the current 16 byte page, because otherwise
27494 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27495 bytes to reach 16 byte boundary. */
27497 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
27500 fprintf (dump_file
, "Label %i with max_skip %i\n",
27501 INSN_UID (insn
), max_skip
);
27504 while (nbytes
+ max_skip
>= 16)
27506 start
= NEXT_INSN (start
);
27507 if ((JUMP_P (start
)
27508 && GET_CODE (PATTERN (start
)) != ADDR_VEC
27509 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
27511 njumps
--, isjump
= 1;
27514 nbytes
-= min_insn_size (start
);
27520 min_size
= min_insn_size (insn
);
27521 nbytes
+= min_size
;
27523 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
27524 INSN_UID (insn
), min_size
);
27526 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
27527 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
27535 start
= NEXT_INSN (start
);
27536 if ((JUMP_P (start
)
27537 && GET_CODE (PATTERN (start
)) != ADDR_VEC
27538 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
27540 njumps
--, isjump
= 1;
27543 nbytes
-= min_insn_size (start
);
27545 gcc_assert (njumps
>= 0);
27547 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
27548 INSN_UID (start
), INSN_UID (insn
), nbytes
);
27550 if (njumps
== 3 && isjump
&& nbytes
< 16)
27552 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
27555 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
27556 INSN_UID (insn
), padsize
);
27557 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
27563 /* AMD Athlon works faster
27564 when RET is not destination of conditional jump or directly preceded
27565 by other jump instruction. We avoid the penalty by inserting NOP just
27566 before the RET instructions in such cases. */
27568 ix86_pad_returns (void)
27573 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
27575 basic_block bb
= e
->src
;
27576 rtx ret
= BB_END (bb
);
27578 bool replace
= false;
27580 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
27581 || optimize_bb_for_size_p (bb
))
27583 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
27584 if (active_insn_p (prev
) || LABEL_P (prev
))
27586 if (prev
&& LABEL_P (prev
))
27591 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
27592 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
27593 && !(e
->flags
& EDGE_FALLTHRU
))
27598 prev
= prev_active_insn (ret
);
27600 && ((JUMP_P (prev
) && any_condjump_p (prev
))
27603 /* Empty functions get branch mispredict even when the jump destination
27604 is not visible to us. */
27605 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
27610 emit_jump_insn_before (gen_return_internal_long (), ret
);
27616 /* Implement machine specific optimizations. We implement padding of returns
27617 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27621 if (optimize
&& optimize_function_for_speed_p (cfun
))
27623 if (TARGET_PAD_RETURNS
)
27624 ix86_pad_returns ();
27625 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27626 if (TARGET_FOUR_JUMP_LIMIT
)
27627 ix86_avoid_jump_mispredicts ();
27632 /* Return nonzero when QImode register that must be represented via REX prefix
27635 x86_extended_QIreg_mentioned_p (rtx insn
)
27638 extract_insn_cached (insn
);
27639 for (i
= 0; i
< recog_data
.n_operands
; i
++)
27640 if (REG_P (recog_data
.operand
[i
])
27641 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
27646 /* Return nonzero when P points to register encoded via REX prefix.
27647 Called via for_each_rtx. */
27649 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
27651 unsigned int regno
;
27654 regno
= REGNO (*p
);
27655 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
27658 /* Return true when INSN mentions register that must be encoded using REX
27661 x86_extended_reg_mentioned_p (rtx insn
)
27663 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
27664 extended_reg_mentioned_1
, NULL
);
27667 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27668 optabs would emit if we didn't have TFmode patterns. */
27671 x86_emit_floatuns (rtx operands
[2])
27673 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
27674 enum machine_mode mode
, inmode
;
27676 inmode
= GET_MODE (operands
[1]);
27677 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
27680 in
= force_reg (inmode
, operands
[1]);
27681 mode
= GET_MODE (out
);
27682 neglab
= gen_label_rtx ();
27683 donelab
= gen_label_rtx ();
27684 f0
= gen_reg_rtx (mode
);
27686 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
27688 expand_float (out
, in
, 0);
27690 emit_jump_insn (gen_jump (donelab
));
27693 emit_label (neglab
);
27695 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
27697 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
27699 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
27701 expand_float (f0
, i0
, 0);
27703 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
27705 emit_label (donelab
);
27708 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27709 with all elements equal to VAR. Return true if successful. */
27712 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
27713 rtx target
, rtx val
)
27715 enum machine_mode hmode
, smode
, wsmode
, wvmode
;
27730 val
= force_reg (GET_MODE_INNER (mode
), val
);
27731 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
27732 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
27738 if (TARGET_SSE
|| TARGET_3DNOW_A
)
27740 val
= gen_lowpart (SImode
, val
);
27741 x
= gen_rtx_TRUNCATE (HImode
, val
);
27742 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
27743 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
27765 /* Extend HImode to SImode using a paradoxical SUBREG. */
27766 tmp1
= gen_reg_rtx (SImode
);
27767 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
27768 /* Insert the SImode value as low element of V4SImode vector. */
27769 tmp2
= gen_reg_rtx (V4SImode
);
27770 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
27771 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
27772 CONST0_RTX (V4SImode
),
27774 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
27775 /* Cast the V4SImode vector back to a V8HImode vector. */
27776 tmp1
= gen_reg_rtx (V8HImode
);
27777 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
27778 /* Duplicate the low short through the whole low SImode word. */
27779 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
27780 /* Cast the V8HImode vector back to a V4SImode vector. */
27781 tmp2
= gen_reg_rtx (V4SImode
);
27782 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
27783 /* Replicate the low element of the V4SImode vector. */
27784 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
27785 /* Cast the V2SImode back to V8HImode, and store in target. */
27786 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
27797 /* Extend QImode to SImode using a paradoxical SUBREG. */
27798 tmp1
= gen_reg_rtx (SImode
);
27799 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
27800 /* Insert the SImode value as low element of V4SImode vector. */
27801 tmp2
= gen_reg_rtx (V4SImode
);
27802 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
27803 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
27804 CONST0_RTX (V4SImode
),
27806 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
27807 /* Cast the V4SImode vector back to a V16QImode vector. */
27808 tmp1
= gen_reg_rtx (V16QImode
);
27809 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
27810 /* Duplicate the low byte through the whole low SImode word. */
27811 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
27812 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
27813 /* Cast the V16QImode vector back to a V4SImode vector. */
27814 tmp2
= gen_reg_rtx (V4SImode
);
27815 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
27816 /* Replicate the low element of the V4SImode vector. */
27817 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
27818 /* Cast the V2SImode back to V16QImode, and store in target. */
27819 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
27827 /* Replicate the value once into the next wider mode and recurse. */
27828 val
= convert_modes (wsmode
, smode
, val
, true);
27829 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
27830 GEN_INT (GET_MODE_BITSIZE (smode
)),
27831 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
27832 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
27834 x
= gen_reg_rtx (wvmode
);
27835 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
27836 gcc_unreachable ();
27837 emit_move_insn (target
, gen_lowpart (mode
, x
));
27860 rtx tmp
= gen_reg_rtx (hmode
);
27861 ix86_expand_vector_init_duplicate (mmx_ok
, hmode
, tmp
, val
);
27862 emit_insn (gen_rtx_SET (VOIDmode
, target
,
27863 gen_rtx_VEC_CONCAT (mode
, tmp
, tmp
)));
27872 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27873 whose ONE_VAR element is VAR, and other elements are zero. Return true
27877 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
27878 rtx target
, rtx var
, int one_var
)
27880 enum machine_mode vsimode
;
27883 bool use_vector_set
= false;
27888 /* For SSE4.1, we normally use vector set. But if the second
27889 element is zero and inter-unit moves are OK, we use movq
27891 use_vector_set
= (TARGET_64BIT
27893 && !(TARGET_INTER_UNIT_MOVES
27899 use_vector_set
= TARGET_SSE4_1
;
27902 use_vector_set
= TARGET_SSE2
;
27905 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
27912 use_vector_set
= TARGET_AVX
;
27915 /* Use ix86_expand_vector_set in 64bit mode only. */
27916 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
27922 if (use_vector_set
)
27924 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
27925 var
= force_reg (GET_MODE_INNER (mode
), var
);
27926 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
27942 var
= force_reg (GET_MODE_INNER (mode
), var
);
27943 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
27944 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
27949 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
27950 new_target
= gen_reg_rtx (mode
);
27952 new_target
= target
;
27953 var
= force_reg (GET_MODE_INNER (mode
), var
);
27954 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
27955 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
27956 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
27959 /* We need to shuffle the value to the correct position, so
27960 create a new pseudo to store the intermediate result. */
27962 /* With SSE2, we can use the integer shuffle insns. */
27963 if (mode
!= V4SFmode
&& TARGET_SSE2
)
27965 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
27967 GEN_INT (one_var
== 1 ? 0 : 1),
27968 GEN_INT (one_var
== 2 ? 0 : 1),
27969 GEN_INT (one_var
== 3 ? 0 : 1)));
27970 if (target
!= new_target
)
27971 emit_move_insn (target
, new_target
);
27975 /* Otherwise convert the intermediate result to V4SFmode and
27976 use the SSE1 shuffle instructions. */
27977 if (mode
!= V4SFmode
)
27979 tmp
= gen_reg_rtx (V4SFmode
);
27980 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
27985 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
27987 GEN_INT (one_var
== 1 ? 0 : 1),
27988 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
27989 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
27991 if (mode
!= V4SFmode
)
27992 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
27993 else if (tmp
!= target
)
27994 emit_move_insn (target
, tmp
);
27996 else if (target
!= new_target
)
27997 emit_move_insn (target
, new_target
);
28002 vsimode
= V4SImode
;
28008 vsimode
= V2SImode
;
28014 /* Zero extend the variable element to SImode and recurse. */
28015 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
28017 x
= gen_reg_rtx (vsimode
);
28018 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
28020 gcc_unreachable ();
28022 emit_move_insn (target
, gen_lowpart (mode
, x
));
28030 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28031 consisting of the values in VALS. It is known that all elements
28032 except ONE_VAR are constants. Return true if successful. */
28035 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
28036 rtx target
, rtx vals
, int one_var
)
28038 rtx var
= XVECEXP (vals
, 0, one_var
);
28039 enum machine_mode wmode
;
28042 const_vec
= copy_rtx (vals
);
28043 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
28044 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
28052 /* For the two element vectors, it's just as easy to use
28053 the general case. */
28057 /* Use ix86_expand_vector_set in 64bit mode only. */
28080 /* There's no way to set one QImode entry easily. Combine
28081 the variable value with its adjacent constant value, and
28082 promote to an HImode set. */
28083 x
= XVECEXP (vals
, 0, one_var
^ 1);
28086 var
= convert_modes (HImode
, QImode
, var
, true);
28087 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
28088 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
28089 x
= GEN_INT (INTVAL (x
) & 0xff);
28093 var
= convert_modes (HImode
, QImode
, var
, true);
28094 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
28096 if (x
!= const0_rtx
)
28097 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
28098 1, OPTAB_LIB_WIDEN
);
28100 x
= gen_reg_rtx (wmode
);
28101 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
28102 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
28104 emit_move_insn (target
, gen_lowpart (mode
, x
));
28111 emit_move_insn (target
, const_vec
);
28112 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
28116 /* A subroutine of ix86_expand_vector_init_general. Use vector
28117 concatenate to handle the most general case: all values variable,
28118 and none identical. */
28121 ix86_expand_vector_init_concat (enum machine_mode mode
,
28122 rtx target
, rtx
*ops
, int n
)
28124 enum machine_mode cmode
, hmode
= VOIDmode
;
28125 rtx first
[8], second
[4];
28165 gcc_unreachable ();
28168 if (!register_operand (ops
[1], cmode
))
28169 ops
[1] = force_reg (cmode
, ops
[1]);
28170 if (!register_operand (ops
[0], cmode
))
28171 ops
[0] = force_reg (cmode
, ops
[0]);
28172 emit_insn (gen_rtx_SET (VOIDmode
, target
,
28173 gen_rtx_VEC_CONCAT (mode
, ops
[0],
28193 gcc_unreachable ();
28209 gcc_unreachable ();
28214 /* FIXME: We process inputs backward to help RA. PR 36222. */
28217 for (; i
> 0; i
-= 2, j
--)
28219 first
[j
] = gen_reg_rtx (cmode
);
28220 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
28221 ix86_expand_vector_init (false, first
[j
],
28222 gen_rtx_PARALLEL (cmode
, v
));
28228 gcc_assert (hmode
!= VOIDmode
);
28229 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
28231 second
[j
] = gen_reg_rtx (hmode
);
28232 ix86_expand_vector_init_concat (hmode
, second
[j
],
28236 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
28239 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
28243 gcc_unreachable ();
28247 /* A subroutine of ix86_expand_vector_init_general. Use vector
28248 interleave to handle the most general case: all values variable,
28249 and none identical. */
28252 ix86_expand_vector_init_interleave (enum machine_mode mode
,
28253 rtx target
, rtx
*ops
, int n
)
28255 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
28258 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
28259 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
28260 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
28265 gen_load_even
= gen_vec_setv8hi
;
28266 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
28267 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
28268 inner_mode
= HImode
;
28269 first_imode
= V4SImode
;
28270 second_imode
= V2DImode
;
28271 third_imode
= VOIDmode
;
28274 gen_load_even
= gen_vec_setv16qi
;
28275 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
28276 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
28277 inner_mode
= QImode
;
28278 first_imode
= V8HImode
;
28279 second_imode
= V4SImode
;
28280 third_imode
= V2DImode
;
28283 gcc_unreachable ();
28286 for (i
= 0; i
< n
; i
++)
28288 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28289 op0
= gen_reg_rtx (SImode
);
28290 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
28292 /* Insert the SImode value as low element of V4SImode vector. */
28293 op1
= gen_reg_rtx (V4SImode
);
28294 op0
= gen_rtx_VEC_MERGE (V4SImode
,
28295 gen_rtx_VEC_DUPLICATE (V4SImode
,
28297 CONST0_RTX (V4SImode
),
28299 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
28301 /* Cast the V4SImode vector back to a vector in orignal mode. */
28302 op0
= gen_reg_rtx (mode
);
28303 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
28305 /* Load even elements into the second positon. */
28306 emit_insn ((*gen_load_even
) (op0
,
28307 force_reg (inner_mode
,
28311 /* Cast vector to FIRST_IMODE vector. */
28312 ops
[i
] = gen_reg_rtx (first_imode
);
28313 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
28316 /* Interleave low FIRST_IMODE vectors. */
28317 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
28319 op0
= gen_reg_rtx (first_imode
);
28320 emit_insn ((*gen_interleave_first_low
) (op0
, ops
[i
], ops
[i
+ 1]));
28322 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28323 ops
[j
] = gen_reg_rtx (second_imode
);
28324 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
28327 /* Interleave low SECOND_IMODE vectors. */
28328 switch (second_imode
)
28331 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
28333 op0
= gen_reg_rtx (second_imode
);
28334 emit_insn ((*gen_interleave_second_low
) (op0
, ops
[i
],
28337 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28339 ops
[j
] = gen_reg_rtx (third_imode
);
28340 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
28342 second_imode
= V2DImode
;
28343 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
28347 op0
= gen_reg_rtx (second_imode
);
28348 emit_insn ((*gen_interleave_second_low
) (op0
, ops
[0],
28351 /* Cast the SECOND_IMODE vector back to a vector on original
28353 emit_insn (gen_rtx_SET (VOIDmode
, target
,
28354 gen_lowpart (mode
, op0
)));
28358 gcc_unreachable ();
28362 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28363 all values variable, and none identical. */
28366 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
28367 rtx target
, rtx vals
)
28369 rtx ops
[32], op0
, op1
;
28370 enum machine_mode half_mode
= VOIDmode
;
28377 if (!mmx_ok
&& !TARGET_SSE
)
28389 n
= GET_MODE_NUNITS (mode
);
28390 for (i
= 0; i
< n
; i
++)
28391 ops
[i
] = XVECEXP (vals
, 0, i
);
28392 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
28396 half_mode
= V16QImode
;
28400 half_mode
= V8HImode
;
28404 n
= GET_MODE_NUNITS (mode
);
28405 for (i
= 0; i
< n
; i
++)
28406 ops
[i
] = XVECEXP (vals
, 0, i
);
28407 op0
= gen_reg_rtx (half_mode
);
28408 op1
= gen_reg_rtx (half_mode
);
28409 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
28411 ix86_expand_vector_init_interleave (half_mode
, op1
,
28412 &ops
[n
>> 1], n
>> 2);
28413 emit_insn (gen_rtx_SET (VOIDmode
, target
,
28414 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
28418 if (!TARGET_SSE4_1
)
28426 /* Don't use ix86_expand_vector_init_interleave if we can't
28427 move from GPR to SSE register directly. */
28428 if (!TARGET_INTER_UNIT_MOVES
)
28431 n
= GET_MODE_NUNITS (mode
);
28432 for (i
= 0; i
< n
; i
++)
28433 ops
[i
] = XVECEXP (vals
, 0, i
);
28434 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
28442 gcc_unreachable ();
28446 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
28447 enum machine_mode inner_mode
;
28448 rtx words
[4], shift
;
28450 inner_mode
= GET_MODE_INNER (mode
);
28451 n_elts
= GET_MODE_NUNITS (mode
);
28452 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
28453 n_elt_per_word
= n_elts
/ n_words
;
28454 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
28456 for (i
= 0; i
< n_words
; ++i
)
28458 rtx word
= NULL_RTX
;
28460 for (j
= 0; j
< n_elt_per_word
; ++j
)
28462 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
28463 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
28469 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
28470 word
, 1, OPTAB_LIB_WIDEN
);
28471 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
28472 word
, 1, OPTAB_LIB_WIDEN
);
28480 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
28481 else if (n_words
== 2)
28483 rtx tmp
= gen_reg_rtx (mode
);
28484 emit_clobber (tmp
);
28485 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
28486 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
28487 emit_move_insn (target
, tmp
);
28489 else if (n_words
== 4)
28491 rtx tmp
= gen_reg_rtx (V4SImode
);
28492 gcc_assert (word_mode
== SImode
);
28493 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
28494 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
28495 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
28498 gcc_unreachable ();
28502 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28503 instructions unless MMX_OK is true. */
28506 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
28508 enum machine_mode mode
= GET_MODE (target
);
28509 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
28510 int n_elts
= GET_MODE_NUNITS (mode
);
28511 int n_var
= 0, one_var
= -1;
28512 bool all_same
= true, all_const_zero
= true;
28516 for (i
= 0; i
< n_elts
; ++i
)
28518 x
= XVECEXP (vals
, 0, i
);
28519 if (!(CONST_INT_P (x
)
28520 || GET_CODE (x
) == CONST_DOUBLE
28521 || GET_CODE (x
) == CONST_FIXED
))
28522 n_var
++, one_var
= i
;
28523 else if (x
!= CONST0_RTX (inner_mode
))
28524 all_const_zero
= false;
28525 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
28529 /* Constants are best loaded from the constant pool. */
28532 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
28536 /* If all values are identical, broadcast the value. */
28538 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
28539 XVECEXP (vals
, 0, 0)))
28542 /* Values where only one field is non-constant are best loaded from
28543 the pool and overwritten via move later. */
28547 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
28548 XVECEXP (vals
, 0, one_var
),
28552 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
28556 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
28560 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
28562 enum machine_mode mode
= GET_MODE (target
);
28563 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
28564 enum machine_mode half_mode
;
28565 bool use_vec_merge
= false;
28567 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
28569 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
28570 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
28571 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
28572 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
28573 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
28574 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
28576 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
28578 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
28579 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
28580 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
28581 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
28582 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
28583 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
28593 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
28594 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
28596 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
28598 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
28599 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
28605 use_vec_merge
= TARGET_SSE4_1
;
28613 /* For the two element vectors, we implement a VEC_CONCAT with
28614 the extraction of the other element. */
28616 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
28617 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
28620 op0
= val
, op1
= tmp
;
28622 op0
= tmp
, op1
= val
;
28624 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
28625 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
28630 use_vec_merge
= TARGET_SSE4_1
;
28637 use_vec_merge
= true;
28641 /* tmp = target = A B C D */
28642 tmp
= copy_to_reg (target
);
28643 /* target = A A B B */
28644 emit_insn (gen_sse_unpcklps (target
, target
, target
));
28645 /* target = X A B B */
28646 ix86_expand_vector_set (false, target
, val
, 0);
28647 /* target = A X C D */
28648 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
28649 GEN_INT (1), GEN_INT (0),
28650 GEN_INT (2+4), GEN_INT (3+4)));
28654 /* tmp = target = A B C D */
28655 tmp
= copy_to_reg (target
);
28656 /* tmp = X B C D */
28657 ix86_expand_vector_set (false, tmp
, val
, 0);
28658 /* target = A B X D */
28659 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
28660 GEN_INT (0), GEN_INT (1),
28661 GEN_INT (0+4), GEN_INT (3+4)));
28665 /* tmp = target = A B C D */
28666 tmp
= copy_to_reg (target
);
28667 /* tmp = X B C D */
28668 ix86_expand_vector_set (false, tmp
, val
, 0);
28669 /* target = A B X D */
28670 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
28671 GEN_INT (0), GEN_INT (1),
28672 GEN_INT (2+4), GEN_INT (0+4)));
28676 gcc_unreachable ();
28681 use_vec_merge
= TARGET_SSE4_1
;
28685 /* Element 0 handled by vec_merge below. */
28688 use_vec_merge
= true;
28694 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28695 store into element 0, then shuffle them back. */
28699 order
[0] = GEN_INT (elt
);
28700 order
[1] = const1_rtx
;
28701 order
[2] = const2_rtx
;
28702 order
[3] = GEN_INT (3);
28703 order
[elt
] = const0_rtx
;
28705 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
28706 order
[1], order
[2], order
[3]));
28708 ix86_expand_vector_set (false, target
, val
, 0);
28710 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
28711 order
[1], order
[2], order
[3]));
28715 /* For SSE1, we have to reuse the V4SF code. */
28716 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
28717 gen_lowpart (SFmode
, val
), elt
);
28722 use_vec_merge
= TARGET_SSE2
;
28725 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
28729 use_vec_merge
= TARGET_SSE4_1
;
28736 half_mode
= V16QImode
;
28742 half_mode
= V8HImode
;
28748 half_mode
= V4SImode
;
28754 half_mode
= V2DImode
;
28760 half_mode
= V4SFmode
;
28766 half_mode
= V2DFmode
;
28772 /* Compute offset. */
28776 gcc_assert (i
<= 1);
28778 /* Extract the half. */
28779 tmp
= gen_reg_rtx (half_mode
);
28780 emit_insn ((*gen_extract
[j
][i
]) (tmp
, target
));
28782 /* Put val in tmp at elt. */
28783 ix86_expand_vector_set (false, tmp
, val
, elt
);
28786 emit_insn ((*gen_insert
[j
][i
]) (target
, target
, tmp
));
28795 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
28796 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
28797 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
28801 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
28803 emit_move_insn (mem
, target
);
28805 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
28806 emit_move_insn (tmp
, val
);
28808 emit_move_insn (target
, mem
);
28813 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
28815 enum machine_mode mode
= GET_MODE (vec
);
28816 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
28817 bool use_vec_extr
= false;
28830 use_vec_extr
= true;
28834 use_vec_extr
= TARGET_SSE4_1
;
28846 tmp
= gen_reg_rtx (mode
);
28847 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
28848 GEN_INT (elt
), GEN_INT (elt
),
28849 GEN_INT (elt
+4), GEN_INT (elt
+4)));
28853 tmp
= gen_reg_rtx (mode
);
28854 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
28858 gcc_unreachable ();
28861 use_vec_extr
= true;
28866 use_vec_extr
= TARGET_SSE4_1
;
28880 tmp
= gen_reg_rtx (mode
);
28881 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
28882 GEN_INT (elt
), GEN_INT (elt
),
28883 GEN_INT (elt
), GEN_INT (elt
)));
28887 tmp
= gen_reg_rtx (mode
);
28888 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
28892 gcc_unreachable ();
28895 use_vec_extr
= true;
28900 /* For SSE1, we have to reuse the V4SF code. */
28901 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
28902 gen_lowpart (V4SFmode
, vec
), elt
);
28908 use_vec_extr
= TARGET_SSE2
;
28911 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
28915 use_vec_extr
= TARGET_SSE4_1
;
28919 /* ??? Could extract the appropriate HImode element and shift. */
28926 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
28927 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
28929 /* Let the rtl optimizers know about the zero extension performed. */
28930 if (inner_mode
== QImode
|| inner_mode
== HImode
)
28932 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
28933 target
= gen_lowpart (SImode
, target
);
28936 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
28940 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
28942 emit_move_insn (mem
, vec
);
28944 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
28945 emit_move_insn (target
, tmp
);
28949 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28950 pattern to reduce; DEST is the destination; IN is the input vector. */
28953 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
28955 rtx tmp1
, tmp2
, tmp3
;
28957 tmp1
= gen_reg_rtx (V4SFmode
);
28958 tmp2
= gen_reg_rtx (V4SFmode
);
28959 tmp3
= gen_reg_rtx (V4SFmode
);
28961 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
28962 emit_insn (fn (tmp2
, tmp1
, in
));
28964 emit_insn (gen_sse_shufps_v4sf (tmp3
, tmp2
, tmp2
,
28965 GEN_INT (1), GEN_INT (1),
28966 GEN_INT (1+4), GEN_INT (1+4)));
28967 emit_insn (fn (dest
, tmp2
, tmp3
));
28970 /* Target hook for scalar_mode_supported_p. */
28972 ix86_scalar_mode_supported_p (enum machine_mode mode
)
28974 if (DECIMAL_FLOAT_MODE_P (mode
))
28976 else if (mode
== TFmode
)
28979 return default_scalar_mode_supported_p (mode
);
28982 /* Implements target hook vector_mode_supported_p. */
28984 ix86_vector_mode_supported_p (enum machine_mode mode
)
28986 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
28988 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
28990 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
28992 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
28994 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
28999 /* Target hook for c_mode_for_suffix. */
29000 static enum machine_mode
29001 ix86_c_mode_for_suffix (char suffix
)
29011 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29013 We do this in the new i386 backend to maintain source compatibility
29014 with the old cc0-based compiler. */
29017 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
29018 tree inputs ATTRIBUTE_UNUSED
,
29021 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
29023 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
29028 /* Implements target vector targetm.asm.encode_section_info. This
29029 is not used by netware. */
29031 static void ATTRIBUTE_UNUSED
29032 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
29034 default_encode_section_info (decl
, rtl
, first
);
29036 if (TREE_CODE (decl
) == VAR_DECL
29037 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
29038 && ix86_in_large_data_p (decl
))
29039 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
29042 /* Worker function for REVERSE_CONDITION. */
29045 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
29047 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
29048 ? reverse_condition (code
)
29049 : reverse_condition_maybe_unordered (code
));
29052 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29056 output_387_reg_move (rtx insn
, rtx
*operands
)
29058 if (REG_P (operands
[0]))
29060 if (REG_P (operands
[1])
29061 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
29063 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
29064 return output_387_ffreep (operands
, 0);
29065 return "fstp\t%y0";
29067 if (STACK_TOP_P (operands
[0]))
29068 return "fld%Z1\t%y1";
29071 else if (MEM_P (operands
[0]))
29073 gcc_assert (REG_P (operands
[1]));
29074 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
29075 return "fstp%Z0\t%y0";
29078 /* There is no non-popping store to memory for XFmode.
29079 So if we need one, follow the store with a load. */
29080 if (GET_MODE (operands
[0]) == XFmode
)
29081 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29083 return "fst%Z0\t%y0";
29090 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29091 FP status register is set. */
29094 ix86_emit_fp_unordered_jump (rtx label
)
29096 rtx reg
= gen_reg_rtx (HImode
);
29099 emit_insn (gen_x86_fnstsw_1 (reg
));
29101 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
29103 emit_insn (gen_x86_sahf_1 (reg
));
29105 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
29106 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
29110 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
29112 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
29113 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
29116 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
29117 gen_rtx_LABEL_REF (VOIDmode
, label
),
29119 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
29121 emit_jump_insn (temp
);
29122 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
29125 /* Output code to perform a log1p XFmode calculation. */
29127 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
29129 rtx label1
= gen_label_rtx ();
29130 rtx label2
= gen_label_rtx ();
29132 rtx tmp
= gen_reg_rtx (XFmode
);
29133 rtx tmp2
= gen_reg_rtx (XFmode
);
29136 emit_insn (gen_absxf2 (tmp
, op1
));
29137 test
= gen_rtx_GE (VOIDmode
, tmp
,
29138 CONST_DOUBLE_FROM_REAL_VALUE (
29139 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
29141 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
29143 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
29144 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
29145 emit_jump (label2
);
29147 emit_label (label1
);
29148 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
29149 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
29150 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
29151 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
29153 emit_label (label2
);
29156 /* Output code to perform a Newton-Rhapson approximation of a single precision
29157 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29159 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
29161 rtx x0
, x1
, e0
, e1
, two
;
29163 x0
= gen_reg_rtx (mode
);
29164 e0
= gen_reg_rtx (mode
);
29165 e1
= gen_reg_rtx (mode
);
29166 x1
= gen_reg_rtx (mode
);
29168 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
29170 if (VECTOR_MODE_P (mode
))
29171 two
= ix86_build_const_vector (SFmode
, true, two
);
29173 two
= force_reg (mode
, two
);
29175 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29177 /* x0 = rcp(b) estimate */
29178 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
29179 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
29182 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
29183 gen_rtx_MULT (mode
, x0
, b
)));
29185 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
29186 gen_rtx_MINUS (mode
, two
, e0
)));
29188 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
29189 gen_rtx_MULT (mode
, x0
, e1
)));
29191 emit_insn (gen_rtx_SET (VOIDmode
, res
,
29192 gen_rtx_MULT (mode
, a
, x1
)));
29195 /* Output code to perform a Newton-Rhapson approximation of a
29196 single precision floating point [reciprocal] square root. */
29198 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
29201 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
29204 x0
= gen_reg_rtx (mode
);
29205 e0
= gen_reg_rtx (mode
);
29206 e1
= gen_reg_rtx (mode
);
29207 e2
= gen_reg_rtx (mode
);
29208 e3
= gen_reg_rtx (mode
);
29210 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
29211 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
29213 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
29214 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
29216 if (VECTOR_MODE_P (mode
))
29218 mthree
= ix86_build_const_vector (SFmode
, true, mthree
);
29219 mhalf
= ix86_build_const_vector (SFmode
, true, mhalf
);
29222 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29223 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29225 /* x0 = rsqrt(a) estimate */
29226 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
29227 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
29230 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29235 zero
= gen_reg_rtx (mode
);
29236 mask
= gen_reg_rtx (mode
);
29238 zero
= force_reg (mode
, CONST0_RTX(mode
));
29239 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
29240 gen_rtx_NE (mode
, zero
, a
)));
29242 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
29243 gen_rtx_AND (mode
, x0
, mask
)));
29247 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
29248 gen_rtx_MULT (mode
, x0
, a
)));
29250 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
29251 gen_rtx_MULT (mode
, e0
, x0
)));
29254 mthree
= force_reg (mode
, mthree
);
29255 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
29256 gen_rtx_PLUS (mode
, e1
, mthree
)));
29258 mhalf
= force_reg (mode
, mhalf
);
29260 /* e3 = -.5 * x0 */
29261 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
29262 gen_rtx_MULT (mode
, x0
, mhalf
)));
29264 /* e3 = -.5 * e0 */
29265 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
29266 gen_rtx_MULT (mode
, e0
, mhalf
)));
29267 /* ret = e2 * e3 */
29268 emit_insn (gen_rtx_SET (VOIDmode
, res
,
29269 gen_rtx_MULT (mode
, e2
, e3
)));
29272 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29274 static void ATTRIBUTE_UNUSED
29275 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
29278 /* With Binutils 2.15, the "@unwind" marker must be specified on
29279 every occurrence of the ".eh_frame" section, not just the first
29282 && strcmp (name
, ".eh_frame") == 0)
29284 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
29285 flags
& SECTION_WRITE
? "aw" : "a");
29288 default_elf_asm_named_section (name
, flags
, decl
);
29291 /* Return the mangling of TYPE if it is an extended fundamental type. */
29293 static const char *
29294 ix86_mangle_type (const_tree type
)
29296 type
= TYPE_MAIN_VARIANT (type
);
29298 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
29299 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
29302 switch (TYPE_MODE (type
))
29305 /* __float128 is "g". */
29308 /* "long double" or __float80 is "e". */
29315 /* For 32-bit code we can save PIC register setup by using
29316 __stack_chk_fail_local hidden function instead of calling
29317 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29318 register, so it is better to call __stack_chk_fail directly. */
29321 ix86_stack_protect_fail (void)
29323 return TARGET_64BIT
29324 ? default_external_stack_protect_fail ()
29325 : default_hidden_stack_protect_fail ();
29328 /* Select a format to encode pointers in exception handling data. CODE
29329 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29330 true if the symbol may be affected by dynamic relocations.
29332 ??? All x86 object file formats are capable of representing this.
29333 After all, the relocation needed is the same as for the call insn.
29334 Whether or not a particular assembler allows us to enter such, I
29335 guess we'll have to see. */
29337 asm_preferred_eh_data_format (int code
, int global
)
29341 int type
= DW_EH_PE_sdata8
;
29343 || ix86_cmodel
== CM_SMALL_PIC
29344 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
29345 type
= DW_EH_PE_sdata4
;
29346 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
29348 if (ix86_cmodel
== CM_SMALL
29349 || (ix86_cmodel
== CM_MEDIUM
&& code
))
29350 return DW_EH_PE_udata4
;
29351 return DW_EH_PE_absptr
;
29354 /* Expand copysign from SIGN to the positive value ABS_VALUE
29355 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29358 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
29360 enum machine_mode mode
= GET_MODE (sign
);
29361 rtx sgn
= gen_reg_rtx (mode
);
29362 if (mask
== NULL_RTX
)
29364 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
29365 if (!VECTOR_MODE_P (mode
))
29367 /* We need to generate a scalar mode mask in this case. */
29368 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
29369 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
29370 mask
= gen_reg_rtx (mode
);
29371 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
29375 mask
= gen_rtx_NOT (mode
, mask
);
29376 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
29377 gen_rtx_AND (mode
, mask
, sign
)));
29378 emit_insn (gen_rtx_SET (VOIDmode
, result
,
29379 gen_rtx_IOR (mode
, abs_value
, sgn
)));
29382 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29383 mask for masking out the sign-bit is stored in *SMASK, if that is
29386 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
29388 enum machine_mode mode
= GET_MODE (op0
);
29391 xa
= gen_reg_rtx (mode
);
29392 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
29393 if (!VECTOR_MODE_P (mode
))
29395 /* We need to generate a scalar mode mask in this case. */
29396 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
29397 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
29398 mask
= gen_reg_rtx (mode
);
29399 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
29401 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
29402 gen_rtx_AND (mode
, op0
, mask
)));
29410 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29411 swapping the operands if SWAP_OPERANDS is true. The expanded
29412 code is a forward jump to a newly created label in case the
29413 comparison is true. The generated label rtx is returned. */
29415 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
29416 bool swap_operands
)
29427 label
= gen_label_rtx ();
29428 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
29429 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
29430 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
29431 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
29432 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
29433 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
29434 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
29435 JUMP_LABEL (tmp
) = label
;
29440 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29441 using comparison code CODE. Operands are swapped for the comparison if
29442 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29444 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
29445 bool swap_operands
)
29447 enum machine_mode mode
= GET_MODE (op0
);
29448 rtx mask
= gen_reg_rtx (mode
);
29457 if (mode
== DFmode
)
29458 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
29459 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
29461 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
29462 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
29467 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29468 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29470 ix86_gen_TWO52 (enum machine_mode mode
)
29472 REAL_VALUE_TYPE TWO52r
;
29475 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
29476 TWO52
= const_double_from_real_value (TWO52r
, mode
);
29477 TWO52
= force_reg (mode
, TWO52
);
29482 /* Expand SSE sequence for computing lround from OP1 storing
29485 ix86_expand_lround (rtx op0
, rtx op1
)
29487 /* C code for the stuff we're doing below:
29488 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29491 enum machine_mode mode
= GET_MODE (op1
);
29492 const struct real_format
*fmt
;
29493 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
29496 /* load nextafter (0.5, 0.0) */
29497 fmt
= REAL_MODE_FORMAT (mode
);
29498 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
29499 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
29501 /* adj = copysign (0.5, op1) */
29502 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
29503 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
29505 /* adj = op1 + adj */
29506 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
29508 /* op0 = (imode)adj */
29509 expand_fix (op0
, adj
, 0);
29512 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29515 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
29517 /* C code for the stuff we're doing below (for do_floor):
29519 xi -= (double)xi > op1 ? 1 : 0;
29522 enum machine_mode fmode
= GET_MODE (op1
);
29523 enum machine_mode imode
= GET_MODE (op0
);
29524 rtx ireg
, freg
, label
, tmp
;
29526 /* reg = (long)op1 */
29527 ireg
= gen_reg_rtx (imode
);
29528 expand_fix (ireg
, op1
, 0);
29530 /* freg = (double)reg */
29531 freg
= gen_reg_rtx (fmode
);
29532 expand_float (freg
, ireg
, 0);
29534 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29535 label
= ix86_expand_sse_compare_and_jump (UNLE
,
29536 freg
, op1
, !do_floor
);
29537 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
29538 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
29539 emit_move_insn (ireg
, tmp
);
29541 emit_label (label
);
29542 LABEL_NUSES (label
) = 1;
29544 emit_move_insn (op0
, ireg
);
29547 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29548 result in OPERAND0. */
29550 ix86_expand_rint (rtx operand0
, rtx operand1
)
29552 /* C code for the stuff we're doing below:
29553 xa = fabs (operand1);
29554 if (!isless (xa, 2**52))
29556 xa = xa + 2**52 - 2**52;
29557 return copysign (xa, operand1);
29559 enum machine_mode mode
= GET_MODE (operand0
);
29560 rtx res
, xa
, label
, TWO52
, mask
;
29562 res
= gen_reg_rtx (mode
);
29563 emit_move_insn (res
, operand1
);
29565 /* xa = abs (operand1) */
29566 xa
= ix86_expand_sse_fabs (res
, &mask
);
29568 /* if (!isless (xa, TWO52)) goto label; */
29569 TWO52
= ix86_gen_TWO52 (mode
);
29570 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29572 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
29573 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
29575 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
29577 emit_label (label
);
29578 LABEL_NUSES (label
) = 1;
29580 emit_move_insn (operand0
, res
);
29583 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29586 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
29588 /* C code for the stuff we expand below.
29589 double xa = fabs (x), x2;
29590 if (!isless (xa, TWO52))
29592 xa = xa + TWO52 - TWO52;
29593 x2 = copysign (xa, x);
29602 enum machine_mode mode
= GET_MODE (operand0
);
29603 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
29605 TWO52
= ix86_gen_TWO52 (mode
);
29607 /* Temporary for holding the result, initialized to the input
29608 operand to ease control flow. */
29609 res
= gen_reg_rtx (mode
);
29610 emit_move_insn (res
, operand1
);
29612 /* xa = abs (operand1) */
29613 xa
= ix86_expand_sse_fabs (res
, &mask
);
29615 /* if (!isless (xa, TWO52)) goto label; */
29616 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29618 /* xa = xa + TWO52 - TWO52; */
29619 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
29620 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
29622 /* xa = copysign (xa, operand1) */
29623 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
29625 /* generate 1.0 or -1.0 */
29626 one
= force_reg (mode
,
29627 const_double_from_real_value (do_floor
29628 ? dconst1
: dconstm1
, mode
));
29630 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29631 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
29632 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
29633 gen_rtx_AND (mode
, one
, tmp
)));
29634 /* We always need to subtract here to preserve signed zero. */
29635 tmp
= expand_simple_binop (mode
, MINUS
,
29636 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
29637 emit_move_insn (res
, tmp
);
29639 emit_label (label
);
29640 LABEL_NUSES (label
) = 1;
29642 emit_move_insn (operand0
, res
);
29645 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29648 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
29650 /* C code for the stuff we expand below.
29651 double xa = fabs (x), x2;
29652 if (!isless (xa, TWO52))
29654 x2 = (double)(long)x;
29661 if (HONOR_SIGNED_ZEROS (mode))
29662 return copysign (x2, x);
29665 enum machine_mode mode
= GET_MODE (operand0
);
29666 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
29668 TWO52
= ix86_gen_TWO52 (mode
);
29670 /* Temporary for holding the result, initialized to the input
29671 operand to ease control flow. */
29672 res
= gen_reg_rtx (mode
);
29673 emit_move_insn (res
, operand1
);
29675 /* xa = abs (operand1) */
29676 xa
= ix86_expand_sse_fabs (res
, &mask
);
29678 /* if (!isless (xa, TWO52)) goto label; */
29679 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29681 /* xa = (double)(long)x */
29682 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
29683 expand_fix (xi
, res
, 0);
29684 expand_float (xa
, xi
, 0);
29687 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
29689 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29690 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
29691 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
29692 gen_rtx_AND (mode
, one
, tmp
)));
29693 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
29694 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
29695 emit_move_insn (res
, tmp
);
29697 if (HONOR_SIGNED_ZEROS (mode
))
29698 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
29700 emit_label (label
);
29701 LABEL_NUSES (label
) = 1;
29703 emit_move_insn (operand0
, res
);
29706 /* Expand SSE sequence for computing round from OPERAND1 storing
29707 into OPERAND0. Sequence that works without relying on DImode truncation
29708 via cvttsd2siq that is only available on 64bit targets. */
29710 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
29712 /* C code for the stuff we expand below.
29713 double xa = fabs (x), xa2, x2;
29714 if (!isless (xa, TWO52))
29716 Using the absolute value and copying back sign makes
29717 -0.0 -> -0.0 correct.
29718 xa2 = xa + TWO52 - TWO52;
29723 else if (dxa > 0.5)
29725 x2 = copysign (xa2, x);
29728 enum machine_mode mode
= GET_MODE (operand0
);
29729 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
29731 TWO52
= ix86_gen_TWO52 (mode
);
29733 /* Temporary for holding the result, initialized to the input
29734 operand to ease control flow. */
29735 res
= gen_reg_rtx (mode
);
29736 emit_move_insn (res
, operand1
);
29738 /* xa = abs (operand1) */
29739 xa
= ix86_expand_sse_fabs (res
, &mask
);
29741 /* if (!isless (xa, TWO52)) goto label; */
29742 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29744 /* xa2 = xa + TWO52 - TWO52; */
29745 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
29746 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
29748 /* dxa = xa2 - xa; */
29749 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
29751 /* generate 0.5, 1.0 and -0.5 */
29752 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
29753 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
29754 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
29758 tmp
= gen_reg_rtx (mode
);
29759 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29760 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
29761 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
29762 gen_rtx_AND (mode
, one
, tmp
)));
29763 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
29764 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29765 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
29766 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
29767 gen_rtx_AND (mode
, one
, tmp
)));
29768 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
29770 /* res = copysign (xa2, operand1) */
29771 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
29773 emit_label (label
);
29774 LABEL_NUSES (label
) = 1;
29776 emit_move_insn (operand0
, res
);
29779 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29782 ix86_expand_trunc (rtx operand0
, rtx operand1
)
29784 /* C code for SSE variant we expand below.
29785 double xa = fabs (x), x2;
29786 if (!isless (xa, TWO52))
29788 x2 = (double)(long)x;
29789 if (HONOR_SIGNED_ZEROS (mode))
29790 return copysign (x2, x);
29793 enum machine_mode mode
= GET_MODE (operand0
);
29794 rtx xa
, xi
, TWO52
, label
, res
, mask
;
29796 TWO52
= ix86_gen_TWO52 (mode
);
29798 /* Temporary for holding the result, initialized to the input
29799 operand to ease control flow. */
29800 res
= gen_reg_rtx (mode
);
29801 emit_move_insn (res
, operand1
);
29803 /* xa = abs (operand1) */
29804 xa
= ix86_expand_sse_fabs (res
, &mask
);
29806 /* if (!isless (xa, TWO52)) goto label; */
29807 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29809 /* x = (double)(long)x */
29810 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
29811 expand_fix (xi
, res
, 0);
29812 expand_float (res
, xi
, 0);
29814 if (HONOR_SIGNED_ZEROS (mode
))
29815 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
29817 emit_label (label
);
29818 LABEL_NUSES (label
) = 1;
29820 emit_move_insn (operand0
, res
);
29823 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29826 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
29828 enum machine_mode mode
= GET_MODE (operand0
);
29829 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
29831 /* C code for SSE variant we expand below.
29832 double xa = fabs (x), x2;
29833 if (!isless (xa, TWO52))
29835 xa2 = xa + TWO52 - TWO52;
29839 x2 = copysign (xa2, x);
29843 TWO52
= ix86_gen_TWO52 (mode
);
29845 /* Temporary for holding the result, initialized to the input
29846 operand to ease control flow. */
29847 res
= gen_reg_rtx (mode
);
29848 emit_move_insn (res
, operand1
);
29850 /* xa = abs (operand1) */
29851 xa
= ix86_expand_sse_fabs (res
, &smask
);
29853 /* if (!isless (xa, TWO52)) goto label; */
29854 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29856 /* res = xa + TWO52 - TWO52; */
29857 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
29858 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
29859 emit_move_insn (res
, tmp
);
29862 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
29864 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29865 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
29866 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
29867 gen_rtx_AND (mode
, mask
, one
)));
29868 tmp
= expand_simple_binop (mode
, MINUS
,
29869 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
29870 emit_move_insn (res
, tmp
);
29872 /* res = copysign (res, operand1) */
29873 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
29875 emit_label (label
);
29876 LABEL_NUSES (label
) = 1;
29878 emit_move_insn (operand0
, res
);
29881 /* Expand SSE sequence for computing round from OPERAND1 storing
29884 ix86_expand_round (rtx operand0
, rtx operand1
)
29886 /* C code for the stuff we're doing below:
29887 double xa = fabs (x);
29888 if (!isless (xa, TWO52))
29890 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29891 return copysign (xa, x);
29893 enum machine_mode mode
= GET_MODE (operand0
);
29894 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
29895 const struct real_format
*fmt
;
29896 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
29898 /* Temporary for holding the result, initialized to the input
29899 operand to ease control flow. */
29900 res
= gen_reg_rtx (mode
);
29901 emit_move_insn (res
, operand1
);
29903 TWO52
= ix86_gen_TWO52 (mode
);
29904 xa
= ix86_expand_sse_fabs (res
, &mask
);
29905 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
29907 /* load nextafter (0.5, 0.0) */
29908 fmt
= REAL_MODE_FORMAT (mode
);
29909 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
29910 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
29912 /* xa = xa + 0.5 */
29913 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
29914 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
29916 /* xa = (double)(int64_t)xa */
29917 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
29918 expand_fix (xi
, xa
, 0);
29919 expand_float (xa
, xi
, 0);
29921 /* res = copysign (xa, operand1) */
29922 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
29924 emit_label (label
);
29925 LABEL_NUSES (label
) = 1;
29927 emit_move_insn (operand0
, res
);
29931 /* Validate whether a SSE5 instruction is valid or not.
29932 OPERANDS is the array of operands.
29933 NUM is the number of operands.
29934 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29935 NUM_MEMORY is the maximum number of memory operands to accept.
29936 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29939 ix86_sse5_valid_op_p (rtx operands
[], rtx insn ATTRIBUTE_UNUSED
, int num
,
29940 bool uses_oc0
, int num_memory
, bool commutative
)
29946 /* Count the number of memory arguments */
29949 for (i
= 0; i
< num
; i
++)
29951 enum machine_mode mode
= GET_MODE (operands
[i
]);
29952 if (register_operand (operands
[i
], mode
))
29955 else if (memory_operand (operands
[i
], mode
))
29957 mem_mask
|= (1 << i
);
29963 rtx pattern
= PATTERN (insn
);
29965 /* allow 0 for pcmov */
29966 if (GET_CODE (pattern
) != SET
29967 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
29969 || operands
[i
] != CONST0_RTX (mode
))
29974 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29975 a memory operation. */
29976 if (num_memory
< 0)
29978 num_memory
= -num_memory
;
29979 if ((mem_mask
& (1 << (num
-1))) != 0)
29981 mem_mask
&= ~(1 << (num
-1));
29986 /* If there were no memory operations, allow the insn */
29990 /* Do not allow the destination register to be a memory operand. */
29991 else if (mem_mask
& (1 << 0))
29994 /* If there are too many memory operations, disallow the instruction. While
29995 the hardware only allows 1 memory reference, before register allocation
29996 for some insns, we allow two memory operations sometimes in order to allow
29997 code like the following to be optimized:
29999 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30001 or similar cases that are vectorized into using the fmaddss
30003 else if (mem_count
> num_memory
)
30006 /* Don't allow more than one memory operation if not optimizing. */
30007 else if (mem_count
> 1 && !optimize
)
30010 else if (num
== 4 && mem_count
== 1)
30012 /* formats (destination is the first argument), example fmaddss:
30013 xmm1, xmm1, xmm2, xmm3/mem
30014 xmm1, xmm1, xmm2/mem, xmm3
30015 xmm1, xmm2, xmm3/mem, xmm1
30016 xmm1, xmm2/mem, xmm3, xmm1 */
30018 return ((mem_mask
== (1 << 1))
30019 || (mem_mask
== (1 << 2))
30020 || (mem_mask
== (1 << 3)));
30022 /* format, example pmacsdd:
30023 xmm1, xmm2, xmm3/mem, xmm1 */
30025 return (mem_mask
== (1 << 2) || mem_mask
== (1 << 1));
30027 return (mem_mask
== (1 << 2));
30030 else if (num
== 4 && num_memory
== 2)
30032 /* If there are two memory operations, we can load one of the memory ops
30033 into the destination register. This is for optimizing the
30034 multiply/add ops, which the combiner has optimized both the multiply
30035 and the add insns to have a memory operation. We have to be careful
30036 that the destination doesn't overlap with the inputs. */
30037 rtx op0
= operands
[0];
30039 if (reg_mentioned_p (op0
, operands
[1])
30040 || reg_mentioned_p (op0
, operands
[2])
30041 || reg_mentioned_p (op0
, operands
[3]))
30044 /* formats (destination is the first argument), example fmaddss:
30045 xmm1, xmm1, xmm2, xmm3/mem
30046 xmm1, xmm1, xmm2/mem, xmm3
30047 xmm1, xmm2, xmm3/mem, xmm1
30048 xmm1, xmm2/mem, xmm3, xmm1
30050 For the oc0 case, we will load either operands[1] or operands[3] into
30051 operands[0], so any combination of 2 memory operands is ok. */
30055 /* format, example pmacsdd:
30056 xmm1, xmm2, xmm3/mem, xmm1
30058 For the integer multiply/add instructions be more restrictive and
30059 require operands[2] and operands[3] to be the memory operands. */
30061 return (mem_mask
== ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30063 return (mem_mask
== ((1 << 2) | (1 << 3)));
30066 else if (num
== 3 && num_memory
== 1)
30068 /* formats, example protb:
30069 xmm1, xmm2, xmm3/mem
30070 xmm1, xmm2/mem, xmm3 */
30072 return ((mem_mask
== (1 << 1)) || (mem_mask
== (1 << 2)));
30074 /* format, example comeq:
30075 xmm1, xmm2, xmm3/mem */
30077 return (mem_mask
== (1 << 2));
30081 gcc_unreachable ();
30087 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30088 hardware will allow by using the destination register to load one of the
30089 memory operations. Presently this is used by the multiply/add routines to
30090 allow 2 memory references. */
30093 ix86_expand_sse5_multiple_memory (rtx operands
[],
30095 enum machine_mode mode
)
30097 rtx op0
= operands
[0];
30099 || memory_operand (op0
, mode
)
30100 || reg_mentioned_p (op0
, operands
[1])
30101 || reg_mentioned_p (op0
, operands
[2])
30102 || reg_mentioned_p (op0
, operands
[3]))
30103 gcc_unreachable ();
30105 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30106 the destination register. */
30107 if (memory_operand (operands
[1], mode
))
30109 emit_move_insn (op0
, operands
[1]);
30112 else if (memory_operand (operands
[3], mode
))
30114 emit_move_insn (op0
, operands
[3]);
30118 gcc_unreachable ();
30124 /* Table of valid machine attributes. */
30125 static const struct attribute_spec ix86_attribute_table
[] =
30127 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30128 /* Stdcall attribute says callee is responsible for popping arguments
30129 if they are not variable. */
30130 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
30131 /* Fastcall attribute says callee is responsible for popping arguments
30132 if they are not variable. */
30133 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
30134 /* Cdecl attribute says the callee is a normal C declaration */
30135 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
30136 /* Regparm attribute specifies how many integer arguments are to be
30137 passed in registers. */
30138 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
30139 /* Sseregparm attribute says we are using x86_64 calling conventions
30140 for FP arguments. */
30141 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
30142 /* force_align_arg_pointer says this function realigns the stack at entry. */
30143 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
30144 false, true, true, ix86_handle_cconv_attribute
},
30145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30146 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
30147 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
30148 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
30150 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
30151 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
30152 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30153 SUBTARGET_ATTRIBUTE_TABLE
,
30155 /* ms_abi and sysv_abi calling convention function attributes. */
30156 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
},
30157 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
},
30159 { NULL
, 0, 0, false, false, false, NULL
}
30162 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30164 x86_builtin_vectorization_cost (bool runtime_test
)
30166 /* If the branch of the runtime test is taken - i.e. - the vectorized
30167 version is skipped - this incurs a misprediction cost (because the
30168 vectorized version is expected to be the fall-through). So we subtract
30169 the latency of a mispredicted branch from the costs that are incured
30170 when the vectorized version is executed.
30172 TODO: The values in individual target tables have to be tuned or new
30173 fields may be needed. For eg. on K8, the default branch path is the
30174 not-taken path. If the taken path is predicted correctly, the minimum
30175 penalty of going down the taken-path is 1 cycle. If the taken-path is
30176 not predicted correctly, then the minimum penalty is 10 cycles. */
30180 return (-(ix86_cost
->cond_taken_branch_cost
));
30186 /* This function returns the calling abi specific va_list type node.
30187 It returns the FNDECL specific va_list type. */
30190 ix86_fn_abi_va_list (tree fndecl
)
30193 return va_list_type_node
;
30194 gcc_assert (fndecl
!= NULL_TREE
);
30196 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
30197 return ms_va_list_type_node
;
30199 return sysv_va_list_type_node
;
30202 /* Returns the canonical va_list type specified by TYPE. If there
30203 is no valid TYPE provided, it return NULL_TREE. */
30206 ix86_canonical_va_list_type (tree type
)
30210 /* Resolve references and pointers to va_list type. */
30211 if (INDIRECT_REF_P (type
))
30212 type
= TREE_TYPE (type
);
30213 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
30214 type
= TREE_TYPE (type
);
30218 wtype
= va_list_type_node
;
30219 gcc_assert (wtype
!= NULL_TREE
);
30221 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
30223 /* If va_list is an array type, the argument may have decayed
30224 to a pointer type, e.g. by being passed to another function.
30225 In that case, unwrap both types so that we can compare the
30226 underlying records. */
30227 if (TREE_CODE (htype
) == ARRAY_TYPE
30228 || POINTER_TYPE_P (htype
))
30230 wtype
= TREE_TYPE (wtype
);
30231 htype
= TREE_TYPE (htype
);
30234 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
30235 return va_list_type_node
;
30236 wtype
= sysv_va_list_type_node
;
30237 gcc_assert (wtype
!= NULL_TREE
);
30239 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
30241 /* If va_list is an array type, the argument may have decayed
30242 to a pointer type, e.g. by being passed to another function.
30243 In that case, unwrap both types so that we can compare the
30244 underlying records. */
30245 if (TREE_CODE (htype
) == ARRAY_TYPE
30246 || POINTER_TYPE_P (htype
))
30248 wtype
= TREE_TYPE (wtype
);
30249 htype
= TREE_TYPE (htype
);
30252 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
30253 return sysv_va_list_type_node
;
30254 wtype
= ms_va_list_type_node
;
30255 gcc_assert (wtype
!= NULL_TREE
);
30257 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
30259 /* If va_list is an array type, the argument may have decayed
30260 to a pointer type, e.g. by being passed to another function.
30261 In that case, unwrap both types so that we can compare the
30262 underlying records. */
30263 if (TREE_CODE (htype
) == ARRAY_TYPE
30264 || POINTER_TYPE_P (htype
))
30266 wtype
= TREE_TYPE (wtype
);
30267 htype
= TREE_TYPE (htype
);
30270 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
30271 return ms_va_list_type_node
;
30274 return std_canonical_va_list_type (type
);
30277 /* Iterate through the target-specific builtin types for va_list.
30278 IDX denotes the iterator, *PTREE is set to the result type of
30279 the va_list builtin, and *PNAME to its internal type.
30280 Returns zero if there is no element for this index, otherwise
30281 IDX should be increased upon the next call.
30282 Note, do not iterate a base builtin's name like __builtin_va_list.
30283 Used from c_common_nodes_and_builtins. */
30286 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
30292 *ptree
= ms_va_list_type_node
;
30293 *pname
= "__builtin_ms_va_list";
30296 *ptree
= sysv_va_list_type_node
;
30297 *pname
= "__builtin_sysv_va_list";
30305 /* Initialize the GCC target structure. */
30306 #undef TARGET_RETURN_IN_MEMORY
30307 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30309 #undef TARGET_LEGITIMIZE_ADDRESS
30310 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30312 #undef TARGET_ATTRIBUTE_TABLE
30313 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30315 # undef TARGET_MERGE_DECL_ATTRIBUTES
30316 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30319 #undef TARGET_COMP_TYPE_ATTRIBUTES
30320 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30322 #undef TARGET_INIT_BUILTINS
30323 #define TARGET_INIT_BUILTINS ix86_init_builtins
30324 #undef TARGET_EXPAND_BUILTIN
30325 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30327 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30328 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30329 ix86_builtin_vectorized_function
30331 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30332 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30334 #undef TARGET_BUILTIN_RECIPROCAL
30335 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30337 #undef TARGET_ASM_FUNCTION_EPILOGUE
30338 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30340 #undef TARGET_ENCODE_SECTION_INFO
30341 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30342 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30344 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30347 #undef TARGET_ASM_OPEN_PAREN
30348 #define TARGET_ASM_OPEN_PAREN ""
30349 #undef TARGET_ASM_CLOSE_PAREN
30350 #define TARGET_ASM_CLOSE_PAREN ""
30352 #undef TARGET_ASM_ALIGNED_HI_OP
30353 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30354 #undef TARGET_ASM_ALIGNED_SI_OP
30355 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30357 #undef TARGET_ASM_ALIGNED_DI_OP
30358 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30361 #undef TARGET_ASM_UNALIGNED_HI_OP
30362 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30363 #undef TARGET_ASM_UNALIGNED_SI_OP
30364 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30365 #undef TARGET_ASM_UNALIGNED_DI_OP
30366 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30368 #undef TARGET_SCHED_ADJUST_COST
30369 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30370 #undef TARGET_SCHED_ISSUE_RATE
30371 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30372 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30373 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30374 ia32_multipass_dfa_lookahead
30376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30377 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30380 #undef TARGET_HAVE_TLS
30381 #define TARGET_HAVE_TLS true
30383 #undef TARGET_CANNOT_FORCE_CONST_MEM
30384 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30385 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30386 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30388 #undef TARGET_DELEGITIMIZE_ADDRESS
30389 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30391 #undef TARGET_MS_BITFIELD_LAYOUT_P
30392 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30395 #undef TARGET_BINDS_LOCAL_P
30396 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30398 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30399 #undef TARGET_BINDS_LOCAL_P
30400 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30403 #undef TARGET_ASM_OUTPUT_MI_THUNK
30404 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30405 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30406 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30408 #undef TARGET_ASM_FILE_START
30409 #define TARGET_ASM_FILE_START x86_file_start
30411 #undef TARGET_DEFAULT_TARGET_FLAGS
30412 #define TARGET_DEFAULT_TARGET_FLAGS \
30414 | TARGET_SUBTARGET_DEFAULT \
30415 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30417 #undef TARGET_HANDLE_OPTION
30418 #define TARGET_HANDLE_OPTION ix86_handle_option
30420 #undef TARGET_RTX_COSTS
30421 #define TARGET_RTX_COSTS ix86_rtx_costs
30422 #undef TARGET_ADDRESS_COST
30423 #define TARGET_ADDRESS_COST ix86_address_cost
30425 #undef TARGET_FIXED_CONDITION_CODE_REGS
30426 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30427 #undef TARGET_CC_MODES_COMPATIBLE
30428 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30430 #undef TARGET_MACHINE_DEPENDENT_REORG
30431 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30433 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30434 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30436 #undef TARGET_BUILD_BUILTIN_VA_LIST
30437 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30439 #undef TARGET_FN_ABI_VA_LIST
30440 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30442 #undef TARGET_CANONICAL_VA_LIST_TYPE
30443 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30445 #undef TARGET_EXPAND_BUILTIN_VA_START
30446 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30448 #undef TARGET_MD_ASM_CLOBBERS
30449 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30451 #undef TARGET_PROMOTE_PROTOTYPES
30452 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30453 #undef TARGET_STRUCT_VALUE_RTX
30454 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30455 #undef TARGET_SETUP_INCOMING_VARARGS
30456 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30457 #undef TARGET_MUST_PASS_IN_STACK
30458 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30459 #undef TARGET_PASS_BY_REFERENCE
30460 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30461 #undef TARGET_INTERNAL_ARG_POINTER
30462 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30463 #undef TARGET_UPDATE_STACK_BOUNDARY
30464 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30465 #undef TARGET_GET_DRAP_RTX
30466 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30467 #undef TARGET_STRICT_ARGUMENT_NAMING
30468 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30470 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30471 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30473 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30474 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30477 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30479 #undef TARGET_C_MODE_FOR_SUFFIX
30480 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30483 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30484 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30487 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30488 #undef TARGET_INSERT_ATTRIBUTES
30489 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30492 #undef TARGET_MANGLE_TYPE
30493 #define TARGET_MANGLE_TYPE ix86_mangle_type
30495 #undef TARGET_STACK_PROTECT_FAIL
30496 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30498 #undef TARGET_FUNCTION_VALUE
30499 #define TARGET_FUNCTION_VALUE ix86_function_value
30501 #undef TARGET_SECONDARY_RELOAD
30502 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30504 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30505 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30507 #undef TARGET_SET_CURRENT_FUNCTION
30508 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30510 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30511 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30513 #undef TARGET_OPTION_SAVE
30514 #define TARGET_OPTION_SAVE ix86_function_specific_save
30516 #undef TARGET_OPTION_RESTORE
30517 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30519 #undef TARGET_OPTION_PRINT
30520 #define TARGET_OPTION_PRINT ix86_function_specific_print
30522 #undef TARGET_OPTION_CAN_INLINE_P
30523 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30525 #undef TARGET_EXPAND_TO_RTL_HOOK
30526 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30528 #undef TARGET_LEGITIMATE_ADDRESS_P
30529 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30531 struct gcc_target targetm
= TARGET_INITIALIZER
;
30533 #include "gt-i386.h"