1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
76 struct processor_costs size_cost
= { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
130 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
131 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
147 struct processor_costs i386_cost
= { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
200 DUMMY_STRINGOP_ALGS
},
201 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
202 DUMMY_STRINGOP_ALGS
},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
217 struct processor_costs i486_cost
= { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
272 DUMMY_STRINGOP_ALGS
},
273 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
274 DUMMY_STRINGOP_ALGS
},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
289 struct processor_costs pentium_cost
= {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
342 DUMMY_STRINGOP_ALGS
},
343 {{libcall
, {{-1, rep_prefix_4_byte
}}},
344 DUMMY_STRINGOP_ALGS
},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
359 struct processor_costs pentiumpro_cost
= {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
417 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
418 DUMMY_STRINGOP_ALGS
},
419 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
420 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
421 DUMMY_STRINGOP_ALGS
},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
436 struct processor_costs geode_cost
= {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
490 DUMMY_STRINGOP_ALGS
},
491 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
492 DUMMY_STRINGOP_ALGS
},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
507 struct processor_costs k6_cost
= {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
563 DUMMY_STRINGOP_ALGS
},
564 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
565 DUMMY_STRINGOP_ALGS
},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs athlon_cost
= {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs k8_cost
= {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
702 100, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
714 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
715 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
716 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
717 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost
= {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
776 MOVD reg64, xmmreg Double FADD 3
778 MOVD reg32, xmmreg Double FADD 3
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
801 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
802 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
803 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
804 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
819 struct processor_costs pentium4_cost
= {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
872 DUMMY_STRINGOP_ALGS
},
873 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
875 DUMMY_STRINGOP_ALGS
},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
890 struct processor_costs nocona_cost
= {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
943 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
944 {100000, unrolled_loop
}, {-1, libcall
}}}},
945 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
947 {libcall
, {{24, loop
}, {64, unrolled_loop
},
948 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
963 struct processor_costs core2_cost
= {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1015 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1016 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1017 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1018 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1019 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1020 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1036 struct processor_costs generic64_cost
= {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS
,
1095 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1096 {DUMMY_STRINGOP_ALGS
,
1097 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1113 struct processor_costs generic32_cost
= {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1166 DUMMY_STRINGOP_ALGS
},
1167 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1168 DUMMY_STRINGOP_ALGS
},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC64
,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1219 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 /* X86_TUNE_USE_BIT_TEST */
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486
| m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_K6
| m_CORE2
| m_GENERIC
,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
| m_GENERIC
,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1244 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
1249 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2
| m_GENERIC
,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386
| m_486
| m_K6_GEODE
,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO
| m_AMD_MULTIPLE
| m_PENT
| m_CORE2
| m_GENERIC
),
1270 /* X86_TUNE_USE_MOV0 */
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1285 /* X86_TUNE_READ_MODIFY */
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_AMD_MULTIPLE
| m_CORE2
1290 | m_GENERIC
/* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT
| m_486
| m_386
),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386
| m_PENT4
| m_NOCONA
,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE
| m_PPRO
| m_K6_GEODE
| m_386
1318 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE
| m_PPRO
| m_386
| m_486
1325 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1330 | m_GENERIC
| m_GEODE
),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO
| m_PENT4
| m_NOCONA
,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1370 /* X86_TUNE_SHIFT1 */
1373 /* X86_TUNE_USE_FFREEP */
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1389 /* X86_TUNE_USE_BT */
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC
,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1433 from integer to FP. */
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386
| m_486
| m_PENT
| m_K6
),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
1460 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1462 static enum stringop_alg stringop_alg
= no_stringop
;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1471 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1472 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1479 /* ax, dx, cx, bx */
1480 AREG
, DREG
, CREG
, BREG
,
1481 /* si, di, bp, sp */
1482 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1484 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1485 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1491 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1494 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1497 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1498 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1499 /* SSE REX registers */
1500 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers
[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1529 static int const x86_64_int_return_registers
[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0
= NULL_RTX
;
1615 rtx ix86_compare_op1
= NULL_RTX
;
1616 rtx ix86_compare_emitted
= NULL_RTX
;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry
GTY(())
1625 unsigned short mode
;
1628 struct stack_local_entry
*next
;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1655 HOST_WIDE_INT frame
;
1657 int outgoing_arguments_size
;
1660 HOST_WIDE_INT to_allocate
;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset
;
1663 HOST_WIDE_INT hard_frame_pointer_offset
;
1664 HOST_WIDE_INT stack_pointer_offset
;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov
;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel
;
1674 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1676 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath
;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune
;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch
;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse
;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm
;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer
;
1695 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary
;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost
;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold
= 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix
[16];
1710 int internal_label_prefix_len
;
1712 /* Fence to use after loop using movnt. */
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1725 X86_64_INTEGER_CLASS
,
1726 X86_64_INTEGERSI_CLASS
,
1733 X86_64_COMPLEX_X87_CLASS
,
1736 static const char * const x86_64_reg_class_name
[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1746 static bool ext_80387_constants_init
= 0;
1749 static struct machine_function
* ix86_init_machine_status (void);
1750 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1751 static int ix86_function_regparm (const_tree
, const_tree
);
1752 static void ix86_compute_frame_layout (struct ix86_frame
*);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit
;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1802 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1812 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1815 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1816 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1821 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1824 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1825 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1833 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1836 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1837 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1842 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1845 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1846 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1851 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1854 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1855 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1860 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1863 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1864 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1869 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1872 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1873 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1878 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1881 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1882 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1887 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1888 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1892 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1893 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1897 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1900 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1901 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1906 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5
;
1909 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE5_UNSET
;
1910 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5_UNSET
;
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1929 override_options (void)
1932 int ix86_tune_defaulted
= 0;
1933 int ix86_arch_specified
= 0;
1934 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1941 const struct processor_costs
*cost
; /* Processor costs */
1942 const int align_loop
; /* Default alignments. */
1943 const int align_loop_max_skip
;
1944 const int align_jump
;
1945 const int align_jump_max_skip
;
1946 const int align_func
;
1948 const processor_target_table
[PROCESSOR_max
] =
1950 {&i386_cost
, 4, 3, 4, 3, 4},
1951 {&i486_cost
, 16, 15, 16, 15, 16},
1952 {&pentium_cost
, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1954 {&geode_cost
, 0, 0, 0, 0, 0},
1955 {&k6_cost
, 32, 7, 32, 7, 32},
1956 {&athlon_cost
, 16, 7, 16, 7, 16},
1957 {&pentium4_cost
, 0, 0, 0, 0, 0},
1958 {&k8_cost
, 16, 7, 16, 7, 16},
1959 {&nocona_cost
, 0, 0, 0, 0, 0},
1960 {&core2_cost
, 16, 10, 16, 10, 16},
1961 {&generic32_cost
, 16, 7, 16, 7, 16},
1962 {&generic64_cost
, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1966 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1973 PTA_PREFETCH_SSE
= 1 << 4,
1975 PTA_3DNOW_A
= 1 << 6,
1979 PTA_POPCNT
= 1 << 10,
1981 PTA_SSE4A
= 1 << 12,
1982 PTA_NO_SAHF
= 1 << 13,
1983 PTA_SSE4_1
= 1 << 14,
1984 PTA_SSE4_2
= 1 << 15,
1990 const char *const name
; /* processor name or nickname. */
1991 const enum processor_type processor
;
1992 const unsigned /*enum pta_flags*/ flags
;
1994 const processor_alias_table
[] =
1996 {"i386", PROCESSOR_I386
, 0},
1997 {"i486", PROCESSOR_I486
, 0},
1998 {"i586", PROCESSOR_PENTIUM
, 0},
1999 {"pentium", PROCESSOR_PENTIUM
, 0},
2000 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
2001 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
2002 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2003 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2004 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2005 {"i686", PROCESSOR_PENTIUMPRO
, 0},
2006 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
2007 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
2008 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2009 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2010 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2011 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2012 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2013 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2014 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
2015 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2016 | PTA_CX16
| PTA_NO_SAHF
)},
2017 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
2018 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2021 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2022 |PTA_PREFETCH_SSE
)},
2023 {"k6", PROCESSOR_K6
, PTA_MMX
},
2024 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2025 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2026 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2027 | PTA_PREFETCH_SSE
)},
2028 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2029 | PTA_PREFETCH_SSE
)},
2030 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2032 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2034 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2036 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
2037 | PTA_MMX
| PTA_SSE
| PTA_SSE2
2039 {"k8", PROCESSOR_K8
, (PTA_64BIT
2040 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2041 | PTA_SSE
| PTA_SSE2
2043 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
2044 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2045 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2047 {"opteron", PROCESSOR_K8
, (PTA_64BIT
2048 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2049 | PTA_SSE
| PTA_SSE2
2051 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
2052 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2053 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2055 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
2056 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2057 | PTA_SSE
| PTA_SSE2
2059 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
2060 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2061 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2063 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
2064 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2065 | PTA_SSE
| PTA_SSE2
2067 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
2068 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2069 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2071 | PTA_CX16
| PTA_ABM
)},
2072 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
2073 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2074 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2076 | PTA_CX16
| PTA_ABM
)},
2077 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
2078 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
2081 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2083 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2084 SUBTARGET_OVERRIDE_OPTIONS
;
2087 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2088 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2091 /* -fPIC is the default for x86_64. */
2092 if (TARGET_MACHO
&& TARGET_64BIT
)
2095 /* Set the default values for switches whose default depends on TARGET_64BIT
2096 in case they weren't overwritten by command line options. */
2099 /* Mach-O doesn't support omitting the frame pointer for now. */
2100 if (flag_omit_frame_pointer
== 2)
2101 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2102 if (flag_asynchronous_unwind_tables
== 2)
2103 flag_asynchronous_unwind_tables
= 1;
2104 if (flag_pcc_struct_return
== 2)
2105 flag_pcc_struct_return
= 0;
2109 if (flag_omit_frame_pointer
== 2)
2110 flag_omit_frame_pointer
= 0;
2111 if (flag_asynchronous_unwind_tables
== 2)
2112 flag_asynchronous_unwind_tables
= 0;
2113 if (flag_pcc_struct_return
== 2)
2114 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2117 /* Need to check -mtune=generic first. */
2118 if (ix86_tune_string
)
2120 if (!strcmp (ix86_tune_string
, "generic")
2121 || !strcmp (ix86_tune_string
, "i686")
2122 /* As special support for cross compilers we read -mtune=native
2123 as -mtune=generic. With native compilers we won't see the
2124 -mtune=native, as it was changed by the driver. */
2125 || !strcmp (ix86_tune_string
, "native"))
2128 ix86_tune_string
= "generic64";
2130 ix86_tune_string
= "generic32";
2132 else if (!strncmp (ix86_tune_string
, "generic", 7))
2133 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2137 if (ix86_arch_string
)
2138 ix86_tune_string
= ix86_arch_string
;
2139 if (!ix86_tune_string
)
2141 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2142 ix86_tune_defaulted
= 1;
2145 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2146 need to use a sensible tune option. */
2147 if (!strcmp (ix86_tune_string
, "generic")
2148 || !strcmp (ix86_tune_string
, "x86-64")
2149 || !strcmp (ix86_tune_string
, "i686"))
2152 ix86_tune_string
= "generic64";
2154 ix86_tune_string
= "generic32";
2157 if (ix86_stringop_string
)
2159 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2160 stringop_alg
= rep_prefix_1_byte
;
2161 else if (!strcmp (ix86_stringop_string
, "libcall"))
2162 stringop_alg
= libcall
;
2163 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2164 stringop_alg
= rep_prefix_4_byte
;
2165 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2166 stringop_alg
= rep_prefix_8_byte
;
2167 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2168 stringop_alg
= loop_1_byte
;
2169 else if (!strcmp (ix86_stringop_string
, "loop"))
2170 stringop_alg
= loop
;
2171 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2172 stringop_alg
= unrolled_loop
;
2174 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2176 if (!strcmp (ix86_tune_string
, "x86-64"))
2177 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2178 "-mtune=generic instead as appropriate.");
2180 if (!ix86_arch_string
)
2181 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2183 ix86_arch_specified
= 1;
2185 if (!strcmp (ix86_arch_string
, "generic"))
2186 error ("generic CPU can be used only for -mtune= switch");
2187 if (!strncmp (ix86_arch_string
, "generic", 7))
2188 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2190 if (ix86_cmodel_string
!= 0)
2192 if (!strcmp (ix86_cmodel_string
, "small"))
2193 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2194 else if (!strcmp (ix86_cmodel_string
, "medium"))
2195 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2196 else if (!strcmp (ix86_cmodel_string
, "large"))
2197 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2199 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2200 else if (!strcmp (ix86_cmodel_string
, "32"))
2201 ix86_cmodel
= CM_32
;
2202 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2203 ix86_cmodel
= CM_KERNEL
;
2205 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2209 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2210 use of rip-relative addressing. This eliminates fixups that
2211 would otherwise be needed if this object is to be placed in a
2212 DLL, and is essentially just as efficient as direct addressing. */
2213 if (TARGET_64BIT_MS_ABI
)
2214 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2215 else if (TARGET_64BIT
)
2216 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2218 ix86_cmodel
= CM_32
;
2220 if (ix86_asm_string
!= 0)
2223 && !strcmp (ix86_asm_string
, "intel"))
2224 ix86_asm_dialect
= ASM_INTEL
;
2225 else if (!strcmp (ix86_asm_string
, "att"))
2226 ix86_asm_dialect
= ASM_ATT
;
2228 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2230 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2231 error ("code model %qs not supported in the %s bit mode",
2232 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2233 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2234 sorry ("%i-bit mode not compiled in",
2235 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2237 for (i
= 0; i
< pta_size
; i
++)
2238 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2240 ix86_arch
= processor_alias_table
[i
].processor
;
2241 /* Default cpu tuning to the architecture. */
2242 ix86_tune
= ix86_arch
;
2244 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2245 error ("CPU you selected does not support x86-64 "
2248 if (processor_alias_table
[i
].flags
& PTA_MMX
2249 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2250 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2251 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2252 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2253 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2254 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2255 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2256 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2257 if (processor_alias_table
[i
].flags
& PTA_SSE
2258 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2259 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2260 if (processor_alias_table
[i
].flags
& PTA_SSE2
2261 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2262 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2263 if (processor_alias_table
[i
].flags
& PTA_SSE3
2264 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2265 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2266 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2268 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2269 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2271 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2272 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2274 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2275 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2278 if (processor_alias_table
[i
].flags
& PTA_SSE5
2279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE5
))
2280 ix86_isa_flags
|= OPTION_MASK_ISA_SSE5
;
2282 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2284 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2285 x86_cmpxchg16b
= true;
2286 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2288 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2289 x86_prefetch_sse
= true;
2290 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2297 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2299 ix86_arch_mask
= 1u << ix86_arch
;
2300 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2301 ix86_arch_features
[i
] &= ix86_arch_mask
;
2303 for (i
= 0; i
< pta_size
; i
++)
2304 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2306 ix86_tune
= processor_alias_table
[i
].processor
;
2307 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2309 if (ix86_tune_defaulted
)
2311 ix86_tune_string
= "x86-64";
2312 for (i
= 0; i
< pta_size
; i
++)
2313 if (! strcmp (ix86_tune_string
,
2314 processor_alias_table
[i
].name
))
2316 ix86_tune
= processor_alias_table
[i
].processor
;
2319 error ("CPU you selected does not support x86-64 "
2322 /* Intel CPUs have always interpreted SSE prefetch instructions as
2323 NOPs; so, we can enable SSE prefetch instructions even when
2324 -mtune (rather than -march) points us to a processor that has them.
2325 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2326 higher processors. */
2328 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2329 x86_prefetch_sse
= true;
2333 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2335 ix86_tune_mask
= 1u << ix86_tune
;
2336 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2337 ix86_tune_features
[i
] &= ix86_tune_mask
;
2340 ix86_cost
= &size_cost
;
2342 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2344 /* Arrange to set up i386_stack_locals for all functions. */
2345 init_machine_status
= ix86_init_machine_status
;
2347 /* Validate -mregparm= value. */
2348 if (ix86_regparm_string
)
2351 warning (0, "-mregparm is ignored in 64-bit mode");
2352 i
= atoi (ix86_regparm_string
);
2353 if (i
< 0 || i
> REGPARM_MAX
)
2354 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2359 ix86_regparm
= REGPARM_MAX
;
2361 /* If the user has provided any of the -malign-* options,
2362 warn and use that value only if -falign-* is not set.
2363 Remove this code in GCC 3.2 or later. */
2364 if (ix86_align_loops_string
)
2366 warning (0, "-malign-loops is obsolete, use -falign-loops");
2367 if (align_loops
== 0)
2369 i
= atoi (ix86_align_loops_string
);
2370 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2371 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2373 align_loops
= 1 << i
;
2377 if (ix86_align_jumps_string
)
2379 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2380 if (align_jumps
== 0)
2382 i
= atoi (ix86_align_jumps_string
);
2383 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2384 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2386 align_jumps
= 1 << i
;
2390 if (ix86_align_funcs_string
)
2392 warning (0, "-malign-functions is obsolete, use -falign-functions");
2393 if (align_functions
== 0)
2395 i
= atoi (ix86_align_funcs_string
);
2396 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2397 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2399 align_functions
= 1 << i
;
2403 /* Default align_* from the processor table. */
2404 if (align_loops
== 0)
2406 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2407 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2409 if (align_jumps
== 0)
2411 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2412 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2414 if (align_functions
== 0)
2416 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2419 /* Validate -mbranch-cost= value, or provide default. */
2420 ix86_branch_cost
= ix86_cost
->branch_cost
;
2421 if (ix86_branch_cost_string
)
2423 i
= atoi (ix86_branch_cost_string
);
2425 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2427 ix86_branch_cost
= i
;
2429 if (ix86_section_threshold_string
)
2431 i
= atoi (ix86_section_threshold_string
);
2433 error ("-mlarge-data-threshold=%d is negative", i
);
2435 ix86_section_threshold
= i
;
2438 if (ix86_tls_dialect_string
)
2440 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2441 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2442 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2443 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2444 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2445 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2447 error ("bad value (%s) for -mtls-dialect= switch",
2448 ix86_tls_dialect_string
);
2451 if (ix87_precision_string
)
2453 i
= atoi (ix87_precision_string
);
2454 if (i
!= 32 && i
!= 64 && i
!= 80)
2455 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2460 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2462 /* Enable by default the SSE and MMX builtins. Do allow the user to
2463 explicitly disable any of these. In particular, disabling SSE and
2464 MMX for kernel code is extremely useful. */
2465 if (!ix86_arch_specified
)
2467 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2468 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2471 warning (0, "-mrtd is ignored in 64bit mode");
2475 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2477 if (!ix86_arch_specified
)
2479 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2481 /* i386 ABI does not specify red zone. It still makes sense to use it
2482 when programmer takes care to stack from being destroyed. */
2483 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2484 target_flags
|= MASK_NO_RED_ZONE
;
2487 /* Keep nonleaf frame pointers. */
2488 if (flag_omit_frame_pointer
)
2489 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2490 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2491 flag_omit_frame_pointer
= 1;
2493 /* If we're doing fast math, we don't care about comparison order
2494 wrt NaNs. This lets us use a shorter comparison sequence. */
2495 if (flag_finite_math_only
)
2496 target_flags
&= ~MASK_IEEE_FP
;
2498 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2499 since the insns won't need emulation. */
2500 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2501 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2503 /* Likewise, if the target doesn't have a 387, or we've specified
2504 software floating point, don't use 387 inline intrinsics. */
2506 target_flags
|= MASK_NO_FANCY_MATH_387
;
2508 /* Turn on SSE4A bultins for -msse5. */
2510 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2512 /* Turn on SSE4.1 builtins for -msse4.2. */
2514 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2516 /* Turn on SSSE3 builtins for -msse4.1. */
2518 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2520 /* Turn on SSE3 builtins for -mssse3. */
2522 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2524 /* Turn on SSE3 builtins for -msse4a. */
2526 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2528 /* Turn on SSE2 builtins for -msse3. */
2530 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2532 /* Turn on SSE builtins for -msse2. */
2534 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2536 /* Turn on MMX builtins for -msse. */
2539 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2540 x86_prefetch_sse
= true;
2543 /* Turn on MMX builtins for 3Dnow. */
2545 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2547 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2548 if (TARGET_SSE4_2
|| TARGET_ABM
)
2551 /* Validate -mpreferred-stack-boundary= value, or provide default.
2552 The default of 128 bits is for Pentium III's SSE __m128. We can't
2553 change it because of optimize_size. Otherwise, we can't mix object
2554 files compiled with -Os and -On. */
2555 ix86_preferred_stack_boundary
= 128;
2556 if (ix86_preferred_stack_boundary_string
)
2558 i
= atoi (ix86_preferred_stack_boundary_string
);
2559 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2560 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2561 TARGET_64BIT
? 4 : 2);
2563 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2566 /* Accept -msseregparm only if at least SSE support is enabled. */
2567 if (TARGET_SSEREGPARM
2569 error ("-msseregparm used without SSE enabled");
2571 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2572 if (ix86_fpmath_string
!= 0)
2574 if (! strcmp (ix86_fpmath_string
, "387"))
2575 ix86_fpmath
= FPMATH_387
;
2576 else if (! strcmp (ix86_fpmath_string
, "sse"))
2580 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2581 ix86_fpmath
= FPMATH_387
;
2584 ix86_fpmath
= FPMATH_SSE
;
2586 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2587 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2591 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2592 ix86_fpmath
= FPMATH_387
;
2594 else if (!TARGET_80387
)
2596 warning (0, "387 instruction set disabled, using SSE arithmetics");
2597 ix86_fpmath
= FPMATH_SSE
;
2600 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2603 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2606 /* If the i387 is disabled, then do not return values in it. */
2608 target_flags
&= ~MASK_FLOAT_RETURNS
;
2610 /* Use external vectorized library in vectorizing intrinsics. */
2611 if (ix86_veclibabi_string
)
2613 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2614 ix86_veclib_handler
= ix86_veclibabi_acml
;
2616 error ("unknown vectorization library ABI type (%s) for "
2617 "-mveclibabi= switch", ix86_veclibabi_string
);
2620 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2621 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2623 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2625 /* ??? Unwind info is not correct around the CFG unless either a frame
2626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2627 unwind info generation to be aware of the CFG and propagating states
2629 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2630 || flag_exceptions
|| flag_non_call_exceptions
)
2631 && flag_omit_frame_pointer
2632 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2634 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2635 warning (0, "unwind tables currently require either a frame pointer "
2636 "or -maccumulate-outgoing-args for correctness");
2637 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2640 /* For sane SSE instruction set generation we need fcomi instruction.
2641 It is safe to enable all CMOVE instructions. */
2645 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2648 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2649 p
= strchr (internal_label_prefix
, 'X');
2650 internal_label_prefix_len
= p
- internal_label_prefix
;
2654 /* When scheduling description is not available, disable scheduler pass
2655 so it won't slow down the compilation and make x87 code slower. */
2656 if (!TARGET_SCHEDULE
)
2657 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2659 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2660 set_param_value ("simultaneous-prefetches",
2661 ix86_cost
->simultaneous_prefetches
);
2662 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2663 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2664 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2665 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2666 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2667 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2670 /* Return true if this goes in large data/bss. */
2673 ix86_in_large_data_p (tree exp
)
2675 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2678 /* Functions are never large data. */
2679 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2682 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2684 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2685 if (strcmp (section
, ".ldata") == 0
2686 || strcmp (section
, ".lbss") == 0)
2692 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2694 /* If this is an incomplete type with size 0, then we can't put it
2695 in data because it might be too big when completed. */
2696 if (!size
|| size
> ix86_section_threshold
)
2703 /* Switch to the appropriate section for output of DECL.
2704 DECL is either a `VAR_DECL' node or a constant of some sort.
2705 RELOC indicates whether forming the initial value of DECL requires
2706 link-time relocations. */
2708 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2712 x86_64_elf_select_section (tree decl
, int reloc
,
2713 unsigned HOST_WIDE_INT align
)
2715 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2716 && ix86_in_large_data_p (decl
))
2718 const char *sname
= NULL
;
2719 unsigned int flags
= SECTION_WRITE
;
2720 switch (categorize_decl_for_section (decl
, reloc
))
2725 case SECCAT_DATA_REL
:
2726 sname
= ".ldata.rel";
2728 case SECCAT_DATA_REL_LOCAL
:
2729 sname
= ".ldata.rel.local";
2731 case SECCAT_DATA_REL_RO
:
2732 sname
= ".ldata.rel.ro";
2734 case SECCAT_DATA_REL_RO_LOCAL
:
2735 sname
= ".ldata.rel.ro.local";
2739 flags
|= SECTION_BSS
;
2742 case SECCAT_RODATA_MERGE_STR
:
2743 case SECCAT_RODATA_MERGE_STR_INIT
:
2744 case SECCAT_RODATA_MERGE_CONST
:
2748 case SECCAT_SRODATA
:
2755 /* We don't split these for medium model. Place them into
2756 default sections and hope for best. */
2761 /* We might get called with string constants, but get_named_section
2762 doesn't like them as they are not DECLs. Also, we need to set
2763 flags in that case. */
2765 return get_section (sname
, flags
, NULL
);
2766 return get_named_section (decl
, sname
, reloc
);
2769 return default_elf_select_section (decl
, reloc
, align
);
2772 /* Build up a unique section name, expressed as a
2773 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2774 RELOC indicates whether the initial value of EXP requires
2775 link-time relocations. */
2777 static void ATTRIBUTE_UNUSED
2778 x86_64_elf_unique_section (tree decl
, int reloc
)
2780 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2781 && ix86_in_large_data_p (decl
))
2783 const char *prefix
= NULL
;
2784 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2785 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2787 switch (categorize_decl_for_section (decl
, reloc
))
2790 case SECCAT_DATA_REL
:
2791 case SECCAT_DATA_REL_LOCAL
:
2792 case SECCAT_DATA_REL_RO
:
2793 case SECCAT_DATA_REL_RO_LOCAL
:
2794 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2797 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2800 case SECCAT_RODATA_MERGE_STR
:
2801 case SECCAT_RODATA_MERGE_STR_INIT
:
2802 case SECCAT_RODATA_MERGE_CONST
:
2803 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2805 case SECCAT_SRODATA
:
2812 /* We don't split these for medium model. Place them into
2813 default sections and hope for best. */
2821 plen
= strlen (prefix
);
2823 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2824 name
= targetm
.strip_name_encoding (name
);
2825 nlen
= strlen (name
);
2827 string
= (char *) alloca (nlen
+ plen
+ 1);
2828 memcpy (string
, prefix
, plen
);
2829 memcpy (string
+ plen
, name
, nlen
+ 1);
2831 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2835 default_unique_section (decl
, reloc
);
2838 #ifdef COMMON_ASM_OP
2839 /* This says how to output assembler code to declare an
2840 uninitialized external linkage data object.
2842 For medium model x86-64 we need to use .largecomm opcode for
2845 x86_elf_aligned_common (FILE *file
,
2846 const char *name
, unsigned HOST_WIDE_INT size
,
2849 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2850 && size
> (unsigned int)ix86_section_threshold
)
2851 fprintf (file
, ".largecomm\t");
2853 fprintf (file
, "%s", COMMON_ASM_OP
);
2854 assemble_name (file
, name
);
2855 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2856 size
, align
/ BITS_PER_UNIT
);
2860 /* Utility function for targets to use in implementing
2861 ASM_OUTPUT_ALIGNED_BSS. */
2864 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2865 const char *name
, unsigned HOST_WIDE_INT size
,
2868 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2869 && size
> (unsigned int)ix86_section_threshold
)
2870 switch_to_section (get_named_section (decl
, ".lbss", 0));
2872 switch_to_section (bss_section
);
2873 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2874 #ifdef ASM_DECLARE_OBJECT_NAME
2875 last_assemble_variable_decl
= decl
;
2876 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2878 /* Standard thing is just output label for the object. */
2879 ASM_OUTPUT_LABEL (file
, name
);
2880 #endif /* ASM_DECLARE_OBJECT_NAME */
2881 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2885 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2887 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2888 make the problem with not enough registers even worse. */
2889 #ifdef INSN_SCHEDULING
2891 flag_schedule_insns
= 0;
2895 /* The Darwin libraries never set errno, so we might as well
2896 avoid calling them when that's the only reason we would. */
2897 flag_errno_math
= 0;
2899 /* The default values of these switches depend on the TARGET_64BIT
2900 that is not known at this moment. Mark these values with 2 and
2901 let user the to override these. In case there is no command line option
2902 specifying them, we will set the defaults in override_options. */
2904 flag_omit_frame_pointer
= 2;
2905 flag_pcc_struct_return
= 2;
2906 flag_asynchronous_unwind_tables
= 2;
2907 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2908 SUBTARGET_OPTIMIZATION_OPTIONS
;
2912 /* Decide whether we can make a sibling call to a function. DECL is the
2913 declaration of the function being targeted by the call and EXP is the
2914 CALL_EXPR representing the call. */
2917 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2922 /* If we are generating position-independent code, we cannot sibcall
2923 optimize any indirect call, or a direct call to a global function,
2924 as the PLT requires %ebx be live. */
2925 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2932 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2933 if (POINTER_TYPE_P (func
))
2934 func
= TREE_TYPE (func
);
2937 /* Check that the return value locations are the same. Like
2938 if we are returning floats on the 80387 register stack, we cannot
2939 make a sibcall from a function that doesn't return a float to a
2940 function that does or, conversely, from a function that does return
2941 a float to a function that doesn't; the necessary stack adjustment
2942 would not be executed. This is also the place we notice
2943 differences in the return value ABI. Note that it is ok for one
2944 of the functions to have void return type as long as the return
2945 value of the other is passed in a register. */
2946 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2947 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2949 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2951 if (!rtx_equal_p (a
, b
))
2954 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2956 else if (!rtx_equal_p (a
, b
))
2959 /* If this call is indirect, we'll need to be able to use a call-clobbered
2960 register for the address of the target function. Make sure that all
2961 such registers are not used for passing parameters. */
2962 if (!decl
&& !TARGET_64BIT
)
2966 /* We're looking at the CALL_EXPR, we need the type of the function. */
2967 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2968 type
= TREE_TYPE (type
); /* pointer type */
2969 type
= TREE_TYPE (type
); /* function type */
2971 if (ix86_function_regparm (type
, NULL
) >= 3)
2973 /* ??? Need to count the actual number of registers to be used,
2974 not the possible number of registers. Fix later. */
2979 /* Dllimport'd functions are also called indirectly. */
2980 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2981 && decl
&& DECL_DLLIMPORT_P (decl
)
2982 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2985 /* If we forced aligned the stack, then sibcalling would unalign the
2986 stack, which may break the called function. */
2987 if (cfun
->machine
->force_align_arg_pointer
)
2990 /* Otherwise okay. That also includes certain types of indirect calls. */
2994 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2995 calling convention attributes;
2996 arguments as in struct attribute_spec.handler. */
2999 ix86_handle_cconv_attribute (tree
*node
, tree name
,
3001 int flags ATTRIBUTE_UNUSED
,
3004 if (TREE_CODE (*node
) != FUNCTION_TYPE
3005 && TREE_CODE (*node
) != METHOD_TYPE
3006 && TREE_CODE (*node
) != FIELD_DECL
3007 && TREE_CODE (*node
) != TYPE_DECL
)
3009 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
3010 IDENTIFIER_POINTER (name
));
3011 *no_add_attrs
= true;
3015 /* Can combine regparm with all attributes but fastcall. */
3016 if (is_attribute_p ("regparm", name
))
3020 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3022 error ("fastcall and regparm attributes are not compatible");
3025 cst
= TREE_VALUE (args
);
3026 if (TREE_CODE (cst
) != INTEGER_CST
)
3028 warning (OPT_Wattributes
,
3029 "%qs attribute requires an integer constant argument",
3030 IDENTIFIER_POINTER (name
));
3031 *no_add_attrs
= true;
3033 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
3035 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
3036 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
3037 *no_add_attrs
= true;
3041 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3042 TYPE_ATTRIBUTES (*node
))
3043 && compare_tree_int (cst
, REGPARM_MAX
-1))
3045 error ("%s functions limited to %d register parameters",
3046 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
3054 /* Do not warn when emulating the MS ABI. */
3055 if (!TARGET_64BIT_MS_ABI
)
3056 warning (OPT_Wattributes
, "%qs attribute ignored",
3057 IDENTIFIER_POINTER (name
));
3058 *no_add_attrs
= true;
3062 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3063 if (is_attribute_p ("fastcall", name
))
3065 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3067 error ("fastcall and cdecl attributes are not compatible");
3069 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3071 error ("fastcall and stdcall attributes are not compatible");
3073 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
3075 error ("fastcall and regparm attributes are not compatible");
3079 /* Can combine stdcall with fastcall (redundant), regparm and
3081 else if (is_attribute_p ("stdcall", name
))
3083 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3085 error ("stdcall and cdecl attributes are not compatible");
3087 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3089 error ("stdcall and fastcall attributes are not compatible");
3093 /* Can combine cdecl with regparm and sseregparm. */
3094 else if (is_attribute_p ("cdecl", name
))
3096 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3098 error ("stdcall and cdecl attributes are not compatible");
3100 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3102 error ("fastcall and cdecl attributes are not compatible");
3106 /* Can combine sseregparm with all attributes. */
3111 /* Return 0 if the attributes for two types are incompatible, 1 if they
3112 are compatible, and 2 if they are nearly compatible (which causes a
3113 warning to be generated). */
3116 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
3118 /* Check for mismatch of non-default calling convention. */
3119 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
3121 if (TREE_CODE (type1
) != FUNCTION_TYPE
3122 && TREE_CODE (type1
) != METHOD_TYPE
)
3125 /* Check for mismatched fastcall/regparm types. */
3126 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
3127 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
3128 || (ix86_function_regparm (type1
, NULL
)
3129 != ix86_function_regparm (type2
, NULL
)))
3132 /* Check for mismatched sseregparm types. */
3133 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
3134 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
3137 /* Check for mismatched return types (cdecl vs stdcall). */
3138 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
3139 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
3145 /* Return the regparm value for a function with the indicated TYPE and DECL.
3146 DECL may be NULL when calling function indirectly
3147 or considering a libcall. */
3150 ix86_function_regparm (const_tree type
, const_tree decl
)
3153 int regparm
= ix86_regparm
;
3158 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3160 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3162 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3165 /* Use register calling convention for local functions when possible. */
3166 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
3167 && flag_unit_at_a_time
&& !profile_flag
)
3169 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3170 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3173 int local_regparm
, globals
= 0, regno
;
3176 /* Make sure no regparm register is taken by a
3177 global register variable. */
3178 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3179 if (global_regs
[local_regparm
])
3182 /* We can't use regparm(3) for nested functions as these use
3183 static chain pointer in third argument. */
3184 if (local_regparm
== 3
3185 && (decl_function_context (decl
)
3186 || ix86_force_align_arg_pointer
)
3187 && !DECL_NO_STATIC_CHAIN (decl
))
3190 /* If the function realigns its stackpointer, the prologue will
3191 clobber %ecx. If we've already generated code for the callee,
3192 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3193 scanning the attributes for the self-realigning property. */
3194 f
= DECL_STRUCT_FUNCTION (decl
);
3195 if (local_regparm
== 3
3196 && (f
? !!f
->machine
->force_align_arg_pointer
3197 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3198 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3201 /* Each global register variable increases register preassure,
3202 so the more global reg vars there are, the smaller regparm
3203 optimization use, unless requested by the user explicitly. */
3204 for (regno
= 0; regno
< 6; regno
++)
3205 if (global_regs
[regno
])
3208 = globals
< local_regparm
? local_regparm
- globals
: 0;
3210 if (local_regparm
> regparm
)
3211 regparm
= local_regparm
;
3218 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3219 DFmode (2) arguments in SSE registers for a function with the
3220 indicated TYPE and DECL. DECL may be NULL when calling function
3221 indirectly or considering a libcall. Otherwise return 0. */
3224 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3226 gcc_assert (!TARGET_64BIT
);
3228 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3229 by the sseregparm attribute. */
3230 if (TARGET_SSEREGPARM
3231 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3236 error ("Calling %qD with attribute sseregparm without "
3237 "SSE/SSE2 enabled", decl
);
3239 error ("Calling %qT with attribute sseregparm without "
3240 "SSE/SSE2 enabled", type
);
3247 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3248 (and DFmode for SSE2) arguments in SSE registers. */
3249 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3251 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3252 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3254 return TARGET_SSE2
? 2 : 1;
3260 /* Return true if EAX is live at the start of the function. Used by
3261 ix86_expand_prologue to determine if we need special help before
3262 calling allocate_stack_worker. */
3265 ix86_eax_live_at_start_p (void)
3267 /* Cheat. Don't bother working forward from ix86_function_regparm
3268 to the function type to whether an actual argument is located in
3269 eax. Instead just look at cfg info, which is still close enough
3270 to correct at this point. This gives false positives for broken
3271 functions that might use uninitialized data that happens to be
3272 allocated in eax, but who cares? */
3273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3276 /* Value is the number of bytes of arguments automatically
3277 popped when returning from a subroutine call.
3278 FUNDECL is the declaration node of the function (as a tree),
3279 FUNTYPE is the data type of the function (as a tree),
3280 or for a library call it is an identifier node for the subroutine name.
3281 SIZE is the number of bytes of arguments passed on the stack.
3283 On the 80386, the RTD insn may be used to pop them if the number
3284 of args is fixed, but if the number is variable then the caller
3285 must pop them all. RTD can't be used for library calls now
3286 because the library is compiled with the Unix compiler.
3287 Use of RTD is a selectable option, since it is incompatible with
3288 standard Unix calling sequences. If the option is not selected,
3289 the caller must always pop the args.
3291 The attribute stdcall is equivalent to RTD on a per module basis. */
3294 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3298 /* None of the 64-bit ABIs pop arguments. */
3302 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3304 /* Cdecl functions override -mrtd, and never pop the stack. */
3305 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3307 /* Stdcall and fastcall functions will pop the stack if not
3309 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3310 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3313 if (rtd
&& ! stdarg_p (funtype
))
3317 /* Lose any fake structure return argument if it is passed on the stack. */
3318 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3319 && !KEEP_AGGREGATE_RETURN_POINTER
)
3321 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3323 return GET_MODE_SIZE (Pmode
);
3329 /* Argument support functions. */
3331 /* Return true when register may be used to pass function parameters. */
3333 ix86_function_arg_regno_p (int regno
)
3336 const int *parm_regs
;
3341 return (regno
< REGPARM_MAX
3342 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3344 return (regno
< REGPARM_MAX
3345 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3346 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3347 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3348 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3353 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3358 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3359 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3363 /* RAX is used as hidden argument to va_arg functions. */
3364 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3367 if (TARGET_64BIT_MS_ABI
)
3368 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3370 parm_regs
= x86_64_int_parameter_registers
;
3371 for (i
= 0; i
< REGPARM_MAX
; i
++)
3372 if (regno
== parm_regs
[i
])
3377 /* Return if we do not know how to pass TYPE solely in registers. */
3380 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3382 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3385 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3386 The layout_type routine is crafty and tries to trick us into passing
3387 currently unsupported vector types on the stack by using TImode. */
3388 return (!TARGET_64BIT
&& mode
== TImode
3389 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3392 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3393 for a call to a function whose data type is FNTYPE.
3394 For a library call, FNTYPE is 0. */
3397 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3398 tree fntype
, /* tree ptr for function decl */
3399 rtx libname
, /* SYMBOL_REF of library name or 0 */
3402 memset (cum
, 0, sizeof (*cum
));
3404 /* Set up the number of registers to use for passing arguments. */
3405 cum
->nregs
= ix86_regparm
;
3407 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3409 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3410 cum
->warn_sse
= true;
3411 cum
->warn_mmx
= true;
3412 cum
->maybe_vaarg
= (fntype
3413 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
3418 /* If there are variable arguments, then we won't pass anything
3419 in registers in 32-bit mode. */
3420 if (cum
->maybe_vaarg
)
3430 /* Use ecx and edx registers if function has fastcall attribute,
3431 else look for regparm information. */
3434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3440 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3443 /* Set up the number of SSE registers used for passing SFmode
3444 and DFmode arguments. Warn for mismatching ABI. */
3445 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3449 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3450 But in the case of vector types, it is some vector mode.
3452 When we have only some of our vector isa extensions enabled, then there
3453 are some modes for which vector_mode_supported_p is false. For these
3454 modes, the generic vector support in gcc will choose some non-vector mode
3455 in order to implement the type. By computing the natural mode, we'll
3456 select the proper ABI location for the operand and not depend on whatever
3457 the middle-end decides to do with these vector types. */
3459 static enum machine_mode
3460 type_natural_mode (const_tree type
)
3462 enum machine_mode mode
= TYPE_MODE (type
);
3464 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3466 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3467 if ((size
== 8 || size
== 16)
3468 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3469 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3471 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3473 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3474 mode
= MIN_MODE_VECTOR_FLOAT
;
3476 mode
= MIN_MODE_VECTOR_INT
;
3478 /* Get the mode which has this inner mode and number of units. */
3479 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3480 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3481 && GET_MODE_INNER (mode
) == innermode
)
3491 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3492 this may not agree with the mode that the type system has chosen for the
3493 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3494 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3497 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3502 if (orig_mode
!= BLKmode
)
3503 tmp
= gen_rtx_REG (orig_mode
, regno
);
3506 tmp
= gen_rtx_REG (mode
, regno
);
3507 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3508 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3514 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3515 of this code is to classify each 8bytes of incoming argument by the register
3516 class and assign registers accordingly. */
3518 /* Return the union class of CLASS1 and CLASS2.
3519 See the x86-64 PS ABI for details. */
3521 static enum x86_64_reg_class
3522 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3524 /* Rule #1: If both classes are equal, this is the resulting class. */
3525 if (class1
== class2
)
3528 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3530 if (class1
== X86_64_NO_CLASS
)
3532 if (class2
== X86_64_NO_CLASS
)
3535 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3536 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3537 return X86_64_MEMORY_CLASS
;
3539 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3540 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3541 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3542 return X86_64_INTEGERSI_CLASS
;
3543 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3544 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3545 return X86_64_INTEGER_CLASS
;
3547 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3549 if (class1
== X86_64_X87_CLASS
3550 || class1
== X86_64_X87UP_CLASS
3551 || class1
== X86_64_COMPLEX_X87_CLASS
3552 || class2
== X86_64_X87_CLASS
3553 || class2
== X86_64_X87UP_CLASS
3554 || class2
== X86_64_COMPLEX_X87_CLASS
)
3555 return X86_64_MEMORY_CLASS
;
3557 /* Rule #6: Otherwise class SSE is used. */
3558 return X86_64_SSE_CLASS
;
3561 /* Classify the argument of type TYPE and mode MODE.
3562 CLASSES will be filled by the register class used to pass each word
3563 of the operand. The number of words is returned. In case the parameter
3564 should be passed in memory, 0 is returned. As a special case for zero
3565 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3567 BIT_OFFSET is used internally for handling records and specifies offset
3568 of the offset in bits modulo 256 to avoid overflow cases.
3570 See the x86-64 PS ABI for details.
3574 classify_argument (enum machine_mode mode
, const_tree type
,
3575 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3577 HOST_WIDE_INT bytes
=
3578 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3579 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3581 /* Variable sized entities are always passed/returned in memory. */
3585 if (mode
!= VOIDmode
3586 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3589 if (type
&& AGGREGATE_TYPE_P (type
))
3593 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3595 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3599 for (i
= 0; i
< words
; i
++)
3600 classes
[i
] = X86_64_NO_CLASS
;
3602 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3603 signalize memory class, so handle it as special case. */
3606 classes
[0] = X86_64_NO_CLASS
;
3610 /* Classify each field of record and merge classes. */
3611 switch (TREE_CODE (type
))
3614 /* And now merge the fields of structure. */
3615 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3617 if (TREE_CODE (field
) == FIELD_DECL
)
3621 if (TREE_TYPE (field
) == error_mark_node
)
3624 /* Bitfields are always classified as integer. Handle them
3625 early, since later code would consider them to be
3626 misaligned integers. */
3627 if (DECL_BIT_FIELD (field
))
3629 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3630 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3631 + tree_low_cst (DECL_SIZE (field
), 0)
3634 merge_classes (X86_64_INTEGER_CLASS
,
3639 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3640 TREE_TYPE (field
), subclasses
,
3641 (int_bit_position (field
)
3642 + bit_offset
) % 256);
3645 for (i
= 0; i
< num
; i
++)
3648 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3650 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3658 /* Arrays are handled as small records. */
3661 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3662 TREE_TYPE (type
), subclasses
, bit_offset
);
3666 /* The partial classes are now full classes. */
3667 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3668 subclasses
[0] = X86_64_SSE_CLASS
;
3669 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3670 subclasses
[0] = X86_64_INTEGER_CLASS
;
3672 for (i
= 0; i
< words
; i
++)
3673 classes
[i
] = subclasses
[i
% num
];
3678 case QUAL_UNION_TYPE
:
3679 /* Unions are similar to RECORD_TYPE but offset is always 0.
3681 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3683 if (TREE_CODE (field
) == FIELD_DECL
)
3687 if (TREE_TYPE (field
) == error_mark_node
)
3690 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3691 TREE_TYPE (field
), subclasses
,
3695 for (i
= 0; i
< num
; i
++)
3696 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3705 /* Final merger cleanup. */
3706 for (i
= 0; i
< words
; i
++)
3708 /* If one class is MEMORY, everything should be passed in
3710 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3713 /* The X86_64_SSEUP_CLASS should be always preceded by
3714 X86_64_SSE_CLASS. */
3715 if (classes
[i
] == X86_64_SSEUP_CLASS
3716 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3717 classes
[i
] = X86_64_SSE_CLASS
;
3719 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3720 if (classes
[i
] == X86_64_X87UP_CLASS
3721 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3722 classes
[i
] = X86_64_SSE_CLASS
;
3727 /* Compute alignment needed. We align all types to natural boundaries with
3728 exception of XFmode that is aligned to 64bits. */
3729 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3731 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3734 mode_alignment
= 128;
3735 else if (mode
== XCmode
)
3736 mode_alignment
= 256;
3737 if (COMPLEX_MODE_P (mode
))
3738 mode_alignment
/= 2;
3739 /* Misaligned fields are always returned in memory. */
3740 if (bit_offset
% mode_alignment
)
3744 /* for V1xx modes, just use the base mode */
3745 if (VECTOR_MODE_P (mode
)
3746 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3747 mode
= GET_MODE_INNER (mode
);
3749 /* Classification of atomic types. */
3754 classes
[0] = X86_64_SSE_CLASS
;
3757 classes
[0] = X86_64_SSE_CLASS
;
3758 classes
[1] = X86_64_SSEUP_CLASS
;
3767 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3768 classes
[0] = X86_64_INTEGERSI_CLASS
;
3770 classes
[0] = X86_64_INTEGER_CLASS
;
3774 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3779 if (!(bit_offset
% 64))
3780 classes
[0] = X86_64_SSESF_CLASS
;
3782 classes
[0] = X86_64_SSE_CLASS
;
3785 classes
[0] = X86_64_SSEDF_CLASS
;
3788 classes
[0] = X86_64_X87_CLASS
;
3789 classes
[1] = X86_64_X87UP_CLASS
;
3792 classes
[0] = X86_64_SSE_CLASS
;
3793 classes
[1] = X86_64_SSEUP_CLASS
;
3796 classes
[0] = X86_64_SSE_CLASS
;
3799 classes
[0] = X86_64_SSEDF_CLASS
;
3800 classes
[1] = X86_64_SSEDF_CLASS
;
3803 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3806 /* This modes is larger than 16 bytes. */
3814 classes
[0] = X86_64_SSE_CLASS
;
3815 classes
[1] = X86_64_SSEUP_CLASS
;
3821 classes
[0] = X86_64_SSE_CLASS
;
3827 gcc_assert (VECTOR_MODE_P (mode
));
3832 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3834 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3835 classes
[0] = X86_64_INTEGERSI_CLASS
;
3837 classes
[0] = X86_64_INTEGER_CLASS
;
3838 classes
[1] = X86_64_INTEGER_CLASS
;
3839 return 1 + (bytes
> 8);
3843 /* Examine the argument and return set number of register required in each
3844 class. Return 0 iff parameter should be passed in memory. */
3846 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3847 int *int_nregs
, int *sse_nregs
)
3849 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3850 int n
= classify_argument (mode
, type
, regclass
, 0);
3856 for (n
--; n
>= 0; n
--)
3857 switch (regclass
[n
])
3859 case X86_64_INTEGER_CLASS
:
3860 case X86_64_INTEGERSI_CLASS
:
3863 case X86_64_SSE_CLASS
:
3864 case X86_64_SSESF_CLASS
:
3865 case X86_64_SSEDF_CLASS
:
3868 case X86_64_NO_CLASS
:
3869 case X86_64_SSEUP_CLASS
:
3871 case X86_64_X87_CLASS
:
3872 case X86_64_X87UP_CLASS
:
3876 case X86_64_COMPLEX_X87_CLASS
:
3877 return in_return
? 2 : 0;
3878 case X86_64_MEMORY_CLASS
:
3884 /* Construct container for the argument used by GCC interface. See
3885 FUNCTION_ARG for the detailed description. */
3888 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3889 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3890 const int *intreg
, int sse_regno
)
3892 /* The following variables hold the static issued_error state. */
3893 static bool issued_sse_arg_error
;
3894 static bool issued_sse_ret_error
;
3895 static bool issued_x87_ret_error
;
3897 enum machine_mode tmpmode
;
3899 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3900 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3904 int needed_sseregs
, needed_intregs
;
3905 rtx exp
[MAX_CLASSES
];
3908 n
= classify_argument (mode
, type
, regclass
, 0);
3911 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3914 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3917 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3918 some less clueful developer tries to use floating-point anyway. */
3919 if (needed_sseregs
&& !TARGET_SSE
)
3923 if (!issued_sse_ret_error
)
3925 error ("SSE register return with SSE disabled");
3926 issued_sse_ret_error
= true;
3929 else if (!issued_sse_arg_error
)
3931 error ("SSE register argument with SSE disabled");
3932 issued_sse_arg_error
= true;
3937 /* Likewise, error if the ABI requires us to return values in the
3938 x87 registers and the user specified -mno-80387. */
3939 if (!TARGET_80387
&& in_return
)
3940 for (i
= 0; i
< n
; i
++)
3941 if (regclass
[i
] == X86_64_X87_CLASS
3942 || regclass
[i
] == X86_64_X87UP_CLASS
3943 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3945 if (!issued_x87_ret_error
)
3947 error ("x87 register return with x87 disabled");
3948 issued_x87_ret_error
= true;
3953 /* First construct simple cases. Avoid SCmode, since we want to use
3954 single register to pass this type. */
3955 if (n
== 1 && mode
!= SCmode
)
3956 switch (regclass
[0])
3958 case X86_64_INTEGER_CLASS
:
3959 case X86_64_INTEGERSI_CLASS
:
3960 return gen_rtx_REG (mode
, intreg
[0]);
3961 case X86_64_SSE_CLASS
:
3962 case X86_64_SSESF_CLASS
:
3963 case X86_64_SSEDF_CLASS
:
3964 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3965 case X86_64_X87_CLASS
:
3966 case X86_64_COMPLEX_X87_CLASS
:
3967 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3968 case X86_64_NO_CLASS
:
3969 /* Zero sized array, struct or class. */
3974 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
3975 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
3976 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3979 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
3980 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3981 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
3982 && regclass
[1] == X86_64_INTEGER_CLASS
3983 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3984 && intreg
[0] + 1 == intreg
[1])
3985 return gen_rtx_REG (mode
, intreg
[0]);
3987 /* Otherwise figure out the entries of the PARALLEL. */
3988 for (i
= 0; i
< n
; i
++)
3990 switch (regclass
[i
])
3992 case X86_64_NO_CLASS
:
3994 case X86_64_INTEGER_CLASS
:
3995 case X86_64_INTEGERSI_CLASS
:
3996 /* Merge TImodes on aligned occasions here too. */
3997 if (i
* 8 + 8 > bytes
)
3998 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3999 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
4003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4004 if (tmpmode
== BLKmode
)
4006 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4007 gen_rtx_REG (tmpmode
, *intreg
),
4011 case X86_64_SSESF_CLASS
:
4012 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4013 gen_rtx_REG (SFmode
,
4014 SSE_REGNO (sse_regno
)),
4018 case X86_64_SSEDF_CLASS
:
4019 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4020 gen_rtx_REG (DFmode
,
4021 SSE_REGNO (sse_regno
)),
4025 case X86_64_SSE_CLASS
:
4026 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
4030 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4031 gen_rtx_REG (tmpmode
,
4032 SSE_REGNO (sse_regno
)),
4034 if (tmpmode
== TImode
)
4043 /* Empty aligned struct, union or class. */
4047 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
4048 for (i
= 0; i
< nexps
; i
++)
4049 XVECEXP (ret
, 0, i
) = exp
[i
];
4053 /* Update the data in CUM to advance over an argument of mode MODE
4054 and data type TYPE. (TYPE is null for libcalls where that information
4055 may not be available.) */
4058 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4059 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4075 cum
->words
+= words
;
4076 cum
->nregs
-= words
;
4077 cum
->regno
+= words
;
4079 if (cum
->nregs
<= 0)
4087 if (cum
->float_in_sse
< 2)
4090 if (cum
->float_in_sse
< 1)
4101 if (!type
|| !AGGREGATE_TYPE_P (type
))
4103 cum
->sse_words
+= words
;
4104 cum
->sse_nregs
-= 1;
4105 cum
->sse_regno
+= 1;
4106 if (cum
->sse_nregs
<= 0)
4118 if (!type
|| !AGGREGATE_TYPE_P (type
))
4120 cum
->mmx_words
+= words
;
4121 cum
->mmx_nregs
-= 1;
4122 cum
->mmx_regno
+= 1;
4123 if (cum
->mmx_nregs
<= 0)
4134 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4135 tree type
, HOST_WIDE_INT words
)
4137 int int_nregs
, sse_nregs
;
4139 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
4140 cum
->words
+= words
;
4141 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
4143 cum
->nregs
-= int_nregs
;
4144 cum
->sse_nregs
-= sse_nregs
;
4145 cum
->regno
+= int_nregs
;
4146 cum
->sse_regno
+= sse_nregs
;
4149 cum
->words
+= words
;
4153 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
4154 HOST_WIDE_INT words
)
4156 /* Otherwise, this should be passed indirect. */
4157 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
4159 cum
->words
+= words
;
4168 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4169 tree type
, int named ATTRIBUTE_UNUSED
)
4171 HOST_WIDE_INT bytes
, words
;
4173 if (mode
== BLKmode
)
4174 bytes
= int_size_in_bytes (type
);
4176 bytes
= GET_MODE_SIZE (mode
);
4177 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4180 mode
= type_natural_mode (type
);
4182 if (TARGET_64BIT_MS_ABI
)
4183 function_arg_advance_ms_64 (cum
, bytes
, words
);
4184 else if (TARGET_64BIT
)
4185 function_arg_advance_64 (cum
, mode
, type
, words
);
4187 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4190 /* Define where to put the arguments to a function.
4191 Value is zero to push the argument on the stack,
4192 or a hard register in which to store the argument.
4194 MODE is the argument's machine mode.
4195 TYPE is the data type of the argument (as a tree).
4196 This is null for libcalls where that information may
4198 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4199 the preceding args and about the function being called.
4200 NAMED is nonzero if this argument is a named parameter
4201 (otherwise it is an extra parameter matching an ellipsis). */
4204 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4205 enum machine_mode orig_mode
, tree type
,
4206 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4208 static bool warnedsse
, warnedmmx
;
4210 /* Avoid the AL settings for the Unix64 ABI. */
4211 if (mode
== VOIDmode
)
4227 if (words
<= cum
->nregs
)
4229 int regno
= cum
->regno
;
4231 /* Fastcall allocates the first two DWORD (SImode) or
4232 smaller arguments to ECX and EDX. */
4235 if (mode
== BLKmode
|| mode
== DImode
)
4238 /* ECX not EAX is the first allocated register. */
4242 return gen_rtx_REG (mode
, regno
);
4247 if (cum
->float_in_sse
< 2)
4250 if (cum
->float_in_sse
< 1)
4260 if (!type
|| !AGGREGATE_TYPE_P (type
))
4262 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4265 warning (0, "SSE vector argument without SSE enabled "
4269 return gen_reg_or_parallel (mode
, orig_mode
,
4270 cum
->sse_regno
+ FIRST_SSE_REG
);
4278 if (!type
|| !AGGREGATE_TYPE_P (type
))
4280 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4283 warning (0, "MMX vector argument without MMX enabled "
4287 return gen_reg_or_parallel (mode
, orig_mode
,
4288 cum
->mmx_regno
+ FIRST_MMX_REG
);
4297 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4298 enum machine_mode orig_mode
, tree type
)
4300 /* Handle a hidden AL argument containing number of registers
4301 for varargs x86-64 functions. */
4302 if (mode
== VOIDmode
)
4303 return GEN_INT (cum
->maybe_vaarg
4304 ? (cum
->sse_nregs
< 0
4309 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4311 &x86_64_int_parameter_registers
[cum
->regno
],
4316 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4317 enum machine_mode orig_mode
, int named
)
4321 /* Avoid the AL settings for the Unix64 ABI. */
4322 if (mode
== VOIDmode
)
4325 /* If we've run out of registers, it goes on the stack. */
4326 if (cum
->nregs
== 0)
4329 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4331 /* Only floating point modes are passed in anything but integer regs. */
4332 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4335 regno
= cum
->regno
+ FIRST_SSE_REG
;
4340 /* Unnamed floating parameters are passed in both the
4341 SSE and integer registers. */
4342 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4343 t2
= gen_rtx_REG (mode
, regno
);
4344 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4345 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4346 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4350 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4354 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4355 tree type
, int named
)
4357 enum machine_mode mode
= omode
;
4358 HOST_WIDE_INT bytes
, words
;
4360 if (mode
== BLKmode
)
4361 bytes
= int_size_in_bytes (type
);
4363 bytes
= GET_MODE_SIZE (mode
);
4364 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4366 /* To simplify the code below, represent vector types with a vector mode
4367 even if MMX/SSE are not active. */
4368 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4369 mode
= type_natural_mode (type
);
4371 if (TARGET_64BIT_MS_ABI
)
4372 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4373 else if (TARGET_64BIT
)
4374 return function_arg_64 (cum
, mode
, omode
, type
);
4376 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4379 /* A C expression that indicates when an argument must be passed by
4380 reference. If nonzero for an argument, a copy of that argument is
4381 made in memory and a pointer to the argument is passed instead of
4382 the argument itself. The pointer is passed in whatever way is
4383 appropriate for passing a pointer to that type. */
4386 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4387 enum machine_mode mode ATTRIBUTE_UNUSED
,
4388 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4390 if (TARGET_64BIT_MS_ABI
)
4394 /* Arrays are passed by reference. */
4395 if (TREE_CODE (type
) == ARRAY_TYPE
)
4398 if (AGGREGATE_TYPE_P (type
))
4400 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4401 are passed by reference. */
4402 int el2
= exact_log2 (int_size_in_bytes (type
));
4403 return !(el2
>= 0 && el2
<= 3);
4407 /* __m128 is passed by reference. */
4408 /* ??? How to handle complex? For now treat them as structs,
4409 and pass them by reference if they're too large. */
4410 if (GET_MODE_SIZE (mode
) > 8)
4413 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4419 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4420 ABI. Only called if TARGET_SSE. */
4422 contains_128bit_aligned_vector_p (tree type
)
4424 enum machine_mode mode
= TYPE_MODE (type
);
4425 if (SSE_REG_MODE_P (mode
)
4426 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4428 if (TYPE_ALIGN (type
) < 128)
4431 if (AGGREGATE_TYPE_P (type
))
4433 /* Walk the aggregates recursively. */
4434 switch (TREE_CODE (type
))
4438 case QUAL_UNION_TYPE
:
4442 /* Walk all the structure fields. */
4443 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4445 if (TREE_CODE (field
) == FIELD_DECL
4446 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4453 /* Just for use if some languages passes arrays by value. */
4454 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4465 /* Gives the alignment boundary, in bits, of an argument with the
4466 specified mode and type. */
4469 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4473 align
= TYPE_ALIGN (type
);
4475 align
= GET_MODE_ALIGNMENT (mode
);
4476 if (align
< PARM_BOUNDARY
)
4477 align
= PARM_BOUNDARY
;
4480 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4481 make an exception for SSE modes since these require 128bit
4484 The handling here differs from field_alignment. ICC aligns MMX
4485 arguments to 4 byte boundaries, while structure fields are aligned
4486 to 8 byte boundaries. */
4488 align
= PARM_BOUNDARY
;
4491 if (!SSE_REG_MODE_P (mode
))
4492 align
= PARM_BOUNDARY
;
4496 if (!contains_128bit_aligned_vector_p (type
))
4497 align
= PARM_BOUNDARY
;
4505 /* Return true if N is a possible register number of function value. */
4508 ix86_function_value_regno_p (int regno
)
4515 case FIRST_FLOAT_REG
:
4516 if (TARGET_64BIT_MS_ABI
)
4518 return TARGET_FLOAT_RETURNS_IN_80387
;
4524 if (TARGET_MACHO
|| TARGET_64BIT
)
4532 /* Define how to find the value returned by a function.
4533 VALTYPE is the data type of the value (as a tree).
4534 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4535 otherwise, FUNC is 0. */
4538 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4539 const_tree fntype
, const_tree fn
)
4543 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4544 we normally prevent this case when mmx is not available. However
4545 some ABIs may require the result to be returned like DImode. */
4546 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4547 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4549 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4550 we prevent this case when sse is not available. However some ABIs
4551 may require the result to be returned like integer TImode. */
4552 else if (mode
== TImode
4553 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4554 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4556 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4557 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4558 regno
= FIRST_FLOAT_REG
;
4560 /* Most things go in %eax. */
4563 /* Override FP return register with %xmm0 for local functions when
4564 SSE math is enabled or for functions with sseregparm attribute. */
4565 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4567 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4568 if ((sse_level
>= 1 && mode
== SFmode
)
4569 || (sse_level
== 2 && mode
== DFmode
))
4570 regno
= FIRST_SSE_REG
;
4573 return gen_rtx_REG (orig_mode
, regno
);
4577 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4582 /* Handle libcalls, which don't provide a type node. */
4583 if (valtype
== NULL
)
4595 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4598 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4602 return gen_rtx_REG (mode
, 0);
4606 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4607 REGPARM_MAX
, SSE_REGPARM_MAX
,
4608 x86_64_int_return_registers
, 0);
4610 /* For zero sized structures, construct_container returns NULL, but we
4611 need to keep rest of compiler happy by returning meaningful value. */
4613 ret
= gen_rtx_REG (orig_mode
, 0);
4619 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4621 unsigned int regno
= 0;
4625 if (mode
== SFmode
|| mode
== DFmode
)
4626 regno
= FIRST_SSE_REG
;
4627 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4628 regno
= FIRST_SSE_REG
;
4631 return gen_rtx_REG (orig_mode
, regno
);
4635 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4636 enum machine_mode orig_mode
, enum machine_mode mode
)
4638 const_tree fn
, fntype
;
4641 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4642 fn
= fntype_or_decl
;
4643 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4645 if (TARGET_64BIT_MS_ABI
)
4646 return function_value_ms_64 (orig_mode
, mode
);
4647 else if (TARGET_64BIT
)
4648 return function_value_64 (orig_mode
, mode
, valtype
);
4650 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4654 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4655 bool outgoing ATTRIBUTE_UNUSED
)
4657 enum machine_mode mode
, orig_mode
;
4659 orig_mode
= TYPE_MODE (valtype
);
4660 mode
= type_natural_mode (valtype
);
4661 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4665 ix86_libcall_value (enum machine_mode mode
)
4667 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4670 /* Return true iff type is returned in memory. */
4673 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4677 if (mode
== BLKmode
)
4680 size
= int_size_in_bytes (type
);
4682 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4685 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4687 /* User-created vectors small enough to fit in EAX. */
4691 /* MMX/3dNow values are returned in MM0,
4692 except when it doesn't exits. */
4694 return (TARGET_MMX
? 0 : 1);
4696 /* SSE values are returned in XMM0, except when it doesn't exist. */
4698 return (TARGET_SSE
? 0 : 1);
4713 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4715 int needed_intregs
, needed_sseregs
;
4716 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4720 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4722 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4724 /* __m128 and friends are returned in xmm0. */
4725 if (!COMPLEX_MODE_P (mode
) && size
== 16 && VECTOR_MODE_P (mode
))
4728 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4729 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8)
4730 || COMPLEX_MODE_P (mode
);
4734 ix86_return_in_memory (const_tree type
)
4736 const enum machine_mode mode
= type_natural_mode (type
);
4738 if (TARGET_64BIT_MS_ABI
)
4739 return return_in_memory_ms_64 (type
, mode
);
4740 else if (TARGET_64BIT
)
4741 return return_in_memory_64 (type
, mode
);
4743 return return_in_memory_32 (type
, mode
);
4746 /* Return false iff TYPE is returned in memory. This version is used
4747 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4748 but differs notably in that when MMX is available, 8-byte vectors
4749 are returned in memory, rather than in MMX registers. */
4752 ix86_sol10_return_in_memory (const_tree type
)
4755 enum machine_mode mode
= type_natural_mode (type
);
4758 return return_in_memory_64 (type
, mode
);
4760 if (mode
== BLKmode
)
4763 size
= int_size_in_bytes (type
);
4765 if (VECTOR_MODE_P (mode
))
4767 /* Return in memory only if MMX registers *are* available. This
4768 seems backwards, but it is consistent with the existing
4775 else if (mode
== TImode
)
4777 else if (mode
== XFmode
)
4783 /* When returning SSE vector types, we have a choice of either
4784 (1) being abi incompatible with a -march switch, or
4785 (2) generating an error.
4786 Given no good solution, I think the safest thing is one warning.
4787 The user won't be able to use -Werror, but....
4789 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4790 called in response to actually generating a caller or callee that
4791 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4792 via aggregate_value_p for general type probing from tree-ssa. */
4795 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4797 static bool warnedsse
, warnedmmx
;
4799 if (!TARGET_64BIT
&& type
)
4801 /* Look at the return type of the function, not the function type. */
4802 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4804 if (!TARGET_SSE
&& !warnedsse
)
4807 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4810 warning (0, "SSE vector return without SSE enabled "
4815 if (!TARGET_MMX
&& !warnedmmx
)
4817 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4820 warning (0, "MMX vector return without MMX enabled "
4830 /* Create the va_list data type. */
4833 ix86_build_builtin_va_list (void)
4835 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4837 /* For i386 we use plain pointer to argument area. */
4838 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4839 return build_pointer_type (char_type_node
);
4841 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4842 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4844 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4845 unsigned_type_node
);
4846 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4847 unsigned_type_node
);
4848 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4850 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4853 va_list_gpr_counter_field
= f_gpr
;
4854 va_list_fpr_counter_field
= f_fpr
;
4856 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4857 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4858 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4859 DECL_FIELD_CONTEXT (f_sav
) = record
;
4861 TREE_CHAIN (record
) = type_decl
;
4862 TYPE_NAME (record
) = type_decl
;
4863 TYPE_FIELDS (record
) = f_gpr
;
4864 TREE_CHAIN (f_gpr
) = f_fpr
;
4865 TREE_CHAIN (f_fpr
) = f_ovf
;
4866 TREE_CHAIN (f_ovf
) = f_sav
;
4868 layout_type (record
);
4870 /* The correct type is an array type of one element. */
4871 return build_array_type (record
, build_index_type (size_zero_node
));
4874 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4877 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4887 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4890 /* Indicate to allocate space on the stack for varargs save area. */
4891 ix86_save_varrargs_registers
= 1;
4892 /* We need 16-byte stack alignment to save SSE registers. If user
4893 asked for lower preferred_stack_boundary, lets just hope that he knows
4894 what he is doing and won't varargs SSE values.
4896 We also may end up assuming that only 64bit values are stored in SSE
4897 register let some floating point program work. */
4898 if (ix86_preferred_stack_boundary
>= 128)
4899 cfun
->stack_alignment_needed
= 128;
4901 save_area
= frame_pointer_rtx
;
4902 set
= get_varargs_alias_set ();
4904 for (i
= cum
->regno
;
4906 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4909 mem
= gen_rtx_MEM (Pmode
,
4910 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4911 MEM_NOTRAP_P (mem
) = 1;
4912 set_mem_alias_set (mem
, set
);
4913 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4914 x86_64_int_parameter_registers
[i
]));
4917 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4919 /* Now emit code to save SSE registers. The AX parameter contains number
4920 of SSE parameter registers used to call this function. We use
4921 sse_prologue_save insn template that produces computed jump across
4922 SSE saves. We need some preparation work to get this working. */
4924 label
= gen_label_rtx ();
4925 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4927 /* Compute address to jump to :
4928 label - 5*eax + nnamed_sse_arguments*5 */
4929 tmp_reg
= gen_reg_rtx (Pmode
);
4930 nsse_reg
= gen_reg_rtx (Pmode
);
4931 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4932 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4933 gen_rtx_MULT (Pmode
, nsse_reg
,
4938 gen_rtx_CONST (DImode
,
4939 gen_rtx_PLUS (DImode
,
4941 GEN_INT (cum
->sse_regno
* 4))));
4943 emit_move_insn (nsse_reg
, label_ref
);
4944 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4946 /* Compute address of memory block we save into. We always use pointer
4947 pointing 127 bytes after first byte to store - this is needed to keep
4948 instruction size limited by 4 bytes. */
4949 tmp_reg
= gen_reg_rtx (Pmode
);
4950 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4951 plus_constant (save_area
,
4952 8 * REGPARM_MAX
+ 127)));
4953 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4954 MEM_NOTRAP_P (mem
) = 1;
4955 set_mem_alias_set (mem
, set
);
4956 set_mem_align (mem
, BITS_PER_WORD
);
4958 /* And finally do the dirty job! */
4959 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4960 GEN_INT (cum
->sse_regno
), label
));
4965 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4967 alias_set_type set
= get_varargs_alias_set ();
4970 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4974 mem
= gen_rtx_MEM (Pmode
,
4975 plus_constant (virtual_incoming_args_rtx
,
4976 i
* UNITS_PER_WORD
));
4977 MEM_NOTRAP_P (mem
) = 1;
4978 set_mem_alias_set (mem
, set
);
4980 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4981 emit_move_insn (mem
, reg
);
4986 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4987 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4990 CUMULATIVE_ARGS next_cum
;
4993 /* This argument doesn't appear to be used anymore. Which is good,
4994 because the old code here didn't suppress rtl generation. */
4995 gcc_assert (!no_rtl
);
5000 fntype
= TREE_TYPE (current_function_decl
);
5002 /* For varargs, we do not want to skip the dummy va_dcl argument.
5003 For stdargs, we do want to skip the last named argument. */
5005 if (stdarg_p (fntype
))
5006 function_arg_advance (&next_cum
, mode
, type
, 1);
5008 if (TARGET_64BIT_MS_ABI
)
5009 setup_incoming_varargs_ms_64 (&next_cum
);
5011 setup_incoming_varargs_64 (&next_cum
);
5014 /* Implement va_start. */
5017 ix86_va_start (tree valist
, rtx nextarg
)
5019 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
5020 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5021 tree gpr
, fpr
, ovf
, sav
, t
;
5024 /* Only 64bit target needs something special. */
5025 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5027 std_expand_builtin_va_start (valist
, nextarg
);
5031 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5032 f_fpr
= TREE_CHAIN (f_gpr
);
5033 f_ovf
= TREE_CHAIN (f_fpr
);
5034 f_sav
= TREE_CHAIN (f_ovf
);
5036 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
5037 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5038 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5039 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5040 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5042 /* Count number of gp and fp argument registers used. */
5043 words
= current_function_args_info
.words
;
5044 n_gpr
= current_function_args_info
.regno
;
5045 n_fpr
= current_function_args_info
.sse_regno
;
5047 if (cfun
->va_list_gpr_size
)
5049 type
= TREE_TYPE (gpr
);
5050 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
5051 build_int_cst (type
, n_gpr
* 8));
5052 TREE_SIDE_EFFECTS (t
) = 1;
5053 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5056 if (cfun
->va_list_fpr_size
)
5058 type
= TREE_TYPE (fpr
);
5059 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
5060 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
5061 TREE_SIDE_EFFECTS (t
) = 1;
5062 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5065 /* Find the overflow area. */
5066 type
= TREE_TYPE (ovf
);
5067 t
= make_tree (type
, virtual_incoming_args_rtx
);
5069 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
5070 size_int (words
* UNITS_PER_WORD
));
5071 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
5072 TREE_SIDE_EFFECTS (t
) = 1;
5073 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5075 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
5077 /* Find the register save area.
5078 Prologue of the function save it right above stack frame. */
5079 type
= TREE_TYPE (sav
);
5080 t
= make_tree (type
, frame_pointer_rtx
);
5081 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
5082 TREE_SIDE_EFFECTS (t
) = 1;
5083 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5087 /* Implement va_arg. */
5090 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
5092 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
5093 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5094 tree gpr
, fpr
, ovf
, sav
, t
;
5096 tree lab_false
, lab_over
= NULL_TREE
;
5101 enum machine_mode nat_mode
;
5103 /* Only 64bit target needs something special. */
5104 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5105 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5107 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5108 f_fpr
= TREE_CHAIN (f_gpr
);
5109 f_ovf
= TREE_CHAIN (f_fpr
);
5110 f_sav
= TREE_CHAIN (f_ovf
);
5112 valist
= build_va_arg_indirect_ref (valist
);
5113 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5114 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5115 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5116 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5118 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5120 type
= build_pointer_type (type
);
5121 size
= int_size_in_bytes (type
);
5122 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5124 nat_mode
= type_natural_mode (type
);
5125 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
5126 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
5128 /* Pull the value out of the saved registers. */
5130 addr
= create_tmp_var (ptr_type_node
, "addr");
5131 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
5135 int needed_intregs
, needed_sseregs
;
5137 tree int_addr
, sse_addr
;
5139 lab_false
= create_artificial_label ();
5140 lab_over
= create_artificial_label ();
5142 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
5144 need_temp
= (!REG_P (container
)
5145 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
5146 || TYPE_ALIGN (type
) > 128));
5148 /* In case we are passing structure, verify that it is consecutive block
5149 on the register save area. If not we need to do moves. */
5150 if (!need_temp
&& !REG_P (container
))
5152 /* Verify that all registers are strictly consecutive */
5153 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
5157 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5159 rtx slot
= XVECEXP (container
, 0, i
);
5160 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
5161 || INTVAL (XEXP (slot
, 1)) != i
* 16)
5169 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5171 rtx slot
= XVECEXP (container
, 0, i
);
5172 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
5173 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5185 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5186 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5187 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5188 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5191 /* First ensure that we fit completely in registers. */
5194 t
= build_int_cst (TREE_TYPE (gpr
),
5195 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5196 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5197 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5198 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5199 gimplify_and_add (t
, pre_p
);
5203 t
= build_int_cst (TREE_TYPE (fpr
),
5204 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5206 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5207 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5208 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5209 gimplify_and_add (t
, pre_p
);
5212 /* Compute index to start of area used for integer regs. */
5215 /* int_addr = gpr + sav; */
5216 t
= fold_convert (sizetype
, gpr
);
5217 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5218 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5219 gimplify_and_add (t
, pre_p
);
5223 /* sse_addr = fpr + sav; */
5224 t
= fold_convert (sizetype
, fpr
);
5225 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5226 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5227 gimplify_and_add (t
, pre_p
);
5232 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5235 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5236 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5237 gimplify_and_add (t
, pre_p
);
5239 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5241 rtx slot
= XVECEXP (container
, 0, i
);
5242 rtx reg
= XEXP (slot
, 0);
5243 enum machine_mode mode
= GET_MODE (reg
);
5244 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5245 tree addr_type
= build_pointer_type (piece_type
);
5248 tree dest_addr
, dest
;
5250 if (SSE_REGNO_P (REGNO (reg
)))
5252 src_addr
= sse_addr
;
5253 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5257 src_addr
= int_addr
;
5258 src_offset
= REGNO (reg
) * 8;
5260 src_addr
= fold_convert (addr_type
, src_addr
);
5261 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5262 size_int (src_offset
));
5263 src
= build_va_arg_indirect_ref (src_addr
);
5265 dest_addr
= fold_convert (addr_type
, addr
);
5266 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5267 size_int (INTVAL (XEXP (slot
, 1))));
5268 dest
= build_va_arg_indirect_ref (dest_addr
);
5270 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5271 gimplify_and_add (t
, pre_p
);
5277 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5278 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5279 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5280 gimplify_and_add (t
, pre_p
);
5284 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5285 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5286 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5287 gimplify_and_add (t
, pre_p
);
5290 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5291 gimplify_and_add (t
, pre_p
);
5293 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5294 append_to_statement_list (t
, pre_p
);
5297 /* ... otherwise out of the overflow area. */
5299 /* Care for on-stack alignment if needed. */
5300 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5301 || integer_zerop (TYPE_SIZE (type
)))
5305 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5306 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5307 size_int (align
- 1));
5308 t
= fold_convert (sizetype
, t
);
5309 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5311 t
= fold_convert (TREE_TYPE (ovf
), t
);
5313 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5315 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5316 gimplify_and_add (t2
, pre_p
);
5318 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5319 size_int (rsize
* UNITS_PER_WORD
));
5320 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5321 gimplify_and_add (t
, pre_p
);
5325 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5326 append_to_statement_list (t
, pre_p
);
5329 ptrtype
= build_pointer_type (type
);
5330 addr
= fold_convert (ptrtype
, addr
);
5333 addr
= build_va_arg_indirect_ref (addr
);
5334 return build_va_arg_indirect_ref (addr
);
5337 /* Return nonzero if OPNUM's MEM should be matched
5338 in movabs* patterns. */
5341 ix86_check_movabs (rtx insn
, int opnum
)
5345 set
= PATTERN (insn
);
5346 if (GET_CODE (set
) == PARALLEL
)
5347 set
= XVECEXP (set
, 0, 0);
5348 gcc_assert (GET_CODE (set
) == SET
);
5349 mem
= XEXP (set
, opnum
);
5350 while (GET_CODE (mem
) == SUBREG
)
5351 mem
= SUBREG_REG (mem
);
5352 gcc_assert (MEM_P (mem
));
5353 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5356 /* Initialize the table of extra 80387 mathematical constants. */
5359 init_ext_80387_constants (void)
5361 static const char * cst
[5] =
5363 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5364 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5365 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5366 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5367 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5371 for (i
= 0; i
< 5; i
++)
5373 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5374 /* Ensure each constant is rounded to XFmode precision. */
5375 real_convert (&ext_80387_constants_table
[i
],
5376 XFmode
, &ext_80387_constants_table
[i
]);
5379 ext_80387_constants_init
= 1;
5382 /* Return true if the constant is something that can be loaded with
5383 a special instruction. */
5386 standard_80387_constant_p (rtx x
)
5388 enum machine_mode mode
= GET_MODE (x
);
5392 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5395 if (x
== CONST0_RTX (mode
))
5397 if (x
== CONST1_RTX (mode
))
5400 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5402 /* For XFmode constants, try to find a special 80387 instruction when
5403 optimizing for size or on those CPUs that benefit from them. */
5405 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5409 if (! ext_80387_constants_init
)
5410 init_ext_80387_constants ();
5412 for (i
= 0; i
< 5; i
++)
5413 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5417 /* Load of the constant -0.0 or -1.0 will be split as
5418 fldz;fchs or fld1;fchs sequence. */
5419 if (real_isnegzero (&r
))
5421 if (real_identical (&r
, &dconstm1
))
5427 /* Return the opcode of the special instruction to be used to load
5431 standard_80387_constant_opcode (rtx x
)
5433 switch (standard_80387_constant_p (x
))
5457 /* Return the CONST_DOUBLE representing the 80387 constant that is
5458 loaded by the specified special instruction. The argument IDX
5459 matches the return value from standard_80387_constant_p. */
5462 standard_80387_constant_rtx (int idx
)
5466 if (! ext_80387_constants_init
)
5467 init_ext_80387_constants ();
5483 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5487 /* Return 1 if mode is a valid mode for sse. */
5489 standard_sse_mode_p (enum machine_mode mode
)
5506 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5509 standard_sse_constant_p (rtx x
)
5511 enum machine_mode mode
= GET_MODE (x
);
5513 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5515 if (vector_all_ones_operand (x
, mode
)
5516 && standard_sse_mode_p (mode
))
5517 return TARGET_SSE2
? 2 : -1;
5522 /* Return the opcode of the special instruction to be used to load
5526 standard_sse_constant_opcode (rtx insn
, rtx x
)
5528 switch (standard_sse_constant_p (x
))
5531 if (get_attr_mode (insn
) == MODE_V4SF
)
5532 return "xorps\t%0, %0";
5533 else if (get_attr_mode (insn
) == MODE_V2DF
)
5534 return "xorpd\t%0, %0";
5536 return "pxor\t%0, %0";
5538 return "pcmpeqd\t%0, %0";
5543 /* Returns 1 if OP contains a symbol reference */
5546 symbolic_reference_mentioned_p (rtx op
)
5551 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5554 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5555 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5561 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5562 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5566 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5573 /* Return 1 if it is appropriate to emit `ret' instructions in the
5574 body of a function. Do this only if the epilogue is simple, needing a
5575 couple of insns. Prior to reloading, we can't tell how many registers
5576 must be saved, so return 0 then. Return 0 if there is no frame
5577 marker to de-allocate. */
5580 ix86_can_use_return_insn_p (void)
5582 struct ix86_frame frame
;
5584 if (! reload_completed
|| frame_pointer_needed
)
5587 /* Don't allow more than 32 pop, since that's all we can do
5588 with one instruction. */
5589 if (current_function_pops_args
5590 && current_function_args_size
>= 32768)
5593 ix86_compute_frame_layout (&frame
);
5594 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5597 /* Value should be nonzero if functions must have frame pointers.
5598 Zero means the frame pointer need not be set up (and parms may
5599 be accessed via the stack pointer) in functions that seem suitable. */
5602 ix86_frame_pointer_required (void)
5604 /* If we accessed previous frames, then the generated code expects
5605 to be able to access the saved ebp value in our frame. */
5606 if (cfun
->machine
->accesses_prev_frame
)
5609 /* Several x86 os'es need a frame pointer for other reasons,
5610 usually pertaining to setjmp. */
5611 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5614 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5615 the frame pointer by default. Turn it back on now if we've not
5616 got a leaf function. */
5617 if (TARGET_OMIT_LEAF_FRAME_POINTER
5618 && (!current_function_is_leaf
5619 || ix86_current_function_calls_tls_descriptor
))
5622 if (current_function_profile
)
5628 /* Record that the current function accesses previous call frames. */
5631 ix86_setup_frame_addresses (void)
5633 cfun
->machine
->accesses_prev_frame
= 1;
5636 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5637 # define USE_HIDDEN_LINKONCE 1
5639 # define USE_HIDDEN_LINKONCE 0
5642 static int pic_labels_used
;
5644 /* Fills in the label name that should be used for a pc thunk for
5645 the given register. */
5648 get_pc_thunk_name (char name
[32], unsigned int regno
)
5650 gcc_assert (!TARGET_64BIT
);
5652 if (USE_HIDDEN_LINKONCE
)
5653 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5655 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5659 /* This function generates code for -fpic that loads %ebx with
5660 the return address of the caller and then returns. */
5663 ix86_file_end (void)
5668 for (regno
= 0; regno
< 8; ++regno
)
5672 if (! ((pic_labels_used
>> regno
) & 1))
5675 get_pc_thunk_name (name
, regno
);
5680 switch_to_section (darwin_sections
[text_coal_section
]);
5681 fputs ("\t.weak_definition\t", asm_out_file
);
5682 assemble_name (asm_out_file
, name
);
5683 fputs ("\n\t.private_extern\t", asm_out_file
);
5684 assemble_name (asm_out_file
, name
);
5685 fputs ("\n", asm_out_file
);
5686 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5690 if (USE_HIDDEN_LINKONCE
)
5694 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5696 TREE_PUBLIC (decl
) = 1;
5697 TREE_STATIC (decl
) = 1;
5698 DECL_ONE_ONLY (decl
) = 1;
5700 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5701 switch_to_section (get_named_section (decl
, NULL
, 0));
5703 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5704 fputs ("\t.hidden\t", asm_out_file
);
5705 assemble_name (asm_out_file
, name
);
5706 fputc ('\n', asm_out_file
);
5707 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5711 switch_to_section (text_section
);
5712 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5715 xops
[0] = gen_rtx_REG (SImode
, regno
);
5716 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5717 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5718 output_asm_insn ("ret", xops
);
5721 if (NEED_INDICATE_EXEC_STACK
)
5722 file_end_indicate_exec_stack ();
5725 /* Emit code for the SET_GOT patterns. */
5728 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5734 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5736 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5737 xops
[2] = gen_rtx_MEM (Pmode
,
5738 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5739 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5741 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5742 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5743 an unadorned address. */
5744 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5745 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5746 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5750 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5752 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5754 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5757 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5759 output_asm_insn ("call\t%a2", xops
);
5762 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5763 is what will be referenced by the Mach-O PIC subsystem. */
5765 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5768 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5769 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5772 output_asm_insn ("pop{l}\t%0", xops
);
5777 get_pc_thunk_name (name
, REGNO (dest
));
5778 pic_labels_used
|= 1 << REGNO (dest
);
5780 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5781 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5782 output_asm_insn ("call\t%X2", xops
);
5783 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5784 is what will be referenced by the Mach-O PIC subsystem. */
5787 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5789 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5790 CODE_LABEL_NUMBER (label
));
5797 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5798 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5800 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5805 /* Generate an "push" pattern for input ARG. */
5810 return gen_rtx_SET (VOIDmode
,
5812 gen_rtx_PRE_DEC (Pmode
,
5813 stack_pointer_rtx
)),
5817 /* Return >= 0 if there is an unused call-clobbered register available
5818 for the entire function. */
5821 ix86_select_alt_pic_regnum (void)
5823 if (current_function_is_leaf
&& !current_function_profile
5824 && !ix86_current_function_calls_tls_descriptor
)
5827 for (i
= 2; i
>= 0; --i
)
5828 if (!df_regs_ever_live_p (i
))
5832 return INVALID_REGNUM
;
5835 /* Return 1 if we need to save REGNO. */
5837 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5839 if (pic_offset_table_rtx
5840 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5841 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5842 || current_function_profile
5843 || current_function_calls_eh_return
5844 || current_function_uses_const_pool
))
5846 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5851 if (current_function_calls_eh_return
&& maybe_eh_return
)
5856 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5857 if (test
== INVALID_REGNUM
)
5864 if (cfun
->machine
->force_align_arg_pointer
5865 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5868 return (df_regs_ever_live_p (regno
)
5869 && !call_used_regs
[regno
]
5870 && !fixed_regs
[regno
]
5871 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5874 /* Return number of registers to be saved on the stack. */
5877 ix86_nsaved_regs (void)
5882 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5883 if (ix86_save_reg (regno
, true))
5888 /* Return the offset between two registers, one to be eliminated, and the other
5889 its replacement, at the start of a routine. */
5892 ix86_initial_elimination_offset (int from
, int to
)
5894 struct ix86_frame frame
;
5895 ix86_compute_frame_layout (&frame
);
5897 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5898 return frame
.hard_frame_pointer_offset
;
5899 else if (from
== FRAME_POINTER_REGNUM
5900 && to
== HARD_FRAME_POINTER_REGNUM
)
5901 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5904 gcc_assert (to
== STACK_POINTER_REGNUM
);
5906 if (from
== ARG_POINTER_REGNUM
)
5907 return frame
.stack_pointer_offset
;
5909 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5910 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5914 /* Fill structure ix86_frame about frame of currently computed function. */
5917 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5919 HOST_WIDE_INT total_size
;
5920 unsigned int stack_alignment_needed
;
5921 HOST_WIDE_INT offset
;
5922 unsigned int preferred_alignment
;
5923 HOST_WIDE_INT size
= get_frame_size ();
5925 frame
->nregs
= ix86_nsaved_regs ();
5928 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5929 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5931 /* During reload iteration the amount of registers saved can change.
5932 Recompute the value as needed. Do not recompute when amount of registers
5933 didn't change as reload does multiple calls to the function and does not
5934 expect the decision to change within single iteration. */
5936 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5938 int count
= frame
->nregs
;
5940 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5941 /* The fast prologue uses move instead of push to save registers. This
5942 is significantly longer, but also executes faster as modern hardware
5943 can execute the moves in parallel, but can't do that for push/pop.
5945 Be careful about choosing what prologue to emit: When function takes
5946 many instructions to execute we may use slow version as well as in
5947 case function is known to be outside hot spot (this is known with
5948 feedback only). Weight the size of function by number of registers
5949 to save as it is cheap to use one or two push instructions but very
5950 slow to use many of them. */
5952 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5953 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5954 || (flag_branch_probabilities
5955 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5956 cfun
->machine
->use_fast_prologue_epilogue
= false;
5958 cfun
->machine
->use_fast_prologue_epilogue
5959 = !expensive_function_p (count
);
5961 if (TARGET_PROLOGUE_USING_MOVE
5962 && cfun
->machine
->use_fast_prologue_epilogue
)
5963 frame
->save_regs_using_mov
= true;
5965 frame
->save_regs_using_mov
= false;
5968 /* Skip return address and saved base pointer. */
5969 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5971 frame
->hard_frame_pointer_offset
= offset
;
5973 /* Do some sanity checking of stack_alignment_needed and
5974 preferred_alignment, since i386 port is the only using those features
5975 that may break easily. */
5977 gcc_assert (!size
|| stack_alignment_needed
);
5978 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5979 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5980 gcc_assert (stack_alignment_needed
5981 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5983 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5984 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5986 /* Register save area */
5987 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5990 if (ix86_save_varrargs_registers
)
5992 offset
+= X86_64_VARARGS_SIZE
;
5993 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5996 frame
->va_arg_size
= 0;
5998 /* Align start of frame for local function. */
5999 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
6000 & -stack_alignment_needed
) - offset
;
6002 offset
+= frame
->padding1
;
6004 /* Frame pointer points here. */
6005 frame
->frame_pointer_offset
= offset
;
6009 /* Add outgoing arguments area. Can be skipped if we eliminated
6010 all the function calls as dead code.
6011 Skipping is however impossible when function calls alloca. Alloca
6012 expander assumes that last current_function_outgoing_args_size
6013 of stack frame are unused. */
6014 if (ACCUMULATE_OUTGOING_ARGS
6015 && (!current_function_is_leaf
|| current_function_calls_alloca
6016 || ix86_current_function_calls_tls_descriptor
))
6018 offset
+= current_function_outgoing_args_size
;
6019 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
6022 frame
->outgoing_arguments_size
= 0;
6024 /* Align stack boundary. Only needed if we're calling another function
6026 if (!current_function_is_leaf
|| current_function_calls_alloca
6027 || ix86_current_function_calls_tls_descriptor
)
6028 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
6029 & -preferred_alignment
) - offset
;
6031 frame
->padding2
= 0;
6033 offset
+= frame
->padding2
;
6035 /* We've reached end of stack frame. */
6036 frame
->stack_pointer_offset
= offset
;
6038 /* Size prologue needs to allocate. */
6039 frame
->to_allocate
=
6040 (size
+ frame
->padding1
+ frame
->padding2
6041 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
6043 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
6044 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
6045 frame
->save_regs_using_mov
= false;
6047 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
6048 && current_function_is_leaf
6049 && !ix86_current_function_calls_tls_descriptor
)
6051 frame
->red_zone_size
= frame
->to_allocate
;
6052 if (frame
->save_regs_using_mov
)
6053 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6054 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6055 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6058 frame
->red_zone_size
= 0;
6059 frame
->to_allocate
-= frame
->red_zone_size
;
6060 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6062 fprintf (stderr
, "\n");
6063 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
6064 fprintf (stderr
, "size: %ld\n", (long)size
);
6065 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
6066 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
6067 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
6068 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
6069 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
6070 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
6071 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
6072 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
6073 (long)frame
->hard_frame_pointer_offset
);
6074 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
6075 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
6076 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
6077 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
6081 /* Emit code to save registers in the prologue. */
6084 ix86_emit_save_regs (void)
6089 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
6090 if (ix86_save_reg (regno
, true))
6092 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
6093 RTX_FRAME_RELATED_P (insn
) = 1;
6097 /* Emit code to save registers using MOV insns. First register
6098 is restored from POINTER + OFFSET. */
6100 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
6105 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6106 if (ix86_save_reg (regno
, true))
6108 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
6110 gen_rtx_REG (Pmode
, regno
));
6111 RTX_FRAME_RELATED_P (insn
) = 1;
6112 offset
+= UNITS_PER_WORD
;
6116 /* Expand prologue or epilogue stack adjustment.
6117 The pattern exist to put a dependency on all ebp-based memory accesses.
6118 STYLE should be negative if instructions should be marked as frame related,
6119 zero if %r11 register is live and cannot be freely used and positive
6123 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
6128 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
6129 else if (x86_64_immediate_operand (offset
, DImode
))
6130 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
6134 /* r11 is used by indirect sibcall return as well, set before the
6135 epilogue and used after the epilogue. ATM indirect sibcall
6136 shouldn't be used together with huge frame sizes in one
6137 function because of the frame_size check in sibcall.c. */
6139 r11
= gen_rtx_REG (DImode
, R11_REG
);
6140 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
6142 RTX_FRAME_RELATED_P (insn
) = 1;
6143 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
6147 RTX_FRAME_RELATED_P (insn
) = 1;
6150 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6153 ix86_internal_arg_pointer (void)
6155 bool has_force_align_arg_pointer
=
6156 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
6157 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
6158 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6159 && DECL_NAME (current_function_decl
)
6160 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6161 && DECL_FILE_SCOPE_P (current_function_decl
))
6162 || ix86_force_align_arg_pointer
6163 || has_force_align_arg_pointer
)
6165 /* Nested functions can't realign the stack due to a register
6167 if (DECL_CONTEXT (current_function_decl
)
6168 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
6170 if (ix86_force_align_arg_pointer
)
6171 warning (0, "-mstackrealign ignored for nested functions");
6172 if (has_force_align_arg_pointer
)
6173 error ("%s not supported for nested functions",
6174 ix86_force_align_arg_pointer_string
);
6175 return virtual_incoming_args_rtx
;
6177 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
6178 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6181 return virtual_incoming_args_rtx
;
6184 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6185 This is called from dwarf2out.c to emit call frame instructions
6186 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6188 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6190 rtx unspec
= SET_SRC (pattern
);
6191 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6195 case UNSPEC_REG_SAVE
:
6196 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6197 SET_DEST (pattern
));
6199 case UNSPEC_DEF_CFA
:
6200 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6201 INTVAL (XVECEXP (unspec
, 0, 0)));
6208 /* Expand the prologue into a bunch of separate insns. */
6211 ix86_expand_prologue (void)
6215 struct ix86_frame frame
;
6216 HOST_WIDE_INT allocate
;
6218 ix86_compute_frame_layout (&frame
);
6220 if (cfun
->machine
->force_align_arg_pointer
)
6224 /* Grab the argument pointer. */
6225 x
= plus_constant (stack_pointer_rtx
, 4);
6226 y
= cfun
->machine
->force_align_arg_pointer
;
6227 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6228 RTX_FRAME_RELATED_P (insn
) = 1;
6230 /* The unwind info consists of two parts: install the fafp as the cfa,
6231 and record the fafp as the "save register" of the stack pointer.
6232 The later is there in order that the unwinder can see where it
6233 should restore the stack pointer across the and insn. */
6234 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6235 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6236 RTX_FRAME_RELATED_P (x
) = 1;
6237 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6239 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6240 RTX_FRAME_RELATED_P (y
) = 1;
6241 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6242 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6243 REG_NOTES (insn
) = x
;
6245 /* Align the stack. */
6246 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6249 /* And here we cheat like madmen with the unwind info. We force the
6250 cfa register back to sp+4, which is exactly what it was at the
6251 start of the function. Re-pushing the return address results in
6252 the return at the same spot relative to the cfa, and thus is
6253 correct wrt the unwind info. */
6254 x
= cfun
->machine
->force_align_arg_pointer
;
6255 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6256 insn
= emit_insn (gen_push (x
));
6257 RTX_FRAME_RELATED_P (insn
) = 1;
6260 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6261 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6262 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6263 REG_NOTES (insn
) = x
;
6266 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6267 slower on all targets. Also sdb doesn't like it. */
6269 if (frame_pointer_needed
)
6271 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6272 RTX_FRAME_RELATED_P (insn
) = 1;
6274 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6275 RTX_FRAME_RELATED_P (insn
) = 1;
6278 allocate
= frame
.to_allocate
;
6280 if (!frame
.save_regs_using_mov
)
6281 ix86_emit_save_regs ();
6283 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6285 /* When using red zone we may start register saving before allocating
6286 the stack frame saving one cycle of the prologue. */
6287 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6288 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6289 : stack_pointer_rtx
,
6290 -frame
.nregs
* UNITS_PER_WORD
);
6294 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6295 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6296 GEN_INT (-allocate
), -1);
6299 /* Only valid for Win32. */
6300 rtx eax
= gen_rtx_REG (Pmode
, 0);
6304 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6306 if (TARGET_64BIT_MS_ABI
)
6309 eax_live
= ix86_eax_live_at_start_p ();
6313 emit_insn (gen_push (eax
));
6314 allocate
-= UNITS_PER_WORD
;
6317 emit_move_insn (eax
, GEN_INT (allocate
));
6320 insn
= gen_allocate_stack_worker_64 (eax
);
6322 insn
= gen_allocate_stack_worker_32 (eax
);
6323 insn
= emit_insn (insn
);
6324 RTX_FRAME_RELATED_P (insn
) = 1;
6325 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6326 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6327 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6328 t
, REG_NOTES (insn
));
6332 if (frame_pointer_needed
)
6333 t
= plus_constant (hard_frame_pointer_rtx
,
6336 - frame
.nregs
* UNITS_PER_WORD
);
6338 t
= plus_constant (stack_pointer_rtx
, allocate
);
6339 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6343 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6345 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6346 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6348 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6349 -frame
.nregs
* UNITS_PER_WORD
);
6352 pic_reg_used
= false;
6353 if (pic_offset_table_rtx
6354 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6355 || current_function_profile
))
6357 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6359 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6360 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6362 pic_reg_used
= true;
6369 if (ix86_cmodel
== CM_LARGE_PIC
)
6371 rtx tmp_reg
= gen_rtx_REG (DImode
,
6372 FIRST_REX_INT_REG
+ 3 /* R11 */);
6373 rtx label
= gen_label_rtx ();
6375 LABEL_PRESERVE_P (label
) = 1;
6376 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6377 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6378 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6379 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6380 pic_offset_table_rtx
, tmp_reg
));
6383 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6386 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6389 /* Prevent function calls from being scheduled before the call to mcount.
6390 In the pic_reg_used case, make sure that the got load isn't deleted. */
6391 if (current_function_profile
)
6394 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6395 emit_insn (gen_blockage ());
6399 /* Emit code to restore saved registers using MOV insns. First register
6400 is restored from POINTER + OFFSET. */
6402 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6403 int maybe_eh_return
)
6406 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6408 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6409 if (ix86_save_reg (regno
, maybe_eh_return
))
6411 /* Ensure that adjust_address won't be forced to produce pointer
6412 out of range allowed by x86-64 instruction set. */
6413 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6417 r11
= gen_rtx_REG (DImode
, R11_REG
);
6418 emit_move_insn (r11
, GEN_INT (offset
));
6419 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6420 base_address
= gen_rtx_MEM (Pmode
, r11
);
6423 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6424 adjust_address (base_address
, Pmode
, offset
));
6425 offset
+= UNITS_PER_WORD
;
6429 /* Restore function stack, frame, and registers. */
6432 ix86_expand_epilogue (int style
)
6435 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6436 struct ix86_frame frame
;
6437 HOST_WIDE_INT offset
;
6439 ix86_compute_frame_layout (&frame
);
6441 /* Calculate start of saved registers relative to ebp. Special care
6442 must be taken for the normal return case of a function using
6443 eh_return: the eax and edx registers are marked as saved, but not
6444 restored along this path. */
6445 offset
= frame
.nregs
;
6446 if (current_function_calls_eh_return
&& style
!= 2)
6448 offset
*= -UNITS_PER_WORD
;
6450 /* If we're only restoring one register and sp is not valid then
6451 using a move instruction to restore the register since it's
6452 less work than reloading sp and popping the register.
6454 The default code result in stack adjustment using add/lea instruction,
6455 while this code results in LEAVE instruction (or discrete equivalent),
6456 so it is profitable in some other cases as well. Especially when there
6457 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6458 and there is exactly one register to pop. This heuristic may need some
6459 tuning in future. */
6460 if ((!sp_valid
&& frame
.nregs
<= 1)
6461 || (TARGET_EPILOGUE_USING_MOVE
6462 && cfun
->machine
->use_fast_prologue_epilogue
6463 && (frame
.nregs
> 1 || frame
.to_allocate
))
6464 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6465 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6466 && cfun
->machine
->use_fast_prologue_epilogue
6467 && frame
.nregs
== 1)
6468 || current_function_calls_eh_return
)
6470 /* Restore registers. We can use ebp or esp to address the memory
6471 locations. If both are available, default to ebp, since offsets
6472 are known to be small. Only exception is esp pointing directly to the
6473 end of block of saved registers, where we may simplify addressing
6476 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6477 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6478 frame
.to_allocate
, style
== 2);
6480 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6481 offset
, style
== 2);
6483 /* eh_return epilogues need %ecx added to the stack pointer. */
6486 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6488 if (frame_pointer_needed
)
6490 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6491 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6492 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6494 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6495 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6497 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6502 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6503 tmp
= plus_constant (tmp
, (frame
.to_allocate
6504 + frame
.nregs
* UNITS_PER_WORD
));
6505 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6508 else if (!frame_pointer_needed
)
6509 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6510 GEN_INT (frame
.to_allocate
6511 + frame
.nregs
* UNITS_PER_WORD
),
6513 /* If not an i386, mov & pop is faster than "leave". */
6514 else if (TARGET_USE_LEAVE
|| optimize_size
6515 || !cfun
->machine
->use_fast_prologue_epilogue
)
6516 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6519 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6520 hard_frame_pointer_rtx
,
6523 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6525 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6530 /* First step is to deallocate the stack frame so that we can
6531 pop the registers. */
6534 gcc_assert (frame_pointer_needed
);
6535 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6536 hard_frame_pointer_rtx
,
6537 GEN_INT (offset
), style
);
6539 else if (frame
.to_allocate
)
6540 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6541 GEN_INT (frame
.to_allocate
), style
);
6543 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6544 if (ix86_save_reg (regno
, false))
6547 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6549 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6551 if (frame_pointer_needed
)
6553 /* Leave results in shorter dependency chains on CPUs that are
6554 able to grok it fast. */
6555 if (TARGET_USE_LEAVE
)
6556 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6557 else if (TARGET_64BIT
)
6558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6564 if (cfun
->machine
->force_align_arg_pointer
)
6566 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6567 cfun
->machine
->force_align_arg_pointer
,
6571 /* Sibcall epilogues don't want a return instruction. */
6575 if (current_function_pops_args
&& current_function_args_size
)
6577 rtx popc
= GEN_INT (current_function_pops_args
);
6579 /* i386 can only pop 64K bytes. If asked to pop more, pop
6580 return address, do explicit add, and jump indirectly to the
6583 if (current_function_pops_args
>= 65536)
6585 rtx ecx
= gen_rtx_REG (SImode
, 2);
6587 /* There is no "pascal" calling convention in any 64bit ABI. */
6588 gcc_assert (!TARGET_64BIT
);
6590 emit_insn (gen_popsi1 (ecx
));
6591 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6592 emit_jump_insn (gen_return_indirect_internal (ecx
));
6595 emit_jump_insn (gen_return_pop_internal (popc
));
6598 emit_jump_insn (gen_return_internal ());
6601 /* Reset from the function's potential modifications. */
6604 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6605 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6607 if (pic_offset_table_rtx
)
6608 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6610 /* Mach-O doesn't support labels at the end of objects, so if
6611 it looks like we might want one, insert a NOP. */
6613 rtx insn
= get_last_insn ();
6616 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6617 insn
= PREV_INSN (insn
);
6621 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6622 fputs ("\tnop\n", file
);
6628 /* Extract the parts of an RTL expression that is a valid memory address
6629 for an instruction. Return 0 if the structure of the address is
6630 grossly off. Return -1 if the address contains ASHIFT, so it is not
6631 strictly valid, but still used for computing length of lea instruction. */
6634 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6636 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6637 rtx base_reg
, index_reg
;
6638 HOST_WIDE_INT scale
= 1;
6639 rtx scale_rtx
= NULL_RTX
;
6641 enum ix86_address_seg seg
= SEG_DEFAULT
;
6643 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6645 else if (GET_CODE (addr
) == PLUS
)
6655 addends
[n
++] = XEXP (op
, 1);
6658 while (GET_CODE (op
) == PLUS
);
6663 for (i
= n
; i
>= 0; --i
)
6666 switch (GET_CODE (op
))
6671 index
= XEXP (op
, 0);
6672 scale_rtx
= XEXP (op
, 1);
6676 if (XINT (op
, 1) == UNSPEC_TP
6677 && TARGET_TLS_DIRECT_SEG_REFS
6678 && seg
== SEG_DEFAULT
)
6679 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6708 else if (GET_CODE (addr
) == MULT
)
6710 index
= XEXP (addr
, 0); /* index*scale */
6711 scale_rtx
= XEXP (addr
, 1);
6713 else if (GET_CODE (addr
) == ASHIFT
)
6717 /* We're called for lea too, which implements ashift on occasion. */
6718 index
= XEXP (addr
, 0);
6719 tmp
= XEXP (addr
, 1);
6720 if (!CONST_INT_P (tmp
))
6722 scale
= INTVAL (tmp
);
6723 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6729 disp
= addr
; /* displacement */
6731 /* Extract the integral value of scale. */
6734 if (!CONST_INT_P (scale_rtx
))
6736 scale
= INTVAL (scale_rtx
);
6739 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6740 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6742 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6743 if (base_reg
&& index_reg
&& scale
== 1
6744 && (index_reg
== arg_pointer_rtx
6745 || index_reg
== frame_pointer_rtx
6746 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6749 tmp
= base
, base
= index
, index
= tmp
;
6750 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6753 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6754 if ((base_reg
== hard_frame_pointer_rtx
6755 || base_reg
== frame_pointer_rtx
6756 || base_reg
== arg_pointer_rtx
) && !disp
)
6759 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6760 Avoid this by transforming to [%esi+0]. */
6761 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6762 && base_reg
&& !index_reg
&& !disp
6764 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6767 /* Special case: encode reg+reg instead of reg*2. */
6768 if (!base
&& index
&& scale
&& scale
== 2)
6769 base
= index
, base_reg
= index_reg
, scale
= 1;
6771 /* Special case: scaling cannot be encoded without base or displacement. */
6772 if (!base
&& !disp
&& index
&& scale
!= 1)
6784 /* Return cost of the memory address x.
6785 For i386, it is better to use a complex address than let gcc copy
6786 the address into a reg and make a new pseudo. But not if the address
6787 requires to two regs - that would mean more pseudos with longer
6790 ix86_address_cost (rtx x
)
6792 struct ix86_address parts
;
6794 int ok
= ix86_decompose_address (x
, &parts
);
6798 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6799 parts
.base
= SUBREG_REG (parts
.base
);
6800 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6801 parts
.index
= SUBREG_REG (parts
.index
);
6803 /* Attempt to minimize number of registers in the address. */
6805 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6807 && (!REG_P (parts
.index
)
6808 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6812 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6814 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6815 && parts
.base
!= parts
.index
)
6818 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6819 since it's predecode logic can't detect the length of instructions
6820 and it degenerates to vector decoded. Increase cost of such
6821 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6822 to split such addresses or even refuse such addresses at all.
6824 Following addressing modes are affected:
6829 The first and last case may be avoidable by explicitly coding the zero in
6830 memory address, but I don't have AMD-K6 machine handy to check this
6834 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6835 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6836 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6842 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6843 this is used for to form addresses to local data when -fPIC is in
6847 darwin_local_data_pic (rtx disp
)
6849 if (GET_CODE (disp
) == MINUS
)
6851 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6852 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6853 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6855 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6856 if (! strcmp (sym_name
, "<pic base>"))
6864 /* Determine if a given RTX is a valid constant. We already know this
6865 satisfies CONSTANT_P. */
6868 legitimate_constant_p (rtx x
)
6870 switch (GET_CODE (x
))
6875 if (GET_CODE (x
) == PLUS
)
6877 if (!CONST_INT_P (XEXP (x
, 1)))
6882 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6885 /* Only some unspecs are valid as "constants". */
6886 if (GET_CODE (x
) == UNSPEC
)
6887 switch (XINT (x
, 1))
6892 return TARGET_64BIT
;
6895 x
= XVECEXP (x
, 0, 0);
6896 return (GET_CODE (x
) == SYMBOL_REF
6897 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6899 x
= XVECEXP (x
, 0, 0);
6900 return (GET_CODE (x
) == SYMBOL_REF
6901 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6906 /* We must have drilled down to a symbol. */
6907 if (GET_CODE (x
) == LABEL_REF
)
6909 if (GET_CODE (x
) != SYMBOL_REF
)
6914 /* TLS symbols are never valid. */
6915 if (SYMBOL_REF_TLS_MODEL (x
))
6918 /* DLLIMPORT symbols are never valid. */
6919 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6920 && SYMBOL_REF_DLLIMPORT_P (x
))
6925 if (GET_MODE (x
) == TImode
6926 && x
!= CONST0_RTX (TImode
)
6932 if (x
== CONST0_RTX (GET_MODE (x
)))
6940 /* Otherwise we handle everything else in the move patterns. */
6944 /* Determine if it's legal to put X into the constant pool. This
6945 is not possible for the address of thread-local symbols, which
6946 is checked above. */
6949 ix86_cannot_force_const_mem (rtx x
)
6951 /* We can always put integral constants and vectors in memory. */
6952 switch (GET_CODE (x
))
6962 return !legitimate_constant_p (x
);
6965 /* Determine if a given RTX is a valid constant address. */
6968 constant_address_p (rtx x
)
6970 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6973 /* Nonzero if the constant value X is a legitimate general operand
6974 when generating PIC code. It is given that flag_pic is on and
6975 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6978 legitimate_pic_operand_p (rtx x
)
6982 switch (GET_CODE (x
))
6985 inner
= XEXP (x
, 0);
6986 if (GET_CODE (inner
) == PLUS
6987 && CONST_INT_P (XEXP (inner
, 1)))
6988 inner
= XEXP (inner
, 0);
6990 /* Only some unspecs are valid as "constants". */
6991 if (GET_CODE (inner
) == UNSPEC
)
6992 switch (XINT (inner
, 1))
6997 return TARGET_64BIT
;
6999 x
= XVECEXP (inner
, 0, 0);
7000 return (GET_CODE (x
) == SYMBOL_REF
7001 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
7009 return legitimate_pic_address_disp_p (x
);
7016 /* Determine if a given CONST RTX is a valid memory displacement
7020 legitimate_pic_address_disp_p (rtx disp
)
7024 /* In 64bit mode we can allow direct addresses of symbols and labels
7025 when they are not dynamic symbols. */
7028 rtx op0
= disp
, op1
;
7030 switch (GET_CODE (disp
))
7036 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
7038 op0
= XEXP (XEXP (disp
, 0), 0);
7039 op1
= XEXP (XEXP (disp
, 0), 1);
7040 if (!CONST_INT_P (op1
)
7041 || INTVAL (op1
) >= 16*1024*1024
7042 || INTVAL (op1
) < -16*1024*1024)
7044 if (GET_CODE (op0
) == LABEL_REF
)
7046 if (GET_CODE (op0
) != SYMBOL_REF
)
7051 /* TLS references should always be enclosed in UNSPEC. */
7052 if (SYMBOL_REF_TLS_MODEL (op0
))
7054 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
7055 && ix86_cmodel
!= CM_LARGE_PIC
)
7063 if (GET_CODE (disp
) != CONST
)
7065 disp
= XEXP (disp
, 0);
7069 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7070 of GOT tables. We should not need these anyway. */
7071 if (GET_CODE (disp
) != UNSPEC
7072 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
7073 && XINT (disp
, 1) != UNSPEC_GOTOFF
7074 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
7077 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
7078 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
7084 if (GET_CODE (disp
) == PLUS
)
7086 if (!CONST_INT_P (XEXP (disp
, 1)))
7088 disp
= XEXP (disp
, 0);
7092 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
7095 if (GET_CODE (disp
) != UNSPEC
)
7098 switch (XINT (disp
, 1))
7103 /* We need to check for both symbols and labels because VxWorks loads
7104 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7106 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7107 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
7109 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7110 While ABI specify also 32bit relocation but we don't produce it in
7111 small PIC model at all. */
7112 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7113 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
7115 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
7117 case UNSPEC_GOTTPOFF
:
7118 case UNSPEC_GOTNTPOFF
:
7119 case UNSPEC_INDNTPOFF
:
7122 disp
= XVECEXP (disp
, 0, 0);
7123 return (GET_CODE (disp
) == SYMBOL_REF
7124 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
7126 disp
= XVECEXP (disp
, 0, 0);
7127 return (GET_CODE (disp
) == SYMBOL_REF
7128 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
7130 disp
= XVECEXP (disp
, 0, 0);
7131 return (GET_CODE (disp
) == SYMBOL_REF
7132 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
7138 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7139 memory address for an instruction. The MODE argument is the machine mode
7140 for the MEM expression that wants to use this address.
7142 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7143 convert common non-canonical forms to canonical form so that they will
7147 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
7148 rtx addr
, int strict
)
7150 struct ix86_address parts
;
7151 rtx base
, index
, disp
;
7152 HOST_WIDE_INT scale
;
7153 const char *reason
= NULL
;
7154 rtx reason_rtx
= NULL_RTX
;
7156 if (ix86_decompose_address (addr
, &parts
) <= 0)
7158 reason
= "decomposition failed";
7163 index
= parts
.index
;
7165 scale
= parts
.scale
;
7167 /* Validate base register.
7169 Don't allow SUBREG's that span more than a word here. It can lead to spill
7170 failures when the base is one word out of a two word structure, which is
7171 represented internally as a DImode int. */
7180 else if (GET_CODE (base
) == SUBREG
7181 && REG_P (SUBREG_REG (base
))
7182 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7184 reg
= SUBREG_REG (base
);
7187 reason
= "base is not a register";
7191 if (GET_MODE (base
) != Pmode
)
7193 reason
= "base is not in Pmode";
7197 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7198 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7200 reason
= "base is not valid";
7205 /* Validate index register.
7207 Don't allow SUBREG's that span more than a word here -- same as above. */
7216 else if (GET_CODE (index
) == SUBREG
7217 && REG_P (SUBREG_REG (index
))
7218 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7220 reg
= SUBREG_REG (index
);
7223 reason
= "index is not a register";
7227 if (GET_MODE (index
) != Pmode
)
7229 reason
= "index is not in Pmode";
7233 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7234 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7236 reason
= "index is not valid";
7241 /* Validate scale factor. */
7244 reason_rtx
= GEN_INT (scale
);
7247 reason
= "scale without index";
7251 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7253 reason
= "scale is not a valid multiplier";
7258 /* Validate displacement. */
7263 if (GET_CODE (disp
) == CONST
7264 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7265 switch (XINT (XEXP (disp
, 0), 1))
7267 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7268 used. While ABI specify also 32bit relocations, we don't produce
7269 them at all and use IP relative instead. */
7272 gcc_assert (flag_pic
);
7274 goto is_legitimate_pic
;
7275 reason
= "64bit address unspec";
7278 case UNSPEC_GOTPCREL
:
7279 gcc_assert (flag_pic
);
7280 goto is_legitimate_pic
;
7282 case UNSPEC_GOTTPOFF
:
7283 case UNSPEC_GOTNTPOFF
:
7284 case UNSPEC_INDNTPOFF
:
7290 reason
= "invalid address unspec";
7294 else if (SYMBOLIC_CONST (disp
)
7298 && MACHOPIC_INDIRECT
7299 && !machopic_operand_p (disp
)
7305 if (TARGET_64BIT
&& (index
|| base
))
7307 /* foo@dtpoff(%rX) is ok. */
7308 if (GET_CODE (disp
) != CONST
7309 || GET_CODE (XEXP (disp
, 0)) != PLUS
7310 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7311 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7312 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7313 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7315 reason
= "non-constant pic memory reference";
7319 else if (! legitimate_pic_address_disp_p (disp
))
7321 reason
= "displacement is an invalid pic construct";
7325 /* This code used to verify that a symbolic pic displacement
7326 includes the pic_offset_table_rtx register.
7328 While this is good idea, unfortunately these constructs may
7329 be created by "adds using lea" optimization for incorrect
7338 This code is nonsensical, but results in addressing
7339 GOT table with pic_offset_table_rtx base. We can't
7340 just refuse it easily, since it gets matched by
7341 "addsi3" pattern, that later gets split to lea in the
7342 case output register differs from input. While this
7343 can be handled by separate addsi pattern for this case
7344 that never results in lea, this seems to be easier and
7345 correct fix for crash to disable this test. */
7347 else if (GET_CODE (disp
) != LABEL_REF
7348 && !CONST_INT_P (disp
)
7349 && (GET_CODE (disp
) != CONST
7350 || !legitimate_constant_p (disp
))
7351 && (GET_CODE (disp
) != SYMBOL_REF
7352 || !legitimate_constant_p (disp
)))
7354 reason
= "displacement is not constant";
7357 else if (TARGET_64BIT
7358 && !x86_64_immediate_operand (disp
, VOIDmode
))
7360 reason
= "displacement is out of range";
7365 /* Everything looks valid. */
7372 /* Return a unique alias set for the GOT. */
7374 static alias_set_type
7375 ix86_GOT_alias_set (void)
7377 static alias_set_type set
= -1;
7379 set
= new_alias_set ();
7383 /* Return a legitimate reference for ORIG (an address) using the
7384 register REG. If REG is 0, a new pseudo is generated.
7386 There are two types of references that must be handled:
7388 1. Global data references must load the address from the GOT, via
7389 the PIC reg. An insn is emitted to do this load, and the reg is
7392 2. Static data references, constant pool addresses, and code labels
7393 compute the address as an offset from the GOT, whose base is in
7394 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7395 differentiate them from global data objects. The returned
7396 address is the PIC reg + an unspec constant.
7398 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7399 reg also appears in the address. */
7402 legitimize_pic_address (rtx orig
, rtx reg
)
7409 if (TARGET_MACHO
&& !TARGET_64BIT
)
7412 reg
= gen_reg_rtx (Pmode
);
7413 /* Use the generic Mach-O PIC machinery. */
7414 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7418 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7420 else if (TARGET_64BIT
7421 && ix86_cmodel
!= CM_SMALL_PIC
7422 && gotoff_operand (addr
, Pmode
))
7425 /* This symbol may be referenced via a displacement from the PIC
7426 base address (@GOTOFF). */
7428 if (reload_in_progress
)
7429 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7430 if (GET_CODE (addr
) == CONST
)
7431 addr
= XEXP (addr
, 0);
7432 if (GET_CODE (addr
) == PLUS
)
7434 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7436 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7439 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7440 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7442 tmpreg
= gen_reg_rtx (Pmode
);
7445 emit_move_insn (tmpreg
, new_rtx
);
7449 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7450 tmpreg
, 1, OPTAB_DIRECT
);
7453 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7455 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7457 /* This symbol may be referenced via a displacement from the PIC
7458 base address (@GOTOFF). */
7460 if (reload_in_progress
)
7461 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7462 if (GET_CODE (addr
) == CONST
)
7463 addr
= XEXP (addr
, 0);
7464 if (GET_CODE (addr
) == PLUS
)
7466 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7468 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7471 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7472 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7473 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7477 emit_move_insn (reg
, new_rtx
);
7481 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7482 /* We can't use @GOTOFF for text labels on VxWorks;
7483 see gotoff_operand. */
7484 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7486 /* Given that we've already handled dllimport variables separately
7487 in legitimize_address, and all other variables should satisfy
7488 legitimate_pic_address_disp_p, we should never arrive here. */
7489 gcc_assert (!TARGET_64BIT_MS_ABI
);
7491 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7493 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7494 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7495 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7496 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7499 reg
= gen_reg_rtx (Pmode
);
7500 /* Use directly gen_movsi, otherwise the address is loaded
7501 into register for CSE. We don't want to CSE this addresses,
7502 instead we CSE addresses from the GOT table, so skip this. */
7503 emit_insn (gen_movsi (reg
, new_rtx
));
7508 /* This symbol must be referenced via a load from the
7509 Global Offset Table (@GOT). */
7511 if (reload_in_progress
)
7512 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7513 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7514 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7516 new_rtx
= force_reg (Pmode
, new_rtx
);
7517 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7518 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7519 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7522 reg
= gen_reg_rtx (Pmode
);
7523 emit_move_insn (reg
, new_rtx
);
7529 if (CONST_INT_P (addr
)
7530 && !x86_64_immediate_operand (addr
, VOIDmode
))
7534 emit_move_insn (reg
, addr
);
7538 new_rtx
= force_reg (Pmode
, addr
);
7540 else if (GET_CODE (addr
) == CONST
)
7542 addr
= XEXP (addr
, 0);
7544 /* We must match stuff we generate before. Assume the only
7545 unspecs that can get here are ours. Not that we could do
7546 anything with them anyway.... */
7547 if (GET_CODE (addr
) == UNSPEC
7548 || (GET_CODE (addr
) == PLUS
7549 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7551 gcc_assert (GET_CODE (addr
) == PLUS
);
7553 if (GET_CODE (addr
) == PLUS
)
7555 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7557 /* Check first to see if this is a constant offset from a @GOTOFF
7558 symbol reference. */
7559 if (gotoff_operand (op0
, Pmode
)
7560 && CONST_INT_P (op1
))
7564 if (reload_in_progress
)
7565 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7566 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7568 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7569 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7570 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7574 emit_move_insn (reg
, new_rtx
);
7580 if (INTVAL (op1
) < -16*1024*1024
7581 || INTVAL (op1
) >= 16*1024*1024)
7583 if (!x86_64_immediate_operand (op1
, Pmode
))
7584 op1
= force_reg (Pmode
, op1
);
7585 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7591 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7592 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7593 base
== reg
? NULL_RTX
: reg
);
7595 if (CONST_INT_P (new_rtx
))
7596 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7599 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7601 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7602 new_rtx
= XEXP (new_rtx
, 1);
7604 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7612 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7615 get_thread_pointer (int to_reg
)
7619 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7623 reg
= gen_reg_rtx (Pmode
);
7624 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7625 insn
= emit_insn (insn
);
7630 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7631 false if we expect this to be used for a memory address and true if
7632 we expect to load the address into a register. */
7635 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7637 rtx dest
, base
, off
, pic
, tp
;
7642 case TLS_MODEL_GLOBAL_DYNAMIC
:
7643 dest
= gen_reg_rtx (Pmode
);
7644 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7646 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7648 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7651 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7652 insns
= get_insns ();
7655 CONST_OR_PURE_CALL_P (insns
) = 1;
7656 emit_libcall_block (insns
, dest
, rax
, x
);
7658 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7659 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7661 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7663 if (TARGET_GNU2_TLS
)
7665 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7667 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7671 case TLS_MODEL_LOCAL_DYNAMIC
:
7672 base
= gen_reg_rtx (Pmode
);
7673 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7675 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7677 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7680 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7681 insns
= get_insns ();
7684 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7685 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7686 CONST_OR_PURE_CALL_P (insns
) = 1;
7687 emit_libcall_block (insns
, base
, rax
, note
);
7689 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7690 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7692 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7694 if (TARGET_GNU2_TLS
)
7696 rtx x
= ix86_tls_module_base ();
7698 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7699 gen_rtx_MINUS (Pmode
, x
, tp
));
7702 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7703 off
= gen_rtx_CONST (Pmode
, off
);
7705 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7707 if (TARGET_GNU2_TLS
)
7709 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7711 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7716 case TLS_MODEL_INITIAL_EXEC
:
7720 type
= UNSPEC_GOTNTPOFF
;
7724 if (reload_in_progress
)
7725 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7726 pic
= pic_offset_table_rtx
;
7727 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7729 else if (!TARGET_ANY_GNU_TLS
)
7731 pic
= gen_reg_rtx (Pmode
);
7732 emit_insn (gen_set_got (pic
));
7733 type
= UNSPEC_GOTTPOFF
;
7738 type
= UNSPEC_INDNTPOFF
;
7741 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7742 off
= gen_rtx_CONST (Pmode
, off
);
7744 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7745 off
= gen_const_mem (Pmode
, off
);
7746 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7748 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7750 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7751 off
= force_reg (Pmode
, off
);
7752 return gen_rtx_PLUS (Pmode
, base
, off
);
7756 base
= get_thread_pointer (true);
7757 dest
= gen_reg_rtx (Pmode
);
7758 emit_insn (gen_subsi3 (dest
, base
, off
));
7762 case TLS_MODEL_LOCAL_EXEC
:
7763 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7764 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7765 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7766 off
= gen_rtx_CONST (Pmode
, off
);
7768 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7770 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7771 return gen_rtx_PLUS (Pmode
, base
, off
);
7775 base
= get_thread_pointer (true);
7776 dest
= gen_reg_rtx (Pmode
);
7777 emit_insn (gen_subsi3 (dest
, base
, off
));
7788 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7791 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7792 htab_t dllimport_map
;
7795 get_dllimport_decl (tree decl
)
7797 struct tree_map
*h
, in
;
7801 size_t namelen
, prefixlen
;
7807 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7809 in
.hash
= htab_hash_pointer (decl
);
7810 in
.base
.from
= decl
;
7811 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7812 h
= (struct tree_map
*) *loc
;
7816 *loc
= h
= GGC_NEW (struct tree_map
);
7818 h
->base
.from
= decl
;
7819 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7820 DECL_ARTIFICIAL (to
) = 1;
7821 DECL_IGNORED_P (to
) = 1;
7822 DECL_EXTERNAL (to
) = 1;
7823 TREE_READONLY (to
) = 1;
7825 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7826 name
= targetm
.strip_name_encoding (name
);
7827 prefix
= name
[0] == FASTCALL_PREFIX
? "*__imp_": "*__imp__";
7828 namelen
= strlen (name
);
7829 prefixlen
= strlen (prefix
);
7830 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7831 memcpy (imp_name
, prefix
, prefixlen
);
7832 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7834 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7835 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7836 SET_SYMBOL_REF_DECL (rtl
, to
);
7837 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7839 rtl
= gen_const_mem (Pmode
, rtl
);
7840 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7842 SET_DECL_RTL (to
, rtl
);
7843 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
7848 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7849 true if we require the result be a register. */
7852 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7857 gcc_assert (SYMBOL_REF_DECL (symbol
));
7858 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7860 x
= DECL_RTL (imp_decl
);
7862 x
= force_reg (Pmode
, x
);
7866 /* Try machine-dependent ways of modifying an illegitimate address
7867 to be legitimate. If we find one, return the new, valid address.
7868 This macro is used in only one place: `memory_address' in explow.c.
7870 OLDX is the address as it was before break_out_memory_refs was called.
7871 In some cases it is useful to look at this to decide what needs to be done.
7873 MODE and WIN are passed so that this macro can use
7874 GO_IF_LEGITIMATE_ADDRESS.
7876 It is always safe for this macro to do nothing. It exists to recognize
7877 opportunities to optimize the output.
7879 For the 80386, we handle X+REG by loading X into a register R and
7880 using R+REG. R will go in a general reg and indexing will be used.
7881 However, if REG is a broken-out memory address or multiplication,
7882 nothing needs to be done because REG can certainly go in a general reg.
7884 When -fpic is used, special handling is needed for symbolic references.
7885 See comments by legitimize_pic_address in i386.c for details. */
7888 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7893 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7895 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7896 if (GET_CODE (x
) == CONST
7897 && GET_CODE (XEXP (x
, 0)) == PLUS
7898 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7899 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7901 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7902 (enum tls_model
) log
, false);
7903 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7906 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7908 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7909 return legitimize_dllimport_symbol (x
, true);
7910 if (GET_CODE (x
) == CONST
7911 && GET_CODE (XEXP (x
, 0)) == PLUS
7912 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7913 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7915 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7916 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7920 if (flag_pic
&& SYMBOLIC_CONST (x
))
7921 return legitimize_pic_address (x
, 0);
7923 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7924 if (GET_CODE (x
) == ASHIFT
7925 && CONST_INT_P (XEXP (x
, 1))
7926 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7929 log
= INTVAL (XEXP (x
, 1));
7930 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7931 GEN_INT (1 << log
));
7934 if (GET_CODE (x
) == PLUS
)
7936 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7938 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7939 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7940 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7943 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7944 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7945 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7946 GEN_INT (1 << log
));
7949 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7950 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7951 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7954 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7955 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7956 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7957 GEN_INT (1 << log
));
7960 /* Put multiply first if it isn't already. */
7961 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7963 rtx tmp
= XEXP (x
, 0);
7964 XEXP (x
, 0) = XEXP (x
, 1);
7969 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7970 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7971 created by virtual register instantiation, register elimination, and
7972 similar optimizations. */
7973 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7976 x
= gen_rtx_PLUS (Pmode
,
7977 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7978 XEXP (XEXP (x
, 1), 0)),
7979 XEXP (XEXP (x
, 1), 1));
7983 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7984 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7985 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7986 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7987 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7988 && CONSTANT_P (XEXP (x
, 1)))
7991 rtx other
= NULL_RTX
;
7993 if (CONST_INT_P (XEXP (x
, 1)))
7995 constant
= XEXP (x
, 1);
7996 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7998 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
8000 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
8001 other
= XEXP (x
, 1);
8009 x
= gen_rtx_PLUS (Pmode
,
8010 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
8011 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
8012 plus_constant (other
, INTVAL (constant
)));
8016 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8019 if (GET_CODE (XEXP (x
, 0)) == MULT
)
8022 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
8025 if (GET_CODE (XEXP (x
, 1)) == MULT
)
8028 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
8032 && REG_P (XEXP (x
, 1))
8033 && REG_P (XEXP (x
, 0)))
8036 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
8039 x
= legitimize_pic_address (x
, 0);
8042 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8045 if (REG_P (XEXP (x
, 0)))
8047 rtx temp
= gen_reg_rtx (Pmode
);
8048 rtx val
= force_operand (XEXP (x
, 1), temp
);
8050 emit_move_insn (temp
, val
);
8056 else if (REG_P (XEXP (x
, 1)))
8058 rtx temp
= gen_reg_rtx (Pmode
);
8059 rtx val
= force_operand (XEXP (x
, 0), temp
);
8061 emit_move_insn (temp
, val
);
8071 /* Print an integer constant expression in assembler syntax. Addition
8072 and subtraction are the only arithmetic that may appear in these
8073 expressions. FILE is the stdio stream to write to, X is the rtx, and
8074 CODE is the operand print code from the output string. */
8077 output_pic_addr_const (FILE *file
, rtx x
, int code
)
8081 switch (GET_CODE (x
))
8084 gcc_assert (flag_pic
);
8089 if (! TARGET_MACHO
|| TARGET_64BIT
)
8090 output_addr_const (file
, x
);
8093 const char *name
= XSTR (x
, 0);
8095 /* Mark the decl as referenced so that cgraph will
8096 output the function. */
8097 if (SYMBOL_REF_DECL (x
))
8098 mark_decl_referenced (SYMBOL_REF_DECL (x
));
8101 if (MACHOPIC_INDIRECT
8102 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
8103 name
= machopic_indirection_name (x
, /*stub_p=*/true);
8105 assemble_name (file
, name
);
8107 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
8108 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
8109 fputs ("@PLT", file
);
8116 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
8117 assemble_name (asm_out_file
, buf
);
8121 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8125 /* This used to output parentheses around the expression,
8126 but that does not work on the 386 (either ATT or BSD assembler). */
8127 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8131 if (GET_MODE (x
) == VOIDmode
)
8133 /* We can use %d if the number is <32 bits and positive. */
8134 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
8135 fprintf (file
, "0x%lx%08lx",
8136 (unsigned long) CONST_DOUBLE_HIGH (x
),
8137 (unsigned long) CONST_DOUBLE_LOW (x
));
8139 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
8142 /* We can't handle floating point constants;
8143 PRINT_OPERAND must handle them. */
8144 output_operand_lossage ("floating constant misused");
8148 /* Some assemblers need integer constants to appear first. */
8149 if (CONST_INT_P (XEXP (x
, 0)))
8151 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8153 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8157 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
8158 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8160 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8166 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
8167 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8169 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8171 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8175 gcc_assert (XVECLEN (x
, 0) == 1);
8176 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8177 switch (XINT (x
, 1))
8180 fputs ("@GOT", file
);
8183 fputs ("@GOTOFF", file
);
8186 fputs ("@PLTOFF", file
);
8188 case UNSPEC_GOTPCREL
:
8189 fputs ("@GOTPCREL(%rip)", file
);
8191 case UNSPEC_GOTTPOFF
:
8192 /* FIXME: This might be @TPOFF in Sun ld too. */
8193 fputs ("@GOTTPOFF", file
);
8196 fputs ("@TPOFF", file
);
8200 fputs ("@TPOFF", file
);
8202 fputs ("@NTPOFF", file
);
8205 fputs ("@DTPOFF", file
);
8207 case UNSPEC_GOTNTPOFF
:
8209 fputs ("@GOTTPOFF(%rip)", file
);
8211 fputs ("@GOTNTPOFF", file
);
8213 case UNSPEC_INDNTPOFF
:
8214 fputs ("@INDNTPOFF", file
);
8217 output_operand_lossage ("invalid UNSPEC as operand");
8223 output_operand_lossage ("invalid expression as operand");
8227 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8228 We need to emit DTP-relative relocations. */
8230 static void ATTRIBUTE_UNUSED
8231 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8233 fputs (ASM_LONG
, file
);
8234 output_addr_const (file
, x
);
8235 fputs ("@DTPOFF", file
);
8241 fputs (", 0", file
);
8248 /* In the name of slightly smaller debug output, and to cater to
8249 general assembler lossage, recognize PIC+GOTOFF and turn it back
8250 into a direct symbol reference.
8252 On Darwin, this is necessary to avoid a crash, because Darwin
8253 has a different PIC label for each routine but the DWARF debugging
8254 information is not associated with any particular routine, so it's
8255 necessary to remove references to the PIC label from RTL stored by
8256 the DWARF output code. */
8259 ix86_delegitimize_address (rtx orig_x
)
8262 /* reg_addend is NULL or a multiple of some register. */
8263 rtx reg_addend
= NULL_RTX
;
8264 /* const_addend is NULL or a const_int. */
8265 rtx const_addend
= NULL_RTX
;
8266 /* This is the result, or NULL. */
8267 rtx result
= NULL_RTX
;
8274 if (GET_CODE (x
) != CONST
8275 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8276 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8279 return XVECEXP (XEXP (x
, 0), 0, 0);
8282 if (GET_CODE (x
) != PLUS
8283 || GET_CODE (XEXP (x
, 1)) != CONST
)
8286 if (REG_P (XEXP (x
, 0))
8287 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8288 /* %ebx + GOT/GOTOFF */
8290 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8292 /* %ebx + %reg * scale + GOT/GOTOFF */
8293 reg_addend
= XEXP (x
, 0);
8294 if (REG_P (XEXP (reg_addend
, 0))
8295 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8296 reg_addend
= XEXP (reg_addend
, 1);
8297 else if (REG_P (XEXP (reg_addend
, 1))
8298 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8299 reg_addend
= XEXP (reg_addend
, 0);
8302 if (!REG_P (reg_addend
)
8303 && GET_CODE (reg_addend
) != MULT
8304 && GET_CODE (reg_addend
) != ASHIFT
)
8310 x
= XEXP (XEXP (x
, 1), 0);
8311 if (GET_CODE (x
) == PLUS
8312 && CONST_INT_P (XEXP (x
, 1)))
8314 const_addend
= XEXP (x
, 1);
8318 if (GET_CODE (x
) == UNSPEC
8319 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8320 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8321 result
= XVECEXP (x
, 0, 0);
8323 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8325 result
= XEXP (x
, 0);
8331 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8333 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8337 /* If X is a machine specific address (i.e. a symbol or label being
8338 referenced as a displacement from the GOT implemented using an
8339 UNSPEC), then return the base term. Otherwise return X. */
8342 ix86_find_base_term (rtx x
)
8348 if (GET_CODE (x
) != CONST
)
8351 if (GET_CODE (term
) == PLUS
8352 && (CONST_INT_P (XEXP (term
, 1))
8353 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8354 term
= XEXP (term
, 0);
8355 if (GET_CODE (term
) != UNSPEC
8356 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8359 term
= XVECEXP (term
, 0, 0);
8361 if (GET_CODE (term
) != SYMBOL_REF
8362 && GET_CODE (term
) != LABEL_REF
)
8368 term
= ix86_delegitimize_address (x
);
8370 if (GET_CODE (term
) != SYMBOL_REF
8371 && GET_CODE (term
) != LABEL_REF
)
8378 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8383 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8385 enum rtx_code second_code
, bypass_code
;
8386 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8387 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8388 code
= ix86_fp_compare_code_to_integer (code
);
8392 code
= reverse_condition (code
);
8443 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8447 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8448 Those same assemblers have the same but opposite lossage on cmov. */
8450 suffix
= fp
? "nbe" : "a";
8451 else if (mode
== CCCmode
)
8474 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8496 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8497 suffix
= fp
? "nb" : "ae";
8500 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8507 else if (mode
== CCCmode
)
8508 suffix
= fp
? "nb" : "ae";
8513 suffix
= fp
? "u" : "p";
8516 suffix
= fp
? "nu" : "np";
8521 fputs (suffix
, file
);
8524 /* Print the name of register X to FILE based on its machine mode and number.
8525 If CODE is 'w', pretend the mode is HImode.
8526 If CODE is 'b', pretend the mode is QImode.
8527 If CODE is 'k', pretend the mode is SImode.
8528 If CODE is 'q', pretend the mode is DImode.
8529 If CODE is 'h', pretend the reg is the 'high' byte register.
8530 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8533 print_reg (rtx x
, int code
, FILE *file
)
8535 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8536 && REGNO (x
) != FRAME_POINTER_REGNUM
8537 && REGNO (x
) != FLAGS_REG
8538 && REGNO (x
) != FPSR_REG
8539 && REGNO (x
) != FPCR_REG
);
8541 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8544 if (code
== 'w' || MMX_REG_P (x
))
8546 else if (code
== 'b')
8548 else if (code
== 'k')
8550 else if (code
== 'q')
8552 else if (code
== 'y')
8554 else if (code
== 'h')
8557 code
= GET_MODE_SIZE (GET_MODE (x
));
8559 /* Irritatingly, AMD extended registers use different naming convention
8560 from the normal registers. */
8561 if (REX_INT_REG_P (x
))
8563 gcc_assert (TARGET_64BIT
);
8567 error ("extended registers have no high halves");
8570 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8573 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8576 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8579 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8582 error ("unsupported operand size for extended register");
8590 if (STACK_TOP_P (x
))
8592 fputs ("st(0)", file
);
8599 if (! ANY_FP_REG_P (x
))
8600 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8605 fputs (hi_reg_name
[REGNO (x
)], file
);
8608 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8610 fputs (qi_reg_name
[REGNO (x
)], file
);
8613 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8615 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8622 /* Locate some local-dynamic symbol still in use by this function
8623 so that we can print its name in some tls_local_dynamic_base
8627 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8631 if (GET_CODE (x
) == SYMBOL_REF
8632 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8634 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8642 get_some_local_dynamic_name (void)
8646 if (cfun
->machine
->some_ld_name
)
8647 return cfun
->machine
->some_ld_name
;
8649 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8651 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8652 return cfun
->machine
->some_ld_name
;
8658 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8659 C -- print opcode suffix for set/cmov insn.
8660 c -- like C, but print reversed condition
8661 F,f -- likewise, but for floating-point.
8662 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8664 R -- print the prefix for register names.
8665 z -- print the opcode suffix for the size of the current operand.
8666 * -- print a star (in certain assembler syntax)
8667 A -- print an absolute memory reference.
8668 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8669 s -- print a shift double count, followed by the assemblers argument
8671 b -- print the QImode name of the register for the indicated operand.
8672 %b0 would print %al if operands[0] is reg 0.
8673 w -- likewise, print the HImode name of the register.
8674 k -- likewise, print the SImode name of the register.
8675 q -- likewise, print the DImode name of the register.
8676 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8677 y -- print "st(0)" instead of "st" as a register.
8678 D -- print condition for SSE cmp instruction.
8679 P -- if PIC, print an @PLT suffix.
8680 X -- don't print any sort of PIC '@' suffix for a symbol.
8681 & -- print some in-use local-dynamic symbol name.
8682 H -- print a memory address offset by 8; used for sse high-parts
8683 Y -- print condition for SSE5 com* instruction.
8684 + -- print a branch hint as 'cs' or 'ds' prefix
8685 ; -- print a semicolon (after prefixes due to bug in older gas).
8689 print_operand (FILE *file
, rtx x
, int code
)
8696 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8701 assemble_name (file
, get_some_local_dynamic_name ());
8705 switch (ASSEMBLER_DIALECT
)
8712 /* Intel syntax. For absolute addresses, registers should not
8713 be surrounded by braces. */
8717 PRINT_OPERAND (file
, x
, 0);
8727 PRINT_OPERAND (file
, x
, 0);
8732 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8737 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8742 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8747 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8752 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8757 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8762 /* 387 opcodes don't get size suffixes if the operands are
8764 if (STACK_REG_P (x
))
8767 /* Likewise if using Intel opcodes. */
8768 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8771 /* This is the size of op from size of operand. */
8772 switch (GET_MODE_SIZE (GET_MODE (x
)))
8781 #ifdef HAVE_GAS_FILDS_FISTS
8791 if (GET_MODE (x
) == SFmode
)
8806 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8808 #ifdef GAS_MNEMONICS
8834 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8836 PRINT_OPERAND (file
, x
, 0);
8842 /* Little bit of braindamage here. The SSE compare instructions
8843 does use completely different names for the comparisons that the
8844 fp conditional moves. */
8845 switch (GET_CODE (x
))
8860 fputs ("unord", file
);
8864 fputs ("neq", file
);
8868 fputs ("nlt", file
);
8872 fputs ("nle", file
);
8875 fputs ("ord", file
);
8882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8883 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8885 switch (GET_MODE (x
))
8887 case HImode
: putc ('w', file
); break;
8889 case SFmode
: putc ('l', file
); break;
8891 case DFmode
: putc ('q', file
); break;
8892 default: gcc_unreachable ();
8899 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8902 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8903 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8906 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8909 /* Like above, but reverse condition */
8911 /* Check to see if argument to %c is really a constant
8912 and not a condition code which needs to be reversed. */
8913 if (!COMPARISON_P (x
))
8915 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8918 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8921 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8922 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8925 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8929 /* It doesn't actually matter what mode we use here, as we're
8930 only going to use this for printing. */
8931 x
= adjust_address_nv (x
, DImode
, 8);
8938 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8941 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8944 int pred_val
= INTVAL (XEXP (x
, 0));
8946 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8947 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8949 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8950 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8952 /* Emit hints only in the case default branch prediction
8953 heuristics would fail. */
8954 if (taken
!= cputaken
)
8956 /* We use 3e (DS) prefix for taken branches and
8957 2e (CS) prefix for not taken branches. */
8959 fputs ("ds ; ", file
);
8961 fputs ("cs ; ", file
);
8969 switch (GET_CODE (x
))
8972 fputs ("neq", file
);
8979 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
8983 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
8994 fputs ("unord", file
);
8997 fputs ("ord", file
);
9000 fputs ("ueq", file
);
9003 fputs ("nlt", file
);
9006 fputs ("nle", file
);
9009 fputs ("ule", file
);
9012 fputs ("ult", file
);
9015 fputs ("une", file
);
9024 fputs (" ; ", file
);
9031 output_operand_lossage ("invalid operand code '%c'", code
);
9036 print_reg (x
, code
, file
);
9040 /* No `byte ptr' prefix for call instructions. */
9041 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
9044 switch (GET_MODE_SIZE (GET_MODE (x
)))
9046 case 1: size
= "BYTE"; break;
9047 case 2: size
= "WORD"; break;
9048 case 4: size
= "DWORD"; break;
9049 case 8: size
= "QWORD"; break;
9050 case 12: size
= "XWORD"; break;
9051 case 16: size
= "XMMWORD"; break;
9056 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9059 else if (code
== 'w')
9061 else if (code
== 'k')
9065 fputs (" PTR ", file
);
9069 /* Avoid (%rip) for call operands. */
9070 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
9071 && !CONST_INT_P (x
))
9072 output_addr_const (file
, x
);
9073 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
9074 output_operand_lossage ("invalid constraints for operand");
9079 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
9084 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9085 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
9087 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9089 fprintf (file
, "0x%08lx", l
);
9092 /* These float cases don't actually occur as immediate operands. */
9093 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
9097 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9098 fprintf (file
, "%s", dstr
);
9101 else if (GET_CODE (x
) == CONST_DOUBLE
9102 && GET_MODE (x
) == XFmode
)
9106 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9107 fprintf (file
, "%s", dstr
);
9112 /* We have patterns that allow zero sets of memory, for instance.
9113 In 64-bit mode, we should probably support all 8-byte vectors,
9114 since we can in fact encode that into an immediate. */
9115 if (GET_CODE (x
) == CONST_VECTOR
)
9117 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
9123 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
9125 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9128 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
9129 || GET_CODE (x
) == LABEL_REF
)
9131 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9134 fputs ("OFFSET FLAT:", file
);
9137 if (CONST_INT_P (x
))
9138 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
9140 output_pic_addr_const (file
, x
, code
);
9142 output_addr_const (file
, x
);
9146 /* Print a memory operand whose address is ADDR. */
9149 print_operand_address (FILE *file
, rtx addr
)
9151 struct ix86_address parts
;
9152 rtx base
, index
, disp
;
9154 int ok
= ix86_decompose_address (addr
, &parts
);
9159 index
= parts
.index
;
9161 scale
= parts
.scale
;
9169 if (USER_LABEL_PREFIX
[0] == 0)
9171 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
9177 if (!base
&& !index
)
9179 /* Displacement only requires special attention. */
9181 if (CONST_INT_P (disp
))
9183 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
9185 if (USER_LABEL_PREFIX
[0] == 0)
9187 fputs ("ds:", file
);
9189 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
9192 output_pic_addr_const (file
, disp
, 0);
9194 output_addr_const (file
, disp
);
9196 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9199 if (GET_CODE (disp
) == CONST
9200 && GET_CODE (XEXP (disp
, 0)) == PLUS
9201 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9202 disp
= XEXP (XEXP (disp
, 0), 0);
9203 if (GET_CODE (disp
) == LABEL_REF
9204 || (GET_CODE (disp
) == SYMBOL_REF
9205 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
9206 fputs ("(%rip)", file
);
9211 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9216 output_pic_addr_const (file
, disp
, 0);
9217 else if (GET_CODE (disp
) == LABEL_REF
)
9218 output_asm_label (disp
);
9220 output_addr_const (file
, disp
);
9225 print_reg (base
, 0, file
);
9229 print_reg (index
, 0, file
);
9231 fprintf (file
, ",%d", scale
);
9237 rtx offset
= NULL_RTX
;
9241 /* Pull out the offset of a symbol; print any symbol itself. */
9242 if (GET_CODE (disp
) == CONST
9243 && GET_CODE (XEXP (disp
, 0)) == PLUS
9244 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9246 offset
= XEXP (XEXP (disp
, 0), 1);
9247 disp
= gen_rtx_CONST (VOIDmode
,
9248 XEXP (XEXP (disp
, 0), 0));
9252 output_pic_addr_const (file
, disp
, 0);
9253 else if (GET_CODE (disp
) == LABEL_REF
)
9254 output_asm_label (disp
);
9255 else if (CONST_INT_P (disp
))
9258 output_addr_const (file
, disp
);
9264 print_reg (base
, 0, file
);
9267 if (INTVAL (offset
) >= 0)
9269 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9273 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9280 print_reg (index
, 0, file
);
9282 fprintf (file
, "*%d", scale
);
9290 output_addr_const_extra (FILE *file
, rtx x
)
9294 if (GET_CODE (x
) != UNSPEC
)
9297 op
= XVECEXP (x
, 0, 0);
9298 switch (XINT (x
, 1))
9300 case UNSPEC_GOTTPOFF
:
9301 output_addr_const (file
, op
);
9302 /* FIXME: This might be @TPOFF in Sun ld. */
9303 fputs ("@GOTTPOFF", file
);
9306 output_addr_const (file
, op
);
9307 fputs ("@TPOFF", file
);
9310 output_addr_const (file
, op
);
9312 fputs ("@TPOFF", file
);
9314 fputs ("@NTPOFF", file
);
9317 output_addr_const (file
, op
);
9318 fputs ("@DTPOFF", file
);
9320 case UNSPEC_GOTNTPOFF
:
9321 output_addr_const (file
, op
);
9323 fputs ("@GOTTPOFF(%rip)", file
);
9325 fputs ("@GOTNTPOFF", file
);
9327 case UNSPEC_INDNTPOFF
:
9328 output_addr_const (file
, op
);
9329 fputs ("@INDNTPOFF", file
);
9339 /* Split one or more DImode RTL references into pairs of SImode
9340 references. The RTL can be REG, offsettable MEM, integer constant, or
9341 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9342 split and "num" is its length. lo_half and hi_half are output arrays
9343 that parallel "operands". */
9346 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9350 rtx op
= operands
[num
];
9352 /* simplify_subreg refuse to split volatile memory addresses,
9353 but we still have to handle it. */
9356 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9357 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9361 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9362 GET_MODE (op
) == VOIDmode
9363 ? DImode
: GET_MODE (op
), 0);
9364 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9365 GET_MODE (op
) == VOIDmode
9366 ? DImode
: GET_MODE (op
), 4);
9370 /* Split one or more TImode RTL references into pairs of DImode
9371 references. The RTL can be REG, offsettable MEM, integer constant, or
9372 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9373 split and "num" is its length. lo_half and hi_half are output arrays
9374 that parallel "operands". */
9377 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9381 rtx op
= operands
[num
];
9383 /* simplify_subreg refuse to split volatile memory addresses, but we
9384 still have to handle it. */
9387 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9388 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9392 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9393 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9398 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9399 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9400 is the expression of the binary operation. The output may either be
9401 emitted here, or returned to the caller, like all output_* functions.
9403 There is no guarantee that the operands are the same mode, as they
9404 might be within FLOAT or FLOAT_EXTEND expressions. */
9406 #ifndef SYSV386_COMPAT
9407 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9408 wants to fix the assemblers because that causes incompatibility
9409 with gcc. No-one wants to fix gcc because that causes
9410 incompatibility with assemblers... You can use the option of
9411 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9412 #define SYSV386_COMPAT 1
9416 output_387_binary_op (rtx insn
, rtx
*operands
)
9418 static char buf
[30];
9421 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9423 #ifdef ENABLE_CHECKING
9424 /* Even if we do not want to check the inputs, this documents input
9425 constraints. Which helps in understanding the following code. */
9426 if (STACK_REG_P (operands
[0])
9427 && ((REG_P (operands
[1])
9428 && REGNO (operands
[0]) == REGNO (operands
[1])
9429 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9430 || (REG_P (operands
[2])
9431 && REGNO (operands
[0]) == REGNO (operands
[2])
9432 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9433 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9436 gcc_assert (is_sse
);
9439 switch (GET_CODE (operands
[3]))
9442 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9443 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9451 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9452 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9460 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9461 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9469 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9470 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9484 if (GET_MODE (operands
[0]) == SFmode
)
9485 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9487 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9492 switch (GET_CODE (operands
[3]))
9496 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9498 rtx temp
= operands
[2];
9499 operands
[2] = operands
[1];
9503 /* know operands[0] == operands[1]. */
9505 if (MEM_P (operands
[2]))
9511 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9513 if (STACK_TOP_P (operands
[0]))
9514 /* How is it that we are storing to a dead operand[2]?
9515 Well, presumably operands[1] is dead too. We can't
9516 store the result to st(0) as st(0) gets popped on this
9517 instruction. Instead store to operands[2] (which I
9518 think has to be st(1)). st(1) will be popped later.
9519 gcc <= 2.8.1 didn't have this check and generated
9520 assembly code that the Unixware assembler rejected. */
9521 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9523 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9527 if (STACK_TOP_P (operands
[0]))
9528 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9530 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9535 if (MEM_P (operands
[1]))
9541 if (MEM_P (operands
[2]))
9547 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9550 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9551 derived assemblers, confusingly reverse the direction of
9552 the operation for fsub{r} and fdiv{r} when the
9553 destination register is not st(0). The Intel assembler
9554 doesn't have this brain damage. Read !SYSV386_COMPAT to
9555 figure out what the hardware really does. */
9556 if (STACK_TOP_P (operands
[0]))
9557 p
= "{p\t%0, %2|rp\t%2, %0}";
9559 p
= "{rp\t%2, %0|p\t%0, %2}";
9561 if (STACK_TOP_P (operands
[0]))
9562 /* As above for fmul/fadd, we can't store to st(0). */
9563 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9565 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9570 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9573 if (STACK_TOP_P (operands
[0]))
9574 p
= "{rp\t%0, %1|p\t%1, %0}";
9576 p
= "{p\t%1, %0|rp\t%0, %1}";
9578 if (STACK_TOP_P (operands
[0]))
9579 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9581 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9586 if (STACK_TOP_P (operands
[0]))
9588 if (STACK_TOP_P (operands
[1]))
9589 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9591 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9594 else if (STACK_TOP_P (operands
[1]))
9597 p
= "{\t%1, %0|r\t%0, %1}";
9599 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9605 p
= "{r\t%2, %0|\t%0, %2}";
9607 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9620 /* Return needed mode for entity in optimize_mode_switching pass. */
9623 ix86_mode_needed (int entity
, rtx insn
)
9625 enum attr_i387_cw mode
;
9627 /* The mode UNINITIALIZED is used to store control word after a
9628 function call or ASM pattern. The mode ANY specify that function
9629 has no requirements on the control word and make no changes in the
9630 bits we are interested in. */
9633 || (NONJUMP_INSN_P (insn
)
9634 && (asm_noperands (PATTERN (insn
)) >= 0
9635 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9636 return I387_CW_UNINITIALIZED
;
9638 if (recog_memoized (insn
) < 0)
9641 mode
= get_attr_i387_cw (insn
);
9646 if (mode
== I387_CW_TRUNC
)
9651 if (mode
== I387_CW_FLOOR
)
9656 if (mode
== I387_CW_CEIL
)
9661 if (mode
== I387_CW_MASK_PM
)
9672 /* Output code to initialize control word copies used by trunc?f?i and
9673 rounding patterns. CURRENT_MODE is set to current control word,
9674 while NEW_MODE is set to new control word. */
9677 emit_i387_cw_initialization (int mode
)
9679 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9682 enum ix86_stack_slot slot
;
9684 rtx reg
= gen_reg_rtx (HImode
);
9686 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9687 emit_move_insn (reg
, copy_rtx (stored_mode
));
9689 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9694 /* round toward zero (truncate) */
9695 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9696 slot
= SLOT_CW_TRUNC
;
9700 /* round down toward -oo */
9701 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9702 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9703 slot
= SLOT_CW_FLOOR
;
9707 /* round up toward +oo */
9708 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9709 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9710 slot
= SLOT_CW_CEIL
;
9713 case I387_CW_MASK_PM
:
9714 /* mask precision exception for nearbyint() */
9715 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9716 slot
= SLOT_CW_MASK_PM
;
9728 /* round toward zero (truncate) */
9729 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9730 slot
= SLOT_CW_TRUNC
;
9734 /* round down toward -oo */
9735 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9736 slot
= SLOT_CW_FLOOR
;
9740 /* round up toward +oo */
9741 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9742 slot
= SLOT_CW_CEIL
;
9745 case I387_CW_MASK_PM
:
9746 /* mask precision exception for nearbyint() */
9747 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9748 slot
= SLOT_CW_MASK_PM
;
9756 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9758 new_mode
= assign_386_stack_local (HImode
, slot
);
9759 emit_move_insn (new_mode
, reg
);
9762 /* Output code for INSN to convert a float to a signed int. OPERANDS
9763 are the insn operands. The output may be [HSD]Imode and the input
9764 operand may be [SDX]Fmode. */
9767 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9769 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9770 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9771 int round_mode
= get_attr_i387_cw (insn
);
9773 /* Jump through a hoop or two for DImode, since the hardware has no
9774 non-popping instruction. We used to do this a different way, but
9775 that was somewhat fragile and broke with post-reload splitters. */
9776 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9777 output_asm_insn ("fld\t%y1", operands
);
9779 gcc_assert (STACK_TOP_P (operands
[1]));
9780 gcc_assert (MEM_P (operands
[0]));
9781 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9784 output_asm_insn ("fisttp%z0\t%0", operands
);
9787 if (round_mode
!= I387_CW_ANY
)
9788 output_asm_insn ("fldcw\t%3", operands
);
9789 if (stack_top_dies
|| dimode_p
)
9790 output_asm_insn ("fistp%z0\t%0", operands
);
9792 output_asm_insn ("fist%z0\t%0", operands
);
9793 if (round_mode
!= I387_CW_ANY
)
9794 output_asm_insn ("fldcw\t%2", operands
);
9800 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9801 have the values zero or one, indicates the ffreep insn's operand
9802 from the OPERANDS array. */
9805 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9807 if (TARGET_USE_FFREEP
)
9808 #if HAVE_AS_IX86_FFREEP
9809 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9812 static char retval
[] = ".word\t0xc_df";
9813 int regno
= REGNO (operands
[opno
]);
9815 gcc_assert (FP_REGNO_P (regno
));
9817 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9822 return opno
? "fstp\t%y1" : "fstp\t%y0";
9826 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9827 should be used. UNORDERED_P is true when fucom should be used. */
9830 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9833 rtx cmp_op0
, cmp_op1
;
9834 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9838 cmp_op0
= operands
[0];
9839 cmp_op1
= operands
[1];
9843 cmp_op0
= operands
[1];
9844 cmp_op1
= operands
[2];
9849 if (GET_MODE (operands
[0]) == SFmode
)
9851 return "ucomiss\t{%1, %0|%0, %1}";
9853 return "comiss\t{%1, %0|%0, %1}";
9856 return "ucomisd\t{%1, %0|%0, %1}";
9858 return "comisd\t{%1, %0|%0, %1}";
9861 gcc_assert (STACK_TOP_P (cmp_op0
));
9863 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9865 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9869 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9870 return output_387_ffreep (operands
, 1);
9873 return "ftst\n\tfnstsw\t%0";
9876 if (STACK_REG_P (cmp_op1
)
9878 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9879 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9881 /* If both the top of the 387 stack dies, and the other operand
9882 is also a stack register that dies, then this must be a
9883 `fcompp' float compare */
9887 /* There is no double popping fcomi variant. Fortunately,
9888 eflags is immune from the fstp's cc clobbering. */
9890 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9892 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9893 return output_387_ffreep (operands
, 0);
9898 return "fucompp\n\tfnstsw\t%0";
9900 return "fcompp\n\tfnstsw\t%0";
9905 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9907 static const char * const alt
[16] =
9909 "fcom%z2\t%y2\n\tfnstsw\t%0",
9910 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9911 "fucom%z2\t%y2\n\tfnstsw\t%0",
9912 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9914 "ficom%z2\t%y2\n\tfnstsw\t%0",
9915 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9919 "fcomi\t{%y1, %0|%0, %y1}",
9920 "fcomip\t{%y1, %0|%0, %y1}",
9921 "fucomi\t{%y1, %0|%0, %y1}",
9922 "fucomip\t{%y1, %0|%0, %y1}",
9933 mask
= eflags_p
<< 3;
9934 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9935 mask
|= unordered_p
<< 1;
9936 mask
|= stack_top_dies
;
9938 gcc_assert (mask
< 16);
9947 ix86_output_addr_vec_elt (FILE *file
, int value
)
9949 const char *directive
= ASM_LONG
;
9953 directive
= ASM_QUAD
;
9955 gcc_assert (!TARGET_64BIT
);
9958 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9962 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9964 const char *directive
= ASM_LONG
;
9967 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9968 directive
= ASM_QUAD
;
9970 gcc_assert (!TARGET_64BIT
);
9972 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9973 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9974 fprintf (file
, "%s%s%d-%s%d\n",
9975 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9976 else if (HAVE_AS_GOTOFF_IN_DATA
)
9977 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9979 else if (TARGET_MACHO
)
9981 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9982 machopic_output_function_base_name (file
);
9983 fprintf(file
, "\n");
9987 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9988 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9991 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9995 ix86_expand_clear (rtx dest
)
9999 /* We play register width games, which are only valid after reload. */
10000 gcc_assert (reload_completed
);
10002 /* Avoid HImode and its attendant prefix byte. */
10003 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
10004 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
10005 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
10007 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10008 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
10010 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10011 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
10017 /* X is an unchanging MEM. If it is a constant pool reference, return
10018 the constant pool rtx, else NULL. */
10021 maybe_get_pool_constant (rtx x
)
10023 x
= ix86_delegitimize_address (XEXP (x
, 0));
10025 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
10026 return get_pool_constant (x
);
10032 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
10034 int strict
= (reload_in_progress
|| reload_completed
);
10036 enum tls_model model
;
10041 if (GET_CODE (op1
) == SYMBOL_REF
)
10043 model
= SYMBOL_REF_TLS_MODEL (op1
);
10046 op1
= legitimize_tls_address (op1
, model
, true);
10047 op1
= force_operand (op1
, op0
);
10051 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10052 && SYMBOL_REF_DLLIMPORT_P (op1
))
10053 op1
= legitimize_dllimport_symbol (op1
, false);
10055 else if (GET_CODE (op1
) == CONST
10056 && GET_CODE (XEXP (op1
, 0)) == PLUS
10057 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
10059 rtx addend
= XEXP (XEXP (op1
, 0), 1);
10060 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
10063 model
= SYMBOL_REF_TLS_MODEL (symbol
);
10065 tmp
= legitimize_tls_address (symbol
, model
, true);
10066 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10067 && SYMBOL_REF_DLLIMPORT_P (symbol
))
10068 tmp
= legitimize_dllimport_symbol (symbol
, true);
10072 tmp
= force_operand (tmp
, NULL
);
10073 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
10074 op0
, 1, OPTAB_DIRECT
);
10080 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
10082 if (TARGET_MACHO
&& !TARGET_64BIT
)
10087 rtx temp
= ((reload_in_progress
10088 || ((op0
&& REG_P (op0
))
10090 ? op0
: gen_reg_rtx (Pmode
));
10091 op1
= machopic_indirect_data_reference (op1
, temp
);
10092 op1
= machopic_legitimize_pic_address (op1
, mode
,
10093 temp
== op1
? 0 : temp
);
10095 else if (MACHOPIC_INDIRECT
)
10096 op1
= machopic_indirect_data_reference (op1
, 0);
10104 op1
= force_reg (Pmode
, op1
);
10105 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
10107 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
10108 op1
= legitimize_pic_address (op1
, reg
);
10117 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
10118 || !push_operand (op0
, mode
))
10120 op1
= force_reg (mode
, op1
);
10122 if (push_operand (op0
, mode
)
10123 && ! general_no_elim_operand (op1
, mode
))
10124 op1
= copy_to_mode_reg (mode
, op1
);
10126 /* Force large constants in 64bit compilation into register
10127 to get them CSEed. */
10128 if (TARGET_64BIT
&& mode
== DImode
10129 && immediate_operand (op1
, mode
)
10130 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
10131 && !register_operand (op0
, mode
)
10132 && optimize
&& !reload_completed
&& !reload_in_progress
)
10133 op1
= copy_to_mode_reg (mode
, op1
);
10135 if (FLOAT_MODE_P (mode
))
10137 /* If we are loading a floating point constant to a register,
10138 force the value to memory now, since we'll get better code
10139 out the back end. */
10143 else if (GET_CODE (op1
) == CONST_DOUBLE
)
10145 op1
= validize_mem (force_const_mem (mode
, op1
));
10146 if (!register_operand (op0
, mode
))
10148 rtx temp
= gen_reg_rtx (mode
);
10149 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
10150 emit_move_insn (op0
, temp
);
10157 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10161 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
10163 rtx op0
= operands
[0], op1
= operands
[1];
10164 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
10166 /* Force constants other than zero into memory. We do not know how
10167 the instructions used to build constants modify the upper 64 bits
10168 of the register, once we have that information we may be able
10169 to handle some of them more efficiently. */
10170 if ((reload_in_progress
| reload_completed
) == 0
10171 && register_operand (op0
, mode
)
10172 && (CONSTANT_P (op1
)
10173 || (GET_CODE (op1
) == SUBREG
10174 && CONSTANT_P (SUBREG_REG (op1
))))
10175 && standard_sse_constant_p (op1
) <= 0)
10176 op1
= validize_mem (force_const_mem (mode
, op1
));
10178 /* TDmode values are passed as TImode on the stack. Timode values
10179 are moved via xmm registers, and moving them to stack can result in
10180 unaligned memory access. Use ix86_expand_vector_move_misalign()
10181 if memory operand is not aligned correctly. */
10182 if (can_create_pseudo_p ()
10183 && (mode
== TImode
) && !TARGET_64BIT
10184 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
10185 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
10189 /* ix86_expand_vector_move_misalign() does not like constants ... */
10190 if (CONSTANT_P (op1
)
10191 || (GET_CODE (op1
) == SUBREG
10192 && CONSTANT_P (SUBREG_REG (op1
))))
10193 op1
= validize_mem (force_const_mem (mode
, op1
));
10195 /* ... nor both arguments in memory. */
10196 if (!register_operand (op0
, mode
)
10197 && !register_operand (op1
, mode
))
10198 op1
= force_reg (mode
, op1
);
10200 tmp
[0] = op0
; tmp
[1] = op1
;
10201 ix86_expand_vector_move_misalign (mode
, tmp
);
10205 /* Make operand1 a register if it isn't already. */
10206 if (can_create_pseudo_p ()
10207 && !register_operand (op0
, mode
)
10208 && !register_operand (op1
, mode
))
10210 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
10214 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10217 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10218 straight to ix86_expand_vector_move. */
10219 /* Code generation for scalar reg-reg moves of single and double precision data:
10220 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10224 if (x86_sse_partial_reg_dependency == true)
10229 Code generation for scalar loads of double precision data:
10230 if (x86_sse_split_regs == true)
10231 movlpd mem, reg (gas syntax)
10235 Code generation for unaligned packed loads of single precision data
10236 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10237 if (x86_sse_unaligned_move_optimal)
10240 if (x86_sse_partial_reg_dependency == true)
10252 Code generation for unaligned packed loads of double precision data
10253 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10254 if (x86_sse_unaligned_move_optimal)
10257 if (x86_sse_split_regs == true)
10270 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10279 /* If we're optimizing for size, movups is the smallest. */
10282 op0
= gen_lowpart (V4SFmode
, op0
);
10283 op1
= gen_lowpart (V4SFmode
, op1
);
10284 emit_insn (gen_sse_movups (op0
, op1
));
10288 /* ??? If we have typed data, then it would appear that using
10289 movdqu is the only way to get unaligned data loaded with
10291 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10293 op0
= gen_lowpart (V16QImode
, op0
);
10294 op1
= gen_lowpart (V16QImode
, op1
);
10295 emit_insn (gen_sse2_movdqu (op0
, op1
));
10299 if (TARGET_SSE2
&& mode
== V2DFmode
)
10303 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10305 op0
= gen_lowpart (V2DFmode
, op0
);
10306 op1
= gen_lowpart (V2DFmode
, op1
);
10307 emit_insn (gen_sse2_movupd (op0
, op1
));
10311 /* When SSE registers are split into halves, we can avoid
10312 writing to the top half twice. */
10313 if (TARGET_SSE_SPLIT_REGS
)
10315 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10320 /* ??? Not sure about the best option for the Intel chips.
10321 The following would seem to satisfy; the register is
10322 entirely cleared, breaking the dependency chain. We
10323 then store to the upper half, with a dependency depth
10324 of one. A rumor has it that Intel recommends two movsd
10325 followed by an unpacklpd, but this is unconfirmed. And
10326 given that the dependency depth of the unpacklpd would
10327 still be one, I'm not sure why this would be better. */
10328 zero
= CONST0_RTX (V2DFmode
);
10331 m
= adjust_address (op1
, DFmode
, 0);
10332 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10333 m
= adjust_address (op1
, DFmode
, 8);
10334 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10338 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10340 op0
= gen_lowpart (V4SFmode
, op0
);
10341 op1
= gen_lowpart (V4SFmode
, op1
);
10342 emit_insn (gen_sse_movups (op0
, op1
));
10346 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10347 emit_move_insn (op0
, CONST0_RTX (mode
));
10349 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10351 if (mode
!= V4SFmode
)
10352 op0
= gen_lowpart (V4SFmode
, op0
);
10353 m
= adjust_address (op1
, V2SFmode
, 0);
10354 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10355 m
= adjust_address (op1
, V2SFmode
, 8);
10356 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10359 else if (MEM_P (op0
))
10361 /* If we're optimizing for size, movups is the smallest. */
10364 op0
= gen_lowpart (V4SFmode
, op0
);
10365 op1
= gen_lowpart (V4SFmode
, op1
);
10366 emit_insn (gen_sse_movups (op0
, op1
));
10370 /* ??? Similar to above, only less clear because of quote
10371 typeless stores unquote. */
10372 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10373 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10375 op0
= gen_lowpart (V16QImode
, op0
);
10376 op1
= gen_lowpart (V16QImode
, op1
);
10377 emit_insn (gen_sse2_movdqu (op0
, op1
));
10381 if (TARGET_SSE2
&& mode
== V2DFmode
)
10383 m
= adjust_address (op0
, DFmode
, 0);
10384 emit_insn (gen_sse2_storelpd (m
, op1
));
10385 m
= adjust_address (op0
, DFmode
, 8);
10386 emit_insn (gen_sse2_storehpd (m
, op1
));
10390 if (mode
!= V4SFmode
)
10391 op1
= gen_lowpart (V4SFmode
, op1
);
10392 m
= adjust_address (op0
, V2SFmode
, 0);
10393 emit_insn (gen_sse_storelps (m
, op1
));
10394 m
= adjust_address (op0
, V2SFmode
, 8);
10395 emit_insn (gen_sse_storehps (m
, op1
));
10399 gcc_unreachable ();
10402 /* Expand a push in MODE. This is some mode for which we do not support
10403 proper push instructions, at least from the registers that we expect
10404 the value to live in. */
10407 ix86_expand_push (enum machine_mode mode
, rtx x
)
10411 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10412 GEN_INT (-GET_MODE_SIZE (mode
)),
10413 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10414 if (tmp
!= stack_pointer_rtx
)
10415 emit_move_insn (stack_pointer_rtx
, tmp
);
10417 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10418 emit_move_insn (tmp
, x
);
10421 /* Helper function of ix86_fixup_binary_operands to canonicalize
10422 operand order. Returns true if the operands should be swapped. */
10425 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10428 rtx dst
= operands
[0];
10429 rtx src1
= operands
[1];
10430 rtx src2
= operands
[2];
10432 /* If the operation is not commutative, we can't do anything. */
10433 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10436 /* Highest priority is that src1 should match dst. */
10437 if (rtx_equal_p (dst
, src1
))
10439 if (rtx_equal_p (dst
, src2
))
10442 /* Next highest priority is that immediate constants come second. */
10443 if (immediate_operand (src2
, mode
))
10445 if (immediate_operand (src1
, mode
))
10448 /* Lowest priority is that memory references should come second. */
10458 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10459 destination to use for the operation. If different from the true
10460 destination in operands[0], a copy operation will be required. */
10463 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10466 rtx dst
= operands
[0];
10467 rtx src1
= operands
[1];
10468 rtx src2
= operands
[2];
10470 /* Canonicalize operand order. */
10471 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10478 /* Both source operands cannot be in memory. */
10479 if (MEM_P (src1
) && MEM_P (src2
))
10481 /* Optimization: Only read from memory once. */
10482 if (rtx_equal_p (src1
, src2
))
10484 src2
= force_reg (mode
, src2
);
10488 src2
= force_reg (mode
, src2
);
10491 /* If the destination is memory, and we do not have matching source
10492 operands, do things in registers. */
10493 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10494 dst
= gen_reg_rtx (mode
);
10496 /* Source 1 cannot be a constant. */
10497 if (CONSTANT_P (src1
))
10498 src1
= force_reg (mode
, src1
);
10500 /* Source 1 cannot be a non-matching memory. */
10501 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10502 src1
= force_reg (mode
, src1
);
10504 operands
[1] = src1
;
10505 operands
[2] = src2
;
10509 /* Similarly, but assume that the destination has already been
10510 set up properly. */
10513 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10514 enum machine_mode mode
, rtx operands
[])
10516 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10517 gcc_assert (dst
== operands
[0]);
10520 /* Attempt to expand a binary operator. Make the expansion closer to the
10521 actual machine, then just general_operand, which will allow 3 separate
10522 memory references (one output, two input) in a single insn. */
10525 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10528 rtx src1
, src2
, dst
, op
, clob
;
10530 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10531 src1
= operands
[1];
10532 src2
= operands
[2];
10534 /* Emit the instruction. */
10536 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10537 if (reload_in_progress
)
10539 /* Reload doesn't know about the flags register, and doesn't know that
10540 it doesn't want to clobber it. We can only do this with PLUS. */
10541 gcc_assert (code
== PLUS
);
10546 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10547 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10550 /* Fix up the destination if needed. */
10551 if (dst
!= operands
[0])
10552 emit_move_insn (operands
[0], dst
);
10555 /* Return TRUE or FALSE depending on whether the binary operator meets the
10556 appropriate constraints. */
10559 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10562 rtx dst
= operands
[0];
10563 rtx src1
= operands
[1];
10564 rtx src2
= operands
[2];
10566 /* Both source operands cannot be in memory. */
10567 if (MEM_P (src1
) && MEM_P (src2
))
10570 /* Canonicalize operand order for commutative operators. */
10571 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10578 /* If the destination is memory, we must have a matching source operand. */
10579 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10582 /* Source 1 cannot be a constant. */
10583 if (CONSTANT_P (src1
))
10586 /* Source 1 cannot be a non-matching memory. */
10587 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10593 /* Attempt to expand a unary operator. Make the expansion closer to the
10594 actual machine, then just general_operand, which will allow 2 separate
10595 memory references (one output, one input) in a single insn. */
10598 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10601 int matching_memory
;
10602 rtx src
, dst
, op
, clob
;
10607 /* If the destination is memory, and we do not have matching source
10608 operands, do things in registers. */
10609 matching_memory
= 0;
10612 if (rtx_equal_p (dst
, src
))
10613 matching_memory
= 1;
10615 dst
= gen_reg_rtx (mode
);
10618 /* When source operand is memory, destination must match. */
10619 if (MEM_P (src
) && !matching_memory
)
10620 src
= force_reg (mode
, src
);
10622 /* Emit the instruction. */
10624 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10625 if (reload_in_progress
|| code
== NOT
)
10627 /* Reload doesn't know about the flags register, and doesn't know that
10628 it doesn't want to clobber it. */
10629 gcc_assert (code
== NOT
);
10634 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10635 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10638 /* Fix up the destination if needed. */
10639 if (dst
!= operands
[0])
10640 emit_move_insn (operands
[0], dst
);
10643 /* Return TRUE or FALSE depending on whether the unary operator meets the
10644 appropriate constraints. */
10647 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10648 enum machine_mode mode ATTRIBUTE_UNUSED
,
10649 rtx operands
[2] ATTRIBUTE_UNUSED
)
10651 /* If one of operands is memory, source and destination must match. */
10652 if ((MEM_P (operands
[0])
10653 || MEM_P (operands
[1]))
10654 && ! rtx_equal_p (operands
[0], operands
[1]))
10659 /* Post-reload splitter for converting an SF or DFmode value in an
10660 SSE register into an unsigned SImode. */
10663 ix86_split_convert_uns_si_sse (rtx operands
[])
10665 enum machine_mode vecmode
;
10666 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10668 large
= operands
[1];
10669 zero_or_two31
= operands
[2];
10670 input
= operands
[3];
10671 two31
= operands
[4];
10672 vecmode
= GET_MODE (large
);
10673 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10675 /* Load up the value into the low element. We must ensure that the other
10676 elements are valid floats -- zero is the easiest such value. */
10679 if (vecmode
== V4SFmode
)
10680 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10682 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10686 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10687 emit_move_insn (value
, CONST0_RTX (vecmode
));
10688 if (vecmode
== V4SFmode
)
10689 emit_insn (gen_sse_movss (value
, value
, input
));
10691 emit_insn (gen_sse2_movsd (value
, value
, input
));
10694 emit_move_insn (large
, two31
);
10695 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10697 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10698 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10700 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10701 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10703 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10704 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10706 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10707 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10709 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10710 if (vecmode
== V4SFmode
)
10711 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10713 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10716 emit_insn (gen_xorv4si3 (value
, value
, large
));
10719 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10720 Expects the 64-bit DImode to be supplied in a pair of integral
10721 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10722 -mfpmath=sse, !optimize_size only. */
10725 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10727 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10728 rtx int_xmm
, fp_xmm
;
10729 rtx biases
, exponents
;
10732 int_xmm
= gen_reg_rtx (V4SImode
);
10733 if (TARGET_INTER_UNIT_MOVES
)
10734 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10735 else if (TARGET_SSE_SPLIT_REGS
)
10737 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10738 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10742 x
= gen_reg_rtx (V2DImode
);
10743 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10744 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10747 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10748 gen_rtvec (4, GEN_INT (0x43300000UL
),
10749 GEN_INT (0x45300000UL
),
10750 const0_rtx
, const0_rtx
));
10751 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10753 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10754 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10756 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10757 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10758 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10759 (0x1.0p84 + double(fp_value_hi_xmm)).
10760 Note these exponents differ by 32. */
10762 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10764 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10765 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10766 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10767 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10768 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10769 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10770 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10771 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10772 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10774 /* Add the upper and lower DFmode values together. */
10776 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10779 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10780 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10781 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10784 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10787 /* Convert an unsigned SImode value into a DFmode. Only currently used
10788 for SSE, but applicable anywhere. */
10791 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10793 REAL_VALUE_TYPE TWO31r
;
10796 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10797 NULL
, 1, OPTAB_DIRECT
);
10799 fp
= gen_reg_rtx (DFmode
);
10800 emit_insn (gen_floatsidf2 (fp
, x
));
10802 real_ldexp (&TWO31r
, &dconst1
, 31);
10803 x
= const_double_from_real_value (TWO31r
, DFmode
);
10805 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10807 emit_move_insn (target
, x
);
10810 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10811 32-bit mode; otherwise we have a direct convert instruction. */
10814 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10816 REAL_VALUE_TYPE TWO32r
;
10817 rtx fp_lo
, fp_hi
, x
;
10819 fp_lo
= gen_reg_rtx (DFmode
);
10820 fp_hi
= gen_reg_rtx (DFmode
);
10822 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10824 real_ldexp (&TWO32r
, &dconst1
, 32);
10825 x
= const_double_from_real_value (TWO32r
, DFmode
);
10826 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10828 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10830 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10833 emit_move_insn (target
, x
);
10836 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10837 For x86_32, -mfpmath=sse, !optimize_size only. */
10839 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10841 REAL_VALUE_TYPE ONE16r
;
10842 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10844 real_ldexp (&ONE16r
, &dconst1
, 16);
10845 x
= const_double_from_real_value (ONE16r
, SFmode
);
10846 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10847 NULL
, 0, OPTAB_DIRECT
);
10848 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10849 NULL
, 0, OPTAB_DIRECT
);
10850 fp_hi
= gen_reg_rtx (SFmode
);
10851 fp_lo
= gen_reg_rtx (SFmode
);
10852 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10853 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10854 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10856 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10858 if (!rtx_equal_p (target
, fp_hi
))
10859 emit_move_insn (target
, fp_hi
);
10862 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10863 then replicate the value for all elements of the vector
10867 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10874 v
= gen_rtvec (4, value
, value
, value
, value
);
10875 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10879 v
= gen_rtvec (2, value
, value
);
10880 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10884 v
= gen_rtvec (4, value
, value
, value
, value
);
10886 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10887 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10888 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10892 v
= gen_rtvec (2, value
, value
);
10894 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10895 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10898 gcc_unreachable ();
10902 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10903 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10904 for an SSE register. If VECT is true, then replicate the mask for
10905 all elements of the vector register. If INVERT is true, then create
10906 a mask excluding the sign bit. */
10909 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10911 enum machine_mode vec_mode
, imode
;
10912 HOST_WIDE_INT hi
, lo
;
10917 /* Find the sign bit, sign extended to 2*HWI. */
10923 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10924 lo
= 0x80000000, hi
= lo
< 0;
10930 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10931 if (HOST_BITS_PER_WIDE_INT
>= 64)
10932 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10934 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10940 vec_mode
= VOIDmode
;
10941 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10942 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10946 gcc_unreachable ();
10950 lo
= ~lo
, hi
= ~hi
;
10952 /* Force this value into the low part of a fp vector constant. */
10953 mask
= immed_double_const (lo
, hi
, imode
);
10954 mask
= gen_lowpart (mode
, mask
);
10956 if (vec_mode
== VOIDmode
)
10957 return force_reg (mode
, mask
);
10959 v
= ix86_build_const_vector (mode
, vect
, mask
);
10960 return force_reg (vec_mode
, v
);
10963 /* Generate code for floating point ABS or NEG. */
10966 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10969 rtx mask
, set
, use
, clob
, dst
, src
;
10970 bool matching_memory
;
10971 bool use_sse
= false;
10972 bool vector_mode
= VECTOR_MODE_P (mode
);
10973 enum machine_mode elt_mode
= mode
;
10977 elt_mode
= GET_MODE_INNER (mode
);
10980 else if (mode
== TFmode
)
10982 else if (TARGET_SSE_MATH
)
10983 use_sse
= SSE_FLOAT_MODE_P (mode
);
10985 /* NEG and ABS performed with SSE use bitwise mask operations.
10986 Create the appropriate mask now. */
10988 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10995 /* If the destination is memory, and we don't have matching source
10996 operands or we're using the x87, do things in registers. */
10997 matching_memory
= false;
11000 if (use_sse
&& rtx_equal_p (dst
, src
))
11001 matching_memory
= true;
11003 dst
= gen_reg_rtx (mode
);
11005 if (MEM_P (src
) && !matching_memory
)
11006 src
= force_reg (mode
, src
);
11010 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
11011 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11016 set
= gen_rtx_fmt_e (code
, mode
, src
);
11017 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11020 use
= gen_rtx_USE (VOIDmode
, mask
);
11021 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
11022 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
11023 gen_rtvec (3, set
, use
, clob
)));
11029 if (dst
!= operands
[0])
11030 emit_move_insn (operands
[0], dst
);
11033 /* Expand a copysign operation. Special case operand 0 being a constant. */
11036 ix86_expand_copysign (rtx operands
[])
11038 enum machine_mode mode
, vmode
;
11039 rtx dest
, op0
, op1
, mask
, nmask
;
11041 dest
= operands
[0];
11045 mode
= GET_MODE (dest
);
11046 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
11048 if (GET_CODE (op0
) == CONST_DOUBLE
)
11050 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
11052 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
11053 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
11055 if (mode
== SFmode
|| mode
== DFmode
)
11057 if (op0
== CONST0_RTX (mode
))
11058 op0
= CONST0_RTX (vmode
);
11063 if (mode
== SFmode
)
11064 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
11065 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
11067 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
11068 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
11072 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11074 if (mode
== SFmode
)
11075 copysign_insn
= gen_copysignsf3_const
;
11076 else if (mode
== DFmode
)
11077 copysign_insn
= gen_copysigndf3_const
;
11079 copysign_insn
= gen_copysigntf3_const
;
11081 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
11085 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
11087 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
11088 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11090 if (mode
== SFmode
)
11091 copysign_insn
= gen_copysignsf3_var
;
11092 else if (mode
== DFmode
)
11093 copysign_insn
= gen_copysigndf3_var
;
11095 copysign_insn
= gen_copysigntf3_var
;
11097 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
11101 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11102 be a constant, and so has already been expanded into a vector constant. */
11105 ix86_split_copysign_const (rtx operands
[])
11107 enum machine_mode mode
, vmode
;
11108 rtx dest
, op0
, op1
, mask
, x
;
11110 dest
= operands
[0];
11113 mask
= operands
[3];
11115 mode
= GET_MODE (dest
);
11116 vmode
= GET_MODE (mask
);
11118 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
11119 x
= gen_rtx_AND (vmode
, dest
, mask
);
11120 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11122 if (op0
!= CONST0_RTX (vmode
))
11124 x
= gen_rtx_IOR (vmode
, dest
, op0
);
11125 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11129 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11130 so we have to do two masks. */
11133 ix86_split_copysign_var (rtx operands
[])
11135 enum machine_mode mode
, vmode
;
11136 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
11138 dest
= operands
[0];
11139 scratch
= operands
[1];
11142 nmask
= operands
[4];
11143 mask
= operands
[5];
11145 mode
= GET_MODE (dest
);
11146 vmode
= GET_MODE (mask
);
11148 if (rtx_equal_p (op0
, op1
))
11150 /* Shouldn't happen often (it's useless, obviously), but when it does
11151 we'd generate incorrect code if we continue below. */
11152 emit_move_insn (dest
, op0
);
11156 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
11158 gcc_assert (REGNO (op1
) == REGNO (scratch
));
11160 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11161 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11164 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11165 x
= gen_rtx_NOT (vmode
, dest
);
11166 x
= gen_rtx_AND (vmode
, x
, op0
);
11167 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11171 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
11173 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11175 else /* alternative 2,4 */
11177 gcc_assert (REGNO (mask
) == REGNO (scratch
));
11178 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
11179 x
= gen_rtx_AND (vmode
, scratch
, op1
);
11181 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11183 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
11185 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11186 x
= gen_rtx_AND (vmode
, dest
, nmask
);
11188 else /* alternative 3,4 */
11190 gcc_assert (REGNO (nmask
) == REGNO (dest
));
11192 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11193 x
= gen_rtx_AND (vmode
, dest
, op0
);
11195 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11198 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
11199 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11202 /* Return TRUE or FALSE depending on whether the first SET in INSN
11203 has source and destination with matching CC modes, and that the
11204 CC mode is at least as constrained as REQ_MODE. */
11207 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
11210 enum machine_mode set_mode
;
11212 set
= PATTERN (insn
);
11213 if (GET_CODE (set
) == PARALLEL
)
11214 set
= XVECEXP (set
, 0, 0);
11215 gcc_assert (GET_CODE (set
) == SET
);
11216 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
11218 set_mode
= GET_MODE (SET_DEST (set
));
11222 if (req_mode
!= CCNOmode
11223 && (req_mode
!= CCmode
11224 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11228 if (req_mode
== CCGCmode
)
11232 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11236 if (req_mode
== CCZmode
)
11243 gcc_unreachable ();
11246 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11249 /* Generate insn patterns to do an integer compare of OPERANDS. */
11252 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11254 enum machine_mode cmpmode
;
11257 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11258 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11260 /* This is very simple, but making the interface the same as in the
11261 FP case makes the rest of the code easier. */
11262 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11263 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11265 /* Return the test that should be put into the flags user, i.e.
11266 the bcc, scc, or cmov instruction. */
11267 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11270 /* Figure out whether to use ordered or unordered fp comparisons.
11271 Return the appropriate mode to use. */
11274 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11276 /* ??? In order to make all comparisons reversible, we do all comparisons
11277 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11278 all forms trapping and nontrapping comparisons, we can make inequality
11279 comparisons trapping again, since it results in better code when using
11280 FCOM based compares. */
11281 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11285 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11287 enum machine_mode mode
= GET_MODE (op0
);
11289 if (SCALAR_FLOAT_MODE_P (mode
))
11291 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11292 return ix86_fp_compare_mode (code
);
11297 /* Only zero flag is needed. */
11298 case EQ
: /* ZF=0 */
11299 case NE
: /* ZF!=0 */
11301 /* Codes needing carry flag. */
11302 case GEU
: /* CF=0 */
11303 case LTU
: /* CF=1 */
11304 /* Detect overflow checks. They need just the carry flag. */
11305 if (GET_CODE (op0
) == PLUS
11306 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11310 case GTU
: /* CF=0 & ZF=0 */
11311 case LEU
: /* CF=1 | ZF=1 */
11312 /* Detect overflow checks. They need just the carry flag. */
11313 if (GET_CODE (op0
) == MINUS
11314 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11318 /* Codes possibly doable only with sign flag when
11319 comparing against zero. */
11320 case GE
: /* SF=OF or SF=0 */
11321 case LT
: /* SF<>OF or SF=1 */
11322 if (op1
== const0_rtx
)
11325 /* For other cases Carry flag is not required. */
11327 /* Codes doable only with sign flag when comparing
11328 against zero, but we miss jump instruction for it
11329 so we need to use relational tests against overflow
11330 that thus needs to be zero. */
11331 case GT
: /* ZF=0 & SF=OF */
11332 case LE
: /* ZF=1 | SF<>OF */
11333 if (op1
== const0_rtx
)
11337 /* strcmp pattern do (use flags) and combine may ask us for proper
11342 gcc_unreachable ();
11346 /* Return the fixed registers used for condition codes. */
11349 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11356 /* If two condition code modes are compatible, return a condition code
11357 mode which is compatible with both. Otherwise, return
11360 static enum machine_mode
11361 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11366 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11369 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11370 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11376 gcc_unreachable ();
11406 /* These are only compatible with themselves, which we already
11412 /* Split comparison code CODE into comparisons we can do using branch
11413 instructions. BYPASS_CODE is comparison code for branch that will
11414 branch around FIRST_CODE and SECOND_CODE. If some of branches
11415 is not required, set value to UNKNOWN.
11416 We never require more than two branches. */
11419 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11420 enum rtx_code
*first_code
,
11421 enum rtx_code
*second_code
)
11423 *first_code
= code
;
11424 *bypass_code
= UNKNOWN
;
11425 *second_code
= UNKNOWN
;
11427 /* The fcomi comparison sets flags as follows:
11437 case GT
: /* GTU - CF=0 & ZF=0 */
11438 case GE
: /* GEU - CF=0 */
11439 case ORDERED
: /* PF=0 */
11440 case UNORDERED
: /* PF=1 */
11441 case UNEQ
: /* EQ - ZF=1 */
11442 case UNLT
: /* LTU - CF=1 */
11443 case UNLE
: /* LEU - CF=1 | ZF=1 */
11444 case LTGT
: /* EQ - ZF=0 */
11446 case LT
: /* LTU - CF=1 - fails on unordered */
11447 *first_code
= UNLT
;
11448 *bypass_code
= UNORDERED
;
11450 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11451 *first_code
= UNLE
;
11452 *bypass_code
= UNORDERED
;
11454 case EQ
: /* EQ - ZF=1 - fails on unordered */
11455 *first_code
= UNEQ
;
11456 *bypass_code
= UNORDERED
;
11458 case NE
: /* NE - ZF=0 - fails on unordered */
11459 *first_code
= LTGT
;
11460 *second_code
= UNORDERED
;
11462 case UNGE
: /* GEU - CF=0 - fails on unordered */
11464 *second_code
= UNORDERED
;
11466 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11468 *second_code
= UNORDERED
;
11471 gcc_unreachable ();
11473 if (!TARGET_IEEE_FP
)
11475 *second_code
= UNKNOWN
;
11476 *bypass_code
= UNKNOWN
;
11480 /* Return cost of comparison done fcom + arithmetics operations on AX.
11481 All following functions do use number of instructions as a cost metrics.
11482 In future this should be tweaked to compute bytes for optimize_size and
11483 take into account performance of various instructions on various CPUs. */
11485 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11487 if (!TARGET_IEEE_FP
)
11489 /* The cost of code output by ix86_expand_fp_compare. */
11513 gcc_unreachable ();
11517 /* Return cost of comparison done using fcomi operation.
11518 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11520 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11522 enum rtx_code bypass_code
, first_code
, second_code
;
11523 /* Return arbitrarily high cost when instruction is not supported - this
11524 prevents gcc from using it. */
11527 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11528 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11531 /* Return cost of comparison done using sahf operation.
11532 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11534 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11536 enum rtx_code bypass_code
, first_code
, second_code
;
11537 /* Return arbitrarily high cost when instruction is not preferred - this
11538 avoids gcc from using it. */
11539 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11541 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11542 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11545 /* Compute cost of the comparison done using any method.
11546 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11548 ix86_fp_comparison_cost (enum rtx_code code
)
11550 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11553 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11554 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11556 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11557 if (min
> sahf_cost
)
11559 if (min
> fcomi_cost
)
11564 /* Return true if we should use an FCOMI instruction for this
11568 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11570 enum rtx_code swapped_code
= swap_condition (code
);
11572 return ((ix86_fp_comparison_cost (code
)
11573 == ix86_fp_comparison_fcomi_cost (code
))
11574 || (ix86_fp_comparison_cost (swapped_code
)
11575 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11578 /* Swap, force into registers, or otherwise massage the two operands
11579 to a fp comparison. The operands are updated in place; the new
11580 comparison code is returned. */
11582 static enum rtx_code
11583 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11585 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11586 rtx op0
= *pop0
, op1
= *pop1
;
11587 enum machine_mode op_mode
= GET_MODE (op0
);
11588 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11590 /* All of the unordered compare instructions only work on registers.
11591 The same is true of the fcomi compare instructions. The XFmode
11592 compare instructions require registers except when comparing
11593 against zero or when converting operand 1 from fixed point to
11597 && (fpcmp_mode
== CCFPUmode
11598 || (op_mode
== XFmode
11599 && ! (standard_80387_constant_p (op0
) == 1
11600 || standard_80387_constant_p (op1
) == 1)
11601 && GET_CODE (op1
) != FLOAT
)
11602 || ix86_use_fcomi_compare (code
)))
11604 op0
= force_reg (op_mode
, op0
);
11605 op1
= force_reg (op_mode
, op1
);
11609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11610 things around if they appear profitable, otherwise force op0
11611 into a register. */
11613 if (standard_80387_constant_p (op0
) == 0
11615 && ! (standard_80387_constant_p (op1
) == 0
11619 tmp
= op0
, op0
= op1
, op1
= tmp
;
11620 code
= swap_condition (code
);
11624 op0
= force_reg (op_mode
, op0
);
11626 if (CONSTANT_P (op1
))
11628 int tmp
= standard_80387_constant_p (op1
);
11630 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11634 op1
= force_reg (op_mode
, op1
);
11637 op1
= force_reg (op_mode
, op1
);
11641 /* Try to rearrange the comparison to make it cheaper. */
11642 if (ix86_fp_comparison_cost (code
)
11643 > ix86_fp_comparison_cost (swap_condition (code
))
11644 && (REG_P (op1
) || can_create_pseudo_p ()))
11647 tmp
= op0
, op0
= op1
, op1
= tmp
;
11648 code
= swap_condition (code
);
11650 op0
= force_reg (op_mode
, op0
);
11658 /* Convert comparison codes we use to represent FP comparison to integer
11659 code that will result in proper branch. Return UNKNOWN if no such code
11663 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11692 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11695 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11696 rtx
*second_test
, rtx
*bypass_test
)
11698 enum machine_mode fpcmp_mode
, intcmp_mode
;
11700 int cost
= ix86_fp_comparison_cost (code
);
11701 enum rtx_code bypass_code
, first_code
, second_code
;
11703 fpcmp_mode
= ix86_fp_compare_mode (code
);
11704 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11707 *second_test
= NULL_RTX
;
11709 *bypass_test
= NULL_RTX
;
11711 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11713 /* Do fcomi/sahf based test when profitable. */
11714 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11715 && (bypass_code
== UNKNOWN
|| bypass_test
)
11716 && (second_code
== UNKNOWN
|| second_test
))
11718 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11719 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11725 gcc_assert (TARGET_SAHF
);
11728 scratch
= gen_reg_rtx (HImode
);
11729 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11731 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11734 /* The FP codes work out to act like unsigned. */
11735 intcmp_mode
= fpcmp_mode
;
11737 if (bypass_code
!= UNKNOWN
)
11738 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11739 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11741 if (second_code
!= UNKNOWN
)
11742 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11743 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11748 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11749 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11750 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11752 scratch
= gen_reg_rtx (HImode
);
11753 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11755 /* In the unordered case, we have to check C2 for NaN's, which
11756 doesn't happen to work out to anything nice combination-wise.
11757 So do some bit twiddling on the value we've got in AH to come
11758 up with an appropriate set of condition codes. */
11760 intcmp_mode
= CCNOmode
;
11765 if (code
== GT
|| !TARGET_IEEE_FP
)
11767 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11772 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11773 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11774 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11775 intcmp_mode
= CCmode
;
11781 if (code
== LT
&& TARGET_IEEE_FP
)
11783 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11784 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11785 intcmp_mode
= CCmode
;
11790 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11796 if (code
== GE
|| !TARGET_IEEE_FP
)
11798 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11803 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11804 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11811 if (code
== LE
&& TARGET_IEEE_FP
)
11813 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11814 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11815 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11816 intcmp_mode
= CCmode
;
11821 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11827 if (code
== EQ
&& TARGET_IEEE_FP
)
11829 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11830 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11831 intcmp_mode
= CCmode
;
11836 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11843 if (code
== NE
&& TARGET_IEEE_FP
)
11845 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11846 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11852 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11858 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11862 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11867 gcc_unreachable ();
11871 /* Return the test that should be put into the flags user, i.e.
11872 the bcc, scc, or cmov instruction. */
11873 return gen_rtx_fmt_ee (code
, VOIDmode
,
11874 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11879 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11882 op0
= ix86_compare_op0
;
11883 op1
= ix86_compare_op1
;
11886 *second_test
= NULL_RTX
;
11888 *bypass_test
= NULL_RTX
;
11890 if (ix86_compare_emitted
)
11892 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11893 ix86_compare_emitted
= NULL_RTX
;
11895 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11897 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11898 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11899 second_test
, bypass_test
);
11902 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11907 /* Return true if the CODE will result in nontrivial jump sequence. */
11909 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11911 enum rtx_code bypass_code
, first_code
, second_code
;
11914 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11915 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11919 ix86_expand_branch (enum rtx_code code
, rtx label
)
11923 /* If we have emitted a compare insn, go straight to simple.
11924 ix86_expand_compare won't emit anything if ix86_compare_emitted
11926 if (ix86_compare_emitted
)
11929 switch (GET_MODE (ix86_compare_op0
))
11935 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11936 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11937 gen_rtx_LABEL_REF (VOIDmode
, label
),
11939 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11948 enum rtx_code bypass_code
, first_code
, second_code
;
11950 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11951 &ix86_compare_op1
);
11953 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11955 /* Check whether we will use the natural sequence with one jump. If
11956 so, we can expand jump early. Otherwise delay expansion by
11957 creating compound insn to not confuse optimizers. */
11958 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
11960 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11961 gen_rtx_LABEL_REF (VOIDmode
, label
),
11962 pc_rtx
, NULL_RTX
, NULL_RTX
);
11966 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11967 ix86_compare_op0
, ix86_compare_op1
);
11968 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11969 gen_rtx_LABEL_REF (VOIDmode
, label
),
11971 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11973 use_fcomi
= ix86_use_fcomi_compare (code
);
11974 vec
= rtvec_alloc (3 + !use_fcomi
);
11975 RTVEC_ELT (vec
, 0) = tmp
;
11977 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
11979 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
11982 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11984 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11993 /* Expand DImode branch into multiple compare+branch. */
11995 rtx lo
[2], hi
[2], label2
;
11996 enum rtx_code code1
, code2
, code3
;
11997 enum machine_mode submode
;
11999 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
12001 tmp
= ix86_compare_op0
;
12002 ix86_compare_op0
= ix86_compare_op1
;
12003 ix86_compare_op1
= tmp
;
12004 code
= swap_condition (code
);
12006 if (GET_MODE (ix86_compare_op0
) == DImode
)
12008 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12009 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12014 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12015 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12019 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12020 avoid two branches. This costs one extra insn, so disable when
12021 optimizing for size. */
12023 if ((code
== EQ
|| code
== NE
)
12025 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
12030 if (hi
[1] != const0_rtx
)
12031 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
12032 NULL_RTX
, 0, OPTAB_WIDEN
);
12035 if (lo
[1] != const0_rtx
)
12036 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
12037 NULL_RTX
, 0, OPTAB_WIDEN
);
12039 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
12040 NULL_RTX
, 0, OPTAB_WIDEN
);
12042 ix86_compare_op0
= tmp
;
12043 ix86_compare_op1
= const0_rtx
;
12044 ix86_expand_branch (code
, label
);
12048 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12049 op1 is a constant and the low word is zero, then we can just
12050 examine the high word. */
12052 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
12055 case LT
: case LTU
: case GE
: case GEU
:
12056 ix86_compare_op0
= hi
[0];
12057 ix86_compare_op1
= hi
[1];
12058 ix86_expand_branch (code
, label
);
12064 /* Otherwise, we need two or three jumps. */
12066 label2
= gen_label_rtx ();
12069 code2
= swap_condition (code
);
12070 code3
= unsigned_condition (code
);
12074 case LT
: case GT
: case LTU
: case GTU
:
12077 case LE
: code1
= LT
; code2
= GT
; break;
12078 case GE
: code1
= GT
; code2
= LT
; break;
12079 case LEU
: code1
= LTU
; code2
= GTU
; break;
12080 case GEU
: code1
= GTU
; code2
= LTU
; break;
12082 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
12083 case NE
: code2
= UNKNOWN
; break;
12086 gcc_unreachable ();
12091 * if (hi(a) < hi(b)) goto true;
12092 * if (hi(a) > hi(b)) goto false;
12093 * if (lo(a) < lo(b)) goto true;
12097 ix86_compare_op0
= hi
[0];
12098 ix86_compare_op1
= hi
[1];
12100 if (code1
!= UNKNOWN
)
12101 ix86_expand_branch (code1
, label
);
12102 if (code2
!= UNKNOWN
)
12103 ix86_expand_branch (code2
, label2
);
12105 ix86_compare_op0
= lo
[0];
12106 ix86_compare_op1
= lo
[1];
12107 ix86_expand_branch (code3
, label
);
12109 if (code2
!= UNKNOWN
)
12110 emit_label (label2
);
12115 gcc_unreachable ();
12119 /* Split branch based on floating point condition. */
12121 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
12122 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
12124 rtx second
, bypass
;
12125 rtx label
= NULL_RTX
;
12127 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
12130 if (target2
!= pc_rtx
)
12133 code
= reverse_condition_maybe_unordered (code
);
12138 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
12139 tmp
, &second
, &bypass
);
12141 /* Remove pushed operand from stack. */
12143 ix86_free_from_memory (GET_MODE (pushed
));
12145 if (split_branch_probability
>= 0)
12147 /* Distribute the probabilities across the jumps.
12148 Assume the BYPASS and SECOND to be always test
12150 probability
= split_branch_probability
;
12152 /* Value of 1 is low enough to make no need for probability
12153 to be updated. Later we may run some experiments and see
12154 if unordered values are more frequent in practice. */
12156 bypass_probability
= 1;
12158 second_probability
= 1;
12160 if (bypass
!= NULL_RTX
)
12162 label
= gen_label_rtx ();
12163 i
= emit_jump_insn (gen_rtx_SET
12165 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12167 gen_rtx_LABEL_REF (VOIDmode
,
12170 if (bypass_probability
>= 0)
12172 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12173 GEN_INT (bypass_probability
),
12176 i
= emit_jump_insn (gen_rtx_SET
12178 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12179 condition
, target1
, target2
)));
12180 if (probability
>= 0)
12182 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12183 GEN_INT (probability
),
12185 if (second
!= NULL_RTX
)
12187 i
= emit_jump_insn (gen_rtx_SET
12189 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
12191 if (second_probability
>= 0)
12193 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12194 GEN_INT (second_probability
),
12197 if (label
!= NULL_RTX
)
12198 emit_label (label
);
12202 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
12204 rtx ret
, tmp
, tmpreg
, equiv
;
12205 rtx second_test
, bypass_test
;
12207 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
12208 return 0; /* FAIL */
12210 gcc_assert (GET_MODE (dest
) == QImode
);
12212 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12213 PUT_MODE (ret
, QImode
);
12218 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
12219 if (bypass_test
|| second_test
)
12221 rtx test
= second_test
;
12223 rtx tmp2
= gen_reg_rtx (QImode
);
12226 gcc_assert (!second_test
);
12227 test
= bypass_test
;
12229 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12231 PUT_MODE (test
, QImode
);
12232 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12235 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12237 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12240 /* Attach a REG_EQUAL note describing the comparison result. */
12241 if (ix86_compare_op0
&& ix86_compare_op1
)
12243 equiv
= simplify_gen_relational (code
, QImode
,
12244 GET_MODE (ix86_compare_op0
),
12245 ix86_compare_op0
, ix86_compare_op1
);
12246 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12249 return 1; /* DONE */
12252 /* Expand comparison setting or clearing carry flag. Return true when
12253 successful and set pop for the operation. */
12255 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12257 enum machine_mode mode
=
12258 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12260 /* Do not handle DImode compares that go through special path. */
12261 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12264 if (SCALAR_FLOAT_MODE_P (mode
))
12266 rtx second_test
= NULL
, bypass_test
= NULL
;
12267 rtx compare_op
, compare_seq
;
12269 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12271 /* Shortcut: following common codes never translate
12272 into carry flag compares. */
12273 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12274 || code
== ORDERED
|| code
== UNORDERED
)
12277 /* These comparisons require zero flag; swap operands so they won't. */
12278 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12279 && !TARGET_IEEE_FP
)
12284 code
= swap_condition (code
);
12287 /* Try to expand the comparison and verify that we end up with
12288 carry flag based comparison. This fails to be true only when
12289 we decide to expand comparison using arithmetic that is not
12290 too common scenario. */
12292 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12293 &second_test
, &bypass_test
);
12294 compare_seq
= get_insns ();
12297 if (second_test
|| bypass_test
)
12300 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12301 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12302 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12304 code
= GET_CODE (compare_op
);
12306 if (code
!= LTU
&& code
!= GEU
)
12309 emit_insn (compare_seq
);
12314 if (!INTEGRAL_MODE_P (mode
))
12323 /* Convert a==0 into (unsigned)a<1. */
12326 if (op1
!= const0_rtx
)
12329 code
= (code
== EQ
? LTU
: GEU
);
12332 /* Convert a>b into b<a or a>=b-1. */
12335 if (CONST_INT_P (op1
))
12337 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12338 /* Bail out on overflow. We still can swap operands but that
12339 would force loading of the constant into register. */
12340 if (op1
== const0_rtx
12341 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12343 code
= (code
== GTU
? GEU
: LTU
);
12350 code
= (code
== GTU
? LTU
: GEU
);
12354 /* Convert a>=0 into (unsigned)a<0x80000000. */
12357 if (mode
== DImode
|| op1
!= const0_rtx
)
12359 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12360 code
= (code
== LT
? GEU
: LTU
);
12364 if (mode
== DImode
|| op1
!= constm1_rtx
)
12366 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12367 code
= (code
== LE
? GEU
: LTU
);
12373 /* Swapping operands may cause constant to appear as first operand. */
12374 if (!nonimmediate_operand (op0
, VOIDmode
))
12376 if (!can_create_pseudo_p ())
12378 op0
= force_reg (mode
, op0
);
12380 ix86_compare_op0
= op0
;
12381 ix86_compare_op1
= op1
;
12382 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12383 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12388 ix86_expand_int_movcc (rtx operands
[])
12390 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12391 rtx compare_seq
, compare_op
;
12392 rtx second_test
, bypass_test
;
12393 enum machine_mode mode
= GET_MODE (operands
[0]);
12394 bool sign_bit_compare_p
= false;;
12397 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12398 compare_seq
= get_insns ();
12401 compare_code
= GET_CODE (compare_op
);
12403 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12404 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12405 sign_bit_compare_p
= true;
12407 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12408 HImode insns, we'd be swallowed in word prefix ops. */
12410 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12411 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12412 && CONST_INT_P (operands
[2])
12413 && CONST_INT_P (operands
[3]))
12415 rtx out
= operands
[0];
12416 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12417 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12418 HOST_WIDE_INT diff
;
12421 /* Sign bit compares are better done using shifts than we do by using
12423 if (sign_bit_compare_p
12424 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12425 ix86_compare_op1
, &compare_op
))
12427 /* Detect overlap between destination and compare sources. */
12430 if (!sign_bit_compare_p
)
12432 bool fpcmp
= false;
12434 compare_code
= GET_CODE (compare_op
);
12436 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12437 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12440 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12443 /* To simplify rest of code, restrict to the GEU case. */
12444 if (compare_code
== LTU
)
12446 HOST_WIDE_INT tmp
= ct
;
12449 compare_code
= reverse_condition (compare_code
);
12450 code
= reverse_condition (code
);
12455 PUT_CODE (compare_op
,
12456 reverse_condition_maybe_unordered
12457 (GET_CODE (compare_op
)));
12459 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12463 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12464 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12465 tmp
= gen_reg_rtx (mode
);
12467 if (mode
== DImode
)
12468 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12470 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12474 if (code
== GT
|| code
== GE
)
12475 code
= reverse_condition (code
);
12478 HOST_WIDE_INT tmp
= ct
;
12483 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12484 ix86_compare_op1
, VOIDmode
, 0, -1);
12497 tmp
= expand_simple_binop (mode
, PLUS
,
12499 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12510 tmp
= expand_simple_binop (mode
, IOR
,
12512 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12514 else if (diff
== -1 && ct
)
12524 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12526 tmp
= expand_simple_binop (mode
, PLUS
,
12527 copy_rtx (tmp
), GEN_INT (cf
),
12528 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12536 * andl cf - ct, dest
12546 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12549 tmp
= expand_simple_binop (mode
, AND
,
12551 gen_int_mode (cf
- ct
, mode
),
12552 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12554 tmp
= expand_simple_binop (mode
, PLUS
,
12555 copy_rtx (tmp
), GEN_INT (ct
),
12556 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12559 if (!rtx_equal_p (tmp
, out
))
12560 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12562 return 1; /* DONE */
12567 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12570 tmp
= ct
, ct
= cf
, cf
= tmp
;
12573 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12575 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12577 /* We may be reversing unordered compare to normal compare, that
12578 is not valid in general (we may convert non-trapping condition
12579 to trapping one), however on i386 we currently emit all
12580 comparisons unordered. */
12581 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12582 code
= reverse_condition_maybe_unordered (code
);
12586 compare_code
= reverse_condition (compare_code
);
12587 code
= reverse_condition (code
);
12591 compare_code
= UNKNOWN
;
12592 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12593 && CONST_INT_P (ix86_compare_op1
))
12595 if (ix86_compare_op1
== const0_rtx
12596 && (code
== LT
|| code
== GE
))
12597 compare_code
= code
;
12598 else if (ix86_compare_op1
== constm1_rtx
)
12602 else if (code
== GT
)
12607 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12608 if (compare_code
!= UNKNOWN
12609 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12610 && (cf
== -1 || ct
== -1))
12612 /* If lea code below could be used, only optimize
12613 if it results in a 2 insn sequence. */
12615 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12616 || diff
== 3 || diff
== 5 || diff
== 9)
12617 || (compare_code
== LT
&& ct
== -1)
12618 || (compare_code
== GE
&& cf
== -1))
12621 * notl op1 (if necessary)
12629 code
= reverse_condition (code
);
12632 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12633 ix86_compare_op1
, VOIDmode
, 0, -1);
12635 out
= expand_simple_binop (mode
, IOR
,
12637 out
, 1, OPTAB_DIRECT
);
12638 if (out
!= operands
[0])
12639 emit_move_insn (operands
[0], out
);
12641 return 1; /* DONE */
12646 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12647 || diff
== 3 || diff
== 5 || diff
== 9)
12648 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12650 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12656 * lea cf(dest*(ct-cf)),dest
12660 * This also catches the degenerate setcc-only case.
12666 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12667 ix86_compare_op1
, VOIDmode
, 0, 1);
12670 /* On x86_64 the lea instruction operates on Pmode, so we need
12671 to get arithmetics done in proper mode to match. */
12673 tmp
= copy_rtx (out
);
12677 out1
= copy_rtx (out
);
12678 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12682 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12688 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12691 if (!rtx_equal_p (tmp
, out
))
12694 out
= force_operand (tmp
, copy_rtx (out
));
12696 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12698 if (!rtx_equal_p (out
, operands
[0]))
12699 emit_move_insn (operands
[0], copy_rtx (out
));
12701 return 1; /* DONE */
12705 * General case: Jumpful:
12706 * xorl dest,dest cmpl op1, op2
12707 * cmpl op1, op2 movl ct, dest
12708 * setcc dest jcc 1f
12709 * decl dest movl cf, dest
12710 * andl (cf-ct),dest 1:
12713 * Size 20. Size 14.
12715 * This is reasonably steep, but branch mispredict costs are
12716 * high on modern cpus, so consider failing only if optimizing
12720 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12721 && BRANCH_COST
>= 2)
12725 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12730 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12732 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12734 /* We may be reversing unordered compare to normal compare,
12735 that is not valid in general (we may convert non-trapping
12736 condition to trapping one), however on i386 we currently
12737 emit all comparisons unordered. */
12738 code
= reverse_condition_maybe_unordered (code
);
12742 code
= reverse_condition (code
);
12743 if (compare_code
!= UNKNOWN
)
12744 compare_code
= reverse_condition (compare_code
);
12748 if (compare_code
!= UNKNOWN
)
12750 /* notl op1 (if needed)
12755 For x < 0 (resp. x <= -1) there will be no notl,
12756 so if possible swap the constants to get rid of the
12758 True/false will be -1/0 while code below (store flag
12759 followed by decrement) is 0/-1, so the constants need
12760 to be exchanged once more. */
12762 if (compare_code
== GE
|| !cf
)
12764 code
= reverse_condition (code
);
12769 HOST_WIDE_INT tmp
= cf
;
12774 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12775 ix86_compare_op1
, VOIDmode
, 0, -1);
12779 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12780 ix86_compare_op1
, VOIDmode
, 0, 1);
12782 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12783 copy_rtx (out
), 1, OPTAB_DIRECT
);
12786 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12787 gen_int_mode (cf
- ct
, mode
),
12788 copy_rtx (out
), 1, OPTAB_DIRECT
);
12790 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12791 copy_rtx (out
), 1, OPTAB_DIRECT
);
12792 if (!rtx_equal_p (out
, operands
[0]))
12793 emit_move_insn (operands
[0], copy_rtx (out
));
12795 return 1; /* DONE */
12799 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12801 /* Try a few things more with specific constants and a variable. */
12804 rtx var
, orig_out
, out
, tmp
;
12806 if (BRANCH_COST
<= 2)
12807 return 0; /* FAIL */
12809 /* If one of the two operands is an interesting constant, load a
12810 constant with the above and mask it in with a logical operation. */
12812 if (CONST_INT_P (operands
[2]))
12815 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12816 operands
[3] = constm1_rtx
, op
= and_optab
;
12817 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12818 operands
[3] = const0_rtx
, op
= ior_optab
;
12820 return 0; /* FAIL */
12822 else if (CONST_INT_P (operands
[3]))
12825 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12826 operands
[2] = constm1_rtx
, op
= and_optab
;
12827 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12828 operands
[2] = const0_rtx
, op
= ior_optab
;
12830 return 0; /* FAIL */
12833 return 0; /* FAIL */
12835 orig_out
= operands
[0];
12836 tmp
= gen_reg_rtx (mode
);
12839 /* Recurse to get the constant loaded. */
12840 if (ix86_expand_int_movcc (operands
) == 0)
12841 return 0; /* FAIL */
12843 /* Mask in the interesting variable. */
12844 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12846 if (!rtx_equal_p (out
, orig_out
))
12847 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12849 return 1; /* DONE */
12853 * For comparison with above,
12863 if (! nonimmediate_operand (operands
[2], mode
))
12864 operands
[2] = force_reg (mode
, operands
[2]);
12865 if (! nonimmediate_operand (operands
[3], mode
))
12866 operands
[3] = force_reg (mode
, operands
[3]);
12868 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12870 rtx tmp
= gen_reg_rtx (mode
);
12871 emit_move_insn (tmp
, operands
[3]);
12874 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12876 rtx tmp
= gen_reg_rtx (mode
);
12877 emit_move_insn (tmp
, operands
[2]);
12881 if (! register_operand (operands
[2], VOIDmode
)
12883 || ! register_operand (operands
[3], VOIDmode
)))
12884 operands
[2] = force_reg (mode
, operands
[2]);
12887 && ! register_operand (operands
[3], VOIDmode
))
12888 operands
[3] = force_reg (mode
, operands
[3]);
12890 emit_insn (compare_seq
);
12891 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12892 gen_rtx_IF_THEN_ELSE (mode
,
12893 compare_op
, operands
[2],
12896 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12897 gen_rtx_IF_THEN_ELSE (mode
,
12899 copy_rtx (operands
[3]),
12900 copy_rtx (operands
[0]))));
12902 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12903 gen_rtx_IF_THEN_ELSE (mode
,
12905 copy_rtx (operands
[2]),
12906 copy_rtx (operands
[0]))));
12908 return 1; /* DONE */
12911 /* Swap, force into registers, or otherwise massage the two operands
12912 to an sse comparison with a mask result. Thus we differ a bit from
12913 ix86_prepare_fp_compare_args which expects to produce a flags result.
12915 The DEST operand exists to help determine whether to commute commutative
12916 operators. The POP0/POP1 operands are updated in place. The new
12917 comparison code is returned, or UNKNOWN if not implementable. */
12919 static enum rtx_code
12920 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12921 rtx
*pop0
, rtx
*pop1
)
12929 /* We have no LTGT as an operator. We could implement it with
12930 NE & ORDERED, but this requires an extra temporary. It's
12931 not clear that it's worth it. */
12938 /* These are supported directly. */
12945 /* For commutative operators, try to canonicalize the destination
12946 operand to be first in the comparison - this helps reload to
12947 avoid extra moves. */
12948 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12956 /* These are not supported directly. Swap the comparison operands
12957 to transform into something that is supported. */
12961 code
= swap_condition (code
);
12965 gcc_unreachable ();
12971 /* Detect conditional moves that exactly match min/max operational
12972 semantics. Note that this is IEEE safe, as long as we don't
12973 interchange the operands.
12975 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12976 and TRUE if the operation is successful and instructions are emitted. */
12979 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12980 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12982 enum machine_mode mode
;
12988 else if (code
== UNGE
)
12991 if_true
= if_false
;
12997 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12999 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
13004 mode
= GET_MODE (dest
);
13006 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13007 but MODE may be a vector mode and thus not appropriate. */
13008 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
13010 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
13013 if_true
= force_reg (mode
, if_true
);
13014 v
= gen_rtvec (2, if_true
, if_false
);
13015 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
13019 code
= is_min
? SMIN
: SMAX
;
13020 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
13023 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
13027 /* Expand an sse vector comparison. Return the register with the result. */
13030 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
13031 rtx op_true
, rtx op_false
)
13033 enum machine_mode mode
= GET_MODE (dest
);
13036 cmp_op0
= force_reg (mode
, cmp_op0
);
13037 if (!nonimmediate_operand (cmp_op1
, mode
))
13038 cmp_op1
= force_reg (mode
, cmp_op1
);
13041 || reg_overlap_mentioned_p (dest
, op_true
)
13042 || reg_overlap_mentioned_p (dest
, op_false
))
13043 dest
= gen_reg_rtx (mode
);
13045 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
13046 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13051 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13052 operations. This is used for both scalar and vector conditional moves. */
13055 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
13057 enum machine_mode mode
= GET_MODE (dest
);
13062 rtx pcmov
= gen_rtx_SET (mode
, dest
,
13063 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
13068 else if (op_false
== CONST0_RTX (mode
))
13070 op_true
= force_reg (mode
, op_true
);
13071 x
= gen_rtx_AND (mode
, cmp
, op_true
);
13072 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13074 else if (op_true
== CONST0_RTX (mode
))
13076 op_false
= force_reg (mode
, op_false
);
13077 x
= gen_rtx_NOT (mode
, cmp
);
13078 x
= gen_rtx_AND (mode
, x
, op_false
);
13079 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13083 op_true
= force_reg (mode
, op_true
);
13084 op_false
= force_reg (mode
, op_false
);
13086 t2
= gen_reg_rtx (mode
);
13088 t3
= gen_reg_rtx (mode
);
13092 x
= gen_rtx_AND (mode
, op_true
, cmp
);
13093 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
13095 x
= gen_rtx_NOT (mode
, cmp
);
13096 x
= gen_rtx_AND (mode
, x
, op_false
);
13097 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
13099 x
= gen_rtx_IOR (mode
, t3
, t2
);
13100 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13104 /* Expand a floating-point conditional move. Return true if successful. */
13107 ix86_expand_fp_movcc (rtx operands
[])
13109 enum machine_mode mode
= GET_MODE (operands
[0]);
13110 enum rtx_code code
= GET_CODE (operands
[1]);
13111 rtx tmp
, compare_op
, second_test
, bypass_test
;
13113 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
13115 enum machine_mode cmode
;
13117 /* Since we've no cmove for sse registers, don't force bad register
13118 allocation just to gain access to it. Deny movcc when the
13119 comparison mode doesn't match the move mode. */
13120 cmode
= GET_MODE (ix86_compare_op0
);
13121 if (cmode
== VOIDmode
)
13122 cmode
= GET_MODE (ix86_compare_op1
);
13126 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13128 &ix86_compare_op1
);
13129 if (code
== UNKNOWN
)
13132 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
13133 ix86_compare_op1
, operands
[2],
13137 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
13138 ix86_compare_op1
, operands
[2], operands
[3]);
13139 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
13143 /* The floating point conditional move instructions don't directly
13144 support conditions resulting from a signed integer comparison. */
13146 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13148 /* The floating point conditional move instructions don't directly
13149 support signed integer comparisons. */
13151 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
13153 gcc_assert (!second_test
&& !bypass_test
);
13154 tmp
= gen_reg_rtx (QImode
);
13155 ix86_expand_setcc (code
, tmp
);
13157 ix86_compare_op0
= tmp
;
13158 ix86_compare_op1
= const0_rtx
;
13159 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13161 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
13163 tmp
= gen_reg_rtx (mode
);
13164 emit_move_insn (tmp
, operands
[3]);
13167 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
13169 tmp
= gen_reg_rtx (mode
);
13170 emit_move_insn (tmp
, operands
[2]);
13174 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13175 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
13176 operands
[2], operands
[3])));
13178 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13179 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
13180 operands
[3], operands
[0])));
13182 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13183 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
13184 operands
[2], operands
[0])));
13189 /* Expand a floating-point vector conditional move; a vcond operation
13190 rather than a movcc operation. */
13193 ix86_expand_fp_vcond (rtx operands
[])
13195 enum rtx_code code
= GET_CODE (operands
[3]);
13198 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13199 &operands
[4], &operands
[5]);
13200 if (code
== UNKNOWN
)
13203 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
13204 operands
[5], operands
[1], operands
[2]))
13207 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
13208 operands
[1], operands
[2]);
13209 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
13213 /* Expand a signed/unsigned integral vector conditional move. */
13216 ix86_expand_int_vcond (rtx operands
[])
13218 enum machine_mode mode
= GET_MODE (operands
[0]);
13219 enum rtx_code code
= GET_CODE (operands
[3]);
13220 bool negate
= false;
13223 cop0
= operands
[4];
13224 cop1
= operands
[5];
13226 /* Canonicalize the comparison to EQ, GT, GTU. */
13237 code
= reverse_condition (code
);
13243 code
= reverse_condition (code
);
13249 code
= swap_condition (code
);
13250 x
= cop0
, cop0
= cop1
, cop1
= x
;
13254 gcc_unreachable ();
13257 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13258 if (mode
== V2DImode
)
13263 /* SSE4.1 supports EQ. */
13264 if (!TARGET_SSE4_1
)
13270 /* SSE4.2 supports GT/GTU. */
13271 if (!TARGET_SSE4_2
)
13276 gcc_unreachable ();
13280 /* Unsigned parallel compare is not supported by the hardware. Play some
13281 tricks to turn this into a signed comparison against 0. */
13284 cop0
= force_reg (mode
, cop0
);
13293 /* Perform a parallel modulo subtraction. */
13294 t1
= gen_reg_rtx (mode
);
13295 emit_insn ((mode
== V4SImode
13297 : gen_subv2di3
) (t1
, cop0
, cop1
));
13299 /* Extract the original sign bit of op0. */
13300 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13302 t2
= gen_reg_rtx (mode
);
13303 emit_insn ((mode
== V4SImode
13305 : gen_andv2di3
) (t2
, cop0
, mask
));
13307 /* XOR it back into the result of the subtraction. This results
13308 in the sign bit set iff we saw unsigned underflow. */
13309 x
= gen_reg_rtx (mode
);
13310 emit_insn ((mode
== V4SImode
13312 : gen_xorv2di3
) (x
, t1
, t2
));
13320 /* Perform a parallel unsigned saturating subtraction. */
13321 x
= gen_reg_rtx (mode
);
13322 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13323 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13330 gcc_unreachable ();
13334 cop1
= CONST0_RTX (mode
);
13337 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13338 operands
[1+negate
], operands
[2-negate
]);
13340 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13341 operands
[2-negate
]);
13345 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13346 true if we should do zero extension, else sign extension. HIGH_P is
13347 true if we want the N/2 high elements, else the low elements. */
13350 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13352 enum machine_mode imode
= GET_MODE (operands
[1]);
13353 rtx (*unpack
)(rtx
, rtx
, rtx
);
13360 unpack
= gen_vec_interleave_highv16qi
;
13362 unpack
= gen_vec_interleave_lowv16qi
;
13366 unpack
= gen_vec_interleave_highv8hi
;
13368 unpack
= gen_vec_interleave_lowv8hi
;
13372 unpack
= gen_vec_interleave_highv4si
;
13374 unpack
= gen_vec_interleave_lowv4si
;
13377 gcc_unreachable ();
13380 dest
= gen_lowpart (imode
, operands
[0]);
13383 se
= force_reg (imode
, CONST0_RTX (imode
));
13385 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13386 operands
[1], pc_rtx
, pc_rtx
);
13388 emit_insn (unpack (dest
, operands
[1], se
));
13391 /* This function performs the same task as ix86_expand_sse_unpack,
13392 but with SSE4.1 instructions. */
13395 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13397 enum machine_mode imode
= GET_MODE (operands
[1]);
13398 rtx (*unpack
)(rtx
, rtx
);
13405 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13407 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13411 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13413 unpack
= gen_sse4_1_extendv4hiv4si2
;
13417 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13419 unpack
= gen_sse4_1_extendv2siv2di2
;
13422 gcc_unreachable ();
13425 dest
= operands
[0];
13428 /* Shift higher 8 bytes to lower 8 bytes. */
13429 src
= gen_reg_rtx (imode
);
13430 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13431 gen_lowpart (TImode
, operands
[1]),
13437 emit_insn (unpack (dest
, src
));
13440 /* This function performs the same task as ix86_expand_sse_unpack,
13441 but with amdfam15 instructions. */
13443 #define PPERM_SRC 0x00 /* copy source */
13444 #define PPERM_INVERT 0x20 /* invert source */
13445 #define PPERM_REVERSE 0x40 /* bit reverse source */
13446 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13447 #define PPERM_ZERO 0x80 /* all 0's */
13448 #define PPERM_ONES 0xa0 /* all 1's */
13449 #define PPERM_SIGN 0xc0 /* propigate sign bit */
13450 #define PPERM_INV_SIGN 0xe0 /* invert & propigate sign */
13452 #define PPERM_SRC1 0x00 /* use first source byte */
13453 #define PPERM_SRC2 0x10 /* use second source byte */
13456 ix86_expand_sse5_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13458 enum machine_mode imode
= GET_MODE (operands
[1]);
13459 int pperm_bytes
[16];
13461 int h
= (high_p
) ? 8 : 0;
13464 rtvec v
= rtvec_alloc (16);
13467 rtx op0
= operands
[0], op1
= operands
[1];
13472 vs
= rtvec_alloc (8);
13473 h2
= (high_p
) ? 8 : 0;
13474 for (i
= 0; i
< 8; i
++)
13476 pperm_bytes
[2*i
+0] = PPERM_SRC
| PPERM_SRC2
| i
| h
;
13477 pperm_bytes
[2*i
+1] = ((unsigned_p
)
13479 : PPERM_SIGN
| PPERM_SRC2
| i
| h
);
13482 for (i
= 0; i
< 16; i
++)
13483 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13485 for (i
= 0; i
< 8; i
++)
13486 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13488 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13489 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13491 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0
, op1
, p
, x
));
13493 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0
, op1
, p
, x
));
13497 vs
= rtvec_alloc (4);
13498 h2
= (high_p
) ? 4 : 0;
13499 for (i
= 0; i
< 4; i
++)
13501 sign_extend
= ((unsigned_p
)
13503 : PPERM_SIGN
| PPERM_SRC2
| ((2*i
) + 1 + h
));
13504 pperm_bytes
[4*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 0 + h
);
13505 pperm_bytes
[4*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 1 + h
);
13506 pperm_bytes
[4*i
+2] = sign_extend
;
13507 pperm_bytes
[4*i
+3] = sign_extend
;
13510 for (i
= 0; i
< 16; i
++)
13511 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13513 for (i
= 0; i
< 4; i
++)
13514 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13516 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13517 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13519 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0
, op1
, p
, x
));
13521 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0
, op1
, p
, x
));
13525 vs
= rtvec_alloc (2);
13526 h2
= (high_p
) ? 2 : 0;
13527 for (i
= 0; i
< 2; i
++)
13529 sign_extend
= ((unsigned_p
)
13531 : PPERM_SIGN
| PPERM_SRC2
| ((4*i
) + 3 + h
));
13532 pperm_bytes
[8*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 0 + h
);
13533 pperm_bytes
[8*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 1 + h
);
13534 pperm_bytes
[8*i
+2] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 2 + h
);
13535 pperm_bytes
[8*i
+3] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 3 + h
);
13536 pperm_bytes
[8*i
+4] = sign_extend
;
13537 pperm_bytes
[8*i
+5] = sign_extend
;
13538 pperm_bytes
[8*i
+6] = sign_extend
;
13539 pperm_bytes
[8*i
+7] = sign_extend
;
13542 for (i
= 0; i
< 16; i
++)
13543 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13545 for (i
= 0; i
< 2; i
++)
13546 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13548 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13549 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13551 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0
, op1
, p
, x
));
13553 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0
, op1
, p
, x
));
13557 gcc_unreachable ();
13563 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13564 next narrower integer vector type */
13566 ix86_expand_sse5_pack (rtx operands
[3])
13568 enum machine_mode imode
= GET_MODE (operands
[0]);
13569 int pperm_bytes
[16];
13571 rtvec v
= rtvec_alloc (16);
13573 rtx op0
= operands
[0];
13574 rtx op1
= operands
[1];
13575 rtx op2
= operands
[2];
13580 for (i
= 0; i
< 8; i
++)
13582 pperm_bytes
[i
+0] = PPERM_SRC
| PPERM_SRC1
| (i
*2);
13583 pperm_bytes
[i
+8] = PPERM_SRC
| PPERM_SRC2
| (i
*2);
13586 for (i
= 0; i
< 16; i
++)
13587 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13589 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13590 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0
, op1
, op2
, x
));
13594 for (i
= 0; i
< 4; i
++)
13596 pperm_bytes
[(2*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 0);
13597 pperm_bytes
[(2*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 1);
13598 pperm_bytes
[(2*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 0);
13599 pperm_bytes
[(2*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 1);
13602 for (i
= 0; i
< 16; i
++)
13603 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13605 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13606 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0
, op1
, op2
, x
));
13610 for (i
= 0; i
< 2; i
++)
13612 pperm_bytes
[(4*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 0);
13613 pperm_bytes
[(4*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 1);
13614 pperm_bytes
[(4*i
)+2] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 2);
13615 pperm_bytes
[(4*i
)+3] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 3);
13616 pperm_bytes
[(4*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 0);
13617 pperm_bytes
[(4*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 1);
13618 pperm_bytes
[(4*i
)+10] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 2);
13619 pperm_bytes
[(4*i
)+11] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 3);
13622 for (i
= 0; i
< 16; i
++)
13623 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13625 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13626 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0
, op1
, op2
, x
));
13630 gcc_unreachable ();
13636 /* Expand conditional increment or decrement using adb/sbb instructions.
13637 The default case using setcc followed by the conditional move can be
13638 done by generic code. */
13640 ix86_expand_int_addcc (rtx operands
[])
13642 enum rtx_code code
= GET_CODE (operands
[1]);
13644 rtx val
= const0_rtx
;
13645 bool fpcmp
= false;
13646 enum machine_mode mode
= GET_MODE (operands
[0]);
13648 if (operands
[3] != const1_rtx
13649 && operands
[3] != constm1_rtx
)
13651 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13652 ix86_compare_op1
, &compare_op
))
13654 code
= GET_CODE (compare_op
);
13656 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13657 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13660 code
= ix86_fp_compare_code_to_integer (code
);
13667 PUT_CODE (compare_op
,
13668 reverse_condition_maybe_unordered
13669 (GET_CODE (compare_op
)));
13671 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13673 PUT_MODE (compare_op
, mode
);
13675 /* Construct either adc or sbb insn. */
13676 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13678 switch (GET_MODE (operands
[0]))
13681 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13684 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13687 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13690 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13693 gcc_unreachable ();
13698 switch (GET_MODE (operands
[0]))
13701 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13704 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13707 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13710 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13713 gcc_unreachable ();
13716 return 1; /* DONE */
13720 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13721 works for floating pointer parameters and nonoffsetable memories.
13722 For pushes, it returns just stack offsets; the values will be saved
13723 in the right order. Maximally three parts are generated. */
13726 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13731 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13733 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13735 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13736 gcc_assert (size
>= 2 && size
<= 3);
13738 /* Optimize constant pool reference to immediates. This is used by fp
13739 moves, that force all constants to memory to allow combining. */
13740 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13742 rtx tmp
= maybe_get_pool_constant (operand
);
13747 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13749 /* The only non-offsetable memories we handle are pushes. */
13750 int ok
= push_operand (operand
, VOIDmode
);
13754 operand
= copy_rtx (operand
);
13755 PUT_MODE (operand
, Pmode
);
13756 parts
[0] = parts
[1] = parts
[2] = operand
;
13760 if (GET_CODE (operand
) == CONST_VECTOR
)
13762 enum machine_mode imode
= int_mode_for_mode (mode
);
13763 /* Caution: if we looked through a constant pool memory above,
13764 the operand may actually have a different mode now. That's
13765 ok, since we want to pun this all the way back to an integer. */
13766 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13767 gcc_assert (operand
!= NULL
);
13773 if (mode
== DImode
)
13774 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13777 if (REG_P (operand
))
13779 gcc_assert (reload_completed
);
13780 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13781 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13783 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13785 else if (offsettable_memref_p (operand
))
13787 operand
= adjust_address (operand
, SImode
, 0);
13788 parts
[0] = operand
;
13789 parts
[1] = adjust_address (operand
, SImode
, 4);
13791 parts
[2] = adjust_address (operand
, SImode
, 8);
13793 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13798 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13802 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13803 parts
[2] = gen_int_mode (l
[2], SImode
);
13806 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13809 gcc_unreachable ();
13811 parts
[1] = gen_int_mode (l
[1], SImode
);
13812 parts
[0] = gen_int_mode (l
[0], SImode
);
13815 gcc_unreachable ();
13820 if (mode
== TImode
)
13821 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13822 if (mode
== XFmode
|| mode
== TFmode
)
13824 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13825 if (REG_P (operand
))
13827 gcc_assert (reload_completed
);
13828 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13829 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13831 else if (offsettable_memref_p (operand
))
13833 operand
= adjust_address (operand
, DImode
, 0);
13834 parts
[0] = operand
;
13835 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13837 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13842 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13843 real_to_target (l
, &r
, mode
);
13845 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13846 if (HOST_BITS_PER_WIDE_INT
>= 64)
13849 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13850 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13853 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13855 if (upper_mode
== SImode
)
13856 parts
[1] = gen_int_mode (l
[2], SImode
);
13857 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13860 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13861 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13864 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13867 gcc_unreachable ();
13874 /* Emit insns to perform a move or push of DI, DF, and XF values.
13875 Return false when normal moves are needed; true when all required
13876 insns have been emitted. Operands 2-4 contain the input values
13877 int the correct order; operands 5-7 contain the output values. */
13880 ix86_split_long_move (rtx operands
[])
13885 int collisions
= 0;
13886 enum machine_mode mode
= GET_MODE (operands
[0]);
13888 /* The DFmode expanders may ask us to move double.
13889 For 64bit target this is single move. By hiding the fact
13890 here we simplify i386.md splitters. */
13891 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13893 /* Optimize constant pool reference to immediates. This is used by
13894 fp moves, that force all constants to memory to allow combining. */
13896 if (MEM_P (operands
[1])
13897 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13898 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13899 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13900 if (push_operand (operands
[0], VOIDmode
))
13902 operands
[0] = copy_rtx (operands
[0]);
13903 PUT_MODE (operands
[0], Pmode
);
13906 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13907 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13908 emit_move_insn (operands
[0], operands
[1]);
13912 /* The only non-offsettable memory we handle is push. */
13913 if (push_operand (operands
[0], VOIDmode
))
13916 gcc_assert (!MEM_P (operands
[0])
13917 || offsettable_memref_p (operands
[0]));
13919 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13920 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13922 /* When emitting push, take care for source operands on the stack. */
13923 if (push
&& MEM_P (operands
[1])
13924 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13927 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13928 XEXP (part
[1][2], 0));
13929 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13930 XEXP (part
[1][1], 0));
13933 /* We need to do copy in the right order in case an address register
13934 of the source overlaps the destination. */
13935 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13937 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13939 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13942 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13945 /* Collision in the middle part can be handled by reordering. */
13946 if (collisions
== 1 && nparts
== 3
13947 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13950 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13951 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13954 /* If there are more collisions, we can't handle it by reordering.
13955 Do an lea to the last part and use only one colliding move. */
13956 else if (collisions
> 1)
13962 base
= part
[0][nparts
- 1];
13964 /* Handle the case when the last part isn't valid for lea.
13965 Happens in 64-bit mode storing the 12-byte XFmode. */
13966 if (GET_MODE (base
) != Pmode
)
13967 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13969 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13970 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13971 part
[1][1] = replace_equiv_address (part
[1][1],
13972 plus_constant (base
, UNITS_PER_WORD
));
13974 part
[1][2] = replace_equiv_address (part
[1][2],
13975 plus_constant (base
, 8));
13985 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13986 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13987 emit_move_insn (part
[0][2], part
[1][2]);
13992 /* In 64bit mode we don't have 32bit push available. In case this is
13993 register, it is OK - we will just use larger counterpart. We also
13994 retype memory - these comes from attempt to avoid REX prefix on
13995 moving of second half of TFmode value. */
13996 if (GET_MODE (part
[1][1]) == SImode
)
13998 switch (GET_CODE (part
[1][1]))
14001 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
14005 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
14009 gcc_unreachable ();
14012 if (GET_MODE (part
[1][0]) == SImode
)
14013 part
[1][0] = part
[1][1];
14016 emit_move_insn (part
[0][1], part
[1][1]);
14017 emit_move_insn (part
[0][0], part
[1][0]);
14021 /* Choose correct order to not overwrite the source before it is copied. */
14022 if ((REG_P (part
[0][0])
14023 && REG_P (part
[1][1])
14024 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
14026 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
14028 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
14032 operands
[2] = part
[0][2];
14033 operands
[3] = part
[0][1];
14034 operands
[4] = part
[0][0];
14035 operands
[5] = part
[1][2];
14036 operands
[6] = part
[1][1];
14037 operands
[7] = part
[1][0];
14041 operands
[2] = part
[0][1];
14042 operands
[3] = part
[0][0];
14043 operands
[5] = part
[1][1];
14044 operands
[6] = part
[1][0];
14051 operands
[2] = part
[0][0];
14052 operands
[3] = part
[0][1];
14053 operands
[4] = part
[0][2];
14054 operands
[5] = part
[1][0];
14055 operands
[6] = part
[1][1];
14056 operands
[7] = part
[1][2];
14060 operands
[2] = part
[0][0];
14061 operands
[3] = part
[0][1];
14062 operands
[5] = part
[1][0];
14063 operands
[6] = part
[1][1];
14067 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14070 if (CONST_INT_P (operands
[5])
14071 && operands
[5] != const0_rtx
14072 && REG_P (operands
[2]))
14074 if (CONST_INT_P (operands
[6])
14075 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
14076 operands
[6] = operands
[2];
14079 && CONST_INT_P (operands
[7])
14080 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
14081 operands
[7] = operands
[2];
14085 && CONST_INT_P (operands
[6])
14086 && operands
[6] != const0_rtx
14087 && REG_P (operands
[3])
14088 && CONST_INT_P (operands
[7])
14089 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
14090 operands
[7] = operands
[3];
14093 emit_move_insn (operands
[2], operands
[5]);
14094 emit_move_insn (operands
[3], operands
[6]);
14096 emit_move_insn (operands
[4], operands
[7]);
14101 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14102 left shift by a constant, either using a single shift or
14103 a sequence of add instructions. */
14106 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
14110 emit_insn ((mode
== DImode
14112 : gen_adddi3
) (operand
, operand
, operand
));
14114 else if (!optimize_size
14115 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
14118 for (i
=0; i
<count
; i
++)
14120 emit_insn ((mode
== DImode
14122 : gen_adddi3
) (operand
, operand
, operand
));
14126 emit_insn ((mode
== DImode
14128 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
14132 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14134 rtx low
[2], high
[2];
14136 const int single_width
= mode
== DImode
? 32 : 64;
14138 if (CONST_INT_P (operands
[2]))
14140 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14141 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14143 if (count
>= single_width
)
14145 emit_move_insn (high
[0], low
[1]);
14146 emit_move_insn (low
[0], const0_rtx
);
14148 if (count
> single_width
)
14149 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
14153 if (!rtx_equal_p (operands
[0], operands
[1]))
14154 emit_move_insn (operands
[0], operands
[1]);
14155 emit_insn ((mode
== DImode
14157 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
14158 ix86_expand_ashl_const (low
[0], count
, mode
);
14163 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14165 if (operands
[1] == const1_rtx
)
14167 /* Assuming we've chosen a QImode capable registers, then 1 << N
14168 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14169 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
14171 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
14173 ix86_expand_clear (low
[0]);
14174 ix86_expand_clear (high
[0]);
14175 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
14177 d
= gen_lowpart (QImode
, low
[0]);
14178 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14179 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
14180 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14182 d
= gen_lowpart (QImode
, high
[0]);
14183 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14184 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
14185 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14188 /* Otherwise, we can get the same results by manually performing
14189 a bit extract operation on bit 5/6, and then performing the two
14190 shifts. The two methods of getting 0/1 into low/high are exactly
14191 the same size. Avoiding the shift in the bit extract case helps
14192 pentium4 a bit; no one else seems to care much either way. */
14197 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
14198 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
14200 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
14201 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
14203 emit_insn ((mode
== DImode
14205 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
14206 emit_insn ((mode
== DImode
14208 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
14209 emit_move_insn (low
[0], high
[0]);
14210 emit_insn ((mode
== DImode
14212 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
14215 emit_insn ((mode
== DImode
14217 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14218 emit_insn ((mode
== DImode
14220 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
14224 if (operands
[1] == constm1_rtx
)
14226 /* For -1 << N, we can avoid the shld instruction, because we
14227 know that we're shifting 0...31/63 ones into a -1. */
14228 emit_move_insn (low
[0], constm1_rtx
);
14230 emit_move_insn (high
[0], low
[0]);
14232 emit_move_insn (high
[0], constm1_rtx
);
14236 if (!rtx_equal_p (operands
[0], operands
[1]))
14237 emit_move_insn (operands
[0], operands
[1]);
14239 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14240 emit_insn ((mode
== DImode
14242 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
14245 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14247 if (TARGET_CMOVE
&& scratch
)
14249 ix86_expand_clear (scratch
);
14250 emit_insn ((mode
== DImode
14251 ? gen_x86_shift_adj_1
14252 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
14255 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
14259 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14261 rtx low
[2], high
[2];
14263 const int single_width
= mode
== DImode
? 32 : 64;
14265 if (CONST_INT_P (operands
[2]))
14267 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14268 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14270 if (count
== single_width
* 2 - 1)
14272 emit_move_insn (high
[0], high
[1]);
14273 emit_insn ((mode
== DImode
14275 : gen_ashrdi3
) (high
[0], high
[0],
14276 GEN_INT (single_width
- 1)));
14277 emit_move_insn (low
[0], high
[0]);
14280 else if (count
>= single_width
)
14282 emit_move_insn (low
[0], high
[1]);
14283 emit_move_insn (high
[0], low
[0]);
14284 emit_insn ((mode
== DImode
14286 : gen_ashrdi3
) (high
[0], high
[0],
14287 GEN_INT (single_width
- 1)));
14288 if (count
> single_width
)
14289 emit_insn ((mode
== DImode
14291 : gen_ashrdi3
) (low
[0], low
[0],
14292 GEN_INT (count
- single_width
)));
14296 if (!rtx_equal_p (operands
[0], operands
[1]))
14297 emit_move_insn (operands
[0], operands
[1]);
14298 emit_insn ((mode
== DImode
14300 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14301 emit_insn ((mode
== DImode
14303 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14308 if (!rtx_equal_p (operands
[0], operands
[1]))
14309 emit_move_insn (operands
[0], operands
[1]);
14311 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14313 emit_insn ((mode
== DImode
14315 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14316 emit_insn ((mode
== DImode
14318 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
14320 if (TARGET_CMOVE
&& scratch
)
14322 emit_move_insn (scratch
, high
[0]);
14323 emit_insn ((mode
== DImode
14325 : gen_ashrdi3
) (scratch
, scratch
,
14326 GEN_INT (single_width
- 1)));
14327 emit_insn ((mode
== DImode
14328 ? gen_x86_shift_adj_1
14329 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14333 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
14338 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14340 rtx low
[2], high
[2];
14342 const int single_width
= mode
== DImode
? 32 : 64;
14344 if (CONST_INT_P (operands
[2]))
14346 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14347 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14349 if (count
>= single_width
)
14351 emit_move_insn (low
[0], high
[1]);
14352 ix86_expand_clear (high
[0]);
14354 if (count
> single_width
)
14355 emit_insn ((mode
== DImode
14357 : gen_lshrdi3
) (low
[0], low
[0],
14358 GEN_INT (count
- single_width
)));
14362 if (!rtx_equal_p (operands
[0], operands
[1]))
14363 emit_move_insn (operands
[0], operands
[1]);
14364 emit_insn ((mode
== DImode
14366 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14367 emit_insn ((mode
== DImode
14369 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14374 if (!rtx_equal_p (operands
[0], operands
[1]))
14375 emit_move_insn (operands
[0], operands
[1]);
14377 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14379 emit_insn ((mode
== DImode
14381 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14382 emit_insn ((mode
== DImode
14384 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
14386 /* Heh. By reversing the arguments, we can reuse this pattern. */
14387 if (TARGET_CMOVE
&& scratch
)
14389 ix86_expand_clear (scratch
);
14390 emit_insn ((mode
== DImode
14391 ? gen_x86_shift_adj_1
14392 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14396 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
14400 /* Predict just emitted jump instruction to be taken with probability PROB. */
14402 predict_jump (int prob
)
14404 rtx insn
= get_last_insn ();
14405 gcc_assert (JUMP_P (insn
));
14407 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
14412 /* Helper function for the string operations below. Dest VARIABLE whether
14413 it is aligned to VALUE bytes. If true, jump to the label. */
14415 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
14417 rtx label
= gen_label_rtx ();
14418 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
14419 if (GET_MODE (variable
) == DImode
)
14420 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
14422 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
14423 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
14426 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14428 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14432 /* Adjust COUNTER by the VALUE. */
14434 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14436 if (GET_MODE (countreg
) == DImode
)
14437 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14439 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14442 /* Zero extend possibly SImode EXP to Pmode register. */
14444 ix86_zero_extend_to_Pmode (rtx exp
)
14447 if (GET_MODE (exp
) == VOIDmode
)
14448 return force_reg (Pmode
, exp
);
14449 if (GET_MODE (exp
) == Pmode
)
14450 return copy_to_mode_reg (Pmode
, exp
);
14451 r
= gen_reg_rtx (Pmode
);
14452 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14456 /* Divide COUNTREG by SCALE. */
14458 scale_counter (rtx countreg
, int scale
)
14461 rtx piece_size_mask
;
14465 if (CONST_INT_P (countreg
))
14466 return GEN_INT (INTVAL (countreg
) / scale
);
14467 gcc_assert (REG_P (countreg
));
14469 piece_size_mask
= GEN_INT (scale
- 1);
14470 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14471 GEN_INT (exact_log2 (scale
)),
14472 NULL
, 1, OPTAB_DIRECT
);
14476 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14477 DImode for constant loop counts. */
14479 static enum machine_mode
14480 counter_mode (rtx count_exp
)
14482 if (GET_MODE (count_exp
) != VOIDmode
)
14483 return GET_MODE (count_exp
);
14484 if (GET_CODE (count_exp
) != CONST_INT
)
14486 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14491 /* When SRCPTR is non-NULL, output simple loop to move memory
14492 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14493 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14494 equivalent loop to set memory by VALUE (supposed to be in MODE).
14496 The size is rounded down to whole number of chunk size moved at once.
14497 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14501 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14502 rtx destptr
, rtx srcptr
, rtx value
,
14503 rtx count
, enum machine_mode mode
, int unroll
,
14506 rtx out_label
, top_label
, iter
, tmp
;
14507 enum machine_mode iter_mode
= counter_mode (count
);
14508 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14509 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14515 top_label
= gen_label_rtx ();
14516 out_label
= gen_label_rtx ();
14517 iter
= gen_reg_rtx (iter_mode
);
14519 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14520 NULL
, 1, OPTAB_DIRECT
);
14521 /* Those two should combine. */
14522 if (piece_size
== const1_rtx
)
14524 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14526 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14528 emit_move_insn (iter
, const0_rtx
);
14530 emit_label (top_label
);
14532 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14533 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14534 destmem
= change_address (destmem
, mode
, x_addr
);
14538 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14539 srcmem
= change_address (srcmem
, mode
, y_addr
);
14541 /* When unrolling for chips that reorder memory reads and writes,
14542 we can save registers by using single temporary.
14543 Also using 4 temporaries is overkill in 32bit mode. */
14544 if (!TARGET_64BIT
&& 0)
14546 for (i
= 0; i
< unroll
; i
++)
14551 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14553 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14555 emit_move_insn (destmem
, srcmem
);
14561 gcc_assert (unroll
<= 4);
14562 for (i
= 0; i
< unroll
; i
++)
14564 tmpreg
[i
] = gen_reg_rtx (mode
);
14568 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14570 emit_move_insn (tmpreg
[i
], srcmem
);
14572 for (i
= 0; i
< unroll
; i
++)
14577 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14579 emit_move_insn (destmem
, tmpreg
[i
]);
14584 for (i
= 0; i
< unroll
; i
++)
14588 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14589 emit_move_insn (destmem
, value
);
14592 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14593 true, OPTAB_LIB_WIDEN
);
14595 emit_move_insn (iter
, tmp
);
14597 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14599 if (expected_size
!= -1)
14601 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14602 if (expected_size
== 0)
14604 else if (expected_size
> REG_BR_PROB_BASE
)
14605 predict_jump (REG_BR_PROB_BASE
- 1);
14607 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14610 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14611 iter
= ix86_zero_extend_to_Pmode (iter
);
14612 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14613 true, OPTAB_LIB_WIDEN
);
14614 if (tmp
!= destptr
)
14615 emit_move_insn (destptr
, tmp
);
14618 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14619 true, OPTAB_LIB_WIDEN
);
14621 emit_move_insn (srcptr
, tmp
);
14623 emit_label (out_label
);
14626 /* Output "rep; mov" instruction.
14627 Arguments have same meaning as for previous function */
14629 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14630 rtx destptr
, rtx srcptr
,
14632 enum machine_mode mode
)
14638 /* If the size is known, it is shorter to use rep movs. */
14639 if (mode
== QImode
&& CONST_INT_P (count
)
14640 && !(INTVAL (count
) & 3))
14643 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14644 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14645 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14646 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14647 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14648 if (mode
!= QImode
)
14650 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14651 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14652 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14653 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14654 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14655 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14659 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14660 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14662 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14666 /* Output "rep; stos" instruction.
14667 Arguments have same meaning as for previous function */
14669 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14671 enum machine_mode mode
)
14676 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14677 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14678 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14679 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14680 if (mode
!= QImode
)
14682 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14683 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14684 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14687 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14688 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14692 emit_strmov (rtx destmem
, rtx srcmem
,
14693 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14695 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14696 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14697 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14700 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14702 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14703 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14706 if (CONST_INT_P (count
))
14708 HOST_WIDE_INT countval
= INTVAL (count
);
14711 if ((countval
& 0x10) && max_size
> 16)
14715 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14716 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14719 gcc_unreachable ();
14722 if ((countval
& 0x08) && max_size
> 8)
14725 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14728 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14729 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14733 if ((countval
& 0x04) && max_size
> 4)
14735 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14738 if ((countval
& 0x02) && max_size
> 2)
14740 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14743 if ((countval
& 0x01) && max_size
> 1)
14745 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14752 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14753 count
, 1, OPTAB_DIRECT
);
14754 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14755 count
, QImode
, 1, 4);
14759 /* When there are stringops, we can cheaply increase dest and src pointers.
14760 Otherwise we save code size by maintaining offset (zero is readily
14761 available from preceding rep operation) and using x86 addressing modes.
14763 if (TARGET_SINGLE_STRINGOP
)
14767 rtx label
= ix86_expand_aligntest (count
, 4, true);
14768 src
= change_address (srcmem
, SImode
, srcptr
);
14769 dest
= change_address (destmem
, SImode
, destptr
);
14770 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14771 emit_label (label
);
14772 LABEL_NUSES (label
) = 1;
14776 rtx label
= ix86_expand_aligntest (count
, 2, true);
14777 src
= change_address (srcmem
, HImode
, srcptr
);
14778 dest
= change_address (destmem
, HImode
, destptr
);
14779 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14780 emit_label (label
);
14781 LABEL_NUSES (label
) = 1;
14785 rtx label
= ix86_expand_aligntest (count
, 1, true);
14786 src
= change_address (srcmem
, QImode
, srcptr
);
14787 dest
= change_address (destmem
, QImode
, destptr
);
14788 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14789 emit_label (label
);
14790 LABEL_NUSES (label
) = 1;
14795 rtx offset
= force_reg (Pmode
, const0_rtx
);
14800 rtx label
= ix86_expand_aligntest (count
, 4, true);
14801 src
= change_address (srcmem
, SImode
, srcptr
);
14802 dest
= change_address (destmem
, SImode
, destptr
);
14803 emit_move_insn (dest
, src
);
14804 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14805 true, OPTAB_LIB_WIDEN
);
14807 emit_move_insn (offset
, tmp
);
14808 emit_label (label
);
14809 LABEL_NUSES (label
) = 1;
14813 rtx label
= ix86_expand_aligntest (count
, 2, true);
14814 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14815 src
= change_address (srcmem
, HImode
, tmp
);
14816 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14817 dest
= change_address (destmem
, HImode
, tmp
);
14818 emit_move_insn (dest
, src
);
14819 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14820 true, OPTAB_LIB_WIDEN
);
14822 emit_move_insn (offset
, tmp
);
14823 emit_label (label
);
14824 LABEL_NUSES (label
) = 1;
14828 rtx label
= ix86_expand_aligntest (count
, 1, true);
14829 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14830 src
= change_address (srcmem
, QImode
, tmp
);
14831 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14832 dest
= change_address (destmem
, QImode
, tmp
);
14833 emit_move_insn (dest
, src
);
14834 emit_label (label
);
14835 LABEL_NUSES (label
) = 1;
14840 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14842 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14843 rtx count
, int max_size
)
14846 expand_simple_binop (counter_mode (count
), AND
, count
,
14847 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14848 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14849 gen_lowpart (QImode
, value
), count
, QImode
,
14853 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14855 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14859 if (CONST_INT_P (count
))
14861 HOST_WIDE_INT countval
= INTVAL (count
);
14864 if ((countval
& 0x10) && max_size
> 16)
14868 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14869 emit_insn (gen_strset (destptr
, dest
, value
));
14870 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14871 emit_insn (gen_strset (destptr
, dest
, value
));
14874 gcc_unreachable ();
14877 if ((countval
& 0x08) && max_size
> 8)
14881 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14882 emit_insn (gen_strset (destptr
, dest
, value
));
14886 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14887 emit_insn (gen_strset (destptr
, dest
, value
));
14888 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14889 emit_insn (gen_strset (destptr
, dest
, value
));
14893 if ((countval
& 0x04) && max_size
> 4)
14895 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14896 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14899 if ((countval
& 0x02) && max_size
> 2)
14901 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14902 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14905 if ((countval
& 0x01) && max_size
> 1)
14907 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14908 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14915 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14920 rtx label
= ix86_expand_aligntest (count
, 16, true);
14923 dest
= change_address (destmem
, DImode
, destptr
);
14924 emit_insn (gen_strset (destptr
, dest
, value
));
14925 emit_insn (gen_strset (destptr
, dest
, value
));
14929 dest
= change_address (destmem
, SImode
, destptr
);
14930 emit_insn (gen_strset (destptr
, dest
, value
));
14931 emit_insn (gen_strset (destptr
, dest
, value
));
14932 emit_insn (gen_strset (destptr
, dest
, value
));
14933 emit_insn (gen_strset (destptr
, dest
, value
));
14935 emit_label (label
);
14936 LABEL_NUSES (label
) = 1;
14940 rtx label
= ix86_expand_aligntest (count
, 8, true);
14943 dest
= change_address (destmem
, DImode
, destptr
);
14944 emit_insn (gen_strset (destptr
, dest
, value
));
14948 dest
= change_address (destmem
, SImode
, destptr
);
14949 emit_insn (gen_strset (destptr
, dest
, value
));
14950 emit_insn (gen_strset (destptr
, dest
, value
));
14952 emit_label (label
);
14953 LABEL_NUSES (label
) = 1;
14957 rtx label
= ix86_expand_aligntest (count
, 4, true);
14958 dest
= change_address (destmem
, SImode
, destptr
);
14959 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14960 emit_label (label
);
14961 LABEL_NUSES (label
) = 1;
14965 rtx label
= ix86_expand_aligntest (count
, 2, true);
14966 dest
= change_address (destmem
, HImode
, destptr
);
14967 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14968 emit_label (label
);
14969 LABEL_NUSES (label
) = 1;
14973 rtx label
= ix86_expand_aligntest (count
, 1, true);
14974 dest
= change_address (destmem
, QImode
, destptr
);
14975 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14976 emit_label (label
);
14977 LABEL_NUSES (label
) = 1;
14981 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14982 DESIRED_ALIGNMENT. */
14984 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14985 rtx destptr
, rtx srcptr
, rtx count
,
14986 int align
, int desired_alignment
)
14988 if (align
<= 1 && desired_alignment
> 1)
14990 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14991 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14992 destmem
= change_address (destmem
, QImode
, destptr
);
14993 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14994 ix86_adjust_counter (count
, 1);
14995 emit_label (label
);
14996 LABEL_NUSES (label
) = 1;
14998 if (align
<= 2 && desired_alignment
> 2)
15000 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15001 srcmem
= change_address (srcmem
, HImode
, srcptr
);
15002 destmem
= change_address (destmem
, HImode
, destptr
);
15003 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15004 ix86_adjust_counter (count
, 2);
15005 emit_label (label
);
15006 LABEL_NUSES (label
) = 1;
15008 if (align
<= 4 && desired_alignment
> 4)
15010 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15011 srcmem
= change_address (srcmem
, SImode
, srcptr
);
15012 destmem
= change_address (destmem
, SImode
, destptr
);
15013 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15014 ix86_adjust_counter (count
, 4);
15015 emit_label (label
);
15016 LABEL_NUSES (label
) = 1;
15018 gcc_assert (desired_alignment
<= 8);
15021 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15022 DESIRED_ALIGNMENT. */
15024 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
15025 int align
, int desired_alignment
)
15027 if (align
<= 1 && desired_alignment
> 1)
15029 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
15030 destmem
= change_address (destmem
, QImode
, destptr
);
15031 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
15032 ix86_adjust_counter (count
, 1);
15033 emit_label (label
);
15034 LABEL_NUSES (label
) = 1;
15036 if (align
<= 2 && desired_alignment
> 2)
15038 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15039 destmem
= change_address (destmem
, HImode
, destptr
);
15040 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
15041 ix86_adjust_counter (count
, 2);
15042 emit_label (label
);
15043 LABEL_NUSES (label
) = 1;
15045 if (align
<= 4 && desired_alignment
> 4)
15047 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15048 destmem
= change_address (destmem
, SImode
, destptr
);
15049 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
15050 ix86_adjust_counter (count
, 4);
15051 emit_label (label
);
15052 LABEL_NUSES (label
) = 1;
15054 gcc_assert (desired_alignment
<= 8);
15057 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15058 static enum stringop_alg
15059 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
15060 int *dynamic_check
)
15062 const struct stringop_algs
* algs
;
15064 *dynamic_check
= -1;
15066 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
15068 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
15069 if (stringop_alg
!= no_stringop
)
15070 return stringop_alg
;
15071 /* rep; movq or rep; movl is the smallest variant. */
15072 else if (optimize_size
)
15074 if (!count
|| (count
& 3))
15075 return rep_prefix_1_byte
;
15077 return rep_prefix_4_byte
;
15079 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15081 else if (expected_size
!= -1 && expected_size
< 4)
15082 return loop_1_byte
;
15083 else if (expected_size
!= -1)
15086 enum stringop_alg alg
= libcall
;
15087 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15089 gcc_assert (algs
->size
[i
].max
);
15090 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
15092 if (algs
->size
[i
].alg
!= libcall
)
15093 alg
= algs
->size
[i
].alg
;
15094 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15095 last non-libcall inline algorithm. */
15096 if (TARGET_INLINE_ALL_STRINGOPS
)
15098 /* When the current size is best to be copied by a libcall,
15099 but we are still forced to inline, run the heuristic bellow
15100 that will pick code for medium sized blocks. */
15101 if (alg
!= libcall
)
15106 return algs
->size
[i
].alg
;
15109 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
15111 /* When asked to inline the call anyway, try to pick meaningful choice.
15112 We look for maximal size of block that is faster to copy by hand and
15113 take blocks of at most of that size guessing that average size will
15114 be roughly half of the block.
15116 If this turns out to be bad, we might simply specify the preferred
15117 choice in ix86_costs. */
15118 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15119 && algs
->unknown_size
== libcall
)
15122 enum stringop_alg alg
;
15125 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15126 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
15127 max
= algs
->size
[i
].max
;
15130 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
15131 gcc_assert (*dynamic_check
== -1);
15132 gcc_assert (alg
!= libcall
);
15133 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15134 *dynamic_check
= max
;
15137 return algs
->unknown_size
;
15140 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15141 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15143 decide_alignment (int align
,
15144 enum stringop_alg alg
,
15147 int desired_align
= 0;
15151 gcc_unreachable ();
15153 case unrolled_loop
:
15154 desired_align
= GET_MODE_SIZE (Pmode
);
15156 case rep_prefix_8_byte
:
15159 case rep_prefix_4_byte
:
15160 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15161 copying whole cacheline at once. */
15162 if (TARGET_PENTIUMPRO
)
15167 case rep_prefix_1_byte
:
15168 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15169 copying whole cacheline at once. */
15170 if (TARGET_PENTIUMPRO
)
15184 if (desired_align
< align
)
15185 desired_align
= align
;
15186 if (expected_size
!= -1 && expected_size
< 4)
15187 desired_align
= align
;
15188 return desired_align
;
15191 /* Return the smallest power of 2 greater than VAL. */
15193 smallest_pow2_greater_than (int val
)
15201 /* Expand string move (memcpy) operation. Use i386 string operations when
15202 profitable. expand_clrmem contains similar code. The code depends upon
15203 architecture, block size and alignment, but always has the same
15206 1) Prologue guard: Conditional that jumps up to epilogues for small
15207 blocks that can be handled by epilogue alone. This is faster but
15208 also needed for correctness, since prologue assume the block is larger
15209 than the desired alignment.
15211 Optional dynamic check for size and libcall for large
15212 blocks is emitted here too, with -minline-stringops-dynamically.
15214 2) Prologue: copy first few bytes in order to get destination aligned
15215 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15216 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15217 We emit either a jump tree on power of two sized blocks, or a byte loop.
15219 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15220 with specified algorithm.
15222 4) Epilogue: code copying tail of the block that is too small to be
15223 handled by main body (or up to size guarded by prologue guard). */
15226 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
15227 rtx expected_align_exp
, rtx expected_size_exp
)
15233 rtx jump_around_label
= NULL
;
15234 HOST_WIDE_INT align
= 1;
15235 unsigned HOST_WIDE_INT count
= 0;
15236 HOST_WIDE_INT expected_size
= -1;
15237 int size_needed
= 0, epilogue_size_needed
;
15238 int desired_align
= 0;
15239 enum stringop_alg alg
;
15242 if (CONST_INT_P (align_exp
))
15243 align
= INTVAL (align_exp
);
15244 /* i386 can do misaligned access on reasonably increased cost. */
15245 if (CONST_INT_P (expected_align_exp
)
15246 && INTVAL (expected_align_exp
) > align
)
15247 align
= INTVAL (expected_align_exp
);
15248 if (CONST_INT_P (count_exp
))
15249 count
= expected_size
= INTVAL (count_exp
);
15250 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15251 expected_size
= INTVAL (expected_size_exp
);
15253 /* Step 0: Decide on preferred algorithm, desired alignment and
15254 size of chunks to be copied by main loop. */
15256 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
15257 desired_align
= decide_alignment (align
, alg
, expected_size
);
15259 if (!TARGET_ALIGN_STRINGOPS
)
15260 align
= desired_align
;
15262 if (alg
== libcall
)
15264 gcc_assert (alg
!= no_stringop
);
15266 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
15267 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15268 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
15273 gcc_unreachable ();
15275 size_needed
= GET_MODE_SIZE (Pmode
);
15277 case unrolled_loop
:
15278 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
15280 case rep_prefix_8_byte
:
15283 case rep_prefix_4_byte
:
15286 case rep_prefix_1_byte
:
15292 epilogue_size_needed
= size_needed
;
15294 /* Step 1: Prologue guard. */
15296 /* Alignment code needs count to be in register. */
15297 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15299 enum machine_mode mode
= SImode
;
15300 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15302 count_exp
= force_reg (mode
, count_exp
);
15304 gcc_assert (desired_align
>= 1 && align
>= 1);
15306 /* Ensure that alignment prologue won't copy past end of block. */
15307 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15309 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15310 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15311 Make sure it is power of 2. */
15312 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15314 label
= gen_label_rtx ();
15315 emit_cmp_and_jump_insns (count_exp
,
15316 GEN_INT (epilogue_size_needed
),
15317 LTU
, 0, counter_mode (count_exp
), 1, label
);
15318 if (GET_CODE (count_exp
) == CONST_INT
)
15320 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
15321 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15323 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15325 /* Emit code to decide on runtime whether library call or inline should be
15327 if (dynamic_check
!= -1)
15329 rtx hot_label
= gen_label_rtx ();
15330 jump_around_label
= gen_label_rtx ();
15331 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15332 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
15333 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15334 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15335 emit_jump (jump_around_label
);
15336 emit_label (hot_label
);
15339 /* Step 2: Alignment prologue. */
15341 if (desired_align
> align
)
15343 /* Except for the first move in epilogue, we no longer know
15344 constant offset in aliasing info. It don't seems to worth
15345 the pain to maintain it for the first move, so throw away
15347 src
= change_address (src
, BLKmode
, srcreg
);
15348 dst
= change_address (dst
, BLKmode
, destreg
);
15349 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
15352 if (label
&& size_needed
== 1)
15354 emit_label (label
);
15355 LABEL_NUSES (label
) = 1;
15359 /* Step 3: Main loop. */
15365 gcc_unreachable ();
15367 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15368 count_exp
, QImode
, 1, expected_size
);
15371 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15372 count_exp
, Pmode
, 1, expected_size
);
15374 case unrolled_loop
:
15375 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15376 registers for 4 temporaries anyway. */
15377 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15378 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
15381 case rep_prefix_8_byte
:
15382 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15385 case rep_prefix_4_byte
:
15386 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15389 case rep_prefix_1_byte
:
15390 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15394 /* Adjust properly the offset of src and dest memory for aliasing. */
15395 if (CONST_INT_P (count_exp
))
15397 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
15398 (count
/ size_needed
) * size_needed
);
15399 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15400 (count
/ size_needed
) * size_needed
);
15404 src
= change_address (src
, BLKmode
, srcreg
);
15405 dst
= change_address (dst
, BLKmode
, destreg
);
15408 /* Step 4: Epilogue to copy the remaining bytes. */
15412 /* When the main loop is done, COUNT_EXP might hold original count,
15413 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15414 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15415 bytes. Compensate if needed. */
15417 if (size_needed
< epilogue_size_needed
)
15420 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15421 GEN_INT (size_needed
- 1), count_exp
, 1,
15423 if (tmp
!= count_exp
)
15424 emit_move_insn (count_exp
, tmp
);
15426 emit_label (label
);
15427 LABEL_NUSES (label
) = 1;
15430 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15431 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15432 epilogue_size_needed
);
15433 if (jump_around_label
)
15434 emit_label (jump_around_label
);
15438 /* Helper function for memcpy. For QImode value 0xXY produce
15439 0xXYXYXYXY of wide specified by MODE. This is essentially
15440 a * 0x10101010, but we can do slightly better than
15441 synth_mult by unwinding the sequence by hand on CPUs with
15444 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15446 enum machine_mode valmode
= GET_MODE (val
);
15448 int nops
= mode
== DImode
? 3 : 2;
15450 gcc_assert (mode
== SImode
|| mode
== DImode
);
15451 if (val
== const0_rtx
)
15452 return copy_to_mode_reg (mode
, const0_rtx
);
15453 if (CONST_INT_P (val
))
15455 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15459 if (mode
== DImode
)
15460 v
|= (v
<< 16) << 16;
15461 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15464 if (valmode
== VOIDmode
)
15466 if (valmode
!= QImode
)
15467 val
= gen_lowpart (QImode
, val
);
15468 if (mode
== QImode
)
15470 if (!TARGET_PARTIAL_REG_STALL
)
15472 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15473 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15474 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15475 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15477 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15478 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15479 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15484 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15486 if (!TARGET_PARTIAL_REG_STALL
)
15487 if (mode
== SImode
)
15488 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15490 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15493 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15494 NULL
, 1, OPTAB_DIRECT
);
15496 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15498 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15499 NULL
, 1, OPTAB_DIRECT
);
15500 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15501 if (mode
== SImode
)
15503 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15504 NULL
, 1, OPTAB_DIRECT
);
15505 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15510 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15511 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15512 alignment from ALIGN to DESIRED_ALIGN. */
15514 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15519 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15520 promoted_val
= promote_duplicated_reg (DImode
, val
);
15521 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15522 promoted_val
= promote_duplicated_reg (SImode
, val
);
15523 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15524 promoted_val
= promote_duplicated_reg (HImode
, val
);
15526 promoted_val
= val
;
15528 return promoted_val
;
15531 /* Expand string clear operation (bzero). Use i386 string operations when
15532 profitable. See expand_movmem comment for explanation of individual
15533 steps performed. */
15535 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15536 rtx expected_align_exp
, rtx expected_size_exp
)
15541 rtx jump_around_label
= NULL
;
15542 HOST_WIDE_INT align
= 1;
15543 unsigned HOST_WIDE_INT count
= 0;
15544 HOST_WIDE_INT expected_size
= -1;
15545 int size_needed
= 0, epilogue_size_needed
;
15546 int desired_align
= 0;
15547 enum stringop_alg alg
;
15548 rtx promoted_val
= NULL
;
15549 bool force_loopy_epilogue
= false;
15552 if (CONST_INT_P (align_exp
))
15553 align
= INTVAL (align_exp
);
15554 /* i386 can do misaligned access on reasonably increased cost. */
15555 if (CONST_INT_P (expected_align_exp
)
15556 && INTVAL (expected_align_exp
) > align
)
15557 align
= INTVAL (expected_align_exp
);
15558 if (CONST_INT_P (count_exp
))
15559 count
= expected_size
= INTVAL (count_exp
);
15560 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15561 expected_size
= INTVAL (expected_size_exp
);
15563 /* Step 0: Decide on preferred algorithm, desired alignment and
15564 size of chunks to be copied by main loop. */
15566 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15567 desired_align
= decide_alignment (align
, alg
, expected_size
);
15569 if (!TARGET_ALIGN_STRINGOPS
)
15570 align
= desired_align
;
15572 if (alg
== libcall
)
15574 gcc_assert (alg
!= no_stringop
);
15576 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15577 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15582 gcc_unreachable ();
15584 size_needed
= GET_MODE_SIZE (Pmode
);
15586 case unrolled_loop
:
15587 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15589 case rep_prefix_8_byte
:
15592 case rep_prefix_4_byte
:
15595 case rep_prefix_1_byte
:
15600 epilogue_size_needed
= size_needed
;
15602 /* Step 1: Prologue guard. */
15604 /* Alignment code needs count to be in register. */
15605 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15607 enum machine_mode mode
= SImode
;
15608 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15610 count_exp
= force_reg (mode
, count_exp
);
15612 /* Do the cheap promotion to allow better CSE across the
15613 main loop and epilogue (ie one load of the big constant in the
15614 front of all code. */
15615 if (CONST_INT_P (val_exp
))
15616 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15617 desired_align
, align
);
15618 /* Ensure that alignment prologue won't copy past end of block. */
15619 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15621 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15622 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15623 Make sure it is power of 2. */
15624 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15626 /* To improve performance of small blocks, we jump around the VAL
15627 promoting mode. This mean that if the promoted VAL is not constant,
15628 we might not use it in the epilogue and have to use byte
15630 if (epilogue_size_needed
> 2 && !promoted_val
)
15631 force_loopy_epilogue
= true;
15632 label
= gen_label_rtx ();
15633 emit_cmp_and_jump_insns (count_exp
,
15634 GEN_INT (epilogue_size_needed
),
15635 LTU
, 0, counter_mode (count_exp
), 1, label
);
15636 if (GET_CODE (count_exp
) == CONST_INT
)
15638 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15639 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15641 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15643 if (dynamic_check
!= -1)
15645 rtx hot_label
= gen_label_rtx ();
15646 jump_around_label
= gen_label_rtx ();
15647 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15648 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15649 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15650 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15651 emit_jump (jump_around_label
);
15652 emit_label (hot_label
);
15655 /* Step 2: Alignment prologue. */
15657 /* Do the expensive promotion once we branched off the small blocks. */
15659 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15660 desired_align
, align
);
15661 gcc_assert (desired_align
>= 1 && align
>= 1);
15663 if (desired_align
> align
)
15665 /* Except for the first move in epilogue, we no longer know
15666 constant offset in aliasing info. It don't seems to worth
15667 the pain to maintain it for the first move, so throw away
15669 dst
= change_address (dst
, BLKmode
, destreg
);
15670 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15673 if (label
&& size_needed
== 1)
15675 emit_label (label
);
15676 LABEL_NUSES (label
) = 1;
15680 /* Step 3: Main loop. */
15686 gcc_unreachable ();
15688 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15689 count_exp
, QImode
, 1, expected_size
);
15692 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15693 count_exp
, Pmode
, 1, expected_size
);
15695 case unrolled_loop
:
15696 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15697 count_exp
, Pmode
, 4, expected_size
);
15699 case rep_prefix_8_byte
:
15700 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15703 case rep_prefix_4_byte
:
15704 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15707 case rep_prefix_1_byte
:
15708 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15712 /* Adjust properly the offset of src and dest memory for aliasing. */
15713 if (CONST_INT_P (count_exp
))
15714 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15715 (count
/ size_needed
) * size_needed
);
15717 dst
= change_address (dst
, BLKmode
, destreg
);
15719 /* Step 4: Epilogue to copy the remaining bytes. */
15723 /* When the main loop is done, COUNT_EXP might hold original count,
15724 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15725 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15726 bytes. Compensate if needed. */
15728 if (size_needed
< desired_align
- align
)
15731 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15732 GEN_INT (size_needed
- 1), count_exp
, 1,
15734 size_needed
= desired_align
- align
+ 1;
15735 if (tmp
!= count_exp
)
15736 emit_move_insn (count_exp
, tmp
);
15738 emit_label (label
);
15739 LABEL_NUSES (label
) = 1;
15741 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15743 if (force_loopy_epilogue
)
15744 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15747 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15750 if (jump_around_label
)
15751 emit_label (jump_around_label
);
15755 /* Expand the appropriate insns for doing strlen if not just doing
15758 out = result, initialized with the start address
15759 align_rtx = alignment of the address.
15760 scratch = scratch register, initialized with the startaddress when
15761 not aligned, otherwise undefined
15763 This is just the body. It needs the initializations mentioned above and
15764 some address computing at the end. These things are done in i386.md. */
15767 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15771 rtx align_2_label
= NULL_RTX
;
15772 rtx align_3_label
= NULL_RTX
;
15773 rtx align_4_label
= gen_label_rtx ();
15774 rtx end_0_label
= gen_label_rtx ();
15776 rtx tmpreg
= gen_reg_rtx (SImode
);
15777 rtx scratch
= gen_reg_rtx (SImode
);
15781 if (CONST_INT_P (align_rtx
))
15782 align
= INTVAL (align_rtx
);
15784 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15786 /* Is there a known alignment and is it less than 4? */
15789 rtx scratch1
= gen_reg_rtx (Pmode
);
15790 emit_move_insn (scratch1
, out
);
15791 /* Is there a known alignment and is it not 2? */
15794 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15795 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15797 /* Leave just the 3 lower bits. */
15798 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15799 NULL_RTX
, 0, OPTAB_WIDEN
);
15801 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15802 Pmode
, 1, align_4_label
);
15803 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15804 Pmode
, 1, align_2_label
);
15805 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15806 Pmode
, 1, align_3_label
);
15810 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15811 check if is aligned to 4 - byte. */
15813 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15814 NULL_RTX
, 0, OPTAB_WIDEN
);
15816 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15817 Pmode
, 1, align_4_label
);
15820 mem
= change_address (src
, QImode
, out
);
15822 /* Now compare the bytes. */
15824 /* Compare the first n unaligned byte on a byte per byte basis. */
15825 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15826 QImode
, 1, end_0_label
);
15828 /* Increment the address. */
15830 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15832 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15834 /* Not needed with an alignment of 2 */
15837 emit_label (align_2_label
);
15839 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15843 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15845 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15847 emit_label (align_3_label
);
15850 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15854 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15856 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15859 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15860 align this loop. It gives only huge programs, but does not help to
15862 emit_label (align_4_label
);
15864 mem
= change_address (src
, SImode
, out
);
15865 emit_move_insn (scratch
, mem
);
15867 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15869 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15871 /* This formula yields a nonzero result iff one of the bytes is zero.
15872 This saves three branches inside loop and many cycles. */
15874 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15875 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15876 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15877 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15878 gen_int_mode (0x80808080, SImode
)));
15879 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15884 rtx reg
= gen_reg_rtx (SImode
);
15885 rtx reg2
= gen_reg_rtx (Pmode
);
15886 emit_move_insn (reg
, tmpreg
);
15887 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15889 /* If zero is not in the first two bytes, move two bytes forward. */
15890 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15891 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15892 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15893 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15894 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15897 /* Emit lea manually to avoid clobbering of flags. */
15898 emit_insn (gen_rtx_SET (SImode
, reg2
,
15899 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15901 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15902 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15903 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15904 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15911 rtx end_2_label
= gen_label_rtx ();
15912 /* Is zero in the first two bytes? */
15914 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15915 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15916 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15917 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15918 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15920 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15921 JUMP_LABEL (tmp
) = end_2_label
;
15923 /* Not in the first two. Move two bytes forward. */
15924 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15926 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15928 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15930 emit_label (end_2_label
);
15934 /* Avoid branch in fixing the byte. */
15935 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15936 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15937 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
15939 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15941 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15943 emit_label (end_0_label
);
15946 /* Expand strlen. */
15949 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15951 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15953 /* The generic case of strlen expander is long. Avoid it's
15954 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15956 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15957 && !TARGET_INLINE_ALL_STRINGOPS
15959 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15962 addr
= force_reg (Pmode
, XEXP (src
, 0));
15963 scratch1
= gen_reg_rtx (Pmode
);
15965 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15968 /* Well it seems that some optimizer does not combine a call like
15969 foo(strlen(bar), strlen(bar));
15970 when the move and the subtraction is done here. It does calculate
15971 the length just once when these instructions are done inside of
15972 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15973 often used and I use one fewer register for the lifetime of
15974 output_strlen_unroll() this is better. */
15976 emit_move_insn (out
, addr
);
15978 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15980 /* strlensi_unroll_1 returns the address of the zero at the end of
15981 the string, like memchr(), so compute the length by subtracting
15982 the start address. */
15984 emit_insn (gen_subdi3 (out
, out
, addr
));
15986 emit_insn (gen_subsi3 (out
, out
, addr
));
15991 scratch2
= gen_reg_rtx (Pmode
);
15992 scratch3
= gen_reg_rtx (Pmode
);
15993 scratch4
= force_reg (Pmode
, constm1_rtx
);
15995 emit_move_insn (scratch3
, addr
);
15996 eoschar
= force_reg (QImode
, eoschar
);
15998 src
= replace_equiv_address_nv (src
, scratch3
);
16000 /* If .md starts supporting :P, this can be done in .md. */
16001 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
16002 scratch4
), UNSPEC_SCAS
);
16003 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
16006 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
16007 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
16011 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
16012 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
16018 /* For given symbol (function) construct code to compute address of it's PLT
16019 entry in large x86-64 PIC model. */
16021 construct_plt_address (rtx symbol
)
16023 rtx tmp
= gen_reg_rtx (Pmode
);
16024 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
16026 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
16027 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
16029 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
16030 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
16035 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
16036 rtx callarg2 ATTRIBUTE_UNUSED
,
16037 rtx pop
, int sibcall
)
16039 rtx use
= NULL
, call
;
16041 if (pop
== const0_rtx
)
16043 gcc_assert (!TARGET_64BIT
|| !pop
);
16045 if (TARGET_MACHO
&& !TARGET_64BIT
)
16048 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
16049 fnaddr
= machopic_indirect_call_target (fnaddr
);
16054 /* Static functions and indirect calls don't need the pic register. */
16055 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
16056 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16057 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
16058 use_reg (&use
, pic_offset_table_rtx
);
16061 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
16063 rtx al
= gen_rtx_REG (QImode
, 0);
16064 emit_move_insn (al
, callarg2
);
16065 use_reg (&use
, al
);
16068 if (ix86_cmodel
== CM_LARGE_PIC
16069 && GET_CODE (fnaddr
) == MEM
16070 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16071 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
16072 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
16073 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
16075 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16076 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16078 if (sibcall
&& TARGET_64BIT
16079 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
16082 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16083 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
16084 emit_move_insn (fnaddr
, addr
);
16085 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16088 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
16090 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
16093 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
16094 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
16095 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
16098 call
= emit_call_insn (call
);
16100 CALL_INSN_FUNCTION_USAGE (call
) = use
;
16104 /* Clear stack slot assignments remembered from previous functions.
16105 This is called from INIT_EXPANDERS once before RTL is emitted for each
16108 static struct machine_function
*
16109 ix86_init_machine_status (void)
16111 struct machine_function
*f
;
16113 f
= GGC_CNEW (struct machine_function
);
16114 f
->use_fast_prologue_epilogue_nregs
= -1;
16115 f
->tls_descriptor_call_expanded_p
= 0;
16120 /* Return a MEM corresponding to a stack slot with mode MODE.
16121 Allocate a new slot if necessary.
16123 The RTL for a function can have several slots available: N is
16124 which slot to use. */
16127 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
16129 struct stack_local_entry
*s
;
16131 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16133 /* Virtual slot is valid only before vregs are instantiated. */
16134 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
16136 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16137 if (s
->mode
== mode
&& s
->n
== n
)
16138 return copy_rtx (s
->rtl
);
16140 s
= (struct stack_local_entry
*)
16141 ggc_alloc (sizeof (struct stack_local_entry
));
16144 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
16146 s
->next
= ix86_stack_locals
;
16147 ix86_stack_locals
= s
;
16151 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16153 static GTY(()) rtx ix86_tls_symbol
;
16155 ix86_tls_get_addr (void)
16158 if (!ix86_tls_symbol
)
16160 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16161 (TARGET_ANY_GNU_TLS
16163 ? "___tls_get_addr"
16164 : "__tls_get_addr");
16167 return ix86_tls_symbol
;
16170 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16172 static GTY(()) rtx ix86_tls_module_base_symbol
;
16174 ix86_tls_module_base (void)
16177 if (!ix86_tls_module_base_symbol
)
16179 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16180 "_TLS_MODULE_BASE_");
16181 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
16182 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
16185 return ix86_tls_module_base_symbol
;
16188 /* Calculate the length of the memory address in the instruction
16189 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16192 memory_address_length (rtx addr
)
16194 struct ix86_address parts
;
16195 rtx base
, index
, disp
;
16199 if (GET_CODE (addr
) == PRE_DEC
16200 || GET_CODE (addr
) == POST_INC
16201 || GET_CODE (addr
) == PRE_MODIFY
16202 || GET_CODE (addr
) == POST_MODIFY
)
16205 ok
= ix86_decompose_address (addr
, &parts
);
16208 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
16209 parts
.base
= SUBREG_REG (parts
.base
);
16210 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
16211 parts
.index
= SUBREG_REG (parts
.index
);
16214 index
= parts
.index
;
16219 - esp as the base always wants an index,
16220 - ebp as the base always wants a displacement. */
16222 /* Register Indirect. */
16223 if (base
&& !index
&& !disp
)
16225 /* esp (for its index) and ebp (for its displacement) need
16226 the two-byte modrm form. */
16227 if (addr
== stack_pointer_rtx
16228 || addr
== arg_pointer_rtx
16229 || addr
== frame_pointer_rtx
16230 || addr
== hard_frame_pointer_rtx
)
16234 /* Direct Addressing. */
16235 else if (disp
&& !base
&& !index
)
16240 /* Find the length of the displacement constant. */
16243 if (base
&& satisfies_constraint_K (disp
))
16248 /* ebp always wants a displacement. */
16249 else if (base
== hard_frame_pointer_rtx
)
16252 /* An index requires the two-byte modrm form.... */
16254 /* ...like esp, which always wants an index. */
16255 || base
== stack_pointer_rtx
16256 || base
== arg_pointer_rtx
16257 || base
== frame_pointer_rtx
)
16264 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16265 is set, expect that insn have 8bit immediate alternative. */
16267 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
16271 extract_insn_cached (insn
);
16272 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16273 if (CONSTANT_P (recog_data
.operand
[i
]))
16276 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
16280 switch (get_attr_mode (insn
))
16291 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16296 fatal_insn ("unknown insn mode", insn
);
16302 /* Compute default value for "length_address" attribute. */
16304 ix86_attr_length_address_default (rtx insn
)
16308 if (get_attr_type (insn
) == TYPE_LEA
)
16310 rtx set
= PATTERN (insn
);
16312 if (GET_CODE (set
) == PARALLEL
)
16313 set
= XVECEXP (set
, 0, 0);
16315 gcc_assert (GET_CODE (set
) == SET
);
16317 return memory_address_length (SET_SRC (set
));
16320 extract_insn_cached (insn
);
16321 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16322 if (MEM_P (recog_data
.operand
[i
]))
16324 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
16330 /* Return the maximum number of instructions a cpu can issue. */
16333 ix86_issue_rate (void)
16337 case PROCESSOR_PENTIUM
:
16341 case PROCESSOR_PENTIUMPRO
:
16342 case PROCESSOR_PENTIUM4
:
16343 case PROCESSOR_ATHLON
:
16345 case PROCESSOR_AMDFAM10
:
16346 case PROCESSOR_NOCONA
:
16347 case PROCESSOR_GENERIC32
:
16348 case PROCESSOR_GENERIC64
:
16351 case PROCESSOR_CORE2
:
16359 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16360 by DEP_INSN and nothing set by DEP_INSN. */
16363 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16367 /* Simplify the test for uninteresting insns. */
16368 if (insn_type
!= TYPE_SETCC
16369 && insn_type
!= TYPE_ICMOV
16370 && insn_type
!= TYPE_FCMOV
16371 && insn_type
!= TYPE_IBR
)
16374 if ((set
= single_set (dep_insn
)) != 0)
16376 set
= SET_DEST (set
);
16379 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
16380 && XVECLEN (PATTERN (dep_insn
), 0) == 2
16381 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
16382 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
16384 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16385 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16390 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
16393 /* This test is true if the dependent insn reads the flags but
16394 not any other potentially set register. */
16395 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
16398 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
16404 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16405 address with operands set by DEP_INSN. */
16408 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16412 if (insn_type
== TYPE_LEA
16415 addr
= PATTERN (insn
);
16417 if (GET_CODE (addr
) == PARALLEL
)
16418 addr
= XVECEXP (addr
, 0, 0);
16420 gcc_assert (GET_CODE (addr
) == SET
);
16422 addr
= SET_SRC (addr
);
16427 extract_insn_cached (insn
);
16428 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16429 if (MEM_P (recog_data
.operand
[i
]))
16431 addr
= XEXP (recog_data
.operand
[i
], 0);
16438 return modified_in_p (addr
, dep_insn
);
16442 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16444 enum attr_type insn_type
, dep_insn_type
;
16445 enum attr_memory memory
;
16447 int dep_insn_code_number
;
16449 /* Anti and output dependencies have zero cost on all CPUs. */
16450 if (REG_NOTE_KIND (link
) != 0)
16453 dep_insn_code_number
= recog_memoized (dep_insn
);
16455 /* If we can't recognize the insns, we can't really do anything. */
16456 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16459 insn_type
= get_attr_type (insn
);
16460 dep_insn_type
= get_attr_type (dep_insn
);
16464 case PROCESSOR_PENTIUM
:
16465 /* Address Generation Interlock adds a cycle of latency. */
16466 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16469 /* ??? Compares pair with jump/setcc. */
16470 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16473 /* Floating point stores require value to be ready one cycle earlier. */
16474 if (insn_type
== TYPE_FMOV
16475 && get_attr_memory (insn
) == MEMORY_STORE
16476 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16480 case PROCESSOR_PENTIUMPRO
:
16481 memory
= get_attr_memory (insn
);
16483 /* INT->FP conversion is expensive. */
16484 if (get_attr_fp_int_src (dep_insn
))
16487 /* There is one cycle extra latency between an FP op and a store. */
16488 if (insn_type
== TYPE_FMOV
16489 && (set
= single_set (dep_insn
)) != NULL_RTX
16490 && (set2
= single_set (insn
)) != NULL_RTX
16491 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16492 && MEM_P (SET_DEST (set2
)))
16495 /* Show ability of reorder buffer to hide latency of load by executing
16496 in parallel with previous instruction in case
16497 previous instruction is not needed to compute the address. */
16498 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16499 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16501 /* Claim moves to take one cycle, as core can issue one load
16502 at time and the next load can start cycle later. */
16503 if (dep_insn_type
== TYPE_IMOV
16504 || dep_insn_type
== TYPE_FMOV
)
16512 memory
= get_attr_memory (insn
);
16514 /* The esp dependency is resolved before the instruction is really
16516 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16517 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16520 /* INT->FP conversion is expensive. */
16521 if (get_attr_fp_int_src (dep_insn
))
16524 /* Show ability of reorder buffer to hide latency of load by executing
16525 in parallel with previous instruction in case
16526 previous instruction is not needed to compute the address. */
16527 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16528 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16530 /* Claim moves to take one cycle, as core can issue one load
16531 at time and the next load can start cycle later. */
16532 if (dep_insn_type
== TYPE_IMOV
16533 || dep_insn_type
== TYPE_FMOV
)
16542 case PROCESSOR_ATHLON
:
16544 case PROCESSOR_AMDFAM10
:
16545 case PROCESSOR_GENERIC32
:
16546 case PROCESSOR_GENERIC64
:
16547 memory
= get_attr_memory (insn
);
16549 /* Show ability of reorder buffer to hide latency of load by executing
16550 in parallel with previous instruction in case
16551 previous instruction is not needed to compute the address. */
16552 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16553 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16555 enum attr_unit unit
= get_attr_unit (insn
);
16558 /* Because of the difference between the length of integer and
16559 floating unit pipeline preparation stages, the memory operands
16560 for floating point are cheaper.
16562 ??? For Athlon it the difference is most probably 2. */
16563 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16566 loadcost
= TARGET_ATHLON
? 2 : 0;
16568 if (cost
>= loadcost
)
16581 /* How many alternative schedules to try. This should be as wide as the
16582 scheduling freedom in the DFA, but no wider. Making this value too
16583 large results extra work for the scheduler. */
16586 ia32_multipass_dfa_lookahead (void)
16588 if (ix86_tune
== PROCESSOR_PENTIUM
)
16591 if (ix86_tune
== PROCESSOR_PENTIUMPRO
16592 || ix86_tune
== PROCESSOR_K6
)
16600 /* Compute the alignment given to a constant that is being placed in memory.
16601 EXP is the constant and ALIGN is the alignment that the object would
16603 The value of this function is used instead of that alignment to align
16607 ix86_constant_alignment (tree exp
, int align
)
16609 if (TREE_CODE (exp
) == REAL_CST
)
16611 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16613 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16616 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16617 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16618 return BITS_PER_WORD
;
16623 /* Compute the alignment for a static variable.
16624 TYPE is the data type, and ALIGN is the alignment that
16625 the object would ordinarily have. The value of this function is used
16626 instead of that alignment to align the object. */
16629 ix86_data_alignment (tree type
, int align
)
16631 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16633 if (AGGREGATE_TYPE_P (type
)
16634 && TYPE_SIZE (type
)
16635 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16636 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16637 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16638 && align
< max_align
)
16641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16642 to 16byte boundary. */
16645 if (AGGREGATE_TYPE_P (type
)
16646 && TYPE_SIZE (type
)
16647 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16648 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16649 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16653 if (TREE_CODE (type
) == ARRAY_TYPE
)
16655 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16657 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16660 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16663 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16665 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16668 else if ((TREE_CODE (type
) == RECORD_TYPE
16669 || TREE_CODE (type
) == UNION_TYPE
16670 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16671 && TYPE_FIELDS (type
))
16673 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16675 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16678 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16679 || TREE_CODE (type
) == INTEGER_TYPE
)
16681 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16683 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16690 /* Compute the alignment for a local variable.
16691 TYPE is the data type, and ALIGN is the alignment that
16692 the object would ordinarily have. The value of this macro is used
16693 instead of that alignment to align the object. */
16696 ix86_local_alignment (tree type
, int align
)
16698 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16699 to 16byte boundary. */
16702 if (AGGREGATE_TYPE_P (type
)
16703 && TYPE_SIZE (type
)
16704 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16705 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16706 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16709 if (TREE_CODE (type
) == ARRAY_TYPE
)
16711 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16713 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16716 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16718 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16720 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16723 else if ((TREE_CODE (type
) == RECORD_TYPE
16724 || TREE_CODE (type
) == UNION_TYPE
16725 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16726 && TYPE_FIELDS (type
))
16728 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16730 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16733 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16734 || TREE_CODE (type
) == INTEGER_TYPE
)
16737 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16739 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16745 /* Emit RTL insns to initialize the variable parts of a trampoline.
16746 FNADDR is an RTX for the address of the function's pure code.
16747 CXT is an RTX for the static chain value for the function. */
16749 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16753 /* Compute offset from the end of the jmp to the target function. */
16754 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16755 plus_constant (tramp
, 10),
16756 NULL_RTX
, 1, OPTAB_DIRECT
);
16757 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16758 gen_int_mode (0xb9, QImode
));
16759 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16760 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16761 gen_int_mode (0xe9, QImode
));
16762 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16767 /* Try to load address using shorter movl instead of movabs.
16768 We may want to support movq for kernel mode, but kernel does not use
16769 trampolines at the moment. */
16770 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16772 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16773 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16774 gen_int_mode (0xbb41, HImode
));
16775 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16776 gen_lowpart (SImode
, fnaddr
));
16781 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16782 gen_int_mode (0xbb49, HImode
));
16783 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16787 /* Load static chain using movabs to r10. */
16788 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16789 gen_int_mode (0xba49, HImode
));
16790 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16793 /* Jump to the r11 */
16794 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16795 gen_int_mode (0xff49, HImode
));
16796 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16797 gen_int_mode (0xe3, QImode
));
16799 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16802 #ifdef ENABLE_EXECUTE_STACK
16803 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16804 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16808 /* Codes for all the SSE/MMX builtins. */
16811 IX86_BUILTIN_ADDPS
,
16812 IX86_BUILTIN_ADDSS
,
16813 IX86_BUILTIN_DIVPS
,
16814 IX86_BUILTIN_DIVSS
,
16815 IX86_BUILTIN_MULPS
,
16816 IX86_BUILTIN_MULSS
,
16817 IX86_BUILTIN_SUBPS
,
16818 IX86_BUILTIN_SUBSS
,
16820 IX86_BUILTIN_CMPEQPS
,
16821 IX86_BUILTIN_CMPLTPS
,
16822 IX86_BUILTIN_CMPLEPS
,
16823 IX86_BUILTIN_CMPGTPS
,
16824 IX86_BUILTIN_CMPGEPS
,
16825 IX86_BUILTIN_CMPNEQPS
,
16826 IX86_BUILTIN_CMPNLTPS
,
16827 IX86_BUILTIN_CMPNLEPS
,
16828 IX86_BUILTIN_CMPNGTPS
,
16829 IX86_BUILTIN_CMPNGEPS
,
16830 IX86_BUILTIN_CMPORDPS
,
16831 IX86_BUILTIN_CMPUNORDPS
,
16832 IX86_BUILTIN_CMPEQSS
,
16833 IX86_BUILTIN_CMPLTSS
,
16834 IX86_BUILTIN_CMPLESS
,
16835 IX86_BUILTIN_CMPNEQSS
,
16836 IX86_BUILTIN_CMPNLTSS
,
16837 IX86_BUILTIN_CMPNLESS
,
16838 IX86_BUILTIN_CMPNGTSS
,
16839 IX86_BUILTIN_CMPNGESS
,
16840 IX86_BUILTIN_CMPORDSS
,
16841 IX86_BUILTIN_CMPUNORDSS
,
16843 IX86_BUILTIN_COMIEQSS
,
16844 IX86_BUILTIN_COMILTSS
,
16845 IX86_BUILTIN_COMILESS
,
16846 IX86_BUILTIN_COMIGTSS
,
16847 IX86_BUILTIN_COMIGESS
,
16848 IX86_BUILTIN_COMINEQSS
,
16849 IX86_BUILTIN_UCOMIEQSS
,
16850 IX86_BUILTIN_UCOMILTSS
,
16851 IX86_BUILTIN_UCOMILESS
,
16852 IX86_BUILTIN_UCOMIGTSS
,
16853 IX86_BUILTIN_UCOMIGESS
,
16854 IX86_BUILTIN_UCOMINEQSS
,
16856 IX86_BUILTIN_CVTPI2PS
,
16857 IX86_BUILTIN_CVTPS2PI
,
16858 IX86_BUILTIN_CVTSI2SS
,
16859 IX86_BUILTIN_CVTSI642SS
,
16860 IX86_BUILTIN_CVTSS2SI
,
16861 IX86_BUILTIN_CVTSS2SI64
,
16862 IX86_BUILTIN_CVTTPS2PI
,
16863 IX86_BUILTIN_CVTTSS2SI
,
16864 IX86_BUILTIN_CVTTSS2SI64
,
16866 IX86_BUILTIN_MAXPS
,
16867 IX86_BUILTIN_MAXSS
,
16868 IX86_BUILTIN_MINPS
,
16869 IX86_BUILTIN_MINSS
,
16871 IX86_BUILTIN_LOADUPS
,
16872 IX86_BUILTIN_STOREUPS
,
16873 IX86_BUILTIN_MOVSS
,
16875 IX86_BUILTIN_MOVHLPS
,
16876 IX86_BUILTIN_MOVLHPS
,
16877 IX86_BUILTIN_LOADHPS
,
16878 IX86_BUILTIN_LOADLPS
,
16879 IX86_BUILTIN_STOREHPS
,
16880 IX86_BUILTIN_STORELPS
,
16882 IX86_BUILTIN_MASKMOVQ
,
16883 IX86_BUILTIN_MOVMSKPS
,
16884 IX86_BUILTIN_PMOVMSKB
,
16886 IX86_BUILTIN_MOVNTPS
,
16887 IX86_BUILTIN_MOVNTQ
,
16889 IX86_BUILTIN_LOADDQU
,
16890 IX86_BUILTIN_STOREDQU
,
16892 IX86_BUILTIN_PACKSSWB
,
16893 IX86_BUILTIN_PACKSSDW
,
16894 IX86_BUILTIN_PACKUSWB
,
16896 IX86_BUILTIN_PADDB
,
16897 IX86_BUILTIN_PADDW
,
16898 IX86_BUILTIN_PADDD
,
16899 IX86_BUILTIN_PADDQ
,
16900 IX86_BUILTIN_PADDSB
,
16901 IX86_BUILTIN_PADDSW
,
16902 IX86_BUILTIN_PADDUSB
,
16903 IX86_BUILTIN_PADDUSW
,
16904 IX86_BUILTIN_PSUBB
,
16905 IX86_BUILTIN_PSUBW
,
16906 IX86_BUILTIN_PSUBD
,
16907 IX86_BUILTIN_PSUBQ
,
16908 IX86_BUILTIN_PSUBSB
,
16909 IX86_BUILTIN_PSUBSW
,
16910 IX86_BUILTIN_PSUBUSB
,
16911 IX86_BUILTIN_PSUBUSW
,
16914 IX86_BUILTIN_PANDN
,
16918 IX86_BUILTIN_PAVGB
,
16919 IX86_BUILTIN_PAVGW
,
16921 IX86_BUILTIN_PCMPEQB
,
16922 IX86_BUILTIN_PCMPEQW
,
16923 IX86_BUILTIN_PCMPEQD
,
16924 IX86_BUILTIN_PCMPGTB
,
16925 IX86_BUILTIN_PCMPGTW
,
16926 IX86_BUILTIN_PCMPGTD
,
16928 IX86_BUILTIN_PMADDWD
,
16930 IX86_BUILTIN_PMAXSW
,
16931 IX86_BUILTIN_PMAXUB
,
16932 IX86_BUILTIN_PMINSW
,
16933 IX86_BUILTIN_PMINUB
,
16935 IX86_BUILTIN_PMULHUW
,
16936 IX86_BUILTIN_PMULHW
,
16937 IX86_BUILTIN_PMULLW
,
16939 IX86_BUILTIN_PSADBW
,
16940 IX86_BUILTIN_PSHUFW
,
16942 IX86_BUILTIN_PSLLW
,
16943 IX86_BUILTIN_PSLLD
,
16944 IX86_BUILTIN_PSLLQ
,
16945 IX86_BUILTIN_PSRAW
,
16946 IX86_BUILTIN_PSRAD
,
16947 IX86_BUILTIN_PSRLW
,
16948 IX86_BUILTIN_PSRLD
,
16949 IX86_BUILTIN_PSRLQ
,
16950 IX86_BUILTIN_PSLLWI
,
16951 IX86_BUILTIN_PSLLDI
,
16952 IX86_BUILTIN_PSLLQI
,
16953 IX86_BUILTIN_PSRAWI
,
16954 IX86_BUILTIN_PSRADI
,
16955 IX86_BUILTIN_PSRLWI
,
16956 IX86_BUILTIN_PSRLDI
,
16957 IX86_BUILTIN_PSRLQI
,
16959 IX86_BUILTIN_PUNPCKHBW
,
16960 IX86_BUILTIN_PUNPCKHWD
,
16961 IX86_BUILTIN_PUNPCKHDQ
,
16962 IX86_BUILTIN_PUNPCKLBW
,
16963 IX86_BUILTIN_PUNPCKLWD
,
16964 IX86_BUILTIN_PUNPCKLDQ
,
16966 IX86_BUILTIN_SHUFPS
,
16968 IX86_BUILTIN_RCPPS
,
16969 IX86_BUILTIN_RCPSS
,
16970 IX86_BUILTIN_RSQRTPS
,
16971 IX86_BUILTIN_RSQRTSS
,
16972 IX86_BUILTIN_RSQRTF
,
16973 IX86_BUILTIN_SQRTPS
,
16974 IX86_BUILTIN_SQRTSS
,
16976 IX86_BUILTIN_UNPCKHPS
,
16977 IX86_BUILTIN_UNPCKLPS
,
16979 IX86_BUILTIN_ANDPS
,
16980 IX86_BUILTIN_ANDNPS
,
16982 IX86_BUILTIN_XORPS
,
16985 IX86_BUILTIN_LDMXCSR
,
16986 IX86_BUILTIN_STMXCSR
,
16987 IX86_BUILTIN_SFENCE
,
16989 /* 3DNow! Original */
16990 IX86_BUILTIN_FEMMS
,
16991 IX86_BUILTIN_PAVGUSB
,
16992 IX86_BUILTIN_PF2ID
,
16993 IX86_BUILTIN_PFACC
,
16994 IX86_BUILTIN_PFADD
,
16995 IX86_BUILTIN_PFCMPEQ
,
16996 IX86_BUILTIN_PFCMPGE
,
16997 IX86_BUILTIN_PFCMPGT
,
16998 IX86_BUILTIN_PFMAX
,
16999 IX86_BUILTIN_PFMIN
,
17000 IX86_BUILTIN_PFMUL
,
17001 IX86_BUILTIN_PFRCP
,
17002 IX86_BUILTIN_PFRCPIT1
,
17003 IX86_BUILTIN_PFRCPIT2
,
17004 IX86_BUILTIN_PFRSQIT1
,
17005 IX86_BUILTIN_PFRSQRT
,
17006 IX86_BUILTIN_PFSUB
,
17007 IX86_BUILTIN_PFSUBR
,
17008 IX86_BUILTIN_PI2FD
,
17009 IX86_BUILTIN_PMULHRW
,
17011 /* 3DNow! Athlon Extensions */
17012 IX86_BUILTIN_PF2IW
,
17013 IX86_BUILTIN_PFNACC
,
17014 IX86_BUILTIN_PFPNACC
,
17015 IX86_BUILTIN_PI2FW
,
17016 IX86_BUILTIN_PSWAPDSI
,
17017 IX86_BUILTIN_PSWAPDSF
,
17020 IX86_BUILTIN_ADDPD
,
17021 IX86_BUILTIN_ADDSD
,
17022 IX86_BUILTIN_DIVPD
,
17023 IX86_BUILTIN_DIVSD
,
17024 IX86_BUILTIN_MULPD
,
17025 IX86_BUILTIN_MULSD
,
17026 IX86_BUILTIN_SUBPD
,
17027 IX86_BUILTIN_SUBSD
,
17029 IX86_BUILTIN_CMPEQPD
,
17030 IX86_BUILTIN_CMPLTPD
,
17031 IX86_BUILTIN_CMPLEPD
,
17032 IX86_BUILTIN_CMPGTPD
,
17033 IX86_BUILTIN_CMPGEPD
,
17034 IX86_BUILTIN_CMPNEQPD
,
17035 IX86_BUILTIN_CMPNLTPD
,
17036 IX86_BUILTIN_CMPNLEPD
,
17037 IX86_BUILTIN_CMPNGTPD
,
17038 IX86_BUILTIN_CMPNGEPD
,
17039 IX86_BUILTIN_CMPORDPD
,
17040 IX86_BUILTIN_CMPUNORDPD
,
17041 IX86_BUILTIN_CMPEQSD
,
17042 IX86_BUILTIN_CMPLTSD
,
17043 IX86_BUILTIN_CMPLESD
,
17044 IX86_BUILTIN_CMPNEQSD
,
17045 IX86_BUILTIN_CMPNLTSD
,
17046 IX86_BUILTIN_CMPNLESD
,
17047 IX86_BUILTIN_CMPORDSD
,
17048 IX86_BUILTIN_CMPUNORDSD
,
17050 IX86_BUILTIN_COMIEQSD
,
17051 IX86_BUILTIN_COMILTSD
,
17052 IX86_BUILTIN_COMILESD
,
17053 IX86_BUILTIN_COMIGTSD
,
17054 IX86_BUILTIN_COMIGESD
,
17055 IX86_BUILTIN_COMINEQSD
,
17056 IX86_BUILTIN_UCOMIEQSD
,
17057 IX86_BUILTIN_UCOMILTSD
,
17058 IX86_BUILTIN_UCOMILESD
,
17059 IX86_BUILTIN_UCOMIGTSD
,
17060 IX86_BUILTIN_UCOMIGESD
,
17061 IX86_BUILTIN_UCOMINEQSD
,
17063 IX86_BUILTIN_MAXPD
,
17064 IX86_BUILTIN_MAXSD
,
17065 IX86_BUILTIN_MINPD
,
17066 IX86_BUILTIN_MINSD
,
17068 IX86_BUILTIN_ANDPD
,
17069 IX86_BUILTIN_ANDNPD
,
17071 IX86_BUILTIN_XORPD
,
17073 IX86_BUILTIN_SQRTPD
,
17074 IX86_BUILTIN_SQRTSD
,
17076 IX86_BUILTIN_UNPCKHPD
,
17077 IX86_BUILTIN_UNPCKLPD
,
17079 IX86_BUILTIN_SHUFPD
,
17081 IX86_BUILTIN_LOADUPD
,
17082 IX86_BUILTIN_STOREUPD
,
17083 IX86_BUILTIN_MOVSD
,
17085 IX86_BUILTIN_LOADHPD
,
17086 IX86_BUILTIN_LOADLPD
,
17088 IX86_BUILTIN_CVTDQ2PD
,
17089 IX86_BUILTIN_CVTDQ2PS
,
17091 IX86_BUILTIN_CVTPD2DQ
,
17092 IX86_BUILTIN_CVTPD2PI
,
17093 IX86_BUILTIN_CVTPD2PS
,
17094 IX86_BUILTIN_CVTTPD2DQ
,
17095 IX86_BUILTIN_CVTTPD2PI
,
17097 IX86_BUILTIN_CVTPI2PD
,
17098 IX86_BUILTIN_CVTSI2SD
,
17099 IX86_BUILTIN_CVTSI642SD
,
17101 IX86_BUILTIN_CVTSD2SI
,
17102 IX86_BUILTIN_CVTSD2SI64
,
17103 IX86_BUILTIN_CVTSD2SS
,
17104 IX86_BUILTIN_CVTSS2SD
,
17105 IX86_BUILTIN_CVTTSD2SI
,
17106 IX86_BUILTIN_CVTTSD2SI64
,
17108 IX86_BUILTIN_CVTPS2DQ
,
17109 IX86_BUILTIN_CVTPS2PD
,
17110 IX86_BUILTIN_CVTTPS2DQ
,
17112 IX86_BUILTIN_MOVNTI
,
17113 IX86_BUILTIN_MOVNTPD
,
17114 IX86_BUILTIN_MOVNTDQ
,
17117 IX86_BUILTIN_MASKMOVDQU
,
17118 IX86_BUILTIN_MOVMSKPD
,
17119 IX86_BUILTIN_PMOVMSKB128
,
17121 IX86_BUILTIN_PACKSSWB128
,
17122 IX86_BUILTIN_PACKSSDW128
,
17123 IX86_BUILTIN_PACKUSWB128
,
17125 IX86_BUILTIN_PADDB128
,
17126 IX86_BUILTIN_PADDW128
,
17127 IX86_BUILTIN_PADDD128
,
17128 IX86_BUILTIN_PADDQ128
,
17129 IX86_BUILTIN_PADDSB128
,
17130 IX86_BUILTIN_PADDSW128
,
17131 IX86_BUILTIN_PADDUSB128
,
17132 IX86_BUILTIN_PADDUSW128
,
17133 IX86_BUILTIN_PSUBB128
,
17134 IX86_BUILTIN_PSUBW128
,
17135 IX86_BUILTIN_PSUBD128
,
17136 IX86_BUILTIN_PSUBQ128
,
17137 IX86_BUILTIN_PSUBSB128
,
17138 IX86_BUILTIN_PSUBSW128
,
17139 IX86_BUILTIN_PSUBUSB128
,
17140 IX86_BUILTIN_PSUBUSW128
,
17142 IX86_BUILTIN_PAND128
,
17143 IX86_BUILTIN_PANDN128
,
17144 IX86_BUILTIN_POR128
,
17145 IX86_BUILTIN_PXOR128
,
17147 IX86_BUILTIN_PAVGB128
,
17148 IX86_BUILTIN_PAVGW128
,
17150 IX86_BUILTIN_PCMPEQB128
,
17151 IX86_BUILTIN_PCMPEQW128
,
17152 IX86_BUILTIN_PCMPEQD128
,
17153 IX86_BUILTIN_PCMPGTB128
,
17154 IX86_BUILTIN_PCMPGTW128
,
17155 IX86_BUILTIN_PCMPGTD128
,
17157 IX86_BUILTIN_PMADDWD128
,
17159 IX86_BUILTIN_PMAXSW128
,
17160 IX86_BUILTIN_PMAXUB128
,
17161 IX86_BUILTIN_PMINSW128
,
17162 IX86_BUILTIN_PMINUB128
,
17164 IX86_BUILTIN_PMULUDQ
,
17165 IX86_BUILTIN_PMULUDQ128
,
17166 IX86_BUILTIN_PMULHUW128
,
17167 IX86_BUILTIN_PMULHW128
,
17168 IX86_BUILTIN_PMULLW128
,
17170 IX86_BUILTIN_PSADBW128
,
17171 IX86_BUILTIN_PSHUFHW
,
17172 IX86_BUILTIN_PSHUFLW
,
17173 IX86_BUILTIN_PSHUFD
,
17175 IX86_BUILTIN_PSLLDQI128
,
17176 IX86_BUILTIN_PSLLWI128
,
17177 IX86_BUILTIN_PSLLDI128
,
17178 IX86_BUILTIN_PSLLQI128
,
17179 IX86_BUILTIN_PSRAWI128
,
17180 IX86_BUILTIN_PSRADI128
,
17181 IX86_BUILTIN_PSRLDQI128
,
17182 IX86_BUILTIN_PSRLWI128
,
17183 IX86_BUILTIN_PSRLDI128
,
17184 IX86_BUILTIN_PSRLQI128
,
17186 IX86_BUILTIN_PSLLDQ128
,
17187 IX86_BUILTIN_PSLLW128
,
17188 IX86_BUILTIN_PSLLD128
,
17189 IX86_BUILTIN_PSLLQ128
,
17190 IX86_BUILTIN_PSRAW128
,
17191 IX86_BUILTIN_PSRAD128
,
17192 IX86_BUILTIN_PSRLW128
,
17193 IX86_BUILTIN_PSRLD128
,
17194 IX86_BUILTIN_PSRLQ128
,
17196 IX86_BUILTIN_PUNPCKHBW128
,
17197 IX86_BUILTIN_PUNPCKHWD128
,
17198 IX86_BUILTIN_PUNPCKHDQ128
,
17199 IX86_BUILTIN_PUNPCKHQDQ128
,
17200 IX86_BUILTIN_PUNPCKLBW128
,
17201 IX86_BUILTIN_PUNPCKLWD128
,
17202 IX86_BUILTIN_PUNPCKLDQ128
,
17203 IX86_BUILTIN_PUNPCKLQDQ128
,
17205 IX86_BUILTIN_CLFLUSH
,
17206 IX86_BUILTIN_MFENCE
,
17207 IX86_BUILTIN_LFENCE
,
17209 /* Prescott New Instructions. */
17210 IX86_BUILTIN_ADDSUBPS
,
17211 IX86_BUILTIN_HADDPS
,
17212 IX86_BUILTIN_HSUBPS
,
17213 IX86_BUILTIN_MOVSHDUP
,
17214 IX86_BUILTIN_MOVSLDUP
,
17215 IX86_BUILTIN_ADDSUBPD
,
17216 IX86_BUILTIN_HADDPD
,
17217 IX86_BUILTIN_HSUBPD
,
17218 IX86_BUILTIN_LDDQU
,
17220 IX86_BUILTIN_MONITOR
,
17221 IX86_BUILTIN_MWAIT
,
17224 IX86_BUILTIN_PHADDW
,
17225 IX86_BUILTIN_PHADDD
,
17226 IX86_BUILTIN_PHADDSW
,
17227 IX86_BUILTIN_PHSUBW
,
17228 IX86_BUILTIN_PHSUBD
,
17229 IX86_BUILTIN_PHSUBSW
,
17230 IX86_BUILTIN_PMADDUBSW
,
17231 IX86_BUILTIN_PMULHRSW
,
17232 IX86_BUILTIN_PSHUFB
,
17233 IX86_BUILTIN_PSIGNB
,
17234 IX86_BUILTIN_PSIGNW
,
17235 IX86_BUILTIN_PSIGND
,
17236 IX86_BUILTIN_PALIGNR
,
17237 IX86_BUILTIN_PABSB
,
17238 IX86_BUILTIN_PABSW
,
17239 IX86_BUILTIN_PABSD
,
17241 IX86_BUILTIN_PHADDW128
,
17242 IX86_BUILTIN_PHADDD128
,
17243 IX86_BUILTIN_PHADDSW128
,
17244 IX86_BUILTIN_PHSUBW128
,
17245 IX86_BUILTIN_PHSUBD128
,
17246 IX86_BUILTIN_PHSUBSW128
,
17247 IX86_BUILTIN_PMADDUBSW128
,
17248 IX86_BUILTIN_PMULHRSW128
,
17249 IX86_BUILTIN_PSHUFB128
,
17250 IX86_BUILTIN_PSIGNB128
,
17251 IX86_BUILTIN_PSIGNW128
,
17252 IX86_BUILTIN_PSIGND128
,
17253 IX86_BUILTIN_PALIGNR128
,
17254 IX86_BUILTIN_PABSB128
,
17255 IX86_BUILTIN_PABSW128
,
17256 IX86_BUILTIN_PABSD128
,
17258 /* AMDFAM10 - SSE4A New Instructions. */
17259 IX86_BUILTIN_MOVNTSD
,
17260 IX86_BUILTIN_MOVNTSS
,
17261 IX86_BUILTIN_EXTRQI
,
17262 IX86_BUILTIN_EXTRQ
,
17263 IX86_BUILTIN_INSERTQI
,
17264 IX86_BUILTIN_INSERTQ
,
17267 IX86_BUILTIN_BLENDPD
,
17268 IX86_BUILTIN_BLENDPS
,
17269 IX86_BUILTIN_BLENDVPD
,
17270 IX86_BUILTIN_BLENDVPS
,
17271 IX86_BUILTIN_PBLENDVB128
,
17272 IX86_BUILTIN_PBLENDW128
,
17277 IX86_BUILTIN_INSERTPS128
,
17279 IX86_BUILTIN_MOVNTDQA
,
17280 IX86_BUILTIN_MPSADBW128
,
17281 IX86_BUILTIN_PACKUSDW128
,
17282 IX86_BUILTIN_PCMPEQQ
,
17283 IX86_BUILTIN_PHMINPOSUW128
,
17285 IX86_BUILTIN_PMAXSB128
,
17286 IX86_BUILTIN_PMAXSD128
,
17287 IX86_BUILTIN_PMAXUD128
,
17288 IX86_BUILTIN_PMAXUW128
,
17290 IX86_BUILTIN_PMINSB128
,
17291 IX86_BUILTIN_PMINSD128
,
17292 IX86_BUILTIN_PMINUD128
,
17293 IX86_BUILTIN_PMINUW128
,
17295 IX86_BUILTIN_PMOVSXBW128
,
17296 IX86_BUILTIN_PMOVSXBD128
,
17297 IX86_BUILTIN_PMOVSXBQ128
,
17298 IX86_BUILTIN_PMOVSXWD128
,
17299 IX86_BUILTIN_PMOVSXWQ128
,
17300 IX86_BUILTIN_PMOVSXDQ128
,
17302 IX86_BUILTIN_PMOVZXBW128
,
17303 IX86_BUILTIN_PMOVZXBD128
,
17304 IX86_BUILTIN_PMOVZXBQ128
,
17305 IX86_BUILTIN_PMOVZXWD128
,
17306 IX86_BUILTIN_PMOVZXWQ128
,
17307 IX86_BUILTIN_PMOVZXDQ128
,
17309 IX86_BUILTIN_PMULDQ128
,
17310 IX86_BUILTIN_PMULLD128
,
17312 IX86_BUILTIN_ROUNDPD
,
17313 IX86_BUILTIN_ROUNDPS
,
17314 IX86_BUILTIN_ROUNDSD
,
17315 IX86_BUILTIN_ROUNDSS
,
17317 IX86_BUILTIN_PTESTZ
,
17318 IX86_BUILTIN_PTESTC
,
17319 IX86_BUILTIN_PTESTNZC
,
17321 IX86_BUILTIN_VEC_INIT_V2SI
,
17322 IX86_BUILTIN_VEC_INIT_V4HI
,
17323 IX86_BUILTIN_VEC_INIT_V8QI
,
17324 IX86_BUILTIN_VEC_EXT_V2DF
,
17325 IX86_BUILTIN_VEC_EXT_V2DI
,
17326 IX86_BUILTIN_VEC_EXT_V4SF
,
17327 IX86_BUILTIN_VEC_EXT_V4SI
,
17328 IX86_BUILTIN_VEC_EXT_V8HI
,
17329 IX86_BUILTIN_VEC_EXT_V2SI
,
17330 IX86_BUILTIN_VEC_EXT_V4HI
,
17331 IX86_BUILTIN_VEC_EXT_V16QI
,
17332 IX86_BUILTIN_VEC_SET_V2DI
,
17333 IX86_BUILTIN_VEC_SET_V4SF
,
17334 IX86_BUILTIN_VEC_SET_V4SI
,
17335 IX86_BUILTIN_VEC_SET_V8HI
,
17336 IX86_BUILTIN_VEC_SET_V4HI
,
17337 IX86_BUILTIN_VEC_SET_V16QI
,
17339 IX86_BUILTIN_VEC_PACK_SFIX
,
17342 IX86_BUILTIN_CRC32QI
,
17343 IX86_BUILTIN_CRC32HI
,
17344 IX86_BUILTIN_CRC32SI
,
17345 IX86_BUILTIN_CRC32DI
,
17347 IX86_BUILTIN_PCMPESTRI128
,
17348 IX86_BUILTIN_PCMPESTRM128
,
17349 IX86_BUILTIN_PCMPESTRA128
,
17350 IX86_BUILTIN_PCMPESTRC128
,
17351 IX86_BUILTIN_PCMPESTRO128
,
17352 IX86_BUILTIN_PCMPESTRS128
,
17353 IX86_BUILTIN_PCMPESTRZ128
,
17354 IX86_BUILTIN_PCMPISTRI128
,
17355 IX86_BUILTIN_PCMPISTRM128
,
17356 IX86_BUILTIN_PCMPISTRA128
,
17357 IX86_BUILTIN_PCMPISTRC128
,
17358 IX86_BUILTIN_PCMPISTRO128
,
17359 IX86_BUILTIN_PCMPISTRS128
,
17360 IX86_BUILTIN_PCMPISTRZ128
,
17362 IX86_BUILTIN_PCMPGTQ
,
17364 /* TFmode support builtins. */
17366 IX86_BUILTIN_FABSQ
,
17367 IX86_BUILTIN_COPYSIGNQ
,
17369 /* SSE5 instructions */
17370 IX86_BUILTIN_FMADDSS
,
17371 IX86_BUILTIN_FMADDSD
,
17372 IX86_BUILTIN_FMADDPS
,
17373 IX86_BUILTIN_FMADDPD
,
17374 IX86_BUILTIN_FMSUBSS
,
17375 IX86_BUILTIN_FMSUBSD
,
17376 IX86_BUILTIN_FMSUBPS
,
17377 IX86_BUILTIN_FMSUBPD
,
17378 IX86_BUILTIN_FNMADDSS
,
17379 IX86_BUILTIN_FNMADDSD
,
17380 IX86_BUILTIN_FNMADDPS
,
17381 IX86_BUILTIN_FNMADDPD
,
17382 IX86_BUILTIN_FNMSUBSS
,
17383 IX86_BUILTIN_FNMSUBSD
,
17384 IX86_BUILTIN_FNMSUBPS
,
17385 IX86_BUILTIN_FNMSUBPD
,
17386 IX86_BUILTIN_PCMOV_V2DI
,
17387 IX86_BUILTIN_PCMOV_V4SI
,
17388 IX86_BUILTIN_PCMOV_V8HI
,
17389 IX86_BUILTIN_PCMOV_V16QI
,
17390 IX86_BUILTIN_PCMOV_V4SF
,
17391 IX86_BUILTIN_PCMOV_V2DF
,
17392 IX86_BUILTIN_PPERM
,
17393 IX86_BUILTIN_PERMPS
,
17394 IX86_BUILTIN_PERMPD
,
17395 IX86_BUILTIN_PMACSSWW
,
17396 IX86_BUILTIN_PMACSWW
,
17397 IX86_BUILTIN_PMACSSWD
,
17398 IX86_BUILTIN_PMACSWD
,
17399 IX86_BUILTIN_PMACSSDD
,
17400 IX86_BUILTIN_PMACSDD
,
17401 IX86_BUILTIN_PMACSSDQL
,
17402 IX86_BUILTIN_PMACSSDQH
,
17403 IX86_BUILTIN_PMACSDQL
,
17404 IX86_BUILTIN_PMACSDQH
,
17405 IX86_BUILTIN_PMADCSSWD
,
17406 IX86_BUILTIN_PMADCSWD
,
17407 IX86_BUILTIN_PHADDBW
,
17408 IX86_BUILTIN_PHADDBD
,
17409 IX86_BUILTIN_PHADDBQ
,
17410 IX86_BUILTIN_PHADDWD
,
17411 IX86_BUILTIN_PHADDWQ
,
17412 IX86_BUILTIN_PHADDDQ
,
17413 IX86_BUILTIN_PHADDUBW
,
17414 IX86_BUILTIN_PHADDUBD
,
17415 IX86_BUILTIN_PHADDUBQ
,
17416 IX86_BUILTIN_PHADDUWD
,
17417 IX86_BUILTIN_PHADDUWQ
,
17418 IX86_BUILTIN_PHADDUDQ
,
17419 IX86_BUILTIN_PHSUBBW
,
17420 IX86_BUILTIN_PHSUBWD
,
17421 IX86_BUILTIN_PHSUBDQ
,
17422 IX86_BUILTIN_PROTB
,
17423 IX86_BUILTIN_PROTW
,
17424 IX86_BUILTIN_PROTD
,
17425 IX86_BUILTIN_PROTQ
,
17426 IX86_BUILTIN_PROTB_IMM
,
17427 IX86_BUILTIN_PROTW_IMM
,
17428 IX86_BUILTIN_PROTD_IMM
,
17429 IX86_BUILTIN_PROTQ_IMM
,
17430 IX86_BUILTIN_PSHLB
,
17431 IX86_BUILTIN_PSHLW
,
17432 IX86_BUILTIN_PSHLD
,
17433 IX86_BUILTIN_PSHLQ
,
17434 IX86_BUILTIN_PSHAB
,
17435 IX86_BUILTIN_PSHAW
,
17436 IX86_BUILTIN_PSHAD
,
17437 IX86_BUILTIN_PSHAQ
,
17438 IX86_BUILTIN_FRCZSS
,
17439 IX86_BUILTIN_FRCZSD
,
17440 IX86_BUILTIN_FRCZPS
,
17441 IX86_BUILTIN_FRCZPD
,
17442 IX86_BUILTIN_CVTPH2PS
,
17443 IX86_BUILTIN_CVTPS2PH
,
17445 IX86_BUILTIN_COMEQSS
,
17446 IX86_BUILTIN_COMNESS
,
17447 IX86_BUILTIN_COMLTSS
,
17448 IX86_BUILTIN_COMLESS
,
17449 IX86_BUILTIN_COMGTSS
,
17450 IX86_BUILTIN_COMGESS
,
17451 IX86_BUILTIN_COMUEQSS
,
17452 IX86_BUILTIN_COMUNESS
,
17453 IX86_BUILTIN_COMULTSS
,
17454 IX86_BUILTIN_COMULESS
,
17455 IX86_BUILTIN_COMUGTSS
,
17456 IX86_BUILTIN_COMUGESS
,
17457 IX86_BUILTIN_COMORDSS
,
17458 IX86_BUILTIN_COMUNORDSS
,
17459 IX86_BUILTIN_COMFALSESS
,
17460 IX86_BUILTIN_COMTRUESS
,
17462 IX86_BUILTIN_COMEQSD
,
17463 IX86_BUILTIN_COMNESD
,
17464 IX86_BUILTIN_COMLTSD
,
17465 IX86_BUILTIN_COMLESD
,
17466 IX86_BUILTIN_COMGTSD
,
17467 IX86_BUILTIN_COMGESD
,
17468 IX86_BUILTIN_COMUEQSD
,
17469 IX86_BUILTIN_COMUNESD
,
17470 IX86_BUILTIN_COMULTSD
,
17471 IX86_BUILTIN_COMULESD
,
17472 IX86_BUILTIN_COMUGTSD
,
17473 IX86_BUILTIN_COMUGESD
,
17474 IX86_BUILTIN_COMORDSD
,
17475 IX86_BUILTIN_COMUNORDSD
,
17476 IX86_BUILTIN_COMFALSESD
,
17477 IX86_BUILTIN_COMTRUESD
,
17479 IX86_BUILTIN_COMEQPS
,
17480 IX86_BUILTIN_COMNEPS
,
17481 IX86_BUILTIN_COMLTPS
,
17482 IX86_BUILTIN_COMLEPS
,
17483 IX86_BUILTIN_COMGTPS
,
17484 IX86_BUILTIN_COMGEPS
,
17485 IX86_BUILTIN_COMUEQPS
,
17486 IX86_BUILTIN_COMUNEPS
,
17487 IX86_BUILTIN_COMULTPS
,
17488 IX86_BUILTIN_COMULEPS
,
17489 IX86_BUILTIN_COMUGTPS
,
17490 IX86_BUILTIN_COMUGEPS
,
17491 IX86_BUILTIN_COMORDPS
,
17492 IX86_BUILTIN_COMUNORDPS
,
17493 IX86_BUILTIN_COMFALSEPS
,
17494 IX86_BUILTIN_COMTRUEPS
,
17496 IX86_BUILTIN_COMEQPD
,
17497 IX86_BUILTIN_COMNEPD
,
17498 IX86_BUILTIN_COMLTPD
,
17499 IX86_BUILTIN_COMLEPD
,
17500 IX86_BUILTIN_COMGTPD
,
17501 IX86_BUILTIN_COMGEPD
,
17502 IX86_BUILTIN_COMUEQPD
,
17503 IX86_BUILTIN_COMUNEPD
,
17504 IX86_BUILTIN_COMULTPD
,
17505 IX86_BUILTIN_COMULEPD
,
17506 IX86_BUILTIN_COMUGTPD
,
17507 IX86_BUILTIN_COMUGEPD
,
17508 IX86_BUILTIN_COMORDPD
,
17509 IX86_BUILTIN_COMUNORDPD
,
17510 IX86_BUILTIN_COMFALSEPD
,
17511 IX86_BUILTIN_COMTRUEPD
,
17513 IX86_BUILTIN_PCOMEQUB
,
17514 IX86_BUILTIN_PCOMNEUB
,
17515 IX86_BUILTIN_PCOMLTUB
,
17516 IX86_BUILTIN_PCOMLEUB
,
17517 IX86_BUILTIN_PCOMGTUB
,
17518 IX86_BUILTIN_PCOMGEUB
,
17519 IX86_BUILTIN_PCOMFALSEUB
,
17520 IX86_BUILTIN_PCOMTRUEUB
,
17521 IX86_BUILTIN_PCOMEQUW
,
17522 IX86_BUILTIN_PCOMNEUW
,
17523 IX86_BUILTIN_PCOMLTUW
,
17524 IX86_BUILTIN_PCOMLEUW
,
17525 IX86_BUILTIN_PCOMGTUW
,
17526 IX86_BUILTIN_PCOMGEUW
,
17527 IX86_BUILTIN_PCOMFALSEUW
,
17528 IX86_BUILTIN_PCOMTRUEUW
,
17529 IX86_BUILTIN_PCOMEQUD
,
17530 IX86_BUILTIN_PCOMNEUD
,
17531 IX86_BUILTIN_PCOMLTUD
,
17532 IX86_BUILTIN_PCOMLEUD
,
17533 IX86_BUILTIN_PCOMGTUD
,
17534 IX86_BUILTIN_PCOMGEUD
,
17535 IX86_BUILTIN_PCOMFALSEUD
,
17536 IX86_BUILTIN_PCOMTRUEUD
,
17537 IX86_BUILTIN_PCOMEQUQ
,
17538 IX86_BUILTIN_PCOMNEUQ
,
17539 IX86_BUILTIN_PCOMLTUQ
,
17540 IX86_BUILTIN_PCOMLEUQ
,
17541 IX86_BUILTIN_PCOMGTUQ
,
17542 IX86_BUILTIN_PCOMGEUQ
,
17543 IX86_BUILTIN_PCOMFALSEUQ
,
17544 IX86_BUILTIN_PCOMTRUEUQ
,
17546 IX86_BUILTIN_PCOMEQB
,
17547 IX86_BUILTIN_PCOMNEB
,
17548 IX86_BUILTIN_PCOMLTB
,
17549 IX86_BUILTIN_PCOMLEB
,
17550 IX86_BUILTIN_PCOMGTB
,
17551 IX86_BUILTIN_PCOMGEB
,
17552 IX86_BUILTIN_PCOMFALSEB
,
17553 IX86_BUILTIN_PCOMTRUEB
,
17554 IX86_BUILTIN_PCOMEQW
,
17555 IX86_BUILTIN_PCOMNEW
,
17556 IX86_BUILTIN_PCOMLTW
,
17557 IX86_BUILTIN_PCOMLEW
,
17558 IX86_BUILTIN_PCOMGTW
,
17559 IX86_BUILTIN_PCOMGEW
,
17560 IX86_BUILTIN_PCOMFALSEW
,
17561 IX86_BUILTIN_PCOMTRUEW
,
17562 IX86_BUILTIN_PCOMEQD
,
17563 IX86_BUILTIN_PCOMNED
,
17564 IX86_BUILTIN_PCOMLTD
,
17565 IX86_BUILTIN_PCOMLED
,
17566 IX86_BUILTIN_PCOMGTD
,
17567 IX86_BUILTIN_PCOMGED
,
17568 IX86_BUILTIN_PCOMFALSED
,
17569 IX86_BUILTIN_PCOMTRUED
,
17570 IX86_BUILTIN_PCOMEQQ
,
17571 IX86_BUILTIN_PCOMNEQ
,
17572 IX86_BUILTIN_PCOMLTQ
,
17573 IX86_BUILTIN_PCOMLEQ
,
17574 IX86_BUILTIN_PCOMGTQ
,
17575 IX86_BUILTIN_PCOMGEQ
,
17576 IX86_BUILTIN_PCOMFALSEQ
,
17577 IX86_BUILTIN_PCOMTRUEQ
,
17582 /* Table for the ix86 builtin decls. */
17583 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
17585 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17586 * if the target_flags include one of MASK. Stores the function decl
17587 * in the ix86_builtins array.
17588 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17591 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
17593 tree decl
= NULL_TREE
;
17595 if (mask
& ix86_isa_flags
17596 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
17598 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
17600 ix86_builtins
[(int) code
] = decl
;
17606 /* Like def_builtin, but also marks the function decl "const". */
17609 def_builtin_const (int mask
, const char *name
, tree type
,
17610 enum ix86_builtins code
)
17612 tree decl
= def_builtin (mask
, name
, type
, code
);
17614 TREE_READONLY (decl
) = 1;
17618 /* Bits for builtin_description.flag. */
17620 /* Set when we don't support the comparison natively, and should
17621 swap_comparison in order to support it. */
17622 #define BUILTIN_DESC_SWAP_OPERANDS 1
17624 struct builtin_description
17626 const unsigned int mask
;
17627 const enum insn_code icode
;
17628 const char *const name
;
17629 const enum ix86_builtins code
;
17630 const enum rtx_code comparison
;
17634 static const struct builtin_description bdesc_comi
[] =
17636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
17637 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17638 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17639 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17641 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17642 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17643 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17645 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17646 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17647 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17649 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17650 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17651 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17657 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17662 static const struct builtin_description bdesc_ptest
[] =
17665 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17666 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17667 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17670 static const struct builtin_description bdesc_pcmpestr
[] =
17673 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17674 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17675 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17676 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17677 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17678 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17679 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17682 static const struct builtin_description bdesc_pcmpistr
[] =
17685 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17686 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17687 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17688 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17689 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17690 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17691 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17694 static const struct builtin_description bdesc_crc32
[] =
17697 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17698 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17699 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17700 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17703 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17704 static const struct builtin_description bdesc_sse_3arg
[] =
17707 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17708 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17709 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17710 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17711 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17712 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17713 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17714 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17715 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17716 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17717 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17718 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17721 static const struct builtin_description bdesc_2arg
[] =
17724 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17725 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17726 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17727 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17728 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17729 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17730 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17731 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17733 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17734 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17735 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17736 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17737 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17738 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17739 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17740 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17741 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17742 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17743 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17744 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17745 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17746 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17747 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17748 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17749 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17750 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17751 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17752 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17753 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17754 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17756 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17757 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17758 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17759 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17761 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17762 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17763 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17764 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17766 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17767 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17768 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17769 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17770 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17773 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17774 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17775 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17777 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17778 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17779 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17782 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17783 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17784 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17785 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17786 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17787 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17788 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17789 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17791 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17792 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17793 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17795 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17796 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17797 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17798 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17800 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17801 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17803 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17804 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17805 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17806 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17807 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17808 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17810 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17811 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17812 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17813 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17815 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17816 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17817 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17818 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17819 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17820 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17823 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17824 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17825 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17827 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17828 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17829 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17831 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17832 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17833 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17834 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17835 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17836 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17838 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17839 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17840 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17841 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17842 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17843 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17845 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17846 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17847 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17848 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17850 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17851 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17857 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17860 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17863 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17865 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17866 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17867 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17868 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17869 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17870 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17871 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17872 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17873 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17874 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17875 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17876 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17877 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17878 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17879 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17880 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17881 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17882 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17884 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17885 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17886 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17887 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17889 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17890 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17891 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17892 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
17896 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
17898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
17901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
17902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
17903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
17904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
17905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
17906 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
17907 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
17908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
17910 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
17911 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
17912 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
17913 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
17914 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
17915 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
17916 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
17917 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
17919 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
17920 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
17922 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
17923 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
17924 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
17925 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
17927 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
17928 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
17930 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
17931 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
17932 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
17933 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
17934 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
17935 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
17937 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
17938 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
17939 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
17940 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
17942 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
17943 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
17944 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
17945 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
17946 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
17947 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
17948 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
17949 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
17951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
17952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
17953 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
17955 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
17956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
17958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
17959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
17961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
17962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
17963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
17965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
17966 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
17967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
17969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
17970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
17972 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
17974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
17975 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
17976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
17977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
17980 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
17981 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
17982 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
17983 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
17984 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
17985 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
17988 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
17989 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
17990 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
17991 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
17992 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
17993 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
17994 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
17995 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
17996 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
17997 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
17998 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
17999 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
18000 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
18001 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
18002 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
18003 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
18004 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
18005 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
18006 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
18007 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
18008 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
18009 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
18010 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
18011 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
18014 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
18015 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
18016 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
18017 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
18018 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
18019 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
18020 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
18021 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
18022 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
18023 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
18024 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
18025 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
18028 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
18031 static const struct builtin_description bdesc_1arg
[] =
18033 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
18034 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
18036 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
18037 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
18038 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
18040 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
18041 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
18042 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
18043 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
18044 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
18045 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
18047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
18048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
18050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
18052 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
18053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
18055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
18056 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
18057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
18058 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
18059 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
18061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
18063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
18064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
18065 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
18066 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
18068 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
18069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
18070 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
18073 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
18074 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
18077 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
18078 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
18079 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
18080 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
18081 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
18082 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
18085 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
18086 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
18087 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
18088 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
18089 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
18090 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
18091 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
18092 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
18093 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
18094 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
18095 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
18096 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
18097 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
18099 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18100 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
18101 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
18105 enum multi_arg_type
{
18115 MULTI_ARG_3_PERMPS
,
18116 MULTI_ARG_3_PERMPD
,
18123 MULTI_ARG_2_DI_IMM
,
18124 MULTI_ARG_2_SI_IMM
,
18125 MULTI_ARG_2_HI_IMM
,
18126 MULTI_ARG_2_QI_IMM
,
18127 MULTI_ARG_2_SF_CMP
,
18128 MULTI_ARG_2_DF_CMP
,
18129 MULTI_ARG_2_DI_CMP
,
18130 MULTI_ARG_2_SI_CMP
,
18131 MULTI_ARG_2_HI_CMP
,
18132 MULTI_ARG_2_QI_CMP
,
18155 static const struct builtin_description bdesc_multi_arg
[] =
18157 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv4sf4
, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18158 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv2df4
, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18159 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv4sf4
, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18160 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv2df4
, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18161 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv4sf4
, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18162 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv2df4
, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18163 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv4sf4
, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18164 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv2df4
, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18165 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv4sf4
, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18166 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv2df4
, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18167 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv4sf4
, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18168 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv2df4
, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18169 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv4sf4
, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18170 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv2df4
, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18171 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv4sf4
, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18172 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv2df4
, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18173 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18174 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18175 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4si
, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI
, 0, (int)MULTI_ARG_3_SI
},
18176 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v8hi
, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI
, 0, (int)MULTI_ARG_3_HI
},
18177 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v16qi
, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI
,0, (int)MULTI_ARG_3_QI
},
18178 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2df
, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF
, 0, (int)MULTI_ARG_3_DF
},
18179 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4sf
, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF
, 0, (int)MULTI_ARG_3_SF
},
18180 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pperm
, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM
, 0, (int)MULTI_ARG_3_QI
},
18181 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv4sf
, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS
, 0, (int)MULTI_ARG_3_PERMPS
},
18182 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv2df
, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD
, 0, (int)MULTI_ARG_3_PERMPD
},
18183 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssww
, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW
, 0, (int)MULTI_ARG_3_HI
},
18184 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsww
, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW
, 0, (int)MULTI_ARG_3_HI
},
18185 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsswd
, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18186 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacswd
, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18187 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdd
, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD
, 0, (int)MULTI_ARG_3_SI
},
18188 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdd
, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD
, 0, (int)MULTI_ARG_3_SI
},
18189 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdql
, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18190 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdqh
, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18191 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdql
, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18192 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdqh
, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18193 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcsswd
, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18194 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcswd
, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18195 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv2di3
, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ
, 0, (int)MULTI_ARG_2_DI
},
18196 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv4si3
, "__builtin_ia32_protd", IX86_BUILTIN_PROTD
, 0, (int)MULTI_ARG_2_SI
},
18197 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv8hi3
, "__builtin_ia32_protw", IX86_BUILTIN_PROTW
, 0, (int)MULTI_ARG_2_HI
},
18198 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv16qi3
, "__builtin_ia32_protb", IX86_BUILTIN_PROTB
, 0, (int)MULTI_ARG_2_QI
},
18199 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv2di3
, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM
, 0, (int)MULTI_ARG_2_DI_IMM
},
18200 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv4si3
, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM
, 0, (int)MULTI_ARG_2_SI_IMM
},
18201 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv8hi3
, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM
, 0, (int)MULTI_ARG_2_HI_IMM
},
18202 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv16qi3
, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM
, 0, (int)MULTI_ARG_2_QI_IMM
},
18203 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv2di3
, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ
, 0, (int)MULTI_ARG_2_DI
},
18204 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv4si3
, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD
, 0, (int)MULTI_ARG_2_SI
},
18205 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv8hi3
, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW
, 0, (int)MULTI_ARG_2_HI
},
18206 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv16qi3
, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB
, 0, (int)MULTI_ARG_2_QI
},
18207 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv2di3
, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ
, 0, (int)MULTI_ARG_2_DI
},
18208 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv4si3
, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD
, 0, (int)MULTI_ARG_2_SI
},
18209 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv8hi3
, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW
, 0, (int)MULTI_ARG_2_HI
},
18210 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv16qi3
, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB
, 0, (int)MULTI_ARG_2_QI
},
18211 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv4sf2
, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS
, 0, (int)MULTI_ARG_2_SF
},
18212 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv2df2
, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD
, 0, (int)MULTI_ARG_2_DF
},
18213 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv4sf2
, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS
, 0, (int)MULTI_ARG_1_SF
},
18214 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv2df2
, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD
, 0, (int)MULTI_ARG_1_DF
},
18215 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtph2ps
, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS
, 0, (int)MULTI_ARG_1_PH2PS
},
18216 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtps2ph
, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH
, 0, (int)MULTI_ARG_1_PS2PH
},
18217 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbw
, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18218 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbd
, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18219 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbq
, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18220 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwd
, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18221 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwq
, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18222 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadddq
, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18223 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubw
, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18224 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubd
, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18225 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubq
, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18226 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwd
, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18227 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwq
, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18228 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddudq
, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18229 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubbw
, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18230 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubwd
, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18231 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubdq
, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18233 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18234 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18235 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18236 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18237 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18238 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18239 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18240 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18241 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18242 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18243 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18244 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18245 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18246 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18247 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18248 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18250 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18251 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18252 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18253 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18254 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18255 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18256 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18257 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18258 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18259 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18260 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18261 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18262 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18263 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18264 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18265 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18267 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18268 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18269 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18270 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18271 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18272 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18273 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18274 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18275 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18276 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18277 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18278 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18279 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18280 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18281 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18282 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18284 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18285 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18286 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18287 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18288 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18289 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18290 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18291 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18292 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18293 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18294 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18295 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18296 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18297 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18298 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18299 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18301 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18302 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18303 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18304 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
18305 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
18306 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
18307 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
18309 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18310 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18311 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18312 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
18313 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
18314 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
18315 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
18317 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18318 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18319 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18320 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
18321 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
18322 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
18323 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
18325 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18326 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18327 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18328 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
18329 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
18330 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
18331 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
18333 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18334 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18335 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18336 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
18337 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
18338 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
18339 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
18341 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18342 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18343 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18344 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
18345 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
18346 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
18347 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
18349 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18350 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18351 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18352 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
18353 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
18354 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
18355 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
18357 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18358 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18359 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18360 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
18361 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
18362 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
18363 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
18365 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS
, COM_FALSE_S
, (int)MULTI_ARG_2_SF_TF
},
18366 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS
, COM_TRUE_S
, (int)MULTI_ARG_2_SF_TF
},
18367 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS
, COM_FALSE_P
, (int)MULTI_ARG_2_SF_TF
},
18368 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS
, COM_TRUE_P
, (int)MULTI_ARG_2_SF_TF
},
18369 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD
, COM_FALSE_S
, (int)MULTI_ARG_2_DF_TF
},
18370 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD
, COM_TRUE_S
, (int)MULTI_ARG_2_DF_TF
},
18371 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD
, COM_FALSE_P
, (int)MULTI_ARG_2_DF_TF
},
18372 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD
, COM_TRUE_P
, (int)MULTI_ARG_2_DF_TF
},
18374 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB
, PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18375 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW
, PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18376 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED
, PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18377 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ
, PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18378 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB
,PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18379 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW
,PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18380 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD
,PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18381 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ
,PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18383 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18384 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18385 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18386 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18387 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18388 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18389 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18390 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18393 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18394 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18397 ix86_init_mmx_sse_builtins (void)
18399 const struct builtin_description
* d
;
18402 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
18403 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
18404 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
18405 tree V2DI_type_node
18406 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
18407 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
18408 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
18409 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
18410 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
18411 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
18412 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
18414 tree pchar_type_node
= build_pointer_type (char_type_node
);
18415 tree pcchar_type_node
= build_pointer_type (
18416 build_type_variant (char_type_node
, 1, 0));
18417 tree pfloat_type_node
= build_pointer_type (float_type_node
);
18418 tree pcfloat_type_node
= build_pointer_type (
18419 build_type_variant (float_type_node
, 1, 0));
18420 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
18421 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
18422 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
18425 tree int_ftype_v4sf_v4sf
18426 = build_function_type_list (integer_type_node
,
18427 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18428 tree v4si_ftype_v4sf_v4sf
18429 = build_function_type_list (V4SI_type_node
,
18430 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18431 /* MMX/SSE/integer conversions. */
18432 tree int_ftype_v4sf
18433 = build_function_type_list (integer_type_node
,
18434 V4SF_type_node
, NULL_TREE
);
18435 tree int64_ftype_v4sf
18436 = build_function_type_list (long_long_integer_type_node
,
18437 V4SF_type_node
, NULL_TREE
);
18438 tree int_ftype_v8qi
18439 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
18440 tree v4sf_ftype_v4sf_int
18441 = build_function_type_list (V4SF_type_node
,
18442 V4SF_type_node
, integer_type_node
, NULL_TREE
);
18443 tree v4sf_ftype_v4sf_int64
18444 = build_function_type_list (V4SF_type_node
,
18445 V4SF_type_node
, long_long_integer_type_node
,
18447 tree v4sf_ftype_v4sf_v2si
18448 = build_function_type_list (V4SF_type_node
,
18449 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
18451 /* Miscellaneous. */
18452 tree v8qi_ftype_v4hi_v4hi
18453 = build_function_type_list (V8QI_type_node
,
18454 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18455 tree v4hi_ftype_v2si_v2si
18456 = build_function_type_list (V4HI_type_node
,
18457 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18458 tree v4sf_ftype_v4sf_v4sf_int
18459 = build_function_type_list (V4SF_type_node
,
18460 V4SF_type_node
, V4SF_type_node
,
18461 integer_type_node
, NULL_TREE
);
18462 tree v2si_ftype_v4hi_v4hi
18463 = build_function_type_list (V2SI_type_node
,
18464 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18465 tree v4hi_ftype_v4hi_int
18466 = build_function_type_list (V4HI_type_node
,
18467 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18468 tree v4hi_ftype_v4hi_di
18469 = build_function_type_list (V4HI_type_node
,
18470 V4HI_type_node
, long_long_unsigned_type_node
,
18472 tree v2si_ftype_v2si_di
18473 = build_function_type_list (V2SI_type_node
,
18474 V2SI_type_node
, long_long_unsigned_type_node
,
18476 tree void_ftype_void
18477 = build_function_type (void_type_node
, void_list_node
);
18478 tree void_ftype_unsigned
18479 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
18480 tree void_ftype_unsigned_unsigned
18481 = build_function_type_list (void_type_node
, unsigned_type_node
,
18482 unsigned_type_node
, NULL_TREE
);
18483 tree void_ftype_pcvoid_unsigned_unsigned
18484 = build_function_type_list (void_type_node
, const_ptr_type_node
,
18485 unsigned_type_node
, unsigned_type_node
,
18487 tree unsigned_ftype_void
18488 = build_function_type (unsigned_type_node
, void_list_node
);
18489 tree v2si_ftype_v4sf
18490 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
18491 /* Loads/stores. */
18492 tree void_ftype_v8qi_v8qi_pchar
18493 = build_function_type_list (void_type_node
,
18494 V8QI_type_node
, V8QI_type_node
,
18495 pchar_type_node
, NULL_TREE
);
18496 tree v4sf_ftype_pcfloat
18497 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
18498 /* @@@ the type is bogus */
18499 tree v4sf_ftype_v4sf_pv2si
18500 = build_function_type_list (V4SF_type_node
,
18501 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
18502 tree void_ftype_pv2si_v4sf
18503 = build_function_type_list (void_type_node
,
18504 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
18505 tree void_ftype_pfloat_v4sf
18506 = build_function_type_list (void_type_node
,
18507 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
18508 tree void_ftype_pdi_di
18509 = build_function_type_list (void_type_node
,
18510 pdi_type_node
, long_long_unsigned_type_node
,
18512 tree void_ftype_pv2di_v2di
18513 = build_function_type_list (void_type_node
,
18514 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
18515 /* Normal vector unops. */
18516 tree v4sf_ftype_v4sf
18517 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18518 tree v16qi_ftype_v16qi
18519 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18520 tree v8hi_ftype_v8hi
18521 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18522 tree v4si_ftype_v4si
18523 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18524 tree v8qi_ftype_v8qi
18525 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18526 tree v4hi_ftype_v4hi
18527 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18529 /* Normal vector binops. */
18530 tree v4sf_ftype_v4sf_v4sf
18531 = build_function_type_list (V4SF_type_node
,
18532 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18533 tree v8qi_ftype_v8qi_v8qi
18534 = build_function_type_list (V8QI_type_node
,
18535 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18536 tree v4hi_ftype_v4hi_v4hi
18537 = build_function_type_list (V4HI_type_node
,
18538 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18539 tree v2si_ftype_v2si_v2si
18540 = build_function_type_list (V2SI_type_node
,
18541 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18542 tree di_ftype_di_di
18543 = build_function_type_list (long_long_unsigned_type_node
,
18544 long_long_unsigned_type_node
,
18545 long_long_unsigned_type_node
, NULL_TREE
);
18547 tree di_ftype_di_di_int
18548 = build_function_type_list (long_long_unsigned_type_node
,
18549 long_long_unsigned_type_node
,
18550 long_long_unsigned_type_node
,
18551 integer_type_node
, NULL_TREE
);
18553 tree v2si_ftype_v2sf
18554 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
18555 tree v2sf_ftype_v2si
18556 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
18557 tree v2si_ftype_v2si
18558 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18559 tree v2sf_ftype_v2sf
18560 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18561 tree v2sf_ftype_v2sf_v2sf
18562 = build_function_type_list (V2SF_type_node
,
18563 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18564 tree v2si_ftype_v2sf_v2sf
18565 = build_function_type_list (V2SI_type_node
,
18566 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18567 tree pint_type_node
= build_pointer_type (integer_type_node
);
18568 tree pdouble_type_node
= build_pointer_type (double_type_node
);
18569 tree pcdouble_type_node
= build_pointer_type (
18570 build_type_variant (double_type_node
, 1, 0));
18571 tree int_ftype_v2df_v2df
18572 = build_function_type_list (integer_type_node
,
18573 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18575 tree void_ftype_pcvoid
18576 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
18577 tree v4sf_ftype_v4si
18578 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
18579 tree v4si_ftype_v4sf
18580 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
18581 tree v2df_ftype_v4si
18582 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
18583 tree v4si_ftype_v2df
18584 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
18585 tree v4si_ftype_v2df_v2df
18586 = build_function_type_list (V4SI_type_node
,
18587 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18588 tree v2si_ftype_v2df
18589 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
18590 tree v4sf_ftype_v2df
18591 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18592 tree v2df_ftype_v2si
18593 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
18594 tree v2df_ftype_v4sf
18595 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18596 tree int_ftype_v2df
18597 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
18598 tree int64_ftype_v2df
18599 = build_function_type_list (long_long_integer_type_node
,
18600 V2DF_type_node
, NULL_TREE
);
18601 tree v2df_ftype_v2df_int
18602 = build_function_type_list (V2DF_type_node
,
18603 V2DF_type_node
, integer_type_node
, NULL_TREE
);
18604 tree v2df_ftype_v2df_int64
18605 = build_function_type_list (V2DF_type_node
,
18606 V2DF_type_node
, long_long_integer_type_node
,
18608 tree v4sf_ftype_v4sf_v2df
18609 = build_function_type_list (V4SF_type_node
,
18610 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18611 tree v2df_ftype_v2df_v4sf
18612 = build_function_type_list (V2DF_type_node
,
18613 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18614 tree v2df_ftype_v2df_v2df_int
18615 = build_function_type_list (V2DF_type_node
,
18616 V2DF_type_node
, V2DF_type_node
,
18619 tree v2df_ftype_v2df_pcdouble
18620 = build_function_type_list (V2DF_type_node
,
18621 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18622 tree void_ftype_pdouble_v2df
18623 = build_function_type_list (void_type_node
,
18624 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
18625 tree void_ftype_pint_int
18626 = build_function_type_list (void_type_node
,
18627 pint_type_node
, integer_type_node
, NULL_TREE
);
18628 tree void_ftype_v16qi_v16qi_pchar
18629 = build_function_type_list (void_type_node
,
18630 V16QI_type_node
, V16QI_type_node
,
18631 pchar_type_node
, NULL_TREE
);
18632 tree v2df_ftype_pcdouble
18633 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18634 tree v2df_ftype_v2df_v2df
18635 = build_function_type_list (V2DF_type_node
,
18636 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18637 tree v16qi_ftype_v16qi_v16qi
18638 = build_function_type_list (V16QI_type_node
,
18639 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18640 tree v8hi_ftype_v8hi_v8hi
18641 = build_function_type_list (V8HI_type_node
,
18642 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18643 tree v4si_ftype_v4si_v4si
18644 = build_function_type_list (V4SI_type_node
,
18645 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18646 tree v2di_ftype_v2di_v2di
18647 = build_function_type_list (V2DI_type_node
,
18648 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18649 tree v2di_ftype_v2df_v2df
18650 = build_function_type_list (V2DI_type_node
,
18651 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18652 tree v2df_ftype_v2df
18653 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18654 tree v2di_ftype_v2di_int
18655 = build_function_type_list (V2DI_type_node
,
18656 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18657 tree v2di_ftype_v2di_v2di_int
18658 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18659 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18660 tree v4si_ftype_v4si_int
18661 = build_function_type_list (V4SI_type_node
,
18662 V4SI_type_node
, integer_type_node
, NULL_TREE
);
18663 tree v8hi_ftype_v8hi_int
18664 = build_function_type_list (V8HI_type_node
,
18665 V8HI_type_node
, integer_type_node
, NULL_TREE
);
18666 tree v4si_ftype_v8hi_v8hi
18667 = build_function_type_list (V4SI_type_node
,
18668 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18669 tree di_ftype_v8qi_v8qi
18670 = build_function_type_list (long_long_unsigned_type_node
,
18671 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18672 tree di_ftype_v2si_v2si
18673 = build_function_type_list (long_long_unsigned_type_node
,
18674 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18675 tree v2di_ftype_v16qi_v16qi
18676 = build_function_type_list (V2DI_type_node
,
18677 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18678 tree v2di_ftype_v4si_v4si
18679 = build_function_type_list (V2DI_type_node
,
18680 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18681 tree int_ftype_v16qi
18682 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
18683 tree v16qi_ftype_pcchar
18684 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
18685 tree void_ftype_pchar_v16qi
18686 = build_function_type_list (void_type_node
,
18687 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
18689 tree v2di_ftype_v2di_unsigned_unsigned
18690 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18691 unsigned_type_node
, unsigned_type_node
,
18693 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18694 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
18695 unsigned_type_node
, unsigned_type_node
,
18697 tree v2di_ftype_v2di_v16qi
18698 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
18700 tree v2df_ftype_v2df_v2df_v2df
18701 = build_function_type_list (V2DF_type_node
,
18702 V2DF_type_node
, V2DF_type_node
,
18703 V2DF_type_node
, NULL_TREE
);
18704 tree v4sf_ftype_v4sf_v4sf_v4sf
18705 = build_function_type_list (V4SF_type_node
,
18706 V4SF_type_node
, V4SF_type_node
,
18707 V4SF_type_node
, NULL_TREE
);
18708 tree v8hi_ftype_v16qi
18709 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
18711 tree v4si_ftype_v16qi
18712 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
18714 tree v2di_ftype_v16qi
18715 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
18717 tree v4si_ftype_v8hi
18718 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
18720 tree v2di_ftype_v8hi
18721 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
18723 tree v2di_ftype_v4si
18724 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
18726 tree v2di_ftype_pv2di
18727 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
18729 tree v16qi_ftype_v16qi_v16qi_int
18730 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18731 V16QI_type_node
, integer_type_node
,
18733 tree v16qi_ftype_v16qi_v16qi_v16qi
18734 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18735 V16QI_type_node
, V16QI_type_node
,
18737 tree v8hi_ftype_v8hi_v8hi_int
18738 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18739 V8HI_type_node
, integer_type_node
,
18741 tree v4si_ftype_v4si_v4si_int
18742 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18743 V4SI_type_node
, integer_type_node
,
18745 tree int_ftype_v2di_v2di
18746 = build_function_type_list (integer_type_node
,
18747 V2DI_type_node
, V2DI_type_node
,
18749 tree int_ftype_v16qi_int_v16qi_int_int
18750 = build_function_type_list (integer_type_node
,
18757 tree v16qi_ftype_v16qi_int_v16qi_int_int
18758 = build_function_type_list (V16QI_type_node
,
18765 tree int_ftype_v16qi_v16qi_int
18766 = build_function_type_list (integer_type_node
,
18772 /* SSE5 instructions */
18773 tree v2di_ftype_v2di_v2di_v2di
18774 = build_function_type_list (V2DI_type_node
,
18780 tree v4si_ftype_v4si_v4si_v4si
18781 = build_function_type_list (V4SI_type_node
,
18787 tree v4si_ftype_v4si_v4si_v2di
18788 = build_function_type_list (V4SI_type_node
,
18794 tree v8hi_ftype_v8hi_v8hi_v8hi
18795 = build_function_type_list (V8HI_type_node
,
18801 tree v8hi_ftype_v8hi_v8hi_v4si
18802 = build_function_type_list (V8HI_type_node
,
18808 tree v2df_ftype_v2df_v2df_v16qi
18809 = build_function_type_list (V2DF_type_node
,
18815 tree v4sf_ftype_v4sf_v4sf_v16qi
18816 = build_function_type_list (V4SF_type_node
,
18822 tree v2di_ftype_v2di_si
18823 = build_function_type_list (V2DI_type_node
,
18828 tree v4si_ftype_v4si_si
18829 = build_function_type_list (V4SI_type_node
,
18834 tree v8hi_ftype_v8hi_si
18835 = build_function_type_list (V8HI_type_node
,
18840 tree v16qi_ftype_v16qi_si
18841 = build_function_type_list (V16QI_type_node
,
18845 tree v4sf_ftype_v4hi
18846 = build_function_type_list (V4SF_type_node
,
18850 tree v4hi_ftype_v4sf
18851 = build_function_type_list (V4HI_type_node
,
18855 tree v2di_ftype_v2di
18856 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18860 /* The __float80 type. */
18861 if (TYPE_MODE (long_double_type_node
) == XFmode
)
18862 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
18866 /* The __float80 type. */
18867 tree float80_type_node
= make_node (REAL_TYPE
);
18869 TYPE_PRECISION (float80_type_node
) = 80;
18870 layout_type (float80_type_node
);
18871 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
18877 tree float128_type_node
= make_node (REAL_TYPE
);
18879 TYPE_PRECISION (float128_type_node
) = 128;
18880 layout_type (float128_type_node
);
18881 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
18884 /* TFmode support builtins. */
18885 ftype
= build_function_type (float128_type_node
,
18887 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
18889 ftype
= build_function_type_list (float128_type_node
,
18890 float128_type_node
,
18892 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
18894 ftype
= build_function_type_list (float128_type_node
,
18895 float128_type_node
,
18896 float128_type_node
,
18898 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
18901 /* Add all SSE builtins that are more or less simple operations on
18903 for (i
= 0, d
= bdesc_sse_3arg
;
18904 i
< ARRAY_SIZE (bdesc_sse_3arg
);
18907 /* Use one of the operands; the target can have a different mode for
18908 mask-generating compares. */
18909 enum machine_mode mode
;
18914 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18919 type
= v16qi_ftype_v16qi_v16qi_int
;
18922 type
= v8hi_ftype_v8hi_v8hi_int
;
18925 type
= v4si_ftype_v4si_v4si_int
;
18928 type
= v2di_ftype_v2di_v2di_int
;
18931 type
= v2df_ftype_v2df_v2df_int
;
18934 type
= v4sf_ftype_v4sf_v4sf_int
;
18937 gcc_unreachable ();
18940 /* Override for variable blends. */
18943 case CODE_FOR_sse4_1_blendvpd
:
18944 type
= v2df_ftype_v2df_v2df_v2df
;
18946 case CODE_FOR_sse4_1_blendvps
:
18947 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
18949 case CODE_FOR_sse4_1_pblendvb
:
18950 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
18956 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18959 /* Add all builtins that are more or less simple operations on two
18961 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18963 /* Use one of the operands; the target can have a different mode for
18964 mask-generating compares. */
18965 enum machine_mode mode
;
18970 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18975 type
= v16qi_ftype_v16qi_v16qi
;
18978 type
= v8hi_ftype_v8hi_v8hi
;
18981 type
= v4si_ftype_v4si_v4si
;
18984 type
= v2di_ftype_v2di_v2di
;
18987 type
= v2df_ftype_v2df_v2df
;
18990 type
= v4sf_ftype_v4sf_v4sf
;
18993 type
= v8qi_ftype_v8qi_v8qi
;
18996 type
= v4hi_ftype_v4hi_v4hi
;
18999 type
= v2si_ftype_v2si_v2si
;
19002 type
= di_ftype_di_di
;
19006 gcc_unreachable ();
19009 /* Override for comparisons. */
19010 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19011 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
19012 type
= v4si_ftype_v4sf_v4sf
;
19014 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19015 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19016 type
= v2di_ftype_v2df_v2df
;
19018 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
19019 type
= v4si_ftype_v2df_v2df
;
19021 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19024 /* Add all builtins that are more or less simple operations on 1 operand. */
19025 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19027 enum machine_mode mode
;
19032 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19037 type
= v16qi_ftype_v16qi
;
19040 type
= v8hi_ftype_v8hi
;
19043 type
= v4si_ftype_v4si
;
19046 type
= v2df_ftype_v2df
;
19049 type
= v4sf_ftype_v4sf
;
19052 type
= v8qi_ftype_v8qi
;
19055 type
= v4hi_ftype_v4hi
;
19058 type
= v2si_ftype_v2si
;
19065 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19068 /* pcmpestr[im] insns. */
19069 for (i
= 0, d
= bdesc_pcmpestr
;
19070 i
< ARRAY_SIZE (bdesc_pcmpestr
);
19073 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
19074 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
19076 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
19077 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19080 /* pcmpistr[im] insns. */
19081 for (i
= 0, d
= bdesc_pcmpistr
;
19082 i
< ARRAY_SIZE (bdesc_pcmpistr
);
19085 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19086 ftype
= v16qi_ftype_v16qi_v16qi_int
;
19088 ftype
= int_ftype_v16qi_v16qi_int
;
19089 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19092 /* Add the remaining MMX insns with somewhat more complicated types. */
19093 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
19094 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
19095 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
19096 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
19098 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
19099 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
19100 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
19102 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
19103 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
19105 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
19106 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
19108 /* comi/ucomi insns. */
19109 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19110 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
19111 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
19113 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
19116 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19117 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
19119 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
19120 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
19121 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
19123 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
19124 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
19125 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
19126 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
19127 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
19128 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
19129 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
19130 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
19131 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
19132 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
19133 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
19135 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
19137 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
19138 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
19140 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
19141 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
19142 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
19143 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
19145 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
19146 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
19147 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
19148 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
19150 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
19152 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
19154 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
19155 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
19156 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
19157 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
19158 ftype
= build_function_type_list (float_type_node
,
19161 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
19162 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
19163 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
19165 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
19167 /* Original 3DNow! */
19168 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
19169 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
19170 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
19171 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
19172 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
19173 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
19174 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
19175 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
19176 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
19177 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
19178 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
19179 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
19180 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
19181 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
19182 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
19183 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
19184 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
19185 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
19186 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
19187 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
19189 /* 3DNow! extension as used in the Athlon CPU. */
19190 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
19191 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
19192 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
19193 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
19194 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
19195 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
19198 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
19200 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
19201 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
19203 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
19204 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
19206 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
19207 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
19208 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
19209 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
19210 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
19212 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
19213 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
19214 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
19215 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
19217 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
19218 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
19220 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
19222 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
19223 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
19225 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
19226 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
19227 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
19228 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
19229 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
19231 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
19233 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
19234 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
19235 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
19236 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
19238 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
19239 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
19240 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
19242 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
19243 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
19244 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
19245 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
19247 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
19248 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
19249 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
19251 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
19252 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
19254 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
19255 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
19257 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
19258 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
19259 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
19260 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
19261 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
19262 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
19263 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
19265 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
19266 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
19267 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
19268 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
19269 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
19270 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
19271 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
19273 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
19274 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
19275 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
19276 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
19278 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
19280 /* Prescott New Instructions. */
19281 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
19282 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
19283 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
19286 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
19287 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
19290 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
19291 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
19292 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
19293 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
19294 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
19295 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
19296 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
19297 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
19298 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
19299 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
19300 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
19301 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
19302 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
19303 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
19305 /* SSE4.1 and SSE5 */
19306 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
19307 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
19308 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
19309 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
19312 ftype
= build_function_type_list (unsigned_type_node
,
19313 unsigned_type_node
,
19314 unsigned_char_type_node
,
19316 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
19317 ftype
= build_function_type_list (unsigned_type_node
,
19318 unsigned_type_node
,
19319 short_unsigned_type_node
,
19321 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
19322 ftype
= build_function_type_list (unsigned_type_node
,
19323 unsigned_type_node
,
19324 unsigned_type_node
,
19326 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
19327 ftype
= build_function_type_list (long_long_unsigned_type_node
,
19328 long_long_unsigned_type_node
,
19329 long_long_unsigned_type_node
,
19331 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
19333 /* AMDFAM10 SSE4A New built-ins */
19334 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
19335 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
19336 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
19337 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
19338 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
19339 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
19341 /* Access to the vec_init patterns. */
19342 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
19343 integer_type_node
, NULL_TREE
);
19344 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
19346 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
19347 short_integer_type_node
,
19348 short_integer_type_node
,
19349 short_integer_type_node
, NULL_TREE
);
19350 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
19352 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
19353 char_type_node
, char_type_node
,
19354 char_type_node
, char_type_node
,
19355 char_type_node
, char_type_node
,
19356 char_type_node
, NULL_TREE
);
19357 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
19359 /* Access to the vec_extract patterns. */
19360 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
19361 integer_type_node
, NULL_TREE
);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
19364 ftype
= build_function_type_list (long_long_integer_type_node
,
19365 V2DI_type_node
, integer_type_node
,
19367 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
19369 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
19370 integer_type_node
, NULL_TREE
);
19371 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
19373 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
19374 integer_type_node
, NULL_TREE
);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
19377 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
19378 integer_type_node
, NULL_TREE
);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
19381 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
19382 integer_type_node
, NULL_TREE
);
19383 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
19385 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
19386 integer_type_node
, NULL_TREE
);
19387 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
19389 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
19390 integer_type_node
, NULL_TREE
);
19391 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
19393 /* Access to the vec_set patterns. */
19394 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
19396 integer_type_node
, NULL_TREE
);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
19399 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
19401 integer_type_node
, NULL_TREE
);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
19404 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
19406 integer_type_node
, NULL_TREE
);
19407 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
19409 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
19411 integer_type_node
, NULL_TREE
);
19412 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
19414 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
19416 integer_type_node
, NULL_TREE
);
19417 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
19419 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
19421 integer_type_node
, NULL_TREE
);
19422 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
19424 /* Add SSE5 multi-arg argument instructions */
19425 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
19427 tree mtype
= NULL_TREE
;
19432 switch ((enum multi_arg_type
)d
->flag
)
19434 case MULTI_ARG_3_SF
: mtype
= v4sf_ftype_v4sf_v4sf_v4sf
; break;
19435 case MULTI_ARG_3_DF
: mtype
= v2df_ftype_v2df_v2df_v2df
; break;
19436 case MULTI_ARG_3_DI
: mtype
= v2di_ftype_v2di_v2di_v2di
; break;
19437 case MULTI_ARG_3_SI
: mtype
= v4si_ftype_v4si_v4si_v4si
; break;
19438 case MULTI_ARG_3_SI_DI
: mtype
= v4si_ftype_v4si_v4si_v2di
; break;
19439 case MULTI_ARG_3_HI
: mtype
= v8hi_ftype_v8hi_v8hi_v8hi
; break;
19440 case MULTI_ARG_3_HI_SI
: mtype
= v8hi_ftype_v8hi_v8hi_v4si
; break;
19441 case MULTI_ARG_3_QI
: mtype
= v16qi_ftype_v16qi_v16qi_v16qi
; break;
19442 case MULTI_ARG_3_PERMPS
: mtype
= v4sf_ftype_v4sf_v4sf_v16qi
; break;
19443 case MULTI_ARG_3_PERMPD
: mtype
= v2df_ftype_v2df_v2df_v16qi
; break;
19444 case MULTI_ARG_2_SF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19445 case MULTI_ARG_2_DF
: mtype
= v2df_ftype_v2df_v2df
; break;
19446 case MULTI_ARG_2_DI
: mtype
= v2di_ftype_v2di_v2di
; break;
19447 case MULTI_ARG_2_SI
: mtype
= v4si_ftype_v4si_v4si
; break;
19448 case MULTI_ARG_2_HI
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19449 case MULTI_ARG_2_QI
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19450 case MULTI_ARG_2_DI_IMM
: mtype
= v2di_ftype_v2di_si
; break;
19451 case MULTI_ARG_2_SI_IMM
: mtype
= v4si_ftype_v4si_si
; break;
19452 case MULTI_ARG_2_HI_IMM
: mtype
= v8hi_ftype_v8hi_si
; break;
19453 case MULTI_ARG_2_QI_IMM
: mtype
= v16qi_ftype_v16qi_si
; break;
19454 case MULTI_ARG_2_SF_CMP
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19455 case MULTI_ARG_2_DF_CMP
: mtype
= v2df_ftype_v2df_v2df
; break;
19456 case MULTI_ARG_2_DI_CMP
: mtype
= v2di_ftype_v2di_v2di
; break;
19457 case MULTI_ARG_2_SI_CMP
: mtype
= v4si_ftype_v4si_v4si
; break;
19458 case MULTI_ARG_2_HI_CMP
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19459 case MULTI_ARG_2_QI_CMP
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19460 case MULTI_ARG_2_SF_TF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19461 case MULTI_ARG_2_DF_TF
: mtype
= v2df_ftype_v2df_v2df
; break;
19462 case MULTI_ARG_2_DI_TF
: mtype
= v2di_ftype_v2di_v2di
; break;
19463 case MULTI_ARG_2_SI_TF
: mtype
= v4si_ftype_v4si_v4si
; break;
19464 case MULTI_ARG_2_HI_TF
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19465 case MULTI_ARG_2_QI_TF
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19466 case MULTI_ARG_1_SF
: mtype
= v4sf_ftype_v4sf
; break;
19467 case MULTI_ARG_1_DF
: mtype
= v2df_ftype_v2df
; break;
19468 case MULTI_ARG_1_DI
: mtype
= v2di_ftype_v2di
; break;
19469 case MULTI_ARG_1_SI
: mtype
= v4si_ftype_v4si
; break;
19470 case MULTI_ARG_1_HI
: mtype
= v8hi_ftype_v8hi
; break;
19471 case MULTI_ARG_1_QI
: mtype
= v16qi_ftype_v16qi
; break;
19472 case MULTI_ARG_1_SI_DI
: mtype
= v2di_ftype_v4si
; break;
19473 case MULTI_ARG_1_HI_DI
: mtype
= v2di_ftype_v8hi
; break;
19474 case MULTI_ARG_1_HI_SI
: mtype
= v4si_ftype_v8hi
; break;
19475 case MULTI_ARG_1_QI_DI
: mtype
= v2di_ftype_v16qi
; break;
19476 case MULTI_ARG_1_QI_SI
: mtype
= v4si_ftype_v16qi
; break;
19477 case MULTI_ARG_1_QI_HI
: mtype
= v8hi_ftype_v16qi
; break;
19478 case MULTI_ARG_1_PH2PS
: mtype
= v4sf_ftype_v4hi
; break;
19479 case MULTI_ARG_1_PS2PH
: mtype
= v4hi_ftype_v4sf
; break;
19480 case MULTI_ARG_UNKNOWN
:
19482 gcc_unreachable ();
19486 def_builtin_const (d
->mask
, d
->name
, mtype
, d
->code
);
19491 ix86_init_builtins (void)
19494 ix86_init_mmx_sse_builtins ();
19497 /* Errors in the source file can cause expand_expr to return const0_rtx
19498 where we expect a vector. To avoid crashing, use one of the vector
19499 clear instructions. */
19501 safe_vector_operand (rtx x
, enum machine_mode mode
)
19503 if (x
== const0_rtx
)
19504 x
= CONST0_RTX (mode
);
19508 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19509 4 operands. The third argument must be a constant smaller than 8
19513 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
19517 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19518 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19519 tree arg2
= CALL_EXPR_ARG (exp
, 2);
19520 rtx op0
= expand_normal (arg0
);
19521 rtx op1
= expand_normal (arg1
);
19522 rtx op2
= expand_normal (arg2
);
19523 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19524 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19525 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
19526 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
19528 if (VECTOR_MODE_P (mode1
))
19529 op0
= safe_vector_operand (op0
, mode1
);
19530 if (VECTOR_MODE_P (mode2
))
19531 op1
= safe_vector_operand (op1
, mode2
);
19532 if (VECTOR_MODE_P (mode3
))
19533 op2
= safe_vector_operand (op2
, mode3
);
19537 || GET_MODE (target
) != tmode
19538 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19539 target
= gen_reg_rtx (tmode
);
19541 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19542 op0
= copy_to_mode_reg (mode1
, op0
);
19543 if ((optimize
&& !register_operand (op1
, mode2
))
19544 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19545 op1
= copy_to_mode_reg (mode2
, op1
);
19547 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19550 case CODE_FOR_sse4_1_blendvpd
:
19551 case CODE_FOR_sse4_1_blendvps
:
19552 case CODE_FOR_sse4_1_pblendvb
:
19553 op2
= copy_to_mode_reg (mode3
, op2
);
19556 case CODE_FOR_sse4_1_roundsd
:
19557 case CODE_FOR_sse4_1_roundss
:
19558 error ("the third argument must be a 4-bit immediate");
19562 error ("the third argument must be an 8-bit immediate");
19566 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19573 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19576 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
19579 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19580 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19581 rtx op0
= expand_normal (arg0
);
19582 rtx op1
= expand_normal (arg1
);
19583 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19584 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19585 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19589 || GET_MODE (target
) != tmode
19590 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19591 target
= gen_reg_rtx (tmode
);
19593 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19594 op0
= copy_to_mode_reg (mode0
, op0
);
19595 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19597 op1
= copy_to_reg (op1
);
19598 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
19601 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19608 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19611 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
19614 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19615 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19616 rtx op0
= expand_normal (arg0
);
19617 rtx op1
= expand_normal (arg1
);
19618 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19619 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19620 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19622 if (VECTOR_MODE_P (mode0
))
19623 op0
= safe_vector_operand (op0
, mode0
);
19624 if (VECTOR_MODE_P (mode1
))
19625 op1
= safe_vector_operand (op1
, mode1
);
19627 if (optimize
|| !target
19628 || GET_MODE (target
) != tmode
19629 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19630 target
= gen_reg_rtx (tmode
);
19632 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
19634 rtx x
= gen_reg_rtx (V4SImode
);
19635 emit_insn (gen_sse2_loadd (x
, op1
));
19636 op1
= gen_lowpart (TImode
, x
);
19639 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19640 op0
= copy_to_mode_reg (mode0
, op0
);
19641 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19642 op1
= copy_to_mode_reg (mode1
, op1
);
19644 /* ??? Using ix86_fixup_binary_operands is problematic when
19645 we've got mismatched modes. Fake it. */
19651 if (tmode
== mode0
&& tmode
== mode1
)
19653 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
19657 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
19659 op0
= force_reg (mode0
, op0
);
19660 op1
= force_reg (mode1
, op1
);
19661 target
= gen_reg_rtx (tmode
);
19664 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19671 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19674 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
19675 enum multi_arg_type m_type
,
19676 enum insn_code sub_code
)
19681 bool comparison_p
= false;
19683 bool last_arg_constant
= false;
19684 int num_memory
= 0;
19687 enum machine_mode mode
;
19690 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19694 case MULTI_ARG_3_SF
:
19695 case MULTI_ARG_3_DF
:
19696 case MULTI_ARG_3_DI
:
19697 case MULTI_ARG_3_SI
:
19698 case MULTI_ARG_3_SI_DI
:
19699 case MULTI_ARG_3_HI
:
19700 case MULTI_ARG_3_HI_SI
:
19701 case MULTI_ARG_3_QI
:
19702 case MULTI_ARG_3_PERMPS
:
19703 case MULTI_ARG_3_PERMPD
:
19707 case MULTI_ARG_2_SF
:
19708 case MULTI_ARG_2_DF
:
19709 case MULTI_ARG_2_DI
:
19710 case MULTI_ARG_2_SI
:
19711 case MULTI_ARG_2_HI
:
19712 case MULTI_ARG_2_QI
:
19716 case MULTI_ARG_2_DI_IMM
:
19717 case MULTI_ARG_2_SI_IMM
:
19718 case MULTI_ARG_2_HI_IMM
:
19719 case MULTI_ARG_2_QI_IMM
:
19721 last_arg_constant
= true;
19724 case MULTI_ARG_1_SF
:
19725 case MULTI_ARG_1_DF
:
19726 case MULTI_ARG_1_DI
:
19727 case MULTI_ARG_1_SI
:
19728 case MULTI_ARG_1_HI
:
19729 case MULTI_ARG_1_QI
:
19730 case MULTI_ARG_1_SI_DI
:
19731 case MULTI_ARG_1_HI_DI
:
19732 case MULTI_ARG_1_HI_SI
:
19733 case MULTI_ARG_1_QI_DI
:
19734 case MULTI_ARG_1_QI_SI
:
19735 case MULTI_ARG_1_QI_HI
:
19736 case MULTI_ARG_1_PH2PS
:
19737 case MULTI_ARG_1_PS2PH
:
19741 case MULTI_ARG_2_SF_CMP
:
19742 case MULTI_ARG_2_DF_CMP
:
19743 case MULTI_ARG_2_DI_CMP
:
19744 case MULTI_ARG_2_SI_CMP
:
19745 case MULTI_ARG_2_HI_CMP
:
19746 case MULTI_ARG_2_QI_CMP
:
19748 comparison_p
= true;
19751 case MULTI_ARG_2_SF_TF
:
19752 case MULTI_ARG_2_DF_TF
:
19753 case MULTI_ARG_2_DI_TF
:
19754 case MULTI_ARG_2_SI_TF
:
19755 case MULTI_ARG_2_HI_TF
:
19756 case MULTI_ARG_2_QI_TF
:
19761 case MULTI_ARG_UNKNOWN
:
19763 gcc_unreachable ();
19766 if (optimize
|| !target
19767 || GET_MODE (target
) != tmode
19768 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19769 target
= gen_reg_rtx (tmode
);
19771 gcc_assert (nargs
<= 4);
19773 for (i
= 0; i
< nargs
; i
++)
19775 tree arg
= CALL_EXPR_ARG (exp
, i
);
19776 rtx op
= expand_normal (arg
);
19777 int adjust
= (comparison_p
) ? 1 : 0;
19778 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
19780 if (last_arg_constant
&& i
== nargs
-1)
19782 if (GET_CODE (op
) != CONST_INT
)
19784 error ("last argument must be an immediate");
19785 return gen_reg_rtx (tmode
);
19790 if (VECTOR_MODE_P (mode
))
19791 op
= safe_vector_operand (op
, mode
);
19793 /* If we aren't optimizing, only allow one memory operand to be
19795 if (memory_operand (op
, mode
))
19798 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
19801 || ! (*insn_data
[icode
].operand
[i
+adjust
+1].predicate
) (op
, mode
)
19803 op
= force_reg (mode
, op
);
19807 args
[i
].mode
= mode
;
19813 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
19818 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
19819 GEN_INT ((int)sub_code
));
19820 else if (! comparison_p
)
19821 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
19824 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
19828 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
19833 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
19837 gcc_unreachable ();
19847 /* Subroutine of ix86_expand_builtin to take care of stores. */
19850 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
19853 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19854 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19855 rtx op0
= expand_normal (arg0
);
19856 rtx op1
= expand_normal (arg1
);
19857 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
19858 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19860 if (VECTOR_MODE_P (mode1
))
19861 op1
= safe_vector_operand (op1
, mode1
);
19863 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19864 op1
= copy_to_mode_reg (mode1
, op1
);
19866 pat
= GEN_FCN (icode
) (op0
, op1
);
19872 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19875 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
19876 rtx target
, int do_load
)
19879 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19880 rtx op0
= expand_normal (arg0
);
19881 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19882 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19884 if (optimize
|| !target
19885 || GET_MODE (target
) != tmode
19886 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19887 target
= gen_reg_rtx (tmode
);
19889 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19892 if (VECTOR_MODE_P (mode0
))
19893 op0
= safe_vector_operand (op0
, mode0
);
19895 if ((optimize
&& !register_operand (op0
, mode0
))
19896 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19897 op0
= copy_to_mode_reg (mode0
, op0
);
19902 case CODE_FOR_sse4_1_roundpd
:
19903 case CODE_FOR_sse4_1_roundps
:
19905 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19906 rtx op1
= expand_normal (arg1
);
19907 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19909 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19911 error ("the second argument must be a 4-bit immediate");
19914 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19918 pat
= GEN_FCN (icode
) (target
, op0
);
19928 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
19929 sqrtss, rsqrtss, rcpss. */
19932 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
19935 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19936 rtx op1
, op0
= expand_normal (arg0
);
19937 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19938 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19940 if (optimize
|| !target
19941 || GET_MODE (target
) != tmode
19942 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19943 target
= gen_reg_rtx (tmode
);
19945 if (VECTOR_MODE_P (mode0
))
19946 op0
= safe_vector_operand (op0
, mode0
);
19948 if ((optimize
&& !register_operand (op0
, mode0
))
19949 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19950 op0
= copy_to_mode_reg (mode0
, op0
);
19953 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
19954 op1
= copy_to_mode_reg (mode0
, op1
);
19956 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19963 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
19966 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
19970 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19971 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19972 rtx op0
= expand_normal (arg0
);
19973 rtx op1
= expand_normal (arg1
);
19975 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
19976 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
19977 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
19978 enum rtx_code comparison
= d
->comparison
;
19980 if (VECTOR_MODE_P (mode0
))
19981 op0
= safe_vector_operand (op0
, mode0
);
19982 if (VECTOR_MODE_P (mode1
))
19983 op1
= safe_vector_operand (op1
, mode1
);
19985 /* Swap operands if we have a comparison that isn't available in
19987 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
19989 rtx tmp
= gen_reg_rtx (mode1
);
19990 emit_move_insn (tmp
, op1
);
19995 if (optimize
|| !target
19996 || GET_MODE (target
) != tmode
19997 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
19998 target
= gen_reg_rtx (tmode
);
20000 if ((optimize
&& !register_operand (op0
, mode0
))
20001 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
20002 op0
= copy_to_mode_reg (mode0
, op0
);
20003 if ((optimize
&& !register_operand (op1
, mode1
))
20004 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
20005 op1
= copy_to_mode_reg (mode1
, op1
);
20007 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
20008 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
20015 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20018 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
20022 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20023 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20024 rtx op0
= expand_normal (arg0
);
20025 rtx op1
= expand_normal (arg1
);
20026 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20027 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20028 enum rtx_code comparison
= d
->comparison
;
20030 if (VECTOR_MODE_P (mode0
))
20031 op0
= safe_vector_operand (op0
, mode0
);
20032 if (VECTOR_MODE_P (mode1
))
20033 op1
= safe_vector_operand (op1
, mode1
);
20035 /* Swap operands if we have a comparison that isn't available in
20037 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
20044 target
= gen_reg_rtx (SImode
);
20045 emit_move_insn (target
, const0_rtx
);
20046 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20048 if ((optimize
&& !register_operand (op0
, mode0
))
20049 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20050 op0
= copy_to_mode_reg (mode0
, op0
);
20051 if ((optimize
&& !register_operand (op1
, mode1
))
20052 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20053 op1
= copy_to_mode_reg (mode1
, op1
);
20055 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20059 emit_insn (gen_rtx_SET (VOIDmode
,
20060 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20061 gen_rtx_fmt_ee (comparison
, QImode
,
20065 return SUBREG_REG (target
);
20068 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20071 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
20075 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20076 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20077 rtx op0
= expand_normal (arg0
);
20078 rtx op1
= expand_normal (arg1
);
20079 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20080 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20081 enum rtx_code comparison
= d
->comparison
;
20083 if (VECTOR_MODE_P (mode0
))
20084 op0
= safe_vector_operand (op0
, mode0
);
20085 if (VECTOR_MODE_P (mode1
))
20086 op1
= safe_vector_operand (op1
, mode1
);
20088 target
= gen_reg_rtx (SImode
);
20089 emit_move_insn (target
, const0_rtx
);
20090 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20092 if ((optimize
&& !register_operand (op0
, mode0
))
20093 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20094 op0
= copy_to_mode_reg (mode0
, op0
);
20095 if ((optimize
&& !register_operand (op1
, mode1
))
20096 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20097 op1
= copy_to_mode_reg (mode1
, op1
);
20099 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20103 emit_insn (gen_rtx_SET (VOIDmode
,
20104 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20105 gen_rtx_fmt_ee (comparison
, QImode
,
20109 return SUBREG_REG (target
);
20112 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20115 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
20116 tree exp
, rtx target
)
20119 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20120 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20121 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20122 tree arg3
= CALL_EXPR_ARG (exp
, 3);
20123 tree arg4
= CALL_EXPR_ARG (exp
, 4);
20124 rtx scratch0
, scratch1
;
20125 rtx op0
= expand_normal (arg0
);
20126 rtx op1
= expand_normal (arg1
);
20127 rtx op2
= expand_normal (arg2
);
20128 rtx op3
= expand_normal (arg3
);
20129 rtx op4
= expand_normal (arg4
);
20130 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
20132 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20133 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20134 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20135 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
20136 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
20137 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
20138 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
20140 if (VECTOR_MODE_P (modev2
))
20141 op0
= safe_vector_operand (op0
, modev2
);
20142 if (VECTOR_MODE_P (modev4
))
20143 op2
= safe_vector_operand (op2
, modev4
);
20145 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20146 op0
= copy_to_mode_reg (modev2
, op0
);
20147 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
20148 op1
= copy_to_mode_reg (modei3
, op1
);
20149 if ((optimize
&& !register_operand (op2
, modev4
))
20150 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
20151 op2
= copy_to_mode_reg (modev4
, op2
);
20152 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
20153 op3
= copy_to_mode_reg (modei5
, op3
);
20155 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
20157 error ("the fifth argument must be a 8-bit immediate");
20161 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
20163 if (optimize
|| !target
20164 || GET_MODE (target
) != tmode0
20165 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20166 target
= gen_reg_rtx (tmode0
);
20168 scratch1
= gen_reg_rtx (tmode1
);
20170 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20172 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
20174 if (optimize
|| !target
20175 || GET_MODE (target
) != tmode1
20176 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20177 target
= gen_reg_rtx (tmode1
);
20179 scratch0
= gen_reg_rtx (tmode0
);
20181 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
20185 gcc_assert (d
->flag
);
20187 scratch0
= gen_reg_rtx (tmode0
);
20188 scratch1
= gen_reg_rtx (tmode1
);
20190 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20200 target
= gen_reg_rtx (SImode
);
20201 emit_move_insn (target
, const0_rtx
);
20202 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20205 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20206 gen_rtx_fmt_ee (EQ
, QImode
,
20207 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20210 return SUBREG_REG (target
);
20217 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20220 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
20221 tree exp
, rtx target
)
20224 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20225 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20226 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20227 rtx scratch0
, scratch1
;
20228 rtx op0
= expand_normal (arg0
);
20229 rtx op1
= expand_normal (arg1
);
20230 rtx op2
= expand_normal (arg2
);
20231 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
20233 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20234 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20235 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20236 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
20237 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
20239 if (VECTOR_MODE_P (modev2
))
20240 op0
= safe_vector_operand (op0
, modev2
);
20241 if (VECTOR_MODE_P (modev3
))
20242 op1
= safe_vector_operand (op1
, modev3
);
20244 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20245 op0
= copy_to_mode_reg (modev2
, op0
);
20246 if ((optimize
&& !register_operand (op1
, modev3
))
20247 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
20248 op1
= copy_to_mode_reg (modev3
, op1
);
20250 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
20252 error ("the third argument must be a 8-bit immediate");
20256 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
20258 if (optimize
|| !target
20259 || GET_MODE (target
) != tmode0
20260 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20261 target
= gen_reg_rtx (tmode0
);
20263 scratch1
= gen_reg_rtx (tmode1
);
20265 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
20267 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
20269 if (optimize
|| !target
20270 || GET_MODE (target
) != tmode1
20271 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20272 target
= gen_reg_rtx (tmode1
);
20274 scratch0
= gen_reg_rtx (tmode0
);
20276 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
20280 gcc_assert (d
->flag
);
20282 scratch0
= gen_reg_rtx (tmode0
);
20283 scratch1
= gen_reg_rtx (tmode1
);
20285 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
20295 target
= gen_reg_rtx (SImode
);
20296 emit_move_insn (target
, const0_rtx
);
20297 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20300 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20301 gen_rtx_fmt_ee (EQ
, QImode
,
20302 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20305 return SUBREG_REG (target
);
20311 /* Return the integer constant in ARG. Constrain it to be in the range
20312 of the subparts of VEC_TYPE; issue an error if not. */
20315 get_element_number (tree vec_type
, tree arg
)
20317 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
20319 if (!host_integerp (arg
, 1)
20320 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
20322 error ("selector must be an integer constant in the range 0..%wi", max
);
20329 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20330 ix86_expand_vector_init. We DO have language-level syntax for this, in
20331 the form of (type){ init-list }. Except that since we can't place emms
20332 instructions from inside the compiler, we can't allow the use of MMX
20333 registers unless the user explicitly asks for it. So we do *not* define
20334 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20335 we have builtins invoked by mmintrin.h that gives us license to emit
20336 these sorts of instructions. */
20339 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
20341 enum machine_mode tmode
= TYPE_MODE (type
);
20342 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
20343 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
20344 rtvec v
= rtvec_alloc (n_elt
);
20346 gcc_assert (VECTOR_MODE_P (tmode
));
20347 gcc_assert (call_expr_nargs (exp
) == n_elt
);
20349 for (i
= 0; i
< n_elt
; ++i
)
20351 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
20352 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
20355 if (!target
|| !register_operand (target
, tmode
))
20356 target
= gen_reg_rtx (tmode
);
20358 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
20362 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20363 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20364 had a language-level syntax for referencing vector elements. */
20367 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
20369 enum machine_mode tmode
, mode0
;
20374 arg0
= CALL_EXPR_ARG (exp
, 0);
20375 arg1
= CALL_EXPR_ARG (exp
, 1);
20377 op0
= expand_normal (arg0
);
20378 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
20380 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20381 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
20382 gcc_assert (VECTOR_MODE_P (mode0
));
20384 op0
= force_reg (mode0
, op0
);
20386 if (optimize
|| !target
|| !register_operand (target
, tmode
))
20387 target
= gen_reg_rtx (tmode
);
20389 ix86_expand_vector_extract (true, target
, op0
, elt
);
20394 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20395 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20396 a language-level syntax for referencing vector elements. */
20399 ix86_expand_vec_set_builtin (tree exp
)
20401 enum machine_mode tmode
, mode1
;
20402 tree arg0
, arg1
, arg2
;
20404 rtx op0
, op1
, target
;
20406 arg0
= CALL_EXPR_ARG (exp
, 0);
20407 arg1
= CALL_EXPR_ARG (exp
, 1);
20408 arg2
= CALL_EXPR_ARG (exp
, 2);
20410 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
20411 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20412 gcc_assert (VECTOR_MODE_P (tmode
));
20414 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
20415 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
20416 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
20418 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
20419 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
20421 op0
= force_reg (tmode
, op0
);
20422 op1
= force_reg (mode1
, op1
);
20424 /* OP0 is the source of these builtin functions and shouldn't be
20425 modified. Create a copy, use it and return it as target. */
20426 target
= gen_reg_rtx (tmode
);
20427 emit_move_insn (target
, op0
);
20428 ix86_expand_vector_set (true, target
, op1
, elt
);
20433 /* Expand an expression EXP that calls a built-in function,
20434 with result going to TARGET if that's convenient
20435 (and in mode MODE if that's convenient).
20436 SUBTARGET may be used as the target for computing one of EXP's operands.
20437 IGNORE is nonzero if the value is to be ignored. */
20440 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
20441 enum machine_mode mode ATTRIBUTE_UNUSED
,
20442 int ignore ATTRIBUTE_UNUSED
)
20444 const struct builtin_description
*d
;
20446 enum insn_code icode
;
20447 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20448 tree arg0
, arg1
, arg2
, arg3
;
20449 rtx op0
, op1
, op2
, op3
, pat
;
20450 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
20451 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
20455 case IX86_BUILTIN_EMMS
:
20456 emit_insn (gen_mmx_emms ());
20459 case IX86_BUILTIN_SFENCE
:
20460 emit_insn (gen_sse_sfence ());
20463 case IX86_BUILTIN_MASKMOVQ
:
20464 case IX86_BUILTIN_MASKMOVDQU
:
20465 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
20466 ? CODE_FOR_mmx_maskmovq
20467 : CODE_FOR_sse2_maskmovdqu
);
20468 /* Note the arg order is different from the operand order. */
20469 arg1
= CALL_EXPR_ARG (exp
, 0);
20470 arg2
= CALL_EXPR_ARG (exp
, 1);
20471 arg0
= CALL_EXPR_ARG (exp
, 2);
20472 op0
= expand_normal (arg0
);
20473 op1
= expand_normal (arg1
);
20474 op2
= expand_normal (arg2
);
20475 mode0
= insn_data
[icode
].operand
[0].mode
;
20476 mode1
= insn_data
[icode
].operand
[1].mode
;
20477 mode2
= insn_data
[icode
].operand
[2].mode
;
20479 op0
= force_reg (Pmode
, op0
);
20480 op0
= gen_rtx_MEM (mode1
, op0
);
20482 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
20483 op0
= copy_to_mode_reg (mode0
, op0
);
20484 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
20485 op1
= copy_to_mode_reg (mode1
, op1
);
20486 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
20487 op2
= copy_to_mode_reg (mode2
, op2
);
20488 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
20494 case IX86_BUILTIN_RSQRTF
:
20495 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
20497 case IX86_BUILTIN_SQRTSS
:
20498 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
20499 case IX86_BUILTIN_RSQRTSS
:
20500 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
20501 case IX86_BUILTIN_RCPSS
:
20502 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
20504 case IX86_BUILTIN_LOADUPS
:
20505 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
20507 case IX86_BUILTIN_STOREUPS
:
20508 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
20510 case IX86_BUILTIN_LOADHPS
:
20511 case IX86_BUILTIN_LOADLPS
:
20512 case IX86_BUILTIN_LOADHPD
:
20513 case IX86_BUILTIN_LOADLPD
:
20514 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
20515 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
20516 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
20517 : CODE_FOR_sse2_loadlpd
);
20518 arg0
= CALL_EXPR_ARG (exp
, 0);
20519 arg1
= CALL_EXPR_ARG (exp
, 1);
20520 op0
= expand_normal (arg0
);
20521 op1
= expand_normal (arg1
);
20522 tmode
= insn_data
[icode
].operand
[0].mode
;
20523 mode0
= insn_data
[icode
].operand
[1].mode
;
20524 mode1
= insn_data
[icode
].operand
[2].mode
;
20526 op0
= force_reg (mode0
, op0
);
20527 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
20528 if (optimize
|| target
== 0
20529 || GET_MODE (target
) != tmode
20530 || !register_operand (target
, tmode
))
20531 target
= gen_reg_rtx (tmode
);
20532 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20538 case IX86_BUILTIN_STOREHPS
:
20539 case IX86_BUILTIN_STORELPS
:
20540 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
20541 : CODE_FOR_sse_storelps
);
20542 arg0
= CALL_EXPR_ARG (exp
, 0);
20543 arg1
= CALL_EXPR_ARG (exp
, 1);
20544 op0
= expand_normal (arg0
);
20545 op1
= expand_normal (arg1
);
20546 mode0
= insn_data
[icode
].operand
[0].mode
;
20547 mode1
= insn_data
[icode
].operand
[1].mode
;
20549 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20550 op1
= force_reg (mode1
, op1
);
20552 pat
= GEN_FCN (icode
) (op0
, op1
);
20558 case IX86_BUILTIN_MOVNTPS
:
20559 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
20560 case IX86_BUILTIN_MOVNTQ
:
20561 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
20563 case IX86_BUILTIN_LDMXCSR
:
20564 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
20565 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20566 emit_move_insn (target
, op0
);
20567 emit_insn (gen_sse_ldmxcsr (target
));
20570 case IX86_BUILTIN_STMXCSR
:
20571 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20572 emit_insn (gen_sse_stmxcsr (target
));
20573 return copy_to_mode_reg (SImode
, target
);
20575 case IX86_BUILTIN_SHUFPS
:
20576 case IX86_BUILTIN_SHUFPD
:
20577 icode
= (fcode
== IX86_BUILTIN_SHUFPS
20578 ? CODE_FOR_sse_shufps
20579 : CODE_FOR_sse2_shufpd
);
20580 arg0
= CALL_EXPR_ARG (exp
, 0);
20581 arg1
= CALL_EXPR_ARG (exp
, 1);
20582 arg2
= CALL_EXPR_ARG (exp
, 2);
20583 op0
= expand_normal (arg0
);
20584 op1
= expand_normal (arg1
);
20585 op2
= expand_normal (arg2
);
20586 tmode
= insn_data
[icode
].operand
[0].mode
;
20587 mode0
= insn_data
[icode
].operand
[1].mode
;
20588 mode1
= insn_data
[icode
].operand
[2].mode
;
20589 mode2
= insn_data
[icode
].operand
[3].mode
;
20591 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20592 op0
= copy_to_mode_reg (mode0
, op0
);
20593 if ((optimize
&& !register_operand (op1
, mode1
))
20594 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20595 op1
= copy_to_mode_reg (mode1
, op1
);
20596 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20598 /* @@@ better error message */
20599 error ("mask must be an immediate");
20600 return gen_reg_rtx (tmode
);
20602 if (optimize
|| target
== 0
20603 || GET_MODE (target
) != tmode
20604 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20605 target
= gen_reg_rtx (tmode
);
20606 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20612 case IX86_BUILTIN_PSHUFW
:
20613 case IX86_BUILTIN_PSHUFD
:
20614 case IX86_BUILTIN_PSHUFHW
:
20615 case IX86_BUILTIN_PSHUFLW
:
20616 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
20617 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
20618 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
20619 : CODE_FOR_mmx_pshufw
);
20620 arg0
= CALL_EXPR_ARG (exp
, 0);
20621 arg1
= CALL_EXPR_ARG (exp
, 1);
20622 op0
= expand_normal (arg0
);
20623 op1
= expand_normal (arg1
);
20624 tmode
= insn_data
[icode
].operand
[0].mode
;
20625 mode1
= insn_data
[icode
].operand
[1].mode
;
20626 mode2
= insn_data
[icode
].operand
[2].mode
;
20628 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20629 op0
= copy_to_mode_reg (mode1
, op0
);
20630 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20632 /* @@@ better error message */
20633 error ("mask must be an immediate");
20637 || GET_MODE (target
) != tmode
20638 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20639 target
= gen_reg_rtx (tmode
);
20640 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20646 case IX86_BUILTIN_PSLLWI128
:
20647 icode
= CODE_FOR_ashlv8hi3
;
20649 case IX86_BUILTIN_PSLLDI128
:
20650 icode
= CODE_FOR_ashlv4si3
;
20652 case IX86_BUILTIN_PSLLQI128
:
20653 icode
= CODE_FOR_ashlv2di3
;
20655 case IX86_BUILTIN_PSRAWI128
:
20656 icode
= CODE_FOR_ashrv8hi3
;
20658 case IX86_BUILTIN_PSRADI128
:
20659 icode
= CODE_FOR_ashrv4si3
;
20661 case IX86_BUILTIN_PSRLWI128
:
20662 icode
= CODE_FOR_lshrv8hi3
;
20664 case IX86_BUILTIN_PSRLDI128
:
20665 icode
= CODE_FOR_lshrv4si3
;
20667 case IX86_BUILTIN_PSRLQI128
:
20668 icode
= CODE_FOR_lshrv2di3
;
20671 arg0
= CALL_EXPR_ARG (exp
, 0);
20672 arg1
= CALL_EXPR_ARG (exp
, 1);
20673 op0
= expand_normal (arg0
);
20674 op1
= expand_normal (arg1
);
20676 if (!CONST_INT_P (op1
))
20678 error ("shift must be an immediate");
20681 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
20682 op1
= GEN_INT (255);
20684 tmode
= insn_data
[icode
].operand
[0].mode
;
20685 mode1
= insn_data
[icode
].operand
[1].mode
;
20686 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20687 op0
= copy_to_reg (op0
);
20689 target
= gen_reg_rtx (tmode
);
20690 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20696 case IX86_BUILTIN_PSLLW128
:
20697 icode
= CODE_FOR_ashlv8hi3
;
20699 case IX86_BUILTIN_PSLLD128
:
20700 icode
= CODE_FOR_ashlv4si3
;
20702 case IX86_BUILTIN_PSLLQ128
:
20703 icode
= CODE_FOR_ashlv2di3
;
20705 case IX86_BUILTIN_PSRAW128
:
20706 icode
= CODE_FOR_ashrv8hi3
;
20708 case IX86_BUILTIN_PSRAD128
:
20709 icode
= CODE_FOR_ashrv4si3
;
20711 case IX86_BUILTIN_PSRLW128
:
20712 icode
= CODE_FOR_lshrv8hi3
;
20714 case IX86_BUILTIN_PSRLD128
:
20715 icode
= CODE_FOR_lshrv4si3
;
20717 case IX86_BUILTIN_PSRLQ128
:
20718 icode
= CODE_FOR_lshrv2di3
;
20721 arg0
= CALL_EXPR_ARG (exp
, 0);
20722 arg1
= CALL_EXPR_ARG (exp
, 1);
20723 op0
= expand_normal (arg0
);
20724 op1
= expand_normal (arg1
);
20726 tmode
= insn_data
[icode
].operand
[0].mode
;
20727 mode1
= insn_data
[icode
].operand
[1].mode
;
20729 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20730 op0
= copy_to_reg (op0
);
20732 op1
= simplify_gen_subreg (SImode
, op1
, GET_MODE (op1
), 0);
20733 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, SImode
))
20734 op1
= copy_to_reg (op1
);
20736 target
= gen_reg_rtx (tmode
);
20737 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20743 case IX86_BUILTIN_PSLLDQI128
:
20744 case IX86_BUILTIN_PSRLDQI128
:
20745 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
20746 : CODE_FOR_sse2_lshrti3
);
20747 arg0
= CALL_EXPR_ARG (exp
, 0);
20748 arg1
= CALL_EXPR_ARG (exp
, 1);
20749 op0
= expand_normal (arg0
);
20750 op1
= expand_normal (arg1
);
20751 tmode
= insn_data
[icode
].operand
[0].mode
;
20752 mode1
= insn_data
[icode
].operand
[1].mode
;
20753 mode2
= insn_data
[icode
].operand
[2].mode
;
20755 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20757 op0
= copy_to_reg (op0
);
20758 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
20760 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20762 error ("shift must be an immediate");
20765 target
= gen_reg_rtx (V2DImode
);
20766 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
20773 case IX86_BUILTIN_FEMMS
:
20774 emit_insn (gen_mmx_femms ());
20777 case IX86_BUILTIN_PAVGUSB
:
20778 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
20780 case IX86_BUILTIN_PF2ID
:
20781 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
20783 case IX86_BUILTIN_PFACC
:
20784 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
20786 case IX86_BUILTIN_PFADD
:
20787 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
20789 case IX86_BUILTIN_PFCMPEQ
:
20790 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
20792 case IX86_BUILTIN_PFCMPGE
:
20793 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
20795 case IX86_BUILTIN_PFCMPGT
:
20796 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
20798 case IX86_BUILTIN_PFMAX
:
20799 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
20801 case IX86_BUILTIN_PFMIN
:
20802 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
20804 case IX86_BUILTIN_PFMUL
:
20805 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
20807 case IX86_BUILTIN_PFRCP
:
20808 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
20810 case IX86_BUILTIN_PFRCPIT1
:
20811 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
20813 case IX86_BUILTIN_PFRCPIT2
:
20814 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
20816 case IX86_BUILTIN_PFRSQIT1
:
20817 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
20819 case IX86_BUILTIN_PFRSQRT
:
20820 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
20822 case IX86_BUILTIN_PFSUB
:
20823 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
20825 case IX86_BUILTIN_PFSUBR
:
20826 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
20828 case IX86_BUILTIN_PI2FD
:
20829 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
20831 case IX86_BUILTIN_PMULHRW
:
20832 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
20834 case IX86_BUILTIN_PF2IW
:
20835 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
20837 case IX86_BUILTIN_PFNACC
:
20838 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
20840 case IX86_BUILTIN_PFPNACC
:
20841 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
20843 case IX86_BUILTIN_PI2FW
:
20844 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
20846 case IX86_BUILTIN_PSWAPDSI
:
20847 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
20849 case IX86_BUILTIN_PSWAPDSF
:
20850 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
20852 case IX86_BUILTIN_SQRTSD
:
20853 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
20854 case IX86_BUILTIN_LOADUPD
:
20855 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
20856 case IX86_BUILTIN_STOREUPD
:
20857 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
20859 case IX86_BUILTIN_MFENCE
:
20860 emit_insn (gen_sse2_mfence ());
20862 case IX86_BUILTIN_LFENCE
:
20863 emit_insn (gen_sse2_lfence ());
20866 case IX86_BUILTIN_CLFLUSH
:
20867 arg0
= CALL_EXPR_ARG (exp
, 0);
20868 op0
= expand_normal (arg0
);
20869 icode
= CODE_FOR_sse2_clflush
;
20870 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
20871 op0
= copy_to_mode_reg (Pmode
, op0
);
20873 emit_insn (gen_sse2_clflush (op0
));
20876 case IX86_BUILTIN_MOVNTPD
:
20877 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
20878 case IX86_BUILTIN_MOVNTDQ
:
20879 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
20880 case IX86_BUILTIN_MOVNTI
:
20881 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
20883 case IX86_BUILTIN_LOADDQU
:
20884 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
20885 case IX86_BUILTIN_STOREDQU
:
20886 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
20888 case IX86_BUILTIN_MONITOR
:
20889 arg0
= CALL_EXPR_ARG (exp
, 0);
20890 arg1
= CALL_EXPR_ARG (exp
, 1);
20891 arg2
= CALL_EXPR_ARG (exp
, 2);
20892 op0
= expand_normal (arg0
);
20893 op1
= expand_normal (arg1
);
20894 op2
= expand_normal (arg2
);
20896 op0
= copy_to_mode_reg (Pmode
, op0
);
20898 op1
= copy_to_mode_reg (SImode
, op1
);
20900 op2
= copy_to_mode_reg (SImode
, op2
);
20902 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
20904 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
20907 case IX86_BUILTIN_MWAIT
:
20908 arg0
= CALL_EXPR_ARG (exp
, 0);
20909 arg1
= CALL_EXPR_ARG (exp
, 1);
20910 op0
= expand_normal (arg0
);
20911 op1
= expand_normal (arg1
);
20913 op0
= copy_to_mode_reg (SImode
, op0
);
20915 op1
= copy_to_mode_reg (SImode
, op1
);
20916 emit_insn (gen_sse3_mwait (op0
, op1
));
20919 case IX86_BUILTIN_LDDQU
:
20920 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
20923 case IX86_BUILTIN_PALIGNR
:
20924 case IX86_BUILTIN_PALIGNR128
:
20925 if (fcode
== IX86_BUILTIN_PALIGNR
)
20927 icode
= CODE_FOR_ssse3_palignrdi
;
20932 icode
= CODE_FOR_ssse3_palignrti
;
20935 arg0
= CALL_EXPR_ARG (exp
, 0);
20936 arg1
= CALL_EXPR_ARG (exp
, 1);
20937 arg2
= CALL_EXPR_ARG (exp
, 2);
20938 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
20939 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
20940 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
20941 tmode
= insn_data
[icode
].operand
[0].mode
;
20942 mode1
= insn_data
[icode
].operand
[1].mode
;
20943 mode2
= insn_data
[icode
].operand
[2].mode
;
20944 mode3
= insn_data
[icode
].operand
[3].mode
;
20946 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20948 op0
= copy_to_reg (op0
);
20949 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
20951 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20953 op1
= copy_to_reg (op1
);
20954 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
20956 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
20958 error ("shift must be an immediate");
20961 target
= gen_reg_rtx (mode
);
20962 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
20969 case IX86_BUILTIN_MOVNTDQA
:
20970 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
20973 case IX86_BUILTIN_MOVNTSD
:
20974 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
20976 case IX86_BUILTIN_MOVNTSS
:
20977 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
20979 case IX86_BUILTIN_INSERTQ
:
20980 case IX86_BUILTIN_EXTRQ
:
20981 icode
= (fcode
== IX86_BUILTIN_EXTRQ
20982 ? CODE_FOR_sse4a_extrq
20983 : CODE_FOR_sse4a_insertq
);
20984 arg0
= CALL_EXPR_ARG (exp
, 0);
20985 arg1
= CALL_EXPR_ARG (exp
, 1);
20986 op0
= expand_normal (arg0
);
20987 op1
= expand_normal (arg1
);
20988 tmode
= insn_data
[icode
].operand
[0].mode
;
20989 mode1
= insn_data
[icode
].operand
[1].mode
;
20990 mode2
= insn_data
[icode
].operand
[2].mode
;
20991 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20992 op0
= copy_to_mode_reg (mode1
, op0
);
20993 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20994 op1
= copy_to_mode_reg (mode2
, op1
);
20995 if (optimize
|| target
== 0
20996 || GET_MODE (target
) != tmode
20997 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20998 target
= gen_reg_rtx (tmode
);
20999 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21005 case IX86_BUILTIN_EXTRQI
:
21006 icode
= CODE_FOR_sse4a_extrqi
;
21007 arg0
= CALL_EXPR_ARG (exp
, 0);
21008 arg1
= CALL_EXPR_ARG (exp
, 1);
21009 arg2
= CALL_EXPR_ARG (exp
, 2);
21010 op0
= expand_normal (arg0
);
21011 op1
= expand_normal (arg1
);
21012 op2
= expand_normal (arg2
);
21013 tmode
= insn_data
[icode
].operand
[0].mode
;
21014 mode1
= insn_data
[icode
].operand
[1].mode
;
21015 mode2
= insn_data
[icode
].operand
[2].mode
;
21016 mode3
= insn_data
[icode
].operand
[3].mode
;
21017 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21018 op0
= copy_to_mode_reg (mode1
, op0
);
21019 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21021 error ("index mask must be an immediate");
21022 return gen_reg_rtx (tmode
);
21024 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21026 error ("length mask must be an immediate");
21027 return gen_reg_rtx (tmode
);
21029 if (optimize
|| target
== 0
21030 || GET_MODE (target
) != tmode
21031 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21032 target
= gen_reg_rtx (tmode
);
21033 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21039 case IX86_BUILTIN_INSERTQI
:
21040 icode
= CODE_FOR_sse4a_insertqi
;
21041 arg0
= CALL_EXPR_ARG (exp
, 0);
21042 arg1
= CALL_EXPR_ARG (exp
, 1);
21043 arg2
= CALL_EXPR_ARG (exp
, 2);
21044 arg3
= CALL_EXPR_ARG (exp
, 3);
21045 op0
= expand_normal (arg0
);
21046 op1
= expand_normal (arg1
);
21047 op2
= expand_normal (arg2
);
21048 op3
= expand_normal (arg3
);
21049 tmode
= insn_data
[icode
].operand
[0].mode
;
21050 mode1
= insn_data
[icode
].operand
[1].mode
;
21051 mode2
= insn_data
[icode
].operand
[2].mode
;
21052 mode3
= insn_data
[icode
].operand
[3].mode
;
21053 mode4
= insn_data
[icode
].operand
[4].mode
;
21055 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21056 op0
= copy_to_mode_reg (mode1
, op0
);
21058 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21059 op1
= copy_to_mode_reg (mode2
, op1
);
21061 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21063 error ("index mask must be an immediate");
21064 return gen_reg_rtx (tmode
);
21066 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
21068 error ("length mask must be an immediate");
21069 return gen_reg_rtx (tmode
);
21071 if (optimize
|| target
== 0
21072 || GET_MODE (target
) != tmode
21073 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21074 target
= gen_reg_rtx (tmode
);
21075 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
21081 case IX86_BUILTIN_VEC_INIT_V2SI
:
21082 case IX86_BUILTIN_VEC_INIT_V4HI
:
21083 case IX86_BUILTIN_VEC_INIT_V8QI
:
21084 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
21086 case IX86_BUILTIN_VEC_EXT_V2DF
:
21087 case IX86_BUILTIN_VEC_EXT_V2DI
:
21088 case IX86_BUILTIN_VEC_EXT_V4SF
:
21089 case IX86_BUILTIN_VEC_EXT_V4SI
:
21090 case IX86_BUILTIN_VEC_EXT_V8HI
:
21091 case IX86_BUILTIN_VEC_EXT_V2SI
:
21092 case IX86_BUILTIN_VEC_EXT_V4HI
:
21093 case IX86_BUILTIN_VEC_EXT_V16QI
:
21094 return ix86_expand_vec_ext_builtin (exp
, target
);
21096 case IX86_BUILTIN_VEC_SET_V2DI
:
21097 case IX86_BUILTIN_VEC_SET_V4SF
:
21098 case IX86_BUILTIN_VEC_SET_V4SI
:
21099 case IX86_BUILTIN_VEC_SET_V8HI
:
21100 case IX86_BUILTIN_VEC_SET_V4HI
:
21101 case IX86_BUILTIN_VEC_SET_V16QI
:
21102 return ix86_expand_vec_set_builtin (exp
);
21104 case IX86_BUILTIN_INFQ
:
21106 REAL_VALUE_TYPE inf
;
21110 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
21112 tmp
= validize_mem (force_const_mem (mode
, tmp
));
21115 target
= gen_reg_rtx (mode
);
21117 emit_move_insn (target
, tmp
);
21121 case IX86_BUILTIN_FABSQ
:
21122 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
21124 case IX86_BUILTIN_COPYSIGNQ
:
21125 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
21131 for (i
= 0, d
= bdesc_sse_3arg
;
21132 i
< ARRAY_SIZE (bdesc_sse_3arg
);
21134 if (d
->code
== fcode
)
21135 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
21138 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21139 if (d
->code
== fcode
)
21141 /* Compares are treated specially. */
21142 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
21143 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
21144 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
21145 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
21146 return ix86_expand_sse_compare (d
, exp
, target
);
21148 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
21151 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21152 if (d
->code
== fcode
)
21153 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21155 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
21156 if (d
->code
== fcode
)
21157 return ix86_expand_sse_comi (d
, exp
, target
);
21159 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
21160 if (d
->code
== fcode
)
21161 return ix86_expand_sse_ptest (d
, exp
, target
);
21163 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
21164 if (d
->code
== fcode
)
21165 return ix86_expand_crc32 (d
->icode
, exp
, target
);
21167 for (i
= 0, d
= bdesc_pcmpestr
;
21168 i
< ARRAY_SIZE (bdesc_pcmpestr
);
21170 if (d
->code
== fcode
)
21171 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
21173 for (i
= 0, d
= bdesc_pcmpistr
;
21174 i
< ARRAY_SIZE (bdesc_pcmpistr
);
21176 if (d
->code
== fcode
)
21177 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
21179 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
21180 if (d
->code
== fcode
)
21181 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
21182 (enum multi_arg_type
)d
->flag
,
21185 gcc_unreachable ();
21188 /* Returns a function decl for a vectorized version of the builtin function
21189 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21190 if it is not available. */
21193 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
21196 enum machine_mode in_mode
, out_mode
;
21199 if (TREE_CODE (type_out
) != VECTOR_TYPE
21200 || TREE_CODE (type_in
) != VECTOR_TYPE
)
21203 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21204 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
21205 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21206 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21210 case BUILT_IN_SQRT
:
21211 if (out_mode
== DFmode
&& out_n
== 2
21212 && in_mode
== DFmode
&& in_n
== 2)
21213 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
21216 case BUILT_IN_SQRTF
:
21217 if (out_mode
== SFmode
&& out_n
== 4
21218 && in_mode
== SFmode
&& in_n
== 4)
21219 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
21222 case BUILT_IN_LRINT
:
21223 if (out_mode
== SImode
&& out_n
== 4
21224 && in_mode
== DFmode
&& in_n
== 2)
21225 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
21228 case BUILT_IN_LRINTF
:
21229 if (out_mode
== SImode
&& out_n
== 4
21230 && in_mode
== SFmode
&& in_n
== 4)
21231 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
21238 /* Dispatch to a handler for a vectorization library. */
21239 if (ix86_veclib_handler
)
21240 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
21245 /* Handler for an ACML-style interface to a library with vectorized
21249 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
21251 char name
[20] = "__vr.._";
21252 tree fntype
, new_fndecl
, args
;
21255 enum machine_mode el_mode
, in_mode
;
21258 /* The ACML is 64bits only and suitable for unsafe math only as
21259 it does not correctly support parts of IEEE with the required
21260 precision such as denormals. */
21262 || !flag_unsafe_math_optimizations
)
21265 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21266 n
= TYPE_VECTOR_SUBPARTS (type_out
);
21267 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21268 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21269 if (el_mode
!= in_mode
21279 case BUILT_IN_LOG2
:
21280 case BUILT_IN_LOG10
:
21283 if (el_mode
!= DFmode
21288 case BUILT_IN_SINF
:
21289 case BUILT_IN_COSF
:
21290 case BUILT_IN_EXPF
:
21291 case BUILT_IN_POWF
:
21292 case BUILT_IN_LOGF
:
21293 case BUILT_IN_LOG2F
:
21294 case BUILT_IN_LOG10F
:
21297 if (el_mode
!= SFmode
21306 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
21307 sprintf (name
+ 7, "%s", bname
+10);
21310 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
21311 args
= TREE_CHAIN (args
))
21315 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
21317 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
21319 /* Build a function declaration for the vectorized function. */
21320 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
21321 TREE_PUBLIC (new_fndecl
) = 1;
21322 DECL_EXTERNAL (new_fndecl
) = 1;
21323 DECL_IS_NOVOPS (new_fndecl
) = 1;
21324 TREE_READONLY (new_fndecl
) = 1;
21330 /* Returns a decl of a function that implements conversion of the
21331 input vector of type TYPE, or NULL_TREE if it is not available. */
21334 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
21336 if (TREE_CODE (type
) != VECTOR_TYPE
)
21342 switch (TYPE_MODE (type
))
21345 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
21350 case FIX_TRUNC_EXPR
:
21351 switch (TYPE_MODE (type
))
21354 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
21364 /* Returns a code for a target-specific builtin that implements
21365 reciprocal of the function, or NULL_TREE if not available. */
21368 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
21369 bool sqrt ATTRIBUTE_UNUSED
)
21371 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
21372 && flag_finite_math_only
&& !flag_trapping_math
21373 && flag_unsafe_math_optimizations
))
21377 /* Machine dependent builtins. */
21380 /* Vectorized version of sqrt to rsqrt conversion. */
21381 case IX86_BUILTIN_SQRTPS
:
21382 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
21388 /* Normal builtins. */
21391 /* Sqrt to rsqrt conversion. */
21392 case BUILT_IN_SQRTF
:
21393 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
21400 /* Store OPERAND to the memory after reload is completed. This means
21401 that we can't easily use assign_stack_local. */
21403 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
21407 gcc_assert (reload_completed
);
21408 if (TARGET_RED_ZONE
)
21410 result
= gen_rtx_MEM (mode
,
21411 gen_rtx_PLUS (Pmode
,
21413 GEN_INT (-RED_ZONE_SIZE
)));
21414 emit_move_insn (result
, operand
);
21416 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
21422 operand
= gen_lowpart (DImode
, operand
);
21426 gen_rtx_SET (VOIDmode
,
21427 gen_rtx_MEM (DImode
,
21428 gen_rtx_PRE_DEC (DImode
,
21429 stack_pointer_rtx
)),
21433 gcc_unreachable ();
21435 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21444 split_di (&operand
, 1, operands
, operands
+ 1);
21446 gen_rtx_SET (VOIDmode
,
21447 gen_rtx_MEM (SImode
,
21448 gen_rtx_PRE_DEC (Pmode
,
21449 stack_pointer_rtx
)),
21452 gen_rtx_SET (VOIDmode
,
21453 gen_rtx_MEM (SImode
,
21454 gen_rtx_PRE_DEC (Pmode
,
21455 stack_pointer_rtx
)),
21460 /* Store HImodes as SImodes. */
21461 operand
= gen_lowpart (SImode
, operand
);
21465 gen_rtx_SET (VOIDmode
,
21466 gen_rtx_MEM (GET_MODE (operand
),
21467 gen_rtx_PRE_DEC (SImode
,
21468 stack_pointer_rtx
)),
21472 gcc_unreachable ();
21474 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21479 /* Free operand from the memory. */
21481 ix86_free_from_memory (enum machine_mode mode
)
21483 if (!TARGET_RED_ZONE
)
21487 if (mode
== DImode
|| TARGET_64BIT
)
21491 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21492 to pop or add instruction if registers are available. */
21493 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21494 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
21499 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21500 QImode must go into class Q_REGS.
21501 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21502 movdf to do mem-to-mem moves through integer regs. */
21504 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
21506 enum machine_mode mode
= GET_MODE (x
);
21508 /* We're only allowed to return a subclass of CLASS. Many of the
21509 following checks fail for NO_REGS, so eliminate that early. */
21510 if (regclass
== NO_REGS
)
21513 /* All classes can load zeros. */
21514 if (x
== CONST0_RTX (mode
))
21517 /* Force constants into memory if we are loading a (nonzero) constant into
21518 an MMX or SSE register. This is because there are no MMX/SSE instructions
21519 to load from a constant. */
21521 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
21524 /* Prefer SSE regs only, if we can use them for math. */
21525 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
21526 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21528 /* Floating-point constants need more complex checks. */
21529 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
21531 /* General regs can load everything. */
21532 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
21535 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21536 zero above. We only want to wind up preferring 80387 registers if
21537 we plan on doing computation with them. */
21539 && standard_80387_constant_p (x
))
21541 /* Limit class to non-sse. */
21542 if (regclass
== FLOAT_SSE_REGS
)
21544 if (regclass
== FP_TOP_SSE_REGS
)
21546 if (regclass
== FP_SECOND_SSE_REGS
)
21547 return FP_SECOND_REG
;
21548 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
21555 /* Generally when we see PLUS here, it's the function invariant
21556 (plus soft-fp const_int). Which can only be computed into general
21558 if (GET_CODE (x
) == PLUS
)
21559 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
21561 /* QImode constants are easy to load, but non-constant QImode data
21562 must go into Q_REGS. */
21563 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
21565 if (reg_class_subset_p (regclass
, Q_REGS
))
21567 if (reg_class_subset_p (Q_REGS
, regclass
))
21575 /* Discourage putting floating-point values in SSE registers unless
21576 SSE math is being used, and likewise for the 387 registers. */
21578 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
21580 enum machine_mode mode
= GET_MODE (x
);
21582 /* Restrict the output reload class to the register bank that we are doing
21583 math on. If we would like not to return a subset of CLASS, reject this
21584 alternative: if reload cannot do this, it will still use its choice. */
21585 mode
= GET_MODE (x
);
21586 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
21587 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
21589 if (X87_FLOAT_MODE_P (mode
))
21591 if (regclass
== FP_TOP_SSE_REGS
)
21593 else if (regclass
== FP_SECOND_SSE_REGS
)
21594 return FP_SECOND_REG
;
21596 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21602 /* If we are copying between general and FP registers, we need a memory
21603 location. The same is true for SSE and MMX registers.
21605 To optimize register_move_cost performance, allow inline variant.
21607 The macro can't work reliably when one of the CLASSES is class containing
21608 registers from multiple units (SSE, MMX, integer). We avoid this by never
21609 combining those units in single alternative in the machine description.
21610 Ensure that this constraint holds to avoid unexpected surprises.
21612 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21613 enforce these sanity checks. */
21616 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21617 enum machine_mode mode
, int strict
)
21619 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
21620 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
21621 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
21622 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
21623 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
21624 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
21626 gcc_assert (!strict
);
21630 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
21633 /* ??? This is a lie. We do have moves between mmx/general, and for
21634 mmx/sse2. But by saying we need secondary memory we discourage the
21635 register allocator from using the mmx registers unless needed. */
21636 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
21639 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21641 /* SSE1 doesn't have any direct moves from other classes. */
21645 /* If the target says that inter-unit moves are more expensive
21646 than moving through memory, then don't generate them. */
21647 if (!TARGET_INTER_UNIT_MOVES
)
21650 /* Between SSE and general, we have moves no larger than word size. */
21651 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21659 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21660 enum machine_mode mode
, int strict
)
21662 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
21665 /* Return true if the registers in CLASS cannot represent the change from
21666 modes FROM to TO. */
21669 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
21670 enum reg_class regclass
)
21675 /* x87 registers can't do subreg at all, as all values are reformatted
21676 to extended precision. */
21677 if (MAYBE_FLOAT_CLASS_P (regclass
))
21680 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
21682 /* Vector registers do not support QI or HImode loads. If we don't
21683 disallow a change to these modes, reload will assume it's ok to
21684 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21685 the vec_dupv4hi pattern. */
21686 if (GET_MODE_SIZE (from
) < 4)
21689 /* Vector registers do not support subreg with nonzero offsets, which
21690 are otherwise valid for integer registers. Since we can't see
21691 whether we have a nonzero offset from here, prohibit all
21692 nonparadoxical subregs changing size. */
21693 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
21700 /* Return the cost of moving data of mode M between a
21701 register and memory. A value of 2 is the default; this cost is
21702 relative to those in `REGISTER_MOVE_COST'.
21704 This function is used extensively by register_move_cost that is used to
21705 build tables at startup. Make it inline in this case.
21706 When IN is 2, return maximum of in and out move cost.
21708 If moving between registers and memory is more expensive than
21709 between two registers, you should define this macro to express the
21712 Model also increased moving costs of QImode registers in non
21716 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
21720 if (FLOAT_CLASS_P (regclass
))
21738 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
21739 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
21741 if (SSE_CLASS_P (regclass
))
21744 switch (GET_MODE_SIZE (mode
))
21759 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
21760 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
21762 if (MMX_CLASS_P (regclass
))
21765 switch (GET_MODE_SIZE (mode
))
21777 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
21778 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
21780 switch (GET_MODE_SIZE (mode
))
21783 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
21786 return ix86_cost
->int_store
[0];
21787 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
21788 cost
= ix86_cost
->movzbl_load
;
21790 cost
= ix86_cost
->int_load
[0];
21792 return MAX (cost
, ix86_cost
->int_store
[0]);
21798 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
21800 return ix86_cost
->movzbl_load
;
21802 return ix86_cost
->int_store
[0] + 4;
21807 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
21808 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
21810 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21811 if (mode
== TFmode
)
21814 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
21816 cost
= ix86_cost
->int_load
[2];
21818 cost
= ix86_cost
->int_store
[2];
21819 return (cost
* (((int) GET_MODE_SIZE (mode
)
21820 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
21825 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
21827 return inline_memory_move_cost (mode
, regclass
, in
);
21831 /* Return the cost of moving data from a register in class CLASS1 to
21832 one in class CLASS2.
21834 It is not required that the cost always equal 2 when FROM is the same as TO;
21835 on some machines it is expensive to move between registers if they are not
21836 general registers. */
21839 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
21840 enum reg_class class2
)
21842 /* In case we require secondary memory, compute cost of the store followed
21843 by load. In order to avoid bad register allocation choices, we need
21844 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21846 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
21850 cost
+= inline_memory_move_cost (mode
, class1
, 2);
21851 cost
+= inline_memory_move_cost (mode
, class2
, 2);
21853 /* In case of copying from general_purpose_register we may emit multiple
21854 stores followed by single load causing memory size mismatch stall.
21855 Count this as arbitrarily high cost of 20. */
21856 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
21859 /* In the case of FP/MMX moves, the registers actually overlap, and we
21860 have to switch modes in order to treat them differently. */
21861 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
21862 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
21868 /* Moves between SSE/MMX and integer unit are expensive. */
21869 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
21870 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21872 /* ??? By keeping returned value relatively high, we limit the number
21873 of moves between integer and MMX/SSE registers for all targets.
21874 Additionally, high value prevents problem with x86_modes_tieable_p(),
21875 where integer modes in MMX/SSE registers are not tieable
21876 because of missing QImode and HImode moves to, from or between
21877 MMX/SSE registers. */
21878 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
21880 if (MAYBE_FLOAT_CLASS_P (class1
))
21881 return ix86_cost
->fp_move
;
21882 if (MAYBE_SSE_CLASS_P (class1
))
21883 return ix86_cost
->sse_move
;
21884 if (MAYBE_MMX_CLASS_P (class1
))
21885 return ix86_cost
->mmx_move
;
21889 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21892 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
21894 /* Flags and only flags can only hold CCmode values. */
21895 if (CC_REGNO_P (regno
))
21896 return GET_MODE_CLASS (mode
) == MODE_CC
;
21897 if (GET_MODE_CLASS (mode
) == MODE_CC
21898 || GET_MODE_CLASS (mode
) == MODE_RANDOM
21899 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
21901 if (FP_REGNO_P (regno
))
21902 return VALID_FP_MODE_P (mode
);
21903 if (SSE_REGNO_P (regno
))
21905 /* We implement the move patterns for all vector modes into and
21906 out of SSE registers, even when no operation instructions
21908 return (VALID_SSE_REG_MODE (mode
)
21909 || VALID_SSE2_REG_MODE (mode
)
21910 || VALID_MMX_REG_MODE (mode
)
21911 || VALID_MMX_REG_MODE_3DNOW (mode
));
21913 if (MMX_REGNO_P (regno
))
21915 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21916 so if the register is available at all, then we can move data of
21917 the given mode into or out of it. */
21918 return (VALID_MMX_REG_MODE (mode
)
21919 || VALID_MMX_REG_MODE_3DNOW (mode
));
21922 if (mode
== QImode
)
21924 /* Take care for QImode values - they can be in non-QI regs,
21925 but then they do cause partial register stalls. */
21926 if (regno
< 4 || TARGET_64BIT
)
21928 if (!TARGET_PARTIAL_REG_STALL
)
21930 return reload_in_progress
|| reload_completed
;
21932 /* We handle both integer and floats in the general purpose registers. */
21933 else if (VALID_INT_MODE_P (mode
))
21935 else if (VALID_FP_MODE_P (mode
))
21937 else if (VALID_DFP_MODE_P (mode
))
21939 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21940 on to use that value in smaller contexts, this can easily force a
21941 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21942 supporting DImode, allow it. */
21943 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
21949 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21950 tieable integer mode. */
21953 ix86_tieable_integer_mode_p (enum machine_mode mode
)
21962 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
21965 return TARGET_64BIT
;
21972 /* Return true if MODE1 is accessible in a register that can hold MODE2
21973 without copying. That is, all register classes that can hold MODE2
21974 can also hold MODE1. */
21977 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
21979 if (mode1
== mode2
)
21982 if (ix86_tieable_integer_mode_p (mode1
)
21983 && ix86_tieable_integer_mode_p (mode2
))
21986 /* MODE2 being XFmode implies fp stack or general regs, which means we
21987 can tie any smaller floating point modes to it. Note that we do not
21988 tie this with TFmode. */
21989 if (mode2
== XFmode
)
21990 return mode1
== SFmode
|| mode1
== DFmode
;
21992 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21993 that we can tie it with SFmode. */
21994 if (mode2
== DFmode
)
21995 return mode1
== SFmode
;
21997 /* If MODE2 is only appropriate for an SSE register, then tie with
21998 any other mode acceptable to SSE registers. */
21999 if (GET_MODE_SIZE (mode2
) == 16
22000 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
22001 return (GET_MODE_SIZE (mode1
) == 16
22002 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
22004 /* If MODE2 is appropriate for an MMX register, then tie
22005 with any other mode acceptable to MMX registers. */
22006 if (GET_MODE_SIZE (mode2
) == 8
22007 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
22008 return (GET_MODE_SIZE (mode1
) == 8
22009 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
22014 /* Compute a (partial) cost for rtx X. Return true if the complete
22015 cost has been computed, and false if subexpressions should be
22016 scanned. In either case, *TOTAL contains the cost result. */
22019 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
22021 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
22022 enum machine_mode mode
= GET_MODE (x
);
22030 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
22032 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
22034 else if (flag_pic
&& SYMBOLIC_CONST (x
)
22036 || (!GET_CODE (x
) != LABEL_REF
22037 && (GET_CODE (x
) != SYMBOL_REF
22038 || !SYMBOL_REF_LOCAL_P (x
)))))
22045 if (mode
== VOIDmode
)
22048 switch (standard_80387_constant_p (x
))
22053 default: /* Other constants */
22058 /* Start with (MEM (SYMBOL_REF)), since that's where
22059 it'll probably end up. Add a penalty for size. */
22060 *total
= (COSTS_N_INSNS (1)
22061 + (flag_pic
!= 0 && !TARGET_64BIT
)
22062 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
22068 /* The zero extensions is often completely free on x86_64, so make
22069 it as cheap as possible. */
22070 if (TARGET_64BIT
&& mode
== DImode
22071 && GET_MODE (XEXP (x
, 0)) == SImode
)
22073 else if (TARGET_ZERO_EXTEND_WITH_AND
)
22074 *total
= ix86_cost
->add
;
22076 *total
= ix86_cost
->movzx
;
22080 *total
= ix86_cost
->movsx
;
22084 if (CONST_INT_P (XEXP (x
, 1))
22085 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
22087 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22090 *total
= ix86_cost
->add
;
22093 if ((value
== 2 || value
== 3)
22094 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
22096 *total
= ix86_cost
->lea
;
22106 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
22108 if (CONST_INT_P (XEXP (x
, 1)))
22110 if (INTVAL (XEXP (x
, 1)) > 32)
22111 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
22113 *total
= ix86_cost
->shift_const
* 2;
22117 if (GET_CODE (XEXP (x
, 1)) == AND
)
22118 *total
= ix86_cost
->shift_var
* 2;
22120 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
22125 if (CONST_INT_P (XEXP (x
, 1)))
22126 *total
= ix86_cost
->shift_const
;
22128 *total
= ix86_cost
->shift_var
;
22133 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22135 /* ??? SSE scalar cost should be used here. */
22136 *total
= ix86_cost
->fmul
;
22139 else if (X87_FLOAT_MODE_P (mode
))
22141 *total
= ix86_cost
->fmul
;
22144 else if (FLOAT_MODE_P (mode
))
22146 /* ??? SSE vector cost should be used here. */
22147 *total
= ix86_cost
->fmul
;
22152 rtx op0
= XEXP (x
, 0);
22153 rtx op1
= XEXP (x
, 1);
22155 if (CONST_INT_P (XEXP (x
, 1)))
22157 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22158 for (nbits
= 0; value
!= 0; value
&= value
- 1)
22162 /* This is arbitrary. */
22165 /* Compute costs correctly for widening multiplication. */
22166 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
22167 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
22168 == GET_MODE_SIZE (mode
))
22170 int is_mulwiden
= 0;
22171 enum machine_mode inner_mode
= GET_MODE (op0
);
22173 if (GET_CODE (op0
) == GET_CODE (op1
))
22174 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
22175 else if (CONST_INT_P (op1
))
22177 if (GET_CODE (op0
) == SIGN_EXTEND
)
22178 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
22181 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
22185 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
22188 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
22189 + nbits
* ix86_cost
->mult_bit
22190 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
22199 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22200 /* ??? SSE cost should be used here. */
22201 *total
= ix86_cost
->fdiv
;
22202 else if (X87_FLOAT_MODE_P (mode
))
22203 *total
= ix86_cost
->fdiv
;
22204 else if (FLOAT_MODE_P (mode
))
22205 /* ??? SSE vector cost should be used here. */
22206 *total
= ix86_cost
->fdiv
;
22208 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
22212 if (GET_MODE_CLASS (mode
) == MODE_INT
22213 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
22215 if (GET_CODE (XEXP (x
, 0)) == PLUS
22216 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
22217 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
22218 && CONSTANT_P (XEXP (x
, 1)))
22220 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
22221 if (val
== 2 || val
== 4 || val
== 8)
22223 *total
= ix86_cost
->lea
;
22224 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22225 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
22227 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22231 else if (GET_CODE (XEXP (x
, 0)) == MULT
22232 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
22234 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
22235 if (val
== 2 || val
== 4 || val
== 8)
22237 *total
= ix86_cost
->lea
;
22238 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22239 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22243 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
22245 *total
= ix86_cost
->lea
;
22246 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22247 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22248 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22255 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22257 /* ??? SSE cost should be used here. */
22258 *total
= ix86_cost
->fadd
;
22261 else if (X87_FLOAT_MODE_P (mode
))
22263 *total
= ix86_cost
->fadd
;
22266 else if (FLOAT_MODE_P (mode
))
22268 /* ??? SSE vector cost should be used here. */
22269 *total
= ix86_cost
->fadd
;
22277 if (!TARGET_64BIT
&& mode
== DImode
)
22279 *total
= (ix86_cost
->add
* 2
22280 + (rtx_cost (XEXP (x
, 0), outer_code
)
22281 << (GET_MODE (XEXP (x
, 0)) != DImode
))
22282 + (rtx_cost (XEXP (x
, 1), outer_code
)
22283 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
22289 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22291 /* ??? SSE cost should be used here. */
22292 *total
= ix86_cost
->fchs
;
22295 else if (X87_FLOAT_MODE_P (mode
))
22297 *total
= ix86_cost
->fchs
;
22300 else if (FLOAT_MODE_P (mode
))
22302 /* ??? SSE vector cost should be used here. */
22303 *total
= ix86_cost
->fchs
;
22309 if (!TARGET_64BIT
&& mode
== DImode
)
22310 *total
= ix86_cost
->add
* 2;
22312 *total
= ix86_cost
->add
;
22316 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
22317 && XEXP (XEXP (x
, 0), 1) == const1_rtx
22318 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
22319 && XEXP (x
, 1) == const0_rtx
)
22321 /* This kind of construct is implemented using test[bwl].
22322 Treat it as if we had an AND. */
22323 *total
= (ix86_cost
->add
22324 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
22325 + rtx_cost (const1_rtx
, outer_code
));
22331 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
22336 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22337 /* ??? SSE cost should be used here. */
22338 *total
= ix86_cost
->fabs
;
22339 else if (X87_FLOAT_MODE_P (mode
))
22340 *total
= ix86_cost
->fabs
;
22341 else if (FLOAT_MODE_P (mode
))
22342 /* ??? SSE vector cost should be used here. */
22343 *total
= ix86_cost
->fabs
;
22347 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22348 /* ??? SSE cost should be used here. */
22349 *total
= ix86_cost
->fsqrt
;
22350 else if (X87_FLOAT_MODE_P (mode
))
22351 *total
= ix86_cost
->fsqrt
;
22352 else if (FLOAT_MODE_P (mode
))
22353 /* ??? SSE vector cost should be used here. */
22354 *total
= ix86_cost
->fsqrt
;
22358 if (XINT (x
, 1) == UNSPEC_TP
)
22369 static int current_machopic_label_num
;
22371 /* Given a symbol name and its associated stub, write out the
22372 definition of the stub. */
22375 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
22377 unsigned int length
;
22378 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
22379 int label
= ++current_machopic_label_num
;
22381 /* For 64-bit we shouldn't get here. */
22382 gcc_assert (!TARGET_64BIT
);
22384 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22385 symb
= (*targetm
.strip_name_encoding
) (symb
);
22387 length
= strlen (stub
);
22388 binder_name
= alloca (length
+ 32);
22389 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
22391 length
= strlen (symb
);
22392 symbol_name
= alloca (length
+ 32);
22393 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
22395 sprintf (lazy_ptr_name
, "L%d$lz", label
);
22398 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
22400 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
22402 fprintf (file
, "%s:\n", stub
);
22403 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22407 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
22408 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
22409 fprintf (file
, "\tjmp\t*%%edx\n");
22412 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
22414 fprintf (file
, "%s:\n", binder_name
);
22418 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
22419 fprintf (file
, "\tpushl\t%%eax\n");
22422 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
22424 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
22426 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
22427 fprintf (file
, "%s:\n", lazy_ptr_name
);
22428 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22429 fprintf (file
, "\t.long %s\n", binder_name
);
22433 darwin_x86_file_end (void)
22435 darwin_file_end ();
22438 #endif /* TARGET_MACHO */
22440 /* Order the registers for register allocator. */
22443 x86_order_regs_for_local_alloc (void)
22448 /* First allocate the local general purpose registers. */
22449 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22450 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
22451 reg_alloc_order
[pos
++] = i
;
22453 /* Global general purpose registers. */
22454 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22455 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
22456 reg_alloc_order
[pos
++] = i
;
22458 /* x87 registers come first in case we are doing FP math
22460 if (!TARGET_SSE_MATH
)
22461 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22462 reg_alloc_order
[pos
++] = i
;
22464 /* SSE registers. */
22465 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
22466 reg_alloc_order
[pos
++] = i
;
22467 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
22468 reg_alloc_order
[pos
++] = i
;
22470 /* x87 registers. */
22471 if (TARGET_SSE_MATH
)
22472 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22473 reg_alloc_order
[pos
++] = i
;
22475 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
22476 reg_alloc_order
[pos
++] = i
;
22478 /* Initialize the rest of array as we do not allocate some registers
22480 while (pos
< FIRST_PSEUDO_REGISTER
)
22481 reg_alloc_order
[pos
++] = 0;
22484 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22485 struct attribute_spec.handler. */
22487 ix86_handle_struct_attribute (tree
*node
, tree name
,
22488 tree args ATTRIBUTE_UNUSED
,
22489 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
22492 if (DECL_P (*node
))
22494 if (TREE_CODE (*node
) == TYPE_DECL
)
22495 type
= &TREE_TYPE (*node
);
22500 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
22501 || TREE_CODE (*type
) == UNION_TYPE
)))
22503 warning (OPT_Wattributes
, "%qs attribute ignored",
22504 IDENTIFIER_POINTER (name
));
22505 *no_add_attrs
= true;
22508 else if ((is_attribute_p ("ms_struct", name
)
22509 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
22510 || ((is_attribute_p ("gcc_struct", name
)
22511 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
22513 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
22514 IDENTIFIER_POINTER (name
));
22515 *no_add_attrs
= true;
22522 ix86_ms_bitfield_layout_p (const_tree record_type
)
22524 return (TARGET_MS_BITFIELD_LAYOUT
&&
22525 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
22526 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
22529 /* Returns an expression indicating where the this parameter is
22530 located on entry to the FUNCTION. */
22533 x86_this_parameter (tree function
)
22535 tree type
= TREE_TYPE (function
);
22536 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
22540 const int *parm_regs
;
22542 if (TARGET_64BIT_MS_ABI
)
22543 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
22545 parm_regs
= x86_64_int_parameter_registers
;
22546 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
22549 if (ix86_function_regparm (type
, function
) > 0 && !stdarg_p (type
))
22552 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
22554 return gen_rtx_REG (SImode
, regno
);
22557 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
22560 /* Determine whether x86_output_mi_thunk can succeed. */
22563 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
22564 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
22565 HOST_WIDE_INT vcall_offset
, const_tree function
)
22567 /* 64-bit can handle anything. */
22571 /* For 32-bit, everything's fine if we have one free register. */
22572 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
22575 /* Need a free register for vcall_offset. */
22579 /* Need a free register for GOT references. */
22580 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
22583 /* Otherwise ok. */
22587 /* Output the assembler code for a thunk function. THUNK_DECL is the
22588 declaration for the thunk function itself, FUNCTION is the decl for
22589 the target function. DELTA is an immediate constant offset to be
22590 added to THIS. If VCALL_OFFSET is nonzero, the word at
22591 *(*this + vcall_offset) should be added to THIS. */
22594 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
22595 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
22596 HOST_WIDE_INT vcall_offset
, tree function
)
22599 rtx this_param
= x86_this_parameter (function
);
22602 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22603 pull it in now and let DELTA benefit. */
22604 if (REG_P (this_param
))
22605 this_reg
= this_param
;
22606 else if (vcall_offset
)
22608 /* Put the this parameter into %eax. */
22609 xops
[0] = this_param
;
22610 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
22611 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22614 this_reg
= NULL_RTX
;
22616 /* Adjust the this parameter by a fixed constant. */
22619 xops
[0] = GEN_INT (delta
);
22620 xops
[1] = this_reg
? this_reg
: this_param
;
22623 if (!x86_64_general_operand (xops
[0], DImode
))
22625 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22627 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
22629 xops
[1] = this_param
;
22631 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22634 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22637 /* Adjust the this parameter by a value stored in the vtable. */
22641 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22644 int tmp_regno
= 2 /* ECX */;
22645 if (lookup_attribute ("fastcall",
22646 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
22647 tmp_regno
= 0 /* EAX */;
22648 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
22651 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
22654 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22656 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22658 /* Adjust the this parameter. */
22659 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
22660 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
22662 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
22663 xops
[0] = GEN_INT (vcall_offset
);
22665 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22666 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
22668 xops
[1] = this_reg
;
22670 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22672 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22675 /* If necessary, drop THIS back to its stack slot. */
22676 if (this_reg
&& this_reg
!= this_param
)
22678 xops
[0] = this_reg
;
22679 xops
[1] = this_param
;
22680 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22683 xops
[0] = XEXP (DECL_RTL (function
), 0);
22686 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22687 output_asm_insn ("jmp\t%P0", xops
);
22688 /* All thunks should be in the same object as their target,
22689 and thus binds_local_p should be true. */
22690 else if (TARGET_64BIT_MS_ABI
)
22691 gcc_unreachable ();
22694 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
22695 tmp
= gen_rtx_CONST (Pmode
, tmp
);
22696 tmp
= gen_rtx_MEM (QImode
, tmp
);
22698 output_asm_insn ("jmp\t%A0", xops
);
22703 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22704 output_asm_insn ("jmp\t%P0", xops
);
22709 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
22710 tmp
= (gen_rtx_SYMBOL_REF
22712 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
22713 tmp
= gen_rtx_MEM (QImode
, tmp
);
22715 output_asm_insn ("jmp\t%0", xops
);
22718 #endif /* TARGET_MACHO */
22720 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
22721 output_set_got (tmp
, NULL_RTX
);
22724 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
22725 output_asm_insn ("jmp\t{*}%1", xops
);
22731 x86_file_start (void)
22733 default_file_start ();
22735 darwin_file_start ();
22737 if (X86_FILE_START_VERSION_DIRECTIVE
)
22738 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
22739 if (X86_FILE_START_FLTUSED
)
22740 fputs ("\t.global\t__fltused\n", asm_out_file
);
22741 if (ix86_asm_dialect
== ASM_INTEL
)
22742 fputs ("\t.intel_syntax\n", asm_out_file
);
22746 x86_field_alignment (tree field
, int computed
)
22748 enum machine_mode mode
;
22749 tree type
= TREE_TYPE (field
);
22751 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
22753 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
22754 ? get_inner_array_type (type
) : type
);
22755 if (mode
== DFmode
|| mode
== DCmode
22756 || GET_MODE_CLASS (mode
) == MODE_INT
22757 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
22758 return MIN (32, computed
);
22762 /* Output assembler code to FILE to increment profiler label # LABELNO
22763 for profiling a function entry. */
22765 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
22769 #ifndef NO_PROFILE_COUNTERS
22770 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
22773 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
22774 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
22776 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22780 #ifndef NO_PROFILE_COUNTERS
22781 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22782 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
22784 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
22788 #ifndef NO_PROFILE_COUNTERS
22789 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
22790 PROFILE_COUNT_REGISTER
);
22792 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22796 /* We don't have exact information about the insn sizes, but we may assume
22797 quite safely that we are informed about all 1 byte insns and memory
22798 address sizes. This is enough to eliminate unnecessary padding in
22802 min_insn_size (rtx insn
)
22806 if (!INSN_P (insn
) || !active_insn_p (insn
))
22809 /* Discard alignments we've emit and jump instructions. */
22810 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
22811 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
22814 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
22815 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
22818 /* Important case - calls are always 5 bytes.
22819 It is common to have many calls in the row. */
22821 && symbolic_reference_mentioned_p (PATTERN (insn
))
22822 && !SIBLING_CALL_P (insn
))
22824 if (get_attr_length (insn
) <= 1)
22827 /* For normal instructions we may rely on the sizes of addresses
22828 and the presence of symbol to require 4 bytes of encoding.
22829 This is not the case for jumps where references are PC relative. */
22830 if (!JUMP_P (insn
))
22832 l
= get_attr_length_address (insn
);
22833 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
22842 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22846 ix86_avoid_jump_misspredicts (void)
22848 rtx insn
, start
= get_insns ();
22849 int nbytes
= 0, njumps
= 0;
22852 /* Look for all minimal intervals of instructions containing 4 jumps.
22853 The intervals are bounded by START and INSN. NBYTES is the total
22854 size of instructions in the interval including INSN and not including
22855 START. When the NBYTES is smaller than 16 bytes, it is possible
22856 that the end of START and INSN ends up in the same 16byte page.
22858 The smallest offset in the page INSN can start is the case where START
22859 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22860 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22862 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22865 nbytes
+= min_insn_size (insn
);
22867 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
22868 INSN_UID (insn
), min_insn_size (insn
));
22870 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
22871 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
22879 start
= NEXT_INSN (start
);
22880 if ((JUMP_P (start
)
22881 && GET_CODE (PATTERN (start
)) != ADDR_VEC
22882 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
22884 njumps
--, isjump
= 1;
22887 nbytes
-= min_insn_size (start
);
22889 gcc_assert (njumps
>= 0);
22891 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
22892 INSN_UID (start
), INSN_UID (insn
), nbytes
);
22894 if (njumps
== 3 && isjump
&& nbytes
< 16)
22896 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
22899 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
22900 INSN_UID (insn
), padsize
);
22901 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
22906 /* AMD Athlon works faster
22907 when RET is not destination of conditional jump or directly preceded
22908 by other jump instruction. We avoid the penalty by inserting NOP just
22909 before the RET instructions in such cases. */
22911 ix86_pad_returns (void)
22916 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
22918 basic_block bb
= e
->src
;
22919 rtx ret
= BB_END (bb
);
22921 bool replace
= false;
22923 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
22924 || !maybe_hot_bb_p (bb
))
22926 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
22927 if (active_insn_p (prev
) || LABEL_P (prev
))
22929 if (prev
&& LABEL_P (prev
))
22934 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
22935 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
22936 && !(e
->flags
& EDGE_FALLTHRU
))
22941 prev
= prev_active_insn (ret
);
22943 && ((JUMP_P (prev
) && any_condjump_p (prev
))
22946 /* Empty functions get branch mispredict even when the jump destination
22947 is not visible to us. */
22948 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
22953 emit_insn_before (gen_return_internal_long (), ret
);
22959 /* Implement machine specific optimizations. We implement padding of returns
22960 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22964 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
22965 ix86_pad_returns ();
22966 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
22967 ix86_avoid_jump_misspredicts ();
22970 /* Return nonzero when QImode register that must be represented via REX prefix
22973 x86_extended_QIreg_mentioned_p (rtx insn
)
22976 extract_insn_cached (insn
);
22977 for (i
= 0; i
< recog_data
.n_operands
; i
++)
22978 if (REG_P (recog_data
.operand
[i
])
22979 && REGNO (recog_data
.operand
[i
]) >= 4)
22984 /* Return nonzero when P points to register encoded via REX prefix.
22985 Called via for_each_rtx. */
22987 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
22989 unsigned int regno
;
22992 regno
= REGNO (*p
);
22993 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
22996 /* Return true when INSN mentions register that must be encoded using REX
22999 x86_extended_reg_mentioned_p (rtx insn
)
23001 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
23004 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23005 optabs would emit if we didn't have TFmode patterns. */
23008 x86_emit_floatuns (rtx operands
[2])
23010 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
23011 enum machine_mode mode
, inmode
;
23013 inmode
= GET_MODE (operands
[1]);
23014 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
23017 in
= force_reg (inmode
, operands
[1]);
23018 mode
= GET_MODE (out
);
23019 neglab
= gen_label_rtx ();
23020 donelab
= gen_label_rtx ();
23021 f0
= gen_reg_rtx (mode
);
23023 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
23025 expand_float (out
, in
, 0);
23027 emit_jump_insn (gen_jump (donelab
));
23030 emit_label (neglab
);
23032 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
23034 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
23036 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
23038 expand_float (f0
, i0
, 0);
23040 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
23042 emit_label (donelab
);
23045 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23046 with all elements equal to VAR. Return true if successful. */
23049 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
23050 rtx target
, rtx val
)
23052 enum machine_mode smode
, wsmode
, wvmode
;
23067 val
= force_reg (GET_MODE_INNER (mode
), val
);
23068 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23069 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23075 if (TARGET_SSE
|| TARGET_3DNOW_A
)
23077 val
= gen_lowpart (SImode
, val
);
23078 x
= gen_rtx_TRUNCATE (HImode
, val
);
23079 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
23080 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23102 /* Extend HImode to SImode using a paradoxical SUBREG. */
23103 tmp1
= gen_reg_rtx (SImode
);
23104 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23105 /* Insert the SImode value as low element of V4SImode vector. */
23106 tmp2
= gen_reg_rtx (V4SImode
);
23107 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23108 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23109 CONST0_RTX (V4SImode
),
23111 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23112 /* Cast the V4SImode vector back to a V8HImode vector. */
23113 tmp1
= gen_reg_rtx (V8HImode
);
23114 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
23115 /* Duplicate the low short through the whole low SImode word. */
23116 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
23117 /* Cast the V8HImode vector back to a V4SImode vector. */
23118 tmp2
= gen_reg_rtx (V4SImode
);
23119 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23120 /* Replicate the low element of the V4SImode vector. */
23121 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23122 /* Cast the V2SImode back to V8HImode, and store in target. */
23123 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
23134 /* Extend QImode to SImode using a paradoxical SUBREG. */
23135 tmp1
= gen_reg_rtx (SImode
);
23136 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23137 /* Insert the SImode value as low element of V4SImode vector. */
23138 tmp2
= gen_reg_rtx (V4SImode
);
23139 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23140 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23141 CONST0_RTX (V4SImode
),
23143 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23144 /* Cast the V4SImode vector back to a V16QImode vector. */
23145 tmp1
= gen_reg_rtx (V16QImode
);
23146 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
23147 /* Duplicate the low byte through the whole low SImode word. */
23148 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23149 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23150 /* Cast the V16QImode vector back to a V4SImode vector. */
23151 tmp2
= gen_reg_rtx (V4SImode
);
23152 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23153 /* Replicate the low element of the V4SImode vector. */
23154 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23155 /* Cast the V2SImode back to V16QImode, and store in target. */
23156 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
23164 /* Replicate the value once into the next wider mode and recurse. */
23165 val
= convert_modes (wsmode
, smode
, val
, true);
23166 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
23167 GEN_INT (GET_MODE_BITSIZE (smode
)),
23168 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23169 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
23171 x
= gen_reg_rtx (wvmode
);
23172 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
23173 gcc_unreachable ();
23174 emit_move_insn (target
, gen_lowpart (mode
, x
));
23182 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23183 whose ONE_VAR element is VAR, and other elements are zero. Return true
23187 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
23188 rtx target
, rtx var
, int one_var
)
23190 enum machine_mode vsimode
;
23206 var
= force_reg (GET_MODE_INNER (mode
), var
);
23207 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
23208 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23213 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
23214 new_target
= gen_reg_rtx (mode
);
23216 new_target
= target
;
23217 var
= force_reg (GET_MODE_INNER (mode
), var
);
23218 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
23219 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
23220 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
23223 /* We need to shuffle the value to the correct position, so
23224 create a new pseudo to store the intermediate result. */
23226 /* With SSE2, we can use the integer shuffle insns. */
23227 if (mode
!= V4SFmode
&& TARGET_SSE2
)
23229 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
23231 GEN_INT (one_var
== 1 ? 0 : 1),
23232 GEN_INT (one_var
== 2 ? 0 : 1),
23233 GEN_INT (one_var
== 3 ? 0 : 1)));
23234 if (target
!= new_target
)
23235 emit_move_insn (target
, new_target
);
23239 /* Otherwise convert the intermediate result to V4SFmode and
23240 use the SSE1 shuffle instructions. */
23241 if (mode
!= V4SFmode
)
23243 tmp
= gen_reg_rtx (V4SFmode
);
23244 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
23249 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
23251 GEN_INT (one_var
== 1 ? 0 : 1),
23252 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
23253 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
23255 if (mode
!= V4SFmode
)
23256 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
23257 else if (tmp
!= target
)
23258 emit_move_insn (target
, tmp
);
23260 else if (target
!= new_target
)
23261 emit_move_insn (target
, new_target
);
23266 vsimode
= V4SImode
;
23272 vsimode
= V2SImode
;
23278 /* Zero extend the variable element to SImode and recurse. */
23279 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
23281 x
= gen_reg_rtx (vsimode
);
23282 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
23284 gcc_unreachable ();
23286 emit_move_insn (target
, gen_lowpart (mode
, x
));
23294 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23295 consisting of the values in VALS. It is known that all elements
23296 except ONE_VAR are constants. Return true if successful. */
23299 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
23300 rtx target
, rtx vals
, int one_var
)
23302 rtx var
= XVECEXP (vals
, 0, one_var
);
23303 enum machine_mode wmode
;
23306 const_vec
= copy_rtx (vals
);
23307 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
23308 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
23316 /* For the two element vectors, it's just as easy to use
23317 the general case. */
23333 /* There's no way to set one QImode entry easily. Combine
23334 the variable value with its adjacent constant value, and
23335 promote to an HImode set. */
23336 x
= XVECEXP (vals
, 0, one_var
^ 1);
23339 var
= convert_modes (HImode
, QImode
, var
, true);
23340 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
23341 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23342 x
= GEN_INT (INTVAL (x
) & 0xff);
23346 var
= convert_modes (HImode
, QImode
, var
, true);
23347 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
23349 if (x
!= const0_rtx
)
23350 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
23351 1, OPTAB_LIB_WIDEN
);
23353 x
= gen_reg_rtx (wmode
);
23354 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
23355 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
23357 emit_move_insn (target
, gen_lowpart (mode
, x
));
23364 emit_move_insn (target
, const_vec
);
23365 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
23369 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23370 all values variable, and none identical. */
23373 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
23374 rtx target
, rtx vals
)
23376 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
23377 rtx op0
= NULL
, op1
= NULL
;
23378 bool use_vec_concat
= false;
23384 if (!mmx_ok
&& !TARGET_SSE
)
23390 /* For the two element vectors, we always implement VEC_CONCAT. */
23391 op0
= XVECEXP (vals
, 0, 0);
23392 op1
= XVECEXP (vals
, 0, 1);
23393 use_vec_concat
= true;
23397 half_mode
= V2SFmode
;
23400 half_mode
= V2SImode
;
23406 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23407 Recurse to load the two halves. */
23409 op0
= gen_reg_rtx (half_mode
);
23410 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
23411 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
23413 op1
= gen_reg_rtx (half_mode
);
23414 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
23415 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
23417 use_vec_concat
= true;
23428 gcc_unreachable ();
23431 if (use_vec_concat
)
23433 if (!register_operand (op0
, half_mode
))
23434 op0
= force_reg (half_mode
, op0
);
23435 if (!register_operand (op1
, half_mode
))
23436 op1
= force_reg (half_mode
, op1
);
23438 emit_insn (gen_rtx_SET (VOIDmode
, target
,
23439 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
23443 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
23444 enum machine_mode inner_mode
;
23445 rtx words
[4], shift
;
23447 inner_mode
= GET_MODE_INNER (mode
);
23448 n_elts
= GET_MODE_NUNITS (mode
);
23449 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
23450 n_elt_per_word
= n_elts
/ n_words
;
23451 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
23453 for (i
= 0; i
< n_words
; ++i
)
23455 rtx word
= NULL_RTX
;
23457 for (j
= 0; j
< n_elt_per_word
; ++j
)
23459 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
23460 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
23466 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
23467 word
, 1, OPTAB_LIB_WIDEN
);
23468 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
23469 word
, 1, OPTAB_LIB_WIDEN
);
23477 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
23478 else if (n_words
== 2)
23480 rtx tmp
= gen_reg_rtx (mode
);
23481 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
23482 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
23483 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
23484 emit_move_insn (target
, tmp
);
23486 else if (n_words
== 4)
23488 rtx tmp
= gen_reg_rtx (V4SImode
);
23489 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
23490 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
23491 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
23494 gcc_unreachable ();
23498 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23499 instructions unless MMX_OK is true. */
23502 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
23504 enum machine_mode mode
= GET_MODE (target
);
23505 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23506 int n_elts
= GET_MODE_NUNITS (mode
);
23507 int n_var
= 0, one_var
= -1;
23508 bool all_same
= true, all_const_zero
= true;
23512 for (i
= 0; i
< n_elts
; ++i
)
23514 x
= XVECEXP (vals
, 0, i
);
23515 if (!CONSTANT_P (x
))
23516 n_var
++, one_var
= i
;
23517 else if (x
!= CONST0_RTX (inner_mode
))
23518 all_const_zero
= false;
23519 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
23523 /* Constants are best loaded from the constant pool. */
23526 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
23530 /* If all values are identical, broadcast the value. */
23532 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
23533 XVECEXP (vals
, 0, 0)))
23536 /* Values where only one field is non-constant are best loaded from
23537 the pool and overwritten via move later. */
23541 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
23542 XVECEXP (vals
, 0, one_var
),
23546 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
23550 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
23554 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
23556 enum machine_mode mode
= GET_MODE (target
);
23557 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23558 bool use_vec_merge
= false;
23567 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
23568 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
23570 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
23572 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
23573 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23579 use_vec_merge
= TARGET_SSE4_1
;
23587 /* For the two element vectors, we implement a VEC_CONCAT with
23588 the extraction of the other element. */
23590 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
23591 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
23594 op0
= val
, op1
= tmp
;
23596 op0
= tmp
, op1
= val
;
23598 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
23599 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23604 use_vec_merge
= TARGET_SSE4_1
;
23611 use_vec_merge
= true;
23615 /* tmp = target = A B C D */
23616 tmp
= copy_to_reg (target
);
23617 /* target = A A B B */
23618 emit_insn (gen_sse_unpcklps (target
, target
, target
));
23619 /* target = X A B B */
23620 ix86_expand_vector_set (false, target
, val
, 0);
23621 /* target = A X C D */
23622 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23623 GEN_INT (1), GEN_INT (0),
23624 GEN_INT (2+4), GEN_INT (3+4)));
23628 /* tmp = target = A B C D */
23629 tmp
= copy_to_reg (target
);
23630 /* tmp = X B C D */
23631 ix86_expand_vector_set (false, tmp
, val
, 0);
23632 /* target = A B X D */
23633 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23634 GEN_INT (0), GEN_INT (1),
23635 GEN_INT (0+4), GEN_INT (3+4)));
23639 /* tmp = target = A B C D */
23640 tmp
= copy_to_reg (target
);
23641 /* tmp = X B C D */
23642 ix86_expand_vector_set (false, tmp
, val
, 0);
23643 /* target = A B X D */
23644 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23645 GEN_INT (0), GEN_INT (1),
23646 GEN_INT (2+4), GEN_INT (0+4)));
23650 gcc_unreachable ();
23655 use_vec_merge
= TARGET_SSE4_1
;
23659 /* Element 0 handled by vec_merge below. */
23662 use_vec_merge
= true;
23668 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23669 store into element 0, then shuffle them back. */
23673 order
[0] = GEN_INT (elt
);
23674 order
[1] = const1_rtx
;
23675 order
[2] = const2_rtx
;
23676 order
[3] = GEN_INT (3);
23677 order
[elt
] = const0_rtx
;
23679 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23680 order
[1], order
[2], order
[3]));
23682 ix86_expand_vector_set (false, target
, val
, 0);
23684 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23685 order
[1], order
[2], order
[3]));
23689 /* For SSE1, we have to reuse the V4SF code. */
23690 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
23691 gen_lowpart (SFmode
, val
), elt
);
23696 use_vec_merge
= TARGET_SSE2
;
23699 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23703 use_vec_merge
= TARGET_SSE4_1
;
23713 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23714 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
23715 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23719 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23721 emit_move_insn (mem
, target
);
23723 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23724 emit_move_insn (tmp
, val
);
23726 emit_move_insn (target
, mem
);
23731 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
23733 enum machine_mode mode
= GET_MODE (vec
);
23734 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23735 bool use_vec_extr
= false;
23748 use_vec_extr
= true;
23752 use_vec_extr
= TARGET_SSE4_1
;
23764 tmp
= gen_reg_rtx (mode
);
23765 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
23766 GEN_INT (elt
), GEN_INT (elt
),
23767 GEN_INT (elt
+4), GEN_INT (elt
+4)));
23771 tmp
= gen_reg_rtx (mode
);
23772 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
23776 gcc_unreachable ();
23779 use_vec_extr
= true;
23784 use_vec_extr
= TARGET_SSE4_1
;
23798 tmp
= gen_reg_rtx (mode
);
23799 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
23800 GEN_INT (elt
), GEN_INT (elt
),
23801 GEN_INT (elt
), GEN_INT (elt
)));
23805 tmp
= gen_reg_rtx (mode
);
23806 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
23810 gcc_unreachable ();
23813 use_vec_extr
= true;
23818 /* For SSE1, we have to reuse the V4SF code. */
23819 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
23820 gen_lowpart (V4SFmode
, vec
), elt
);
23826 use_vec_extr
= TARGET_SSE2
;
23829 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23833 use_vec_extr
= TARGET_SSE4_1
;
23837 /* ??? Could extract the appropriate HImode element and shift. */
23844 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
23845 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
23847 /* Let the rtl optimizers know about the zero extension performed. */
23848 if (inner_mode
== QImode
|| inner_mode
== HImode
)
23850 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
23851 target
= gen_lowpart (SImode
, target
);
23854 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23858 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23860 emit_move_insn (mem
, vec
);
23862 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23863 emit_move_insn (target
, tmp
);
23867 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23868 pattern to reduce; DEST is the destination; IN is the input vector. */
23871 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
23873 rtx tmp1
, tmp2
, tmp3
;
23875 tmp1
= gen_reg_rtx (V4SFmode
);
23876 tmp2
= gen_reg_rtx (V4SFmode
);
23877 tmp3
= gen_reg_rtx (V4SFmode
);
23879 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
23880 emit_insn (fn (tmp2
, tmp1
, in
));
23882 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
23883 GEN_INT (1), GEN_INT (1),
23884 GEN_INT (1+4), GEN_INT (1+4)));
23885 emit_insn (fn (dest
, tmp2
, tmp3
));
23888 /* Target hook for scalar_mode_supported_p. */
23890 ix86_scalar_mode_supported_p (enum machine_mode mode
)
23892 if (DECIMAL_FLOAT_MODE_P (mode
))
23894 else if (mode
== TFmode
)
23895 return TARGET_64BIT
;
23897 return default_scalar_mode_supported_p (mode
);
23900 /* Implements target hook vector_mode_supported_p. */
23902 ix86_vector_mode_supported_p (enum machine_mode mode
)
23904 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
23906 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
23908 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
23910 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
23915 /* Target hook for c_mode_for_suffix. */
23916 static enum machine_mode
23917 ix86_c_mode_for_suffix (char suffix
)
23919 if (TARGET_64BIT
&& suffix
== 'q')
23921 if (TARGET_MMX
&& suffix
== 'w')
23927 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23929 We do this in the new i386 backend to maintain source compatibility
23930 with the old cc0-based compiler. */
23933 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
23934 tree inputs ATTRIBUTE_UNUSED
,
23937 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
23939 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
23944 /* Implements target vector targetm.asm.encode_section_info. This
23945 is not used by netware. */
23947 static void ATTRIBUTE_UNUSED
23948 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
23950 default_encode_section_info (decl
, rtl
, first
);
23952 if (TREE_CODE (decl
) == VAR_DECL
23953 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
23954 && ix86_in_large_data_p (decl
))
23955 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
23958 /* Worker function for REVERSE_CONDITION. */
23961 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
23963 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
23964 ? reverse_condition (code
)
23965 : reverse_condition_maybe_unordered (code
));
23968 /* Output code to perform an x87 FP register move, from OPERANDS[1]
23972 output_387_reg_move (rtx insn
, rtx
*operands
)
23974 if (REG_P (operands
[0]))
23976 if (REG_P (operands
[1])
23977 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
23979 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
23980 return output_387_ffreep (operands
, 0);
23981 return "fstp\t%y0";
23983 if (STACK_TOP_P (operands
[0]))
23984 return "fld%z1\t%y1";
23987 else if (MEM_P (operands
[0]))
23989 gcc_assert (REG_P (operands
[1]));
23990 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
23991 return "fstp%z0\t%y0";
23994 /* There is no non-popping store to memory for XFmode.
23995 So if we need one, follow the store with a load. */
23996 if (GET_MODE (operands
[0]) == XFmode
)
23997 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
23999 return "fst%z0\t%y0";
24006 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24007 FP status register is set. */
24010 ix86_emit_fp_unordered_jump (rtx label
)
24012 rtx reg
= gen_reg_rtx (HImode
);
24015 emit_insn (gen_x86_fnstsw_1 (reg
));
24017 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
24019 emit_insn (gen_x86_sahf_1 (reg
));
24021 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24022 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
24026 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
24028 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24029 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
24032 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
24033 gen_rtx_LABEL_REF (VOIDmode
, label
),
24035 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
24037 emit_jump_insn (temp
);
24038 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
24041 /* Output code to perform a log1p XFmode calculation. */
24043 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
24045 rtx label1
= gen_label_rtx ();
24046 rtx label2
= gen_label_rtx ();
24048 rtx tmp
= gen_reg_rtx (XFmode
);
24049 rtx tmp2
= gen_reg_rtx (XFmode
);
24051 emit_insn (gen_absxf2 (tmp
, op1
));
24052 emit_insn (gen_cmpxf (tmp
,
24053 CONST_DOUBLE_FROM_REAL_VALUE (
24054 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
24056 emit_jump_insn (gen_bge (label1
));
24058 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24059 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
24060 emit_jump (label2
);
24062 emit_label (label1
);
24063 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
24064 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
24065 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24066 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
24068 emit_label (label2
);
24071 /* Output code to perform a Newton-Rhapson approximation of a single precision
24072 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24074 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
24076 rtx x0
, x1
, e0
, e1
, two
;
24078 x0
= gen_reg_rtx (mode
);
24079 e0
= gen_reg_rtx (mode
);
24080 e1
= gen_reg_rtx (mode
);
24081 x1
= gen_reg_rtx (mode
);
24083 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
24085 if (VECTOR_MODE_P (mode
))
24086 two
= ix86_build_const_vector (SFmode
, true, two
);
24088 two
= force_reg (mode
, two
);
24090 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24092 /* x0 = 1./b estimate */
24093 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24094 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
24097 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24098 gen_rtx_MULT (mode
, x0
, b
)));
24100 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24101 gen_rtx_MINUS (mode
, two
, e0
)));
24103 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
24104 gen_rtx_MULT (mode
, x0
, e1
)));
24106 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24107 gen_rtx_MULT (mode
, a
, x1
)));
24110 /* Output code to perform a Newton-Rhapson approximation of a
24111 single precision floating point [reciprocal] square root. */
24113 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
24116 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
24118 x0
= gen_reg_rtx (mode
);
24119 e0
= gen_reg_rtx (mode
);
24120 e1
= gen_reg_rtx (mode
);
24121 e2
= gen_reg_rtx (mode
);
24122 e3
= gen_reg_rtx (mode
);
24124 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
24125 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
24127 mask
= gen_reg_rtx (mode
);
24129 if (VECTOR_MODE_P (mode
))
24131 three
= ix86_build_const_vector (SFmode
, true, three
);
24132 half
= ix86_build_const_vector (SFmode
, true, half
);
24135 three
= force_reg (mode
, three
);
24136 half
= force_reg (mode
, half
);
24138 zero
= force_reg (mode
, CONST0_RTX(mode
));
24140 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24141 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24143 /* Compare a to zero. */
24144 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24145 gen_rtx_NE (mode
, a
, zero
)));
24147 /* x0 = 1./sqrt(a) estimate */
24148 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24149 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
24151 /* Filter out infinity. */
24152 if (VECTOR_MODE_P (mode
))
24153 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
24155 gen_lowpart (V4SFmode
, x0
),
24156 gen_lowpart (V4SFmode
, mask
))));
24158 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24159 gen_rtx_AND (mode
, x0
, mask
)));
24162 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24163 gen_rtx_MULT (mode
, x0
, a
)));
24165 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24166 gen_rtx_MULT (mode
, e0
, x0
)));
24168 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
24169 gen_rtx_MINUS (mode
, three
, e1
)));
24172 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24173 gen_rtx_MULT (mode
, half
, x0
)));
24176 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24177 gen_rtx_MULT (mode
, half
, e0
)));
24178 /* ret = e2 * e3 */
24179 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24180 gen_rtx_MULT (mode
, e2
, e3
)));
24183 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24185 static void ATTRIBUTE_UNUSED
24186 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
24189 /* With Binutils 2.15, the "@unwind" marker must be specified on
24190 every occurrence of the ".eh_frame" section, not just the first
24193 && strcmp (name
, ".eh_frame") == 0)
24195 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
24196 flags
& SECTION_WRITE
? "aw" : "a");
24199 default_elf_asm_named_section (name
, flags
, decl
);
24202 /* Return the mangling of TYPE if it is an extended fundamental type. */
24204 static const char *
24205 ix86_mangle_type (const_tree type
)
24207 type
= TYPE_MAIN_VARIANT (type
);
24209 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
24210 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
24213 switch (TYPE_MODE (type
))
24216 /* __float128 is "g". */
24219 /* "long double" or __float80 is "e". */
24226 /* For 32-bit code we can save PIC register setup by using
24227 __stack_chk_fail_local hidden function instead of calling
24228 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24229 register, so it is better to call __stack_chk_fail directly. */
24232 ix86_stack_protect_fail (void)
24234 return TARGET_64BIT
24235 ? default_external_stack_protect_fail ()
24236 : default_hidden_stack_protect_fail ();
24239 /* Select a format to encode pointers in exception handling data. CODE
24240 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24241 true if the symbol may be affected by dynamic relocations.
24243 ??? All x86 object file formats are capable of representing this.
24244 After all, the relocation needed is the same as for the call insn.
24245 Whether or not a particular assembler allows us to enter such, I
24246 guess we'll have to see. */
24248 asm_preferred_eh_data_format (int code
, int global
)
24252 int type
= DW_EH_PE_sdata8
;
24254 || ix86_cmodel
== CM_SMALL_PIC
24255 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
24256 type
= DW_EH_PE_sdata4
;
24257 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
24259 if (ix86_cmodel
== CM_SMALL
24260 || (ix86_cmodel
== CM_MEDIUM
&& code
))
24261 return DW_EH_PE_udata4
;
24262 return DW_EH_PE_absptr
;
24265 /* Expand copysign from SIGN to the positive value ABS_VALUE
24266 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24269 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
24271 enum machine_mode mode
= GET_MODE (sign
);
24272 rtx sgn
= gen_reg_rtx (mode
);
24273 if (mask
== NULL_RTX
)
24275 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
24276 if (!VECTOR_MODE_P (mode
))
24278 /* We need to generate a scalar mode mask in this case. */
24279 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24280 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24281 mask
= gen_reg_rtx (mode
);
24282 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24286 mask
= gen_rtx_NOT (mode
, mask
);
24287 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
24288 gen_rtx_AND (mode
, mask
, sign
)));
24289 emit_insn (gen_rtx_SET (VOIDmode
, result
,
24290 gen_rtx_IOR (mode
, abs_value
, sgn
)));
24293 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24294 mask for masking out the sign-bit is stored in *SMASK, if that is
24297 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
24299 enum machine_mode mode
= GET_MODE (op0
);
24302 xa
= gen_reg_rtx (mode
);
24303 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
24304 if (!VECTOR_MODE_P (mode
))
24306 /* We need to generate a scalar mode mask in this case. */
24307 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24308 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24309 mask
= gen_reg_rtx (mode
);
24310 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24312 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
24313 gen_rtx_AND (mode
, op0
, mask
)));
24321 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24322 swapping the operands if SWAP_OPERANDS is true. The expanded
24323 code is a forward jump to a newly created label in case the
24324 comparison is true. The generated label rtx is returned. */
24326 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
24327 bool swap_operands
)
24338 label
= gen_label_rtx ();
24339 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
24340 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24341 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
24342 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
24343 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24344 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
24345 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24346 JUMP_LABEL (tmp
) = label
;
24351 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24352 using comparison code CODE. Operands are swapped for the comparison if
24353 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24355 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
24356 bool swap_operands
)
24358 enum machine_mode mode
= GET_MODE (op0
);
24359 rtx mask
= gen_reg_rtx (mode
);
24368 if (mode
== DFmode
)
24369 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
24370 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24372 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
24373 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24378 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24379 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24381 ix86_gen_TWO52 (enum machine_mode mode
)
24383 REAL_VALUE_TYPE TWO52r
;
24386 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
24387 TWO52
= const_double_from_real_value (TWO52r
, mode
);
24388 TWO52
= force_reg (mode
, TWO52
);
24393 /* Expand SSE sequence for computing lround from OP1 storing
24396 ix86_expand_lround (rtx op0
, rtx op1
)
24398 /* C code for the stuff we're doing below:
24399 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24402 enum machine_mode mode
= GET_MODE (op1
);
24403 const struct real_format
*fmt
;
24404 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24407 /* load nextafter (0.5, 0.0) */
24408 fmt
= REAL_MODE_FORMAT (mode
);
24409 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24410 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24412 /* adj = copysign (0.5, op1) */
24413 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24414 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
24416 /* adj = op1 + adj */
24417 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
24419 /* op0 = (imode)adj */
24420 expand_fix (op0
, adj
, 0);
24423 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24426 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
24428 /* C code for the stuff we're doing below (for do_floor):
24430 xi -= (double)xi > op1 ? 1 : 0;
24433 enum machine_mode fmode
= GET_MODE (op1
);
24434 enum machine_mode imode
= GET_MODE (op0
);
24435 rtx ireg
, freg
, label
, tmp
;
24437 /* reg = (long)op1 */
24438 ireg
= gen_reg_rtx (imode
);
24439 expand_fix (ireg
, op1
, 0);
24441 /* freg = (double)reg */
24442 freg
= gen_reg_rtx (fmode
);
24443 expand_float (freg
, ireg
, 0);
24445 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24446 label
= ix86_expand_sse_compare_and_jump (UNLE
,
24447 freg
, op1
, !do_floor
);
24448 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
24449 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
24450 emit_move_insn (ireg
, tmp
);
24452 emit_label (label
);
24453 LABEL_NUSES (label
) = 1;
24455 emit_move_insn (op0
, ireg
);
24458 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24459 result in OPERAND0. */
24461 ix86_expand_rint (rtx operand0
, rtx operand1
)
24463 /* C code for the stuff we're doing below:
24464 xa = fabs (operand1);
24465 if (!isless (xa, 2**52))
24467 xa = xa + 2**52 - 2**52;
24468 return copysign (xa, operand1);
24470 enum machine_mode mode
= GET_MODE (operand0
);
24471 rtx res
, xa
, label
, TWO52
, mask
;
24473 res
= gen_reg_rtx (mode
);
24474 emit_move_insn (res
, operand1
);
24476 /* xa = abs (operand1) */
24477 xa
= ix86_expand_sse_fabs (res
, &mask
);
24479 /* if (!isless (xa, TWO52)) goto label; */
24480 TWO52
= ix86_gen_TWO52 (mode
);
24481 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24483 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24484 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24486 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
24488 emit_label (label
);
24489 LABEL_NUSES (label
) = 1;
24491 emit_move_insn (operand0
, res
);
24494 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24497 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
24499 /* C code for the stuff we expand below.
24500 double xa = fabs (x), x2;
24501 if (!isless (xa, TWO52))
24503 xa = xa + TWO52 - TWO52;
24504 x2 = copysign (xa, x);
24513 enum machine_mode mode
= GET_MODE (operand0
);
24514 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
24516 TWO52
= ix86_gen_TWO52 (mode
);
24518 /* Temporary for holding the result, initialized to the input
24519 operand to ease control flow. */
24520 res
= gen_reg_rtx (mode
);
24521 emit_move_insn (res
, operand1
);
24523 /* xa = abs (operand1) */
24524 xa
= ix86_expand_sse_fabs (res
, &mask
);
24526 /* if (!isless (xa, TWO52)) goto label; */
24527 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24529 /* xa = xa + TWO52 - TWO52; */
24530 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24531 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24533 /* xa = copysign (xa, operand1) */
24534 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
24536 /* generate 1.0 or -1.0 */
24537 one
= force_reg (mode
,
24538 const_double_from_real_value (do_floor
24539 ? dconst1
: dconstm1
, mode
));
24541 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24542 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24543 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24544 gen_rtx_AND (mode
, one
, tmp
)));
24545 /* We always need to subtract here to preserve signed zero. */
24546 tmp
= expand_simple_binop (mode
, MINUS
,
24547 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24548 emit_move_insn (res
, tmp
);
24550 emit_label (label
);
24551 LABEL_NUSES (label
) = 1;
24553 emit_move_insn (operand0
, res
);
24556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24559 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
24561 /* C code for the stuff we expand below.
24562 double xa = fabs (x), x2;
24563 if (!isless (xa, TWO52))
24565 x2 = (double)(long)x;
24572 if (HONOR_SIGNED_ZEROS (mode))
24573 return copysign (x2, x);
24576 enum machine_mode mode
= GET_MODE (operand0
);
24577 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
24579 TWO52
= ix86_gen_TWO52 (mode
);
24581 /* Temporary for holding the result, initialized to the input
24582 operand to ease control flow. */
24583 res
= gen_reg_rtx (mode
);
24584 emit_move_insn (res
, operand1
);
24586 /* xa = abs (operand1) */
24587 xa
= ix86_expand_sse_fabs (res
, &mask
);
24589 /* if (!isless (xa, TWO52)) goto label; */
24590 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24592 /* xa = (double)(long)x */
24593 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24594 expand_fix (xi
, res
, 0);
24595 expand_float (xa
, xi
, 0);
24598 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24600 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24601 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24602 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24603 gen_rtx_AND (mode
, one
, tmp
)));
24604 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
24605 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24606 emit_move_insn (res
, tmp
);
24608 if (HONOR_SIGNED_ZEROS (mode
))
24609 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24611 emit_label (label
);
24612 LABEL_NUSES (label
) = 1;
24614 emit_move_insn (operand0
, res
);
24617 /* Expand SSE sequence for computing round from OPERAND1 storing
24618 into OPERAND0. Sequence that works without relying on DImode truncation
24619 via cvttsd2siq that is only available on 64bit targets. */
24621 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
24623 /* C code for the stuff we expand below.
24624 double xa = fabs (x), xa2, x2;
24625 if (!isless (xa, TWO52))
24627 Using the absolute value and copying back sign makes
24628 -0.0 -> -0.0 correct.
24629 xa2 = xa + TWO52 - TWO52;
24634 else if (dxa > 0.5)
24636 x2 = copysign (xa2, x);
24639 enum machine_mode mode
= GET_MODE (operand0
);
24640 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
24642 TWO52
= ix86_gen_TWO52 (mode
);
24644 /* Temporary for holding the result, initialized to the input
24645 operand to ease control flow. */
24646 res
= gen_reg_rtx (mode
);
24647 emit_move_insn (res
, operand1
);
24649 /* xa = abs (operand1) */
24650 xa
= ix86_expand_sse_fabs (res
, &mask
);
24652 /* if (!isless (xa, TWO52)) goto label; */
24653 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24655 /* xa2 = xa + TWO52 - TWO52; */
24656 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24657 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
24659 /* dxa = xa2 - xa; */
24660 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
24662 /* generate 0.5, 1.0 and -0.5 */
24663 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
24664 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24665 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
24669 tmp
= gen_reg_rtx (mode
);
24670 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24671 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
24672 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24673 gen_rtx_AND (mode
, one
, tmp
)));
24674 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24675 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24676 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
24677 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24678 gen_rtx_AND (mode
, one
, tmp
)));
24679 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24681 /* res = copysign (xa2, operand1) */
24682 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
24684 emit_label (label
);
24685 LABEL_NUSES (label
) = 1;
24687 emit_move_insn (operand0
, res
);
24690 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24693 ix86_expand_trunc (rtx operand0
, rtx operand1
)
24695 /* C code for SSE variant we expand below.
24696 double xa = fabs (x), x2;
24697 if (!isless (xa, TWO52))
24699 x2 = (double)(long)x;
24700 if (HONOR_SIGNED_ZEROS (mode))
24701 return copysign (x2, x);
24704 enum machine_mode mode
= GET_MODE (operand0
);
24705 rtx xa
, xi
, TWO52
, label
, res
, mask
;
24707 TWO52
= ix86_gen_TWO52 (mode
);
24709 /* Temporary for holding the result, initialized to the input
24710 operand to ease control flow. */
24711 res
= gen_reg_rtx (mode
);
24712 emit_move_insn (res
, operand1
);
24714 /* xa = abs (operand1) */
24715 xa
= ix86_expand_sse_fabs (res
, &mask
);
24717 /* if (!isless (xa, TWO52)) goto label; */
24718 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24720 /* x = (double)(long)x */
24721 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24722 expand_fix (xi
, res
, 0);
24723 expand_float (res
, xi
, 0);
24725 if (HONOR_SIGNED_ZEROS (mode
))
24726 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24728 emit_label (label
);
24729 LABEL_NUSES (label
) = 1;
24731 emit_move_insn (operand0
, res
);
24734 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24737 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
24739 enum machine_mode mode
= GET_MODE (operand0
);
24740 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
24742 /* C code for SSE variant we expand below.
24743 double xa = fabs (x), x2;
24744 if (!isless (xa, TWO52))
24746 xa2 = xa + TWO52 - TWO52;
24750 x2 = copysign (xa2, x);
24754 TWO52
= ix86_gen_TWO52 (mode
);
24756 /* Temporary for holding the result, initialized to the input
24757 operand to ease control flow. */
24758 res
= gen_reg_rtx (mode
);
24759 emit_move_insn (res
, operand1
);
24761 /* xa = abs (operand1) */
24762 xa
= ix86_expand_sse_fabs (res
, &smask
);
24764 /* if (!isless (xa, TWO52)) goto label; */
24765 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24767 /* res = xa + TWO52 - TWO52; */
24768 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24769 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
24770 emit_move_insn (res
, tmp
);
24773 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24775 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24776 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
24777 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24778 gen_rtx_AND (mode
, mask
, one
)));
24779 tmp
= expand_simple_binop (mode
, MINUS
,
24780 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
24781 emit_move_insn (res
, tmp
);
24783 /* res = copysign (res, operand1) */
24784 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
24786 emit_label (label
);
24787 LABEL_NUSES (label
) = 1;
24789 emit_move_insn (operand0
, res
);
24792 /* Expand SSE sequence for computing round from OPERAND1 storing
24795 ix86_expand_round (rtx operand0
, rtx operand1
)
24797 /* C code for the stuff we're doing below:
24798 double xa = fabs (x);
24799 if (!isless (xa, TWO52))
24801 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24802 return copysign (xa, x);
24804 enum machine_mode mode
= GET_MODE (operand0
);
24805 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
24806 const struct real_format
*fmt
;
24807 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24809 /* Temporary for holding the result, initialized to the input
24810 operand to ease control flow. */
24811 res
= gen_reg_rtx (mode
);
24812 emit_move_insn (res
, operand1
);
24814 TWO52
= ix86_gen_TWO52 (mode
);
24815 xa
= ix86_expand_sse_fabs (res
, &mask
);
24816 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24818 /* load nextafter (0.5, 0.0) */
24819 fmt
= REAL_MODE_FORMAT (mode
);
24820 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24821 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24823 /* xa = xa + 0.5 */
24824 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24825 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24827 /* xa = (double)(int64_t)xa */
24828 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24829 expand_fix (xi
, xa
, 0);
24830 expand_float (xa
, xi
, 0);
24832 /* res = copysign (xa, operand1) */
24833 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
24835 emit_label (label
);
24836 LABEL_NUSES (label
) = 1;
24838 emit_move_insn (operand0
, res
);
24842 /* Validate whether a SSE5 instruction is valid or not.
24843 OPERANDS is the array of operands.
24844 NUM is the number of operands.
24845 USES_OC0 is true if the instruction uses OC0 and provides 4 varients.
24846 NUM_MEMORY is the maximum number of memory operands to accept. */
24847 bool ix86_sse5_valid_op_p (rtx operands
[], rtx insn
, int num
, bool uses_oc0
, int num_memory
)
24853 /* Count the number of memory arguments */
24856 for (i
= 0; i
< num
; i
++)
24858 enum machine_mode mode
= GET_MODE (operands
[i
]);
24859 if (register_operand (operands
[i
], mode
))
24862 else if (memory_operand (operands
[i
], mode
))
24864 mem_mask
|= (1 << i
);
24870 rtx pattern
= PATTERN (insn
);
24872 /* allow 0 for pcmov */
24873 if (GET_CODE (pattern
) != SET
24874 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
24876 || operands
[i
] != CONST0_RTX (mode
))
24881 /* If there were no memory operations, allow the insn */
24885 /* Do not allow the destination register to be a memory operand. */
24886 else if (mem_mask
& (1 << 0))
24889 /* If there are too many memory operations, disallow the instruction. While
24890 the hardware only allows 1 memory reference, before register allocation
24891 for some insns, we allow two memory operations sometimes in order to allow
24892 code like the following to be optimized:
24894 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24896 or similar cases that are vectorized into using the fmaddss
24898 else if (mem_count
> num_memory
)
24901 /* Don't allow more than one memory operation if not optimizing. */
24902 else if (mem_count
> 1 && !optimize
)
24905 else if (num
== 4 && mem_count
== 1)
24907 /* formats (destination is the first argument), example fmaddss:
24908 xmm1, xmm1, xmm2, xmm3/mem
24909 xmm1, xmm1, xmm2/mem, xmm3
24910 xmm1, xmm2, xmm3/mem, xmm1
24911 xmm1, xmm2/mem, xmm3, xmm1 */
24913 return ((mem_mask
== (1 << 1))
24914 || (mem_mask
== (1 << 2))
24915 || (mem_mask
== (1 << 3)));
24917 /* format, example pmacsdd:
24918 xmm1, xmm2, xmm3/mem, xmm1 */
24920 return (mem_mask
== (1 << 2));
24923 else if (num
== 4 && num_memory
== 2)
24925 /* If there are two memory operations, we can load one of the memory ops
24926 into the destination register. This is for optimizating the
24927 multiply/add ops, which the combiner has optimized both the multiply
24928 and the add insns to have a memory operation. We have to be careful
24929 that the destination doesn't overlap with the inputs. */
24930 rtx op0
= operands
[0];
24932 if (reg_mentioned_p (op0
, operands
[1])
24933 || reg_mentioned_p (op0
, operands
[2])
24934 || reg_mentioned_p (op0
, operands
[3]))
24937 /* formats (destination is the first argument), example fmaddss:
24938 xmm1, xmm1, xmm2, xmm3/mem
24939 xmm1, xmm1, xmm2/mem, xmm3
24940 xmm1, xmm2, xmm3/mem, xmm1
24941 xmm1, xmm2/mem, xmm3, xmm1
24943 For the oc0 case, we will load either operands[1] or operands[3] into
24944 operands[0], so any combination of 2 memory operands is ok. */
24948 /* format, example pmacsdd:
24949 xmm1, xmm2, xmm3/mem, xmm1
24951 For the integer multiply/add instructions be more restrictive and
24952 require operands[2] and operands[3] to be the memory operands. */
24954 return (mem_mask
== ((1 << 2) | (1 << 3)));
24957 else if (num
== 3 && num_memory
== 1)
24959 /* formats, example protb:
24960 xmm1, xmm2, xmm3/mem
24961 xmm1, xmm2/mem, xmm3 */
24963 return ((mem_mask
== (1 << 1)) || (mem_mask
== (1 << 2)));
24965 /* format, example comeq:
24966 xmm1, xmm2, xmm3/mem */
24968 return (mem_mask
== (1 << 2));
24972 gcc_unreachable ();
24978 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
24979 hardware will allow by using the destination register to load one of the
24980 memory operations. Presently this is used by the multiply/add routines to
24981 allow 2 memory references. */
24984 ix86_expand_sse5_multiple_memory (rtx operands
[],
24986 enum machine_mode mode
)
24988 rtx op0
= operands
[0];
24990 || memory_operand (op0
, mode
)
24991 || reg_mentioned_p (op0
, operands
[1])
24992 || reg_mentioned_p (op0
, operands
[2])
24993 || reg_mentioned_p (op0
, operands
[3]))
24994 gcc_unreachable ();
24996 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
24997 the destination register. */
24998 if (memory_operand (operands
[1], mode
))
25000 emit_move_insn (op0
, operands
[1]);
25003 else if (memory_operand (operands
[3], mode
))
25005 emit_move_insn (op0
, operands
[3]);
25009 gcc_unreachable ();
25015 /* Table of valid machine attributes. */
25016 static const struct attribute_spec ix86_attribute_table
[] =
25018 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25019 /* Stdcall attribute says callee is responsible for popping arguments
25020 if they are not variable. */
25021 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25022 /* Fastcall attribute says callee is responsible for popping arguments
25023 if they are not variable. */
25024 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25025 /* Cdecl attribute says the callee is a normal C declaration */
25026 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25027 /* Regparm attribute specifies how many integer arguments are to be
25028 passed in registers. */
25029 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
25030 /* Sseregparm attribute says we are using x86_64 calling conventions
25031 for FP arguments. */
25032 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25033 /* force_align_arg_pointer says this function realigns the stack at entry. */
25034 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
25035 false, true, true, ix86_handle_cconv_attribute
},
25036 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25037 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
25038 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
25039 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
25041 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25042 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25043 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25044 SUBTARGET_ATTRIBUTE_TABLE
,
25046 { NULL
, 0, 0, false, false, false, NULL
}
25049 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25051 x86_builtin_vectorization_cost (bool runtime_test
)
25053 /* If the branch of the runtime test is taken - i.e. - the vectorized
25054 version is skipped - this incurs a misprediction cost (because the
25055 vectorized version is expected to be the fall-through). So we subtract
25056 the latency of a mispredicted branch from the costs that are incured
25057 when the vectorized version is executed.
25059 TODO: The values in individual target tables have to be tuned or new
25060 fields may be needed. For eg. on K8, the default branch path is the
25061 not-taken path. If the taken path is predicted correctly, the minimum
25062 penalty of going down the taken-path is 1 cycle. If the taken-path is
25063 not predicted correctly, then the minimum penalty is 10 cycles. */
25067 return (-(ix86_cost
->cond_taken_branch_cost
));
25073 /* Initialize the GCC target structure. */
25074 #undef TARGET_ATTRIBUTE_TABLE
25075 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25076 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25077 # undef TARGET_MERGE_DECL_ATTRIBUTES
25078 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25081 #undef TARGET_COMP_TYPE_ATTRIBUTES
25082 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25084 #undef TARGET_INIT_BUILTINS
25085 #define TARGET_INIT_BUILTINS ix86_init_builtins
25086 #undef TARGET_EXPAND_BUILTIN
25087 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25089 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25090 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25091 ix86_builtin_vectorized_function
25093 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25094 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25096 #undef TARGET_BUILTIN_RECIPROCAL
25097 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25099 #undef TARGET_ASM_FUNCTION_EPILOGUE
25100 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25102 #undef TARGET_ENCODE_SECTION_INFO
25103 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25104 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25106 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25109 #undef TARGET_ASM_OPEN_PAREN
25110 #define TARGET_ASM_OPEN_PAREN ""
25111 #undef TARGET_ASM_CLOSE_PAREN
25112 #define TARGET_ASM_CLOSE_PAREN ""
25114 #undef TARGET_ASM_ALIGNED_HI_OP
25115 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25116 #undef TARGET_ASM_ALIGNED_SI_OP
25117 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25119 #undef TARGET_ASM_ALIGNED_DI_OP
25120 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25123 #undef TARGET_ASM_UNALIGNED_HI_OP
25124 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25125 #undef TARGET_ASM_UNALIGNED_SI_OP
25126 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25127 #undef TARGET_ASM_UNALIGNED_DI_OP
25128 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25130 #undef TARGET_SCHED_ADJUST_COST
25131 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25132 #undef TARGET_SCHED_ISSUE_RATE
25133 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25134 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25135 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25136 ia32_multipass_dfa_lookahead
25138 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25139 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25142 #undef TARGET_HAVE_TLS
25143 #define TARGET_HAVE_TLS true
25145 #undef TARGET_CANNOT_FORCE_CONST_MEM
25146 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25147 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25148 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25150 #undef TARGET_DELEGITIMIZE_ADDRESS
25151 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25153 #undef TARGET_MS_BITFIELD_LAYOUT_P
25154 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25157 #undef TARGET_BINDS_LOCAL_P
25158 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25160 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25161 #undef TARGET_BINDS_LOCAL_P
25162 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25165 #undef TARGET_ASM_OUTPUT_MI_THUNK
25166 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25167 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25168 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25170 #undef TARGET_ASM_FILE_START
25171 #define TARGET_ASM_FILE_START x86_file_start
25173 #undef TARGET_DEFAULT_TARGET_FLAGS
25174 #define TARGET_DEFAULT_TARGET_FLAGS \
25176 | TARGET_SUBTARGET_DEFAULT \
25177 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25179 #undef TARGET_HANDLE_OPTION
25180 #define TARGET_HANDLE_OPTION ix86_handle_option
25182 #undef TARGET_RTX_COSTS
25183 #define TARGET_RTX_COSTS ix86_rtx_costs
25184 #undef TARGET_ADDRESS_COST
25185 #define TARGET_ADDRESS_COST ix86_address_cost
25187 #undef TARGET_FIXED_CONDITION_CODE_REGS
25188 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25189 #undef TARGET_CC_MODES_COMPATIBLE
25190 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25192 #undef TARGET_MACHINE_DEPENDENT_REORG
25193 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25195 #undef TARGET_BUILD_BUILTIN_VA_LIST
25196 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25198 #undef TARGET_MD_ASM_CLOBBERS
25199 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25201 #undef TARGET_PROMOTE_PROTOTYPES
25202 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25203 #undef TARGET_STRUCT_VALUE_RTX
25204 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25205 #undef TARGET_SETUP_INCOMING_VARARGS
25206 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25207 #undef TARGET_MUST_PASS_IN_STACK
25208 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25209 #undef TARGET_PASS_BY_REFERENCE
25210 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25211 #undef TARGET_INTERNAL_ARG_POINTER
25212 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25213 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25214 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25215 #undef TARGET_STRICT_ARGUMENT_NAMING
25216 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25218 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25219 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25221 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25222 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25224 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25225 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25227 #undef TARGET_C_MODE_FOR_SUFFIX
25228 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25231 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25232 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25235 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25236 #undef TARGET_INSERT_ATTRIBUTES
25237 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25240 #undef TARGET_MANGLE_TYPE
25241 #define TARGET_MANGLE_TYPE ix86_mangle_type
25243 #undef TARGET_STACK_PROTECT_FAIL
25244 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25246 #undef TARGET_FUNCTION_VALUE
25247 #define TARGET_FUNCTION_VALUE ix86_function_value
25249 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25250 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25252 struct gcc_target targetm
= TARGET_INITIALIZER
;
25254 #include "gt-i386.h"