PR testsuite/35843
[official-gcc.git] / gcc / config / i386 / i386.c
blob2df9683f457af41888eff90c44a1788dbc249f70
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
56 static int x86_builtin_vectorization_cost (bool);
57 static rtx legitimize_dllimport_symbol (rtx, bool);
59 #ifndef CHECK_STACK_LIMIT
60 #define CHECK_STACK_LIMIT (-1)
61 #endif
63 /* Return index of given mode in mult and division cost tables. */
64 #define MODE_INDEX(mode) \
65 ((mode) == QImode ? 0 \
66 : (mode) == HImode ? 1 \
67 : (mode) == SImode ? 2 \
68 : (mode) == DImode ? 3 \
69 : 4)
71 /* Processor costs (relative to an add) */
72 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
73 #define COSTS_N_BYTES(N) ((N) * 2)
75 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77 static const
78 struct processor_costs size_cost = { /* costs for tuning for size */
79 COSTS_N_BYTES (2), /* cost of an add instruction */
80 COSTS_N_BYTES (3), /* cost of a lea instruction */
81 COSTS_N_BYTES (2), /* variable shift costs */
82 COSTS_N_BYTES (3), /* constant shift costs */
83 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 0, /* cost of multiply per each bit set */
89 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
90 COSTS_N_BYTES (3), /* HI */
91 COSTS_N_BYTES (3), /* SI */
92 COSTS_N_BYTES (3), /* DI */
93 COSTS_N_BYTES (5)}, /* other */
94 COSTS_N_BYTES (3), /* cost of movsx */
95 COSTS_N_BYTES (3), /* cost of movzx */
96 0, /* "large" insn */
97 2, /* MOVE_RATIO */
98 2, /* cost for loading QImode using movzbl */
99 {2, 2, 2}, /* cost of loading integer registers
100 in QImode, HImode and SImode.
101 Relative to reg-reg move (2). */
102 {2, 2, 2}, /* cost of storing integer registers */
103 2, /* cost of reg,reg fld/fst */
104 {2, 2, 2}, /* cost of loading fp registers
105 in SFmode, DFmode and XFmode */
106 {2, 2, 2}, /* cost of storing fp registers
107 in SFmode, DFmode and XFmode */
108 3, /* cost of moving MMX register */
109 {3, 3}, /* cost of loading MMX registers
110 in SImode and DImode */
111 {3, 3}, /* cost of storing MMX registers
112 in SImode and DImode */
113 3, /* cost of moving SSE register */
114 {3, 3, 3}, /* cost of loading SSE registers
115 in SImode, DImode and TImode */
116 {3, 3, 3}, /* cost of storing SSE registers
117 in SImode, DImode and TImode */
118 3, /* MMX or SSE register to integer */
119 0, /* size of l1 cache */
120 0, /* size of l2 cache */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
123 2, /* Branch cost */
124 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
125 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
126 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
127 COSTS_N_BYTES (2), /* cost of FABS instruction. */
128 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
129 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 1, /* scalar_stmt_cost. */
135 1, /* scalar load_cost. */
136 1, /* scalar_store_cost. */
137 1, /* vec_stmt_cost. */
138 1, /* vec_to_scalar_cost. */
139 1, /* scalar_to_vec_cost. */
140 1, /* vec_align_load_cost. */
141 1, /* vec_unalign_load_cost. */
142 1, /* vec_store_cost. */
143 1, /* cond_taken_branch_cost. */
144 1, /* cond_not_taken_branch_cost. */
147 /* Processor costs (relative to an add) */
148 static const
149 struct processor_costs i386_cost = { /* 386 specific costs */
150 COSTS_N_INSNS (1), /* cost of an add instruction */
151 COSTS_N_INSNS (1), /* cost of a lea instruction */
152 COSTS_N_INSNS (3), /* variable shift costs */
153 COSTS_N_INSNS (2), /* constant shift costs */
154 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
155 COSTS_N_INSNS (6), /* HI */
156 COSTS_N_INSNS (6), /* SI */
157 COSTS_N_INSNS (6), /* DI */
158 COSTS_N_INSNS (6)}, /* other */
159 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
160 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
161 COSTS_N_INSNS (23), /* HI */
162 COSTS_N_INSNS (23), /* SI */
163 COSTS_N_INSNS (23), /* DI */
164 COSTS_N_INSNS (23)}, /* other */
165 COSTS_N_INSNS (3), /* cost of movsx */
166 COSTS_N_INSNS (2), /* cost of movzx */
167 15, /* "large" insn */
168 3, /* MOVE_RATIO */
169 4, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {8, 8, 8}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {8, 8, 8}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 2, /* cost of moving MMX register */
180 {4, 8}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {4, 8}, /* cost of storing MMX registers
183 in SImode and DImode */
184 2, /* cost of moving SSE register */
185 {4, 8, 16}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {4, 8, 16}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 1, /* Branch cost */
195 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
196 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
197 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
198 COSTS_N_INSNS (22), /* cost of FABS instruction. */
199 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
200 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 1, /* scalar_stmt_cost. */
206 1, /* scalar load_cost. */
207 1, /* scalar_store_cost. */
208 1, /* vec_stmt_cost. */
209 1, /* vec_to_scalar_cost. */
210 1, /* scalar_to_vec_cost. */
211 1, /* vec_align_load_cost. */
212 2, /* vec_unalign_load_cost. */
213 1, /* vec_store_cost. */
214 3, /* cond_taken_branch_cost. */
215 1, /* cond_not_taken_branch_cost. */
218 static const
219 struct processor_costs i486_cost = { /* 486 specific costs */
220 COSTS_N_INSNS (1), /* cost of an add instruction */
221 COSTS_N_INSNS (1), /* cost of a lea instruction */
222 COSTS_N_INSNS (3), /* variable shift costs */
223 COSTS_N_INSNS (2), /* constant shift costs */
224 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
225 COSTS_N_INSNS (12), /* HI */
226 COSTS_N_INSNS (12), /* SI */
227 COSTS_N_INSNS (12), /* DI */
228 COSTS_N_INSNS (12)}, /* other */
229 1, /* cost of multiply per each bit set */
230 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
231 COSTS_N_INSNS (40), /* HI */
232 COSTS_N_INSNS (40), /* SI */
233 COSTS_N_INSNS (40), /* DI */
234 COSTS_N_INSNS (40)}, /* other */
235 COSTS_N_INSNS (3), /* cost of movsx */
236 COSTS_N_INSNS (2), /* cost of movzx */
237 15, /* "large" insn */
238 3, /* MOVE_RATIO */
239 4, /* cost for loading QImode using movzbl */
240 {2, 4, 2}, /* cost of loading integer registers
241 in QImode, HImode and SImode.
242 Relative to reg-reg move (2). */
243 {2, 4, 2}, /* cost of storing integer registers */
244 2, /* cost of reg,reg fld/fst */
245 {8, 8, 8}, /* cost of loading fp registers
246 in SFmode, DFmode and XFmode */
247 {8, 8, 8}, /* cost of storing fp registers
248 in SFmode, DFmode and XFmode */
249 2, /* cost of moving MMX register */
250 {4, 8}, /* cost of loading MMX registers
251 in SImode and DImode */
252 {4, 8}, /* cost of storing MMX registers
253 in SImode and DImode */
254 2, /* cost of moving SSE register */
255 {4, 8, 16}, /* cost of loading SSE registers
256 in SImode, DImode and TImode */
257 {4, 8, 16}, /* cost of storing SSE registers
258 in SImode, DImode and TImode */
259 3, /* MMX or SSE register to integer */
260 4, /* size of l1 cache. 486 has 8kB cache
261 shared for code and data, so 4kB is
262 not really precise. */
263 4, /* size of l2 cache */
264 0, /* size of prefetch block */
265 0, /* number of parallel prefetches */
266 1, /* Branch cost */
267 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
268 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
269 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
270 COSTS_N_INSNS (3), /* cost of FABS instruction. */
271 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
272 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 1, /* scalar_stmt_cost. */
278 1, /* scalar load_cost. */
279 1, /* scalar_store_cost. */
280 1, /* vec_stmt_cost. */
281 1, /* vec_to_scalar_cost. */
282 1, /* scalar_to_vec_cost. */
283 1, /* vec_align_load_cost. */
284 2, /* vec_unalign_load_cost. */
285 1, /* vec_store_cost. */
286 3, /* cond_taken_branch_cost. */
287 1, /* cond_not_taken_branch_cost. */
290 static const
291 struct processor_costs pentium_cost = {
292 COSTS_N_INSNS (1), /* cost of an add instruction */
293 COSTS_N_INSNS (1), /* cost of a lea instruction */
294 COSTS_N_INSNS (4), /* variable shift costs */
295 COSTS_N_INSNS (1), /* constant shift costs */
296 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
297 COSTS_N_INSNS (11), /* HI */
298 COSTS_N_INSNS (11), /* SI */
299 COSTS_N_INSNS (11), /* DI */
300 COSTS_N_INSNS (11)}, /* other */
301 0, /* cost of multiply per each bit set */
302 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
303 COSTS_N_INSNS (25), /* HI */
304 COSTS_N_INSNS (25), /* SI */
305 COSTS_N_INSNS (25), /* DI */
306 COSTS_N_INSNS (25)}, /* other */
307 COSTS_N_INSNS (3), /* cost of movsx */
308 COSTS_N_INSNS (2), /* cost of movzx */
309 8, /* "large" insn */
310 6, /* MOVE_RATIO */
311 6, /* cost for loading QImode using movzbl */
312 {2, 4, 2}, /* cost of loading integer registers
313 in QImode, HImode and SImode.
314 Relative to reg-reg move (2). */
315 {2, 4, 2}, /* cost of storing integer registers */
316 2, /* cost of reg,reg fld/fst */
317 {2, 2, 6}, /* cost of loading fp registers
318 in SFmode, DFmode and XFmode */
319 {4, 4, 6}, /* cost of storing fp registers
320 in SFmode, DFmode and XFmode */
321 8, /* cost of moving MMX register */
322 {8, 8}, /* cost of loading MMX registers
323 in SImode and DImode */
324 {8, 8}, /* cost of storing MMX registers
325 in SImode and DImode */
326 2, /* cost of moving SSE register */
327 {4, 8, 16}, /* cost of loading SSE registers
328 in SImode, DImode and TImode */
329 {4, 8, 16}, /* cost of storing SSE registers
330 in SImode, DImode and TImode */
331 3, /* MMX or SSE register to integer */
332 8, /* size of l1 cache. */
333 8, /* size of l2 cache */
334 0, /* size of prefetch block */
335 0, /* number of parallel prefetches */
336 2, /* Branch cost */
337 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
338 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
339 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
340 COSTS_N_INSNS (1), /* cost of FABS instruction. */
341 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
342 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
343 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
344 DUMMY_STRINGOP_ALGS},
345 {{libcall, {{-1, rep_prefix_4_byte}}},
346 DUMMY_STRINGOP_ALGS},
347 1, /* scalar_stmt_cost. */
348 1, /* scalar load_cost. */
349 1, /* scalar_store_cost. */
350 1, /* vec_stmt_cost. */
351 1, /* vec_to_scalar_cost. */
352 1, /* scalar_to_vec_cost. */
353 1, /* vec_align_load_cost. */
354 2, /* vec_unalign_load_cost. */
355 1, /* vec_store_cost. */
356 3, /* cond_taken_branch_cost. */
357 1, /* cond_not_taken_branch_cost. */
360 static const
361 struct processor_costs pentiumpro_cost = {
362 COSTS_N_INSNS (1), /* cost of an add instruction */
363 COSTS_N_INSNS (1), /* cost of a lea instruction */
364 COSTS_N_INSNS (1), /* variable shift costs */
365 COSTS_N_INSNS (1), /* constant shift costs */
366 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
367 COSTS_N_INSNS (4), /* HI */
368 COSTS_N_INSNS (4), /* SI */
369 COSTS_N_INSNS (4), /* DI */
370 COSTS_N_INSNS (4)}, /* other */
371 0, /* cost of multiply per each bit set */
372 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
373 COSTS_N_INSNS (17), /* HI */
374 COSTS_N_INSNS (17), /* SI */
375 COSTS_N_INSNS (17), /* DI */
376 COSTS_N_INSNS (17)}, /* other */
377 COSTS_N_INSNS (1), /* cost of movsx */
378 COSTS_N_INSNS (1), /* cost of movzx */
379 8, /* "large" insn */
380 6, /* MOVE_RATIO */
381 2, /* cost for loading QImode using movzbl */
382 {4, 4, 4}, /* cost of loading integer registers
383 in QImode, HImode and SImode.
384 Relative to reg-reg move (2). */
385 {2, 2, 2}, /* cost of storing integer registers */
386 2, /* cost of reg,reg fld/fst */
387 {2, 2, 6}, /* cost of loading fp registers
388 in SFmode, DFmode and XFmode */
389 {4, 4, 6}, /* cost of storing fp registers
390 in SFmode, DFmode and XFmode */
391 2, /* cost of moving MMX register */
392 {2, 2}, /* cost of loading MMX registers
393 in SImode and DImode */
394 {2, 2}, /* cost of storing MMX registers
395 in SImode and DImode */
396 2, /* cost of moving SSE register */
397 {2, 2, 8}, /* cost of loading SSE registers
398 in SImode, DImode and TImode */
399 {2, 2, 8}, /* cost of storing SSE registers
400 in SImode, DImode and TImode */
401 3, /* MMX or SSE register to integer */
402 8, /* size of l1 cache. */
403 256, /* size of l2 cache */
404 32, /* size of prefetch block */
405 6, /* number of parallel prefetches */
406 2, /* Branch cost */
407 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
408 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
409 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
410 COSTS_N_INSNS (2), /* cost of FABS instruction. */
411 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
412 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
414 the alignment). For small blocks inline loop is still a noticeable win, for bigger
415 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
416 more expensive startup time in CPU, but after 4K the difference is down in the noise.
418 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
419 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
420 DUMMY_STRINGOP_ALGS},
421 {{rep_prefix_4_byte, {{1024, unrolled_loop},
422 {8192, rep_prefix_4_byte}, {-1, libcall}}},
423 DUMMY_STRINGOP_ALGS},
424 1, /* scalar_stmt_cost. */
425 1, /* scalar load_cost. */
426 1, /* scalar_store_cost. */
427 1, /* vec_stmt_cost. */
428 1, /* vec_to_scalar_cost. */
429 1, /* scalar_to_vec_cost. */
430 1, /* vec_align_load_cost. */
431 2, /* vec_unalign_load_cost. */
432 1, /* vec_store_cost. */
433 3, /* cond_taken_branch_cost. */
434 1, /* cond_not_taken_branch_cost. */
437 static const
438 struct processor_costs geode_cost = {
439 COSTS_N_INSNS (1), /* cost of an add instruction */
440 COSTS_N_INSNS (1), /* cost of a lea instruction */
441 COSTS_N_INSNS (2), /* variable shift costs */
442 COSTS_N_INSNS (1), /* constant shift costs */
443 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
444 COSTS_N_INSNS (4), /* HI */
445 COSTS_N_INSNS (7), /* SI */
446 COSTS_N_INSNS (7), /* DI */
447 COSTS_N_INSNS (7)}, /* other */
448 0, /* cost of multiply per each bit set */
449 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
450 COSTS_N_INSNS (23), /* HI */
451 COSTS_N_INSNS (39), /* SI */
452 COSTS_N_INSNS (39), /* DI */
453 COSTS_N_INSNS (39)}, /* other */
454 COSTS_N_INSNS (1), /* cost of movsx */
455 COSTS_N_INSNS (1), /* cost of movzx */
456 8, /* "large" insn */
457 4, /* MOVE_RATIO */
458 1, /* cost for loading QImode using movzbl */
459 {1, 1, 1}, /* cost of loading integer registers
460 in QImode, HImode and SImode.
461 Relative to reg-reg move (2). */
462 {1, 1, 1}, /* cost of storing integer registers */
463 1, /* cost of reg,reg fld/fst */
464 {1, 1, 1}, /* cost of loading fp registers
465 in SFmode, DFmode and XFmode */
466 {4, 6, 6}, /* cost of storing fp registers
467 in SFmode, DFmode and XFmode */
469 1, /* cost of moving MMX register */
470 {1, 1}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {1, 1}, /* cost of storing MMX registers
473 in SImode and DImode */
474 1, /* cost of moving SSE register */
475 {1, 1, 1}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {1, 1, 1}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 1, /* MMX or SSE register to integer */
480 64, /* size of l1 cache. */
481 128, /* size of l2 cache. */
482 32, /* size of prefetch block */
483 1, /* number of parallel prefetches */
484 1, /* Branch cost */
485 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
486 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
487 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
488 COSTS_N_INSNS (1), /* cost of FABS instruction. */
489 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
490 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 1, /* scalar_stmt_cost. */
496 1, /* scalar load_cost. */
497 1, /* scalar_store_cost. */
498 1, /* vec_stmt_cost. */
499 1, /* vec_to_scalar_cost. */
500 1, /* scalar_to_vec_cost. */
501 1, /* vec_align_load_cost. */
502 2, /* vec_unalign_load_cost. */
503 1, /* vec_store_cost. */
504 3, /* cond_taken_branch_cost. */
505 1, /* cond_not_taken_branch_cost. */
508 static const
509 struct processor_costs k6_cost = {
510 COSTS_N_INSNS (1), /* cost of an add instruction */
511 COSTS_N_INSNS (2), /* cost of a lea instruction */
512 COSTS_N_INSNS (1), /* variable shift costs */
513 COSTS_N_INSNS (1), /* constant shift costs */
514 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
515 COSTS_N_INSNS (3), /* HI */
516 COSTS_N_INSNS (3), /* SI */
517 COSTS_N_INSNS (3), /* DI */
518 COSTS_N_INSNS (3)}, /* other */
519 0, /* cost of multiply per each bit set */
520 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
521 COSTS_N_INSNS (18), /* HI */
522 COSTS_N_INSNS (18), /* SI */
523 COSTS_N_INSNS (18), /* DI */
524 COSTS_N_INSNS (18)}, /* other */
525 COSTS_N_INSNS (2), /* cost of movsx */
526 COSTS_N_INSNS (2), /* cost of movzx */
527 8, /* "large" insn */
528 4, /* MOVE_RATIO */
529 3, /* cost for loading QImode using movzbl */
530 {4, 5, 4}, /* cost of loading integer registers
531 in QImode, HImode and SImode.
532 Relative to reg-reg move (2). */
533 {2, 3, 2}, /* cost of storing integer registers */
534 4, /* cost of reg,reg fld/fst */
535 {6, 6, 6}, /* cost of loading fp registers
536 in SFmode, DFmode and XFmode */
537 {4, 4, 4}, /* cost of storing fp registers
538 in SFmode, DFmode and XFmode */
539 2, /* cost of moving MMX register */
540 {2, 2}, /* cost of loading MMX registers
541 in SImode and DImode */
542 {2, 2}, /* cost of storing MMX registers
543 in SImode and DImode */
544 2, /* cost of moving SSE register */
545 {2, 2, 8}, /* cost of loading SSE registers
546 in SImode, DImode and TImode */
547 {2, 2, 8}, /* cost of storing SSE registers
548 in SImode, DImode and TImode */
549 6, /* MMX or SSE register to integer */
550 32, /* size of l1 cache. */
551 32, /* size of l2 cache. Some models
552 have integrated l2 cache, but
553 optimizing for k6 is not important
554 enough to worry about that. */
555 32, /* size of prefetch block */
556 1, /* number of parallel prefetches */
557 1, /* Branch cost */
558 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
560 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
563 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 2, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 3, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 static const
582 struct processor_costs athlon_cost = {
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (2), /* cost of a lea instruction */
585 COSTS_N_INSNS (1), /* variable shift costs */
586 COSTS_N_INSNS (1), /* constant shift costs */
587 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (5), /* HI */
589 COSTS_N_INSNS (5), /* SI */
590 COSTS_N_INSNS (5), /* DI */
591 COSTS_N_INSNS (5)}, /* other */
592 0, /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (26), /* HI */
595 COSTS_N_INSNS (42), /* SI */
596 COSTS_N_INSNS (74), /* DI */
597 COSTS_N_INSNS (74)}, /* other */
598 COSTS_N_INSNS (1), /* cost of movsx */
599 COSTS_N_INSNS (1), /* cost of movzx */
600 8, /* "large" insn */
601 9, /* MOVE_RATIO */
602 4, /* cost for loading QImode using movzbl */
603 {3, 4, 3}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {3, 4, 3}, /* cost of storing integer registers */
607 4, /* cost of reg,reg fld/fst */
608 {4, 4, 12}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {6, 6, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 4}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 4}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 4, 6}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 4, 5}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 5, /* MMX or SSE register to integer */
623 64, /* size of l1 cache. */
624 256, /* size of l2 cache. */
625 64, /* size of prefetch block */
626 6, /* number of parallel prefetches */
627 5, /* Branch cost */
628 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
634 /* For some reason, Athlon deals better with REP prefix (relative to loops)
635 compared to K8. Alignment becomes important after 8 bytes for memcpy and
636 128 bytes for memset. */
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
654 static const
655 struct processor_costs k8_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (4), /* HI */
662 COSTS_N_INSNS (3), /* SI */
663 COSTS_N_INSNS (4), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {3, 3}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 3, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 512, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 /* New AMD processors never drop prefetches; if they cannot be performed
700 immediately, they are queued. We set number of simultaneous prefetches
701 to a large constant to reflect this (it probably is not a good idea not
702 to limit number of prefetches at all, as their execution also takes some
703 time). */
704 100, /* number of parallel prefetches */
705 3, /* Branch cost */
706 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
707 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
708 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
709 COSTS_N_INSNS (2), /* cost of FABS instruction. */
710 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
711 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
712 /* K8 has optimized REP instruction for medium sized blocks, but for very small
713 blocks it is better to use loop. For large blocks, libcall can do
714 nontemporary accesses and beat inline considerably. */
715 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
716 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
717 {{libcall, {{8, loop}, {24, unrolled_loop},
718 {2048, rep_prefix_4_byte}, {-1, libcall}}},
719 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
720 4, /* scalar_stmt_cost. */
721 2, /* scalar load_cost. */
722 2, /* scalar_store_cost. */
723 5, /* vec_stmt_cost. */
724 0, /* vec_to_scalar_cost. */
725 2, /* scalar_to_vec_cost. */
726 2, /* vec_align_load_cost. */
727 3, /* vec_unalign_load_cost. */
728 3, /* vec_store_cost. */
729 3, /* cond_taken_branch_cost. */
730 2, /* cond_not_taken_branch_cost. */
733 struct processor_costs amdfam10_cost = {
734 COSTS_N_INSNS (1), /* cost of an add instruction */
735 COSTS_N_INSNS (2), /* cost of a lea instruction */
736 COSTS_N_INSNS (1), /* variable shift costs */
737 COSTS_N_INSNS (1), /* constant shift costs */
738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
739 COSTS_N_INSNS (4), /* HI */
740 COSTS_N_INSNS (3), /* SI */
741 COSTS_N_INSNS (4), /* DI */
742 COSTS_N_INSNS (5)}, /* other */
743 0, /* cost of multiply per each bit set */
744 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
745 COSTS_N_INSNS (35), /* HI */
746 COSTS_N_INSNS (51), /* SI */
747 COSTS_N_INSNS (83), /* DI */
748 COSTS_N_INSNS (83)}, /* other */
749 COSTS_N_INSNS (1), /* cost of movsx */
750 COSTS_N_INSNS (1), /* cost of movzx */
751 8, /* "large" insn */
752 9, /* MOVE_RATIO */
753 4, /* cost for loading QImode using movzbl */
754 {3, 4, 3}, /* cost of loading integer registers
755 in QImode, HImode and SImode.
756 Relative to reg-reg move (2). */
757 {3, 4, 3}, /* cost of storing integer registers */
758 4, /* cost of reg,reg fld/fst */
759 {4, 4, 12}, /* cost of loading fp registers
760 in SFmode, DFmode and XFmode */
761 {6, 6, 8}, /* cost of storing fp registers
762 in SFmode, DFmode and XFmode */
763 2, /* cost of moving MMX register */
764 {3, 3}, /* cost of loading MMX registers
765 in SImode and DImode */
766 {4, 4}, /* cost of storing MMX registers
767 in SImode and DImode */
768 2, /* cost of moving SSE register */
769 {4, 4, 3}, /* cost of loading SSE registers
770 in SImode, DImode and TImode */
771 {4, 4, 5}, /* cost of storing SSE registers
772 in SImode, DImode and TImode */
773 3, /* MMX or SSE register to integer */
774 /* On K8
775 MOVD reg64, xmmreg Double FSTORE 4
776 MOVD reg32, xmmreg Double FSTORE 4
777 On AMDFAM10
778 MOVD reg64, xmmreg Double FADD 3
779 1/1 1/1
780 MOVD reg32, xmmreg Double FADD 3
781 1/1 1/1 */
782 64, /* size of l1 cache. */
783 512, /* size of l2 cache. */
784 64, /* size of prefetch block */
785 /* New AMD processors never drop prefetches; if they cannot be performed
786 immediately, they are queued. We set number of simultaneous prefetches
787 to a large constant to reflect this (it probably is not a good idea not
788 to limit number of prefetches at all, as their execution also takes some
789 time). */
790 100, /* number of parallel prefetches */
791 2, /* Branch cost */
792 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
793 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
794 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
795 COSTS_N_INSNS (2), /* cost of FABS instruction. */
796 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
797 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
800 very small blocks it is better to use loop. For large blocks, libcall can
801 do nontemporary accesses and beat inline considerably. */
802 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
803 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
804 {{libcall, {{8, loop}, {24, unrolled_loop},
805 {2048, rep_prefix_4_byte}, {-1, libcall}}},
806 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
807 4, /* scalar_stmt_cost. */
808 2, /* scalar load_cost. */
809 2, /* scalar_store_cost. */
810 6, /* vec_stmt_cost. */
811 0, /* vec_to_scalar_cost. */
812 2, /* scalar_to_vec_cost. */
813 2, /* vec_align_load_cost. */
814 2, /* vec_unalign_load_cost. */
815 2, /* vec_store_cost. */
816 2, /* cond_taken_branch_cost. */
817 1, /* cond_not_taken_branch_cost. */
820 static const
821 struct processor_costs pentium4_cost = {
822 COSTS_N_INSNS (1), /* cost of an add instruction */
823 COSTS_N_INSNS (3), /* cost of a lea instruction */
824 COSTS_N_INSNS (4), /* variable shift costs */
825 COSTS_N_INSNS (4), /* constant shift costs */
826 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
827 COSTS_N_INSNS (15), /* HI */
828 COSTS_N_INSNS (15), /* SI */
829 COSTS_N_INSNS (15), /* DI */
830 COSTS_N_INSNS (15)}, /* other */
831 0, /* cost of multiply per each bit set */
832 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
833 COSTS_N_INSNS (56), /* HI */
834 COSTS_N_INSNS (56), /* SI */
835 COSTS_N_INSNS (56), /* DI */
836 COSTS_N_INSNS (56)}, /* other */
837 COSTS_N_INSNS (1), /* cost of movsx */
838 COSTS_N_INSNS (1), /* cost of movzx */
839 16, /* "large" insn */
840 6, /* MOVE_RATIO */
841 2, /* cost for loading QImode using movzbl */
842 {4, 5, 4}, /* cost of loading integer registers
843 in QImode, HImode and SImode.
844 Relative to reg-reg move (2). */
845 {2, 3, 2}, /* cost of storing integer registers */
846 2, /* cost of reg,reg fld/fst */
847 {2, 2, 6}, /* cost of loading fp registers
848 in SFmode, DFmode and XFmode */
849 {4, 4, 6}, /* cost of storing fp registers
850 in SFmode, DFmode and XFmode */
851 2, /* cost of moving MMX register */
852 {2, 2}, /* cost of loading MMX registers
853 in SImode and DImode */
854 {2, 2}, /* cost of storing MMX registers
855 in SImode and DImode */
856 12, /* cost of moving SSE register */
857 {12, 12, 12}, /* cost of loading SSE registers
858 in SImode, DImode and TImode */
859 {2, 2, 8}, /* cost of storing SSE registers
860 in SImode, DImode and TImode */
861 10, /* MMX or SSE register to integer */
862 8, /* size of l1 cache. */
863 256, /* size of l2 cache. */
864 64, /* size of prefetch block */
865 6, /* number of parallel prefetches */
866 2, /* Branch cost */
867 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
868 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
869 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
870 COSTS_N_INSNS (2), /* cost of FABS instruction. */
871 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
872 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
873 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
874 DUMMY_STRINGOP_ALGS},
875 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
876 {-1, libcall}}},
877 DUMMY_STRINGOP_ALGS},
878 1, /* scalar_stmt_cost. */
879 1, /* scalar load_cost. */
880 1, /* scalar_store_cost. */
881 1, /* vec_stmt_cost. */
882 1, /* vec_to_scalar_cost. */
883 1, /* scalar_to_vec_cost. */
884 1, /* vec_align_load_cost. */
885 2, /* vec_unalign_load_cost. */
886 1, /* vec_store_cost. */
887 3, /* cond_taken_branch_cost. */
888 1, /* cond_not_taken_branch_cost. */
891 static const
892 struct processor_costs nocona_cost = {
893 COSTS_N_INSNS (1), /* cost of an add instruction */
894 COSTS_N_INSNS (1), /* cost of a lea instruction */
895 COSTS_N_INSNS (1), /* variable shift costs */
896 COSTS_N_INSNS (1), /* constant shift costs */
897 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
898 COSTS_N_INSNS (10), /* HI */
899 COSTS_N_INSNS (10), /* SI */
900 COSTS_N_INSNS (10), /* DI */
901 COSTS_N_INSNS (10)}, /* other */
902 0, /* cost of multiply per each bit set */
903 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
904 COSTS_N_INSNS (66), /* HI */
905 COSTS_N_INSNS (66), /* SI */
906 COSTS_N_INSNS (66), /* DI */
907 COSTS_N_INSNS (66)}, /* other */
908 COSTS_N_INSNS (1), /* cost of movsx */
909 COSTS_N_INSNS (1), /* cost of movzx */
910 16, /* "large" insn */
911 17, /* MOVE_RATIO */
912 4, /* cost for loading QImode using movzbl */
913 {4, 4, 4}, /* cost of loading integer registers
914 in QImode, HImode and SImode.
915 Relative to reg-reg move (2). */
916 {4, 4, 4}, /* cost of storing integer registers */
917 3, /* cost of reg,reg fld/fst */
918 {12, 12, 12}, /* cost of loading fp registers
919 in SFmode, DFmode and XFmode */
920 {4, 4, 4}, /* cost of storing fp registers
921 in SFmode, DFmode and XFmode */
922 6, /* cost of moving MMX register */
923 {12, 12}, /* cost of loading MMX registers
924 in SImode and DImode */
925 {12, 12}, /* cost of storing MMX registers
926 in SImode and DImode */
927 6, /* cost of moving SSE register */
928 {12, 12, 12}, /* cost of loading SSE registers
929 in SImode, DImode and TImode */
930 {12, 12, 12}, /* cost of storing SSE registers
931 in SImode, DImode and TImode */
932 8, /* MMX or SSE register to integer */
933 8, /* size of l1 cache. */
934 1024, /* size of l2 cache. */
935 128, /* size of prefetch block */
936 8, /* number of parallel prefetches */
937 1, /* Branch cost */
938 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
939 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
940 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
941 COSTS_N_INSNS (3), /* cost of FABS instruction. */
942 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
943 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
944 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
945 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
946 {100000, unrolled_loop}, {-1, libcall}}}},
947 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
948 {-1, libcall}}},
949 {libcall, {{24, loop}, {64, unrolled_loop},
950 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 1, /* scalar_stmt_cost. */
952 1, /* scalar load_cost. */
953 1, /* scalar_store_cost. */
954 1, /* vec_stmt_cost. */
955 1, /* vec_to_scalar_cost. */
956 1, /* scalar_to_vec_cost. */
957 1, /* vec_align_load_cost. */
958 2, /* vec_unalign_load_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 static const
965 struct processor_costs core2_cost = {
966 COSTS_N_INSNS (1), /* cost of an add instruction */
967 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
968 COSTS_N_INSNS (1), /* variable shift costs */
969 COSTS_N_INSNS (1), /* constant shift costs */
970 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
971 COSTS_N_INSNS (3), /* HI */
972 COSTS_N_INSNS (3), /* SI */
973 COSTS_N_INSNS (3), /* DI */
974 COSTS_N_INSNS (3)}, /* other */
975 0, /* cost of multiply per each bit set */
976 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
977 COSTS_N_INSNS (22), /* HI */
978 COSTS_N_INSNS (22), /* SI */
979 COSTS_N_INSNS (22), /* DI */
980 COSTS_N_INSNS (22)}, /* other */
981 COSTS_N_INSNS (1), /* cost of movsx */
982 COSTS_N_INSNS (1), /* cost of movzx */
983 8, /* "large" insn */
984 16, /* MOVE_RATIO */
985 2, /* cost for loading QImode using movzbl */
986 {6, 6, 6}, /* cost of loading integer registers
987 in QImode, HImode and SImode.
988 Relative to reg-reg move (2). */
989 {4, 4, 4}, /* cost of storing integer registers */
990 2, /* cost of reg,reg fld/fst */
991 {6, 6, 6}, /* cost of loading fp registers
992 in SFmode, DFmode and XFmode */
993 {4, 4, 4}, /* cost of loading integer registers */
994 2, /* cost of moving MMX register */
995 {6, 6}, /* cost of loading MMX registers
996 in SImode and DImode */
997 {4, 4}, /* cost of storing MMX registers
998 in SImode and DImode */
999 2, /* cost of moving SSE register */
1000 {6, 6, 6}, /* cost of loading SSE registers
1001 in SImode, DImode and TImode */
1002 {4, 4, 4}, /* cost of storing SSE registers
1003 in SImode, DImode and TImode */
1004 2, /* MMX or SSE register to integer */
1005 32, /* size of l1 cache. */
1006 2048, /* size of l2 cache. */
1007 128, /* size of prefetch block */
1008 8, /* number of parallel prefetches */
1009 3, /* Branch cost */
1010 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1016 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1017 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1018 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1019 {{libcall, {{8, loop}, {15, unrolled_loop},
1020 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1021 {libcall, {{24, loop}, {32, unrolled_loop},
1022 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 2, /* vec_unalign_load_cost. */
1031 1, /* vec_store_cost. */
1032 3, /* cond_taken_branch_cost. */
1033 1, /* cond_not_taken_branch_cost. */
1036 /* Generic64 should produce code tuned for Nocona and K8. */
1037 static const
1038 struct processor_costs generic64_cost = {
1039 COSTS_N_INSNS (1), /* cost of an add instruction */
1040 /* On all chips taken into consideration lea is 2 cycles and more. With
1041 this cost however our current implementation of synth_mult results in
1042 use of unnecessary temporary registers causing regression on several
1043 SPECfp benchmarks. */
1044 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1045 COSTS_N_INSNS (1), /* variable shift costs */
1046 COSTS_N_INSNS (1), /* constant shift costs */
1047 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1048 COSTS_N_INSNS (4), /* HI */
1049 COSTS_N_INSNS (3), /* SI */
1050 COSTS_N_INSNS (4), /* DI */
1051 COSTS_N_INSNS (2)}, /* other */
1052 0, /* cost of multiply per each bit set */
1053 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1054 COSTS_N_INSNS (26), /* HI */
1055 COSTS_N_INSNS (42), /* SI */
1056 COSTS_N_INSNS (74), /* DI */
1057 COSTS_N_INSNS (74)}, /* other */
1058 COSTS_N_INSNS (1), /* cost of movsx */
1059 COSTS_N_INSNS (1), /* cost of movzx */
1060 8, /* "large" insn */
1061 17, /* MOVE_RATIO */
1062 4, /* cost for loading QImode using movzbl */
1063 {4, 4, 4}, /* cost of loading integer registers
1064 in QImode, HImode and SImode.
1065 Relative to reg-reg move (2). */
1066 {4, 4, 4}, /* cost of storing integer registers */
1067 4, /* cost of reg,reg fld/fst */
1068 {12, 12, 12}, /* cost of loading fp registers
1069 in SFmode, DFmode and XFmode */
1070 {6, 6, 8}, /* cost of storing fp registers
1071 in SFmode, DFmode and XFmode */
1072 2, /* cost of moving MMX register */
1073 {8, 8}, /* cost of loading MMX registers
1074 in SImode and DImode */
1075 {8, 8}, /* cost of storing MMX registers
1076 in SImode and DImode */
1077 2, /* cost of moving SSE register */
1078 {8, 8, 8}, /* cost of loading SSE registers
1079 in SImode, DImode and TImode */
1080 {8, 8, 8}, /* cost of storing SSE registers
1081 in SImode, DImode and TImode */
1082 5, /* MMX or SSE register to integer */
1083 32, /* size of l1 cache. */
1084 512, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 6, /* number of parallel prefetches */
1087 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1088 is increased to perhaps more appropriate value of 5. */
1089 3, /* Branch cost */
1090 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1091 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1092 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1093 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1094 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1095 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 {DUMMY_STRINGOP_ALGS,
1099 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1100 1, /* scalar_stmt_cost. */
1101 1, /* scalar load_cost. */
1102 1, /* scalar_store_cost. */
1103 1, /* vec_stmt_cost. */
1104 1, /* vec_to_scalar_cost. */
1105 1, /* scalar_to_vec_cost. */
1106 1, /* vec_align_load_cost. */
1107 2, /* vec_unalign_load_cost. */
1108 1, /* vec_store_cost. */
1109 3, /* cond_taken_branch_cost. */
1110 1, /* cond_not_taken_branch_cost. */
1113 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1114 static const
1115 struct processor_costs generic32_cost = {
1116 COSTS_N_INSNS (1), /* cost of an add instruction */
1117 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1118 COSTS_N_INSNS (1), /* variable shift costs */
1119 COSTS_N_INSNS (1), /* constant shift costs */
1120 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1121 COSTS_N_INSNS (4), /* HI */
1122 COSTS_N_INSNS (3), /* SI */
1123 COSTS_N_INSNS (4), /* DI */
1124 COSTS_N_INSNS (2)}, /* other */
1125 0, /* cost of multiply per each bit set */
1126 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1127 COSTS_N_INSNS (26), /* HI */
1128 COSTS_N_INSNS (42), /* SI */
1129 COSTS_N_INSNS (74), /* DI */
1130 COSTS_N_INSNS (74)}, /* other */
1131 COSTS_N_INSNS (1), /* cost of movsx */
1132 COSTS_N_INSNS (1), /* cost of movzx */
1133 8, /* "large" insn */
1134 17, /* MOVE_RATIO */
1135 4, /* cost for loading QImode using movzbl */
1136 {4, 4, 4}, /* cost of loading integer registers
1137 in QImode, HImode and SImode.
1138 Relative to reg-reg move (2). */
1139 {4, 4, 4}, /* cost of storing integer registers */
1140 4, /* cost of reg,reg fld/fst */
1141 {12, 12, 12}, /* cost of loading fp registers
1142 in SFmode, DFmode and XFmode */
1143 {6, 6, 8}, /* cost of storing fp registers
1144 in SFmode, DFmode and XFmode */
1145 2, /* cost of moving MMX register */
1146 {8, 8}, /* cost of loading MMX registers
1147 in SImode and DImode */
1148 {8, 8}, /* cost of storing MMX registers
1149 in SImode and DImode */
1150 2, /* cost of moving SSE register */
1151 {8, 8, 8}, /* cost of loading SSE registers
1152 in SImode, DImode and TImode */
1153 {8, 8, 8}, /* cost of storing SSE registers
1154 in SImode, DImode and TImode */
1155 5, /* MMX or SSE register to integer */
1156 32, /* size of l1 cache. */
1157 256, /* size of l2 cache. */
1158 64, /* size of prefetch block */
1159 6, /* number of parallel prefetches */
1160 3, /* Branch cost */
1161 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1162 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1163 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1164 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1165 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1166 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1170 DUMMY_STRINGOP_ALGS},
1171 1, /* scalar_stmt_cost. */
1172 1, /* scalar load_cost. */
1173 1, /* scalar_store_cost. */
1174 1, /* vec_stmt_cost. */
1175 1, /* vec_to_scalar_cost. */
1176 1, /* scalar_to_vec_cost. */
1177 1, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 1, /* vec_store_cost. */
1180 3, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 const struct processor_costs *ix86_cost = &pentium_cost;
1186 /* Processor feature/optimization bitmasks. */
1187 #define m_386 (1<<PROCESSOR_I386)
1188 #define m_486 (1<<PROCESSOR_I486)
1189 #define m_PENT (1<<PROCESSOR_PENTIUM)
1190 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1191 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1192 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1193 #define m_CORE2 (1<<PROCESSOR_CORE2)
1195 #define m_GEODE (1<<PROCESSOR_GEODE)
1196 #define m_K6 (1<<PROCESSOR_K6)
1197 #define m_K6_GEODE (m_K6 | m_GEODE)
1198 #define m_K8 (1<<PROCESSOR_K8)
1199 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1200 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1201 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1202 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1204 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1205 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1207 /* Generic instruction choice should be common subset of supported CPUs
1208 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1209 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1211 /* Feature tests against the various tunings. */
1212 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1213 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1214 negatively, so enabling for Generic64 seems like good code size
1215 tradeoff. We can't enable it for 32bit generic because it does not
1216 work well with PPro base chips. */
1217 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1219 /* X86_TUNE_PUSH_MEMORY */
1220 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC,
1223 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 m_486 | m_PENT,
1226 /* X86_TUNE_USE_BIT_TEST */
1227 m_386,
1229 /* X86_TUNE_UNROLL_STRLEN */
1230 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1232 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1233 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1235 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1236 on simulation result. But after P4 was made, no performance benefit
1237 was observed with branch hints. It also increases the code size.
1238 As a result, icc never generates branch hints. */
1241 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 ~m_386,
1244 /* X86_TUNE_USE_SAHF */
1245 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1246 | m_NOCONA | m_CORE2 | m_GENERIC,
1248 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1249 partial dependencies. */
1250 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1251 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1253 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1254 register stalls on Generic32 compilation setting as well. However
1255 in current implementation the partial register stalls are not eliminated
1256 very well - they can be introduced via subregs synthesized by combine
1257 and can happen in caller/callee saving sequences. Because this option
1258 pays back little on PPro based chips and is in conflict with partial reg
1259 dependencies used by Athlon/P4 based chips, it is better to leave it off
1260 for generic32 for now. */
1261 m_PPRO,
1263 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1264 m_CORE2 | m_GENERIC,
1266 /* X86_TUNE_USE_HIMODE_FIOP */
1267 m_386 | m_486 | m_K6_GEODE,
1269 /* X86_TUNE_USE_SIMODE_FIOP */
1270 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1272 /* X86_TUNE_USE_MOV0 */
1273 m_K6,
1275 /* X86_TUNE_USE_CLTD */
1276 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1278 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 m_PENT4,
1281 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 m_PPRO,
1284 /* X86_TUNE_READ_MODIFY_WRITE */
1285 ~m_PENT,
1287 /* X86_TUNE_READ_MODIFY */
1288 ~(m_PENT | m_PPRO),
1290 /* X86_TUNE_PROMOTE_QIMODE */
1291 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1292 | m_GENERIC /* | m_PENT4 ? */,
1294 /* X86_TUNE_FAST_PREFIX */
1295 ~(m_PENT | m_486 | m_386),
1297 /* X86_TUNE_SINGLE_STRINGOP */
1298 m_386 | m_PENT4 | m_NOCONA,
1300 /* X86_TUNE_QIMODE_MATH */
1303 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1304 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1305 might be considered for Generic32 if our scheme for avoiding partial
1306 stalls was more effective. */
1307 ~m_PPRO,
1309 /* X86_TUNE_PROMOTE_QI_REGS */
1312 /* X86_TUNE_PROMOTE_HI_REGS */
1313 m_PPRO,
1315 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1316 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1318 /* X86_TUNE_ADD_ESP_8 */
1319 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1320 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_SUB_ESP_4 */
1323 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325 /* X86_TUNE_SUB_ESP_8 */
1326 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1327 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1330 for DFmode copies */
1331 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1332 | m_GENERIC | m_GEODE),
1334 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1335 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1337 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1338 conflict here in between PPro/Pentium4 based chips that thread 128bit
1339 SSE registers as single units versus K8 based chips that divide SSE
1340 registers to two 64bit halves. This knob promotes all store destinations
1341 to be 128bit to allow register renaming on 128bit SSE units, but usually
1342 results in one extra microop on 64bit SSE units. Experimental results
1343 shows that disabling this option on P4 brings over 20% SPECfp regression,
1344 while enabling it on K8 brings roughly 2.4% regression that can be partly
1345 masked by careful scheduling of moves. */
1346 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1348 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 m_AMDFAM10,
1351 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1352 are resolved on SSE register parts instead of whole registers, so we may
1353 maintain just lower part of scalar values in proper format leaving the
1354 upper part undefined. */
1355 m_ATHLON_K8,
1357 /* X86_TUNE_SSE_TYPELESS_STORES */
1358 m_AMD_MULTIPLE,
1360 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1361 m_PPRO | m_PENT4 | m_NOCONA,
1363 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1364 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1366 /* X86_TUNE_PROLOGUE_USING_MOVE */
1367 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369 /* X86_TUNE_EPILOGUE_USING_MOVE */
1370 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372 /* X86_TUNE_SHIFT1 */
1373 ~m_486,
1375 /* X86_TUNE_USE_FFREEP */
1376 m_AMD_MULTIPLE,
1378 /* X86_TUNE_INTER_UNIT_MOVES */
1379 ~(m_AMD_MULTIPLE | m_GENERIC),
1381 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 ~(m_AMDFAM10),
1384 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1385 than 4 branch instructions in the 16 byte window. */
1386 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1388 /* X86_TUNE_SCHEDULE */
1389 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1391 /* X86_TUNE_USE_BT */
1392 m_AMD_MULTIPLE,
1394 /* X86_TUNE_USE_INCDEC */
1395 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1397 /* X86_TUNE_PAD_RETURNS */
1398 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_EXT_80387_CONSTANTS */
1401 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1403 /* X86_TUNE_SHORTEN_X87_SSE */
1404 ~m_K8,
1406 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 m_K8 | m_GENERIC64,
1409 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1410 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 ~(m_386 | m_486),
1413 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1414 vector path on AMD machines. */
1415 m_K8 | m_GENERIC64 | m_AMDFAM10,
1417 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1418 machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1422 than a MOV. */
1423 m_PENT,
1425 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1426 but one byte longer. */
1427 m_PENT,
1429 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1430 operand that cannot be represented using a modRM byte. The XOR
1431 replacement is long decoded, so this split helps here as well. */
1432 m_K6,
1434 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1435 from integer to FP. */
1436 m_AMDFAM10,
1439 /* Feature tests against the various architecture variations. */
1440 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1441 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1442 ~(m_386 | m_486 | m_PENT | m_K6),
1444 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 ~m_386,
1447 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 ~(m_386 | m_486),
1450 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 ~m_386,
1453 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1454 ~m_386,
1457 static const unsigned int x86_accumulate_outgoing_args
1458 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1460 static const unsigned int x86_arch_always_fancy_math_387
1461 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1462 | m_NOCONA | m_CORE2 | m_GENERIC;
1464 static enum stringop_alg stringop_alg = no_stringop;
1466 /* In case the average insn count for single function invocation is
1467 lower than this constant, emit fast (but longer) prologue and
1468 epilogue code. */
1469 #define FAST_PROLOGUE_INSN_COUNT 20
1471 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1472 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1473 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1474 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1476 /* Array of the smallest class containing reg number REGNO, indexed by
1477 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1479 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1481 /* ax, dx, cx, bx */
1482 AREG, DREG, CREG, BREG,
1483 /* si, di, bp, sp */
1484 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1485 /* FP registers */
1486 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1487 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1488 /* arg pointer */
1489 NON_Q_REGS,
1490 /* flags, fpsr, fpcr, frame */
1491 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1492 /* SSE registers */
1493 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1494 SSE_REGS, SSE_REGS,
1495 /* MMX registers */
1496 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1497 MMX_REGS, MMX_REGS,
1498 /* REX registers */
1499 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 /* SSE REX registers */
1502 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1503 SSE_REGS, SSE_REGS,
1506 /* The "default" register map used in 32bit mode. */
1508 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1510 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1511 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1512 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1513 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1514 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1515 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1519 static int const x86_64_int_parameter_registers[6] =
1521 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1522 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1525 static int const x86_64_ms_abi_int_parameter_registers[4] =
1527 2 /*RCX*/, 1 /*RDX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_int_return_registers[4] =
1533 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1536 /* The "default" register map used in 64bit mode. */
1537 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1540 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1541 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1542 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1543 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1544 8,9,10,11,12,13,14,15, /* extended integer registers */
1545 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1548 /* Define the register numbers to be used in Dwarf debugging information.
1549 The SVR4 reference port C compiler uses the following register numbers
1550 in its Dwarf output code:
1551 0 for %eax (gcc regno = 0)
1552 1 for %ecx (gcc regno = 2)
1553 2 for %edx (gcc regno = 1)
1554 3 for %ebx (gcc regno = 3)
1555 4 for %esp (gcc regno = 7)
1556 5 for %ebp (gcc regno = 6)
1557 6 for %esi (gcc regno = 4)
1558 7 for %edi (gcc regno = 5)
1559 The following three DWARF register numbers are never generated by
1560 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1561 believes these numbers have these meanings.
1562 8 for %eip (no gcc equivalent)
1563 9 for %eflags (gcc regno = 17)
1564 10 for %trapno (no gcc equivalent)
1565 It is not at all clear how we should number the FP stack registers
1566 for the x86 architecture. If the version of SDB on x86/svr4 were
1567 a bit less brain dead with respect to floating-point then we would
1568 have a precedent to follow with respect to DWARF register numbers
1569 for x86 FP registers, but the SDB on x86/svr4 is so completely
1570 broken with respect to FP registers that it is hardly worth thinking
1571 of it as something to strive for compatibility with.
1572 The version of x86/svr4 SDB I have at the moment does (partially)
1573 seem to believe that DWARF register number 11 is associated with
1574 the x86 register %st(0), but that's about all. Higher DWARF
1575 register numbers don't seem to be associated with anything in
1576 particular, and even for DWARF regno 11, SDB only seems to under-
1577 stand that it should say that a variable lives in %st(0) (when
1578 asked via an `=' command) if we said it was in DWARF regno 11,
1579 but SDB still prints garbage when asked for the value of the
1580 variable in question (via a `/' command).
1581 (Also note that the labels SDB prints for various FP stack regs
1582 when doing an `x' command are all wrong.)
1583 Note that these problems generally don't affect the native SVR4
1584 C compiler because it doesn't allow the use of -O with -g and
1585 because when it is *not* optimizing, it allocates a memory
1586 location for each floating-point variable, and the memory
1587 location is what gets described in the DWARF AT_location
1588 attribute for the variable in question.
1589 Regardless of the severe mental illness of the x86/svr4 SDB, we
1590 do something sensible here and we use the following DWARF
1591 register numbers. Note that these are all stack-top-relative
1592 numbers.
1593 11 for %st(0) (gcc regno = 8)
1594 12 for %st(1) (gcc regno = 9)
1595 13 for %st(2) (gcc regno = 10)
1596 14 for %st(3) (gcc regno = 11)
1597 15 for %st(4) (gcc regno = 12)
1598 16 for %st(5) (gcc regno = 13)
1599 17 for %st(6) (gcc regno = 14)
1600 18 for %st(7) (gcc regno = 15)
1602 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1605 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1606 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1607 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1608 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1609 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1613 /* Test and compare insns in i386.md store the information needed to
1614 generate branch and scc insns here. */
1616 rtx ix86_compare_op0 = NULL_RTX;
1617 rtx ix86_compare_op1 = NULL_RTX;
1618 rtx ix86_compare_emitted = NULL_RTX;
1620 /* Size of the register save area. */
1621 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1623 /* Define the structure for the machine field in struct function. */
1625 struct stack_local_entry GTY(())
1627 unsigned short mode;
1628 unsigned short n;
1629 rtx rtl;
1630 struct stack_local_entry *next;
1633 /* Structure describing stack frame layout.
1634 Stack grows downward:
1636 [arguments]
1637 <- ARG_POINTER
1638 saved pc
1640 saved frame pointer if frame_pointer_needed
1641 <- HARD_FRAME_POINTER
1642 [saved regs]
1644 [padding1] \
1646 [va_arg registers] (
1647 > to_allocate <- FRAME_POINTER
1648 [frame] (
1650 [padding2] /
1652 struct ix86_frame
1654 int nregs;
1655 int padding1;
1656 int va_arg_size;
1657 HOST_WIDE_INT frame;
1658 int padding2;
1659 int outgoing_arguments_size;
1660 int red_zone_size;
1662 HOST_WIDE_INT to_allocate;
1663 /* The offsets relative to ARG_POINTER. */
1664 HOST_WIDE_INT frame_pointer_offset;
1665 HOST_WIDE_INT hard_frame_pointer_offset;
1666 HOST_WIDE_INT stack_pointer_offset;
1668 /* When save_regs_using_mov is set, emit prologue using
1669 move instead of push instructions. */
1670 bool save_regs_using_mov;
1673 /* Code model option. */
1674 enum cmodel ix86_cmodel;
1675 /* Asm dialect. */
1676 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1677 /* TLS dialects. */
1678 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1680 /* Which unit we are generating floating point math for. */
1681 enum fpmath_unit ix86_fpmath;
1683 /* Which cpu are we scheduling for. */
1684 enum processor_type ix86_tune;
1686 /* Which instruction set architecture to use. */
1687 enum processor_type ix86_arch;
1689 /* true if sse prefetch instruction is not NOOP. */
1690 int x86_prefetch_sse;
1692 /* ix86_regparm_string as a number */
1693 static int ix86_regparm;
1695 /* -mstackrealign option */
1696 extern int ix86_force_align_arg_pointer;
1697 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1699 /* Preferred alignment for stack boundary in bits. */
1700 unsigned int ix86_preferred_stack_boundary;
1702 /* Values 1-5: see jump.c */
1703 int ix86_branch_cost;
1705 /* Variables which are this size or smaller are put in the data/bss
1706 or ldata/lbss sections. */
1708 int ix86_section_threshold = 65536;
1710 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1711 char internal_label_prefix[16];
1712 int internal_label_prefix_len;
1714 /* Fence to use after loop using movnt. */
1715 tree x86_mfence;
1717 /* Register class used for passing given 64bit part of the argument.
1718 These represent classes as documented by the PS ABI, with the exception
1719 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1720 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1722 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1723 whenever possible (upper half does contain padding). */
1724 enum x86_64_reg_class
1726 X86_64_NO_CLASS,
1727 X86_64_INTEGER_CLASS,
1728 X86_64_INTEGERSI_CLASS,
1729 X86_64_SSE_CLASS,
1730 X86_64_SSESF_CLASS,
1731 X86_64_SSEDF_CLASS,
1732 X86_64_SSEUP_CLASS,
1733 X86_64_X87_CLASS,
1734 X86_64_X87UP_CLASS,
1735 X86_64_COMPLEX_X87_CLASS,
1736 X86_64_MEMORY_CLASS
1738 static const char * const x86_64_reg_class_name[] =
1740 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1741 "sseup", "x87", "x87up", "cplx87", "no"
1744 #define MAX_CLASSES 4
1746 /* Table of constants used by fldpi, fldln2, etc.... */
1747 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1748 static bool ext_80387_constants_init = 0;
1751 static struct machine_function * ix86_init_machine_status (void);
1752 static rtx ix86_function_value (const_tree, const_tree, bool);
1753 static int ix86_function_regparm (const_tree, const_tree);
1754 static void ix86_compute_frame_layout (struct ix86_frame *);
1755 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1756 rtx, rtx, int);
1759 /* The svr4 ABI for the i386 says that records and unions are returned
1760 in memory. */
1761 #ifndef DEFAULT_PCC_STRUCT_RETURN
1762 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 #endif
1765 /* Bit flags that specify the ISA we are compiling for. */
1766 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1768 /* A mask of ix86_isa_flags that includes bit X if X
1769 was set or cleared on the command line. */
1770 static int ix86_isa_flags_explicit;
1772 /* Define a set of ISAs which are available when a given ISA is
1773 enabled. MMX and SSE ISAs are handled separately. */
1775 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1776 #define OPTION_MASK_ISA_3DNOW_SET \
1777 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1779 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1780 #define OPTION_MASK_ISA_SSE2_SET \
1781 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1782 #define OPTION_MASK_ISA_SSE3_SET \
1783 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1784 #define OPTION_MASK_ISA_SSSE3_SET \
1785 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1786 #define OPTION_MASK_ISA_SSE4_1_SET \
1787 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1788 #define OPTION_MASK_ISA_SSE4_2_SET \
1789 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1791 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1792 as -msse4.2. */
1793 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1795 #define OPTION_MASK_ISA_SSE4A_SET \
1796 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1797 #define OPTION_MASK_ISA_SSE5_SET \
1798 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1800 /* Define a set of ISAs which aren't available when a given ISA is
1801 disabled. MMX and SSE ISAs are handled separately. */
1803 #define OPTION_MASK_ISA_MMX_UNSET \
1804 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1805 #define OPTION_MASK_ISA_3DNOW_UNSET \
1806 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1807 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1809 #define OPTION_MASK_ISA_SSE_UNSET \
1810 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1811 #define OPTION_MASK_ISA_SSE2_UNSET \
1812 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1813 #define OPTION_MASK_ISA_SSE3_UNSET \
1814 (OPTION_MASK_ISA_SSE3 \
1815 | OPTION_MASK_ISA_SSSE3_UNSET \
1816 | OPTION_MASK_ISA_SSE4A_UNSET )
1817 #define OPTION_MASK_ISA_SSSE3_UNSET \
1818 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1819 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1820 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1821 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1823 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1824 as -mno-sse4.1. */
1825 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1827 #define OPTION_MASK_ISA_SSE4A_UNSET \
1828 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1830 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1832 /* Vectorization library interface and handlers. */
1833 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1834 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1835 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1837 /* Implement TARGET_HANDLE_OPTION. */
1839 static bool
1840 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1842 switch (code)
1844 case OPT_mmmx:
1845 if (value)
1847 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1848 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1850 else
1852 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1853 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1855 return true;
1857 case OPT_m3dnow:
1858 if (value)
1860 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1861 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1863 else
1865 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1866 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1868 return true;
1870 case OPT_m3dnowa:
1871 return false;
1873 case OPT_msse:
1874 if (value)
1876 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1877 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1879 else
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1884 return true;
1886 case OPT_msse2:
1887 if (value)
1889 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1890 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1892 else
1894 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1895 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1897 return true;
1899 case OPT_msse3:
1900 if (value)
1902 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1903 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1905 else
1907 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1908 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1910 return true;
1912 case OPT_mssse3:
1913 if (value)
1915 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1916 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1918 else
1920 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1921 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1923 return true;
1925 case OPT_msse4_1:
1926 if (value)
1928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1929 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1931 else
1933 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1934 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1936 return true;
1938 case OPT_msse4_2:
1939 if (value)
1941 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1942 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1944 else
1946 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1947 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1949 return true;
1951 case OPT_msse4:
1952 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1953 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1954 return true;
1956 case OPT_mno_sse4:
1957 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1958 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1959 return true;
1961 case OPT_msse4a:
1962 if (value)
1964 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1965 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1967 else
1969 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1970 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1972 return true;
1974 case OPT_msse5:
1975 if (value)
1977 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1978 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1980 else
1982 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1983 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1985 return true;
1987 default:
1988 return true;
1992 /* Sometimes certain combinations of command options do not make
1993 sense on a particular target machine. You can define a macro
1994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1995 defined, is executed once just after all the command options have
1996 been parsed.
1998 Don't use this macro to turn on various extra optimizations for
1999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2001 void
2002 override_options (void)
2004 int i;
2005 int ix86_tune_defaulted = 0;
2006 int ix86_arch_specified = 0;
2007 unsigned int ix86_arch_mask, ix86_tune_mask;
2009 /* Comes from final.c -- no real reason to change it. */
2010 #define MAX_CODE_ALIGN 16
2012 static struct ptt
2014 const struct processor_costs *cost; /* Processor costs */
2015 const int align_loop; /* Default alignments. */
2016 const int align_loop_max_skip;
2017 const int align_jump;
2018 const int align_jump_max_skip;
2019 const int align_func;
2021 const processor_target_table[PROCESSOR_max] =
2023 {&i386_cost, 4, 3, 4, 3, 4},
2024 {&i486_cost, 16, 15, 16, 15, 16},
2025 {&pentium_cost, 16, 7, 16, 7, 16},
2026 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2027 {&geode_cost, 0, 0, 0, 0, 0},
2028 {&k6_cost, 32, 7, 32, 7, 32},
2029 {&athlon_cost, 16, 7, 16, 7, 16},
2030 {&pentium4_cost, 0, 0, 0, 0, 0},
2031 {&k8_cost, 16, 7, 16, 7, 16},
2032 {&nocona_cost, 0, 0, 0, 0, 0},
2033 {&core2_cost, 16, 10, 16, 10, 16},
2034 {&generic32_cost, 16, 7, 16, 7, 16},
2035 {&generic64_cost, 16, 10, 16, 10, 16},
2036 {&amdfam10_cost, 32, 24, 32, 7, 32}
2039 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2041 "generic",
2042 "i386",
2043 "i486",
2044 "pentium",
2045 "pentium-mmx",
2046 "pentiumpro",
2047 "pentium2",
2048 "pentium3",
2049 "pentium4",
2050 "pentium-m",
2051 "prescott",
2052 "nocona",
2053 "core2",
2054 "geode",
2055 "k6",
2056 "k6-2",
2057 "k6-3",
2058 "athlon",
2059 "athlon-4",
2060 "k8",
2061 "amdfam10"
2064 enum pta_flags
2066 PTA_SSE = 1 << 0,
2067 PTA_SSE2 = 1 << 1,
2068 PTA_SSE3 = 1 << 2,
2069 PTA_MMX = 1 << 3,
2070 PTA_PREFETCH_SSE = 1 << 4,
2071 PTA_3DNOW = 1 << 5,
2072 PTA_3DNOW_A = 1 << 6,
2073 PTA_64BIT = 1 << 7,
2074 PTA_SSSE3 = 1 << 8,
2075 PTA_CX16 = 1 << 9,
2076 PTA_POPCNT = 1 << 10,
2077 PTA_ABM = 1 << 11,
2078 PTA_SSE4A = 1 << 12,
2079 PTA_NO_SAHF = 1 << 13,
2080 PTA_SSE4_1 = 1 << 14,
2081 PTA_SSE4_2 = 1 << 15,
2082 PTA_SSE5 = 1 << 16,
2083 PTA_AES = 1 << 17,
2084 PTA_PCLMUL = 1 << 18
2087 static struct pta
2089 const char *const name; /* processor name or nickname. */
2090 const enum processor_type processor;
2091 const unsigned /*enum pta_flags*/ flags;
2093 const processor_alias_table[] =
2095 {"i386", PROCESSOR_I386, 0},
2096 {"i486", PROCESSOR_I486, 0},
2097 {"i586", PROCESSOR_PENTIUM, 0},
2098 {"pentium", PROCESSOR_PENTIUM, 0},
2099 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2100 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2101 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2102 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2104 {"i686", PROCESSOR_PENTIUMPRO, 0},
2105 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2107 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2108 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2110 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2111 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2112 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2113 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2114 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2115 | PTA_CX16 | PTA_NO_SAHF)},
2116 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2117 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2118 | PTA_SSSE3
2119 | PTA_CX16)},
2120 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2121 |PTA_PREFETCH_SSE)},
2122 {"k6", PROCESSOR_K6, PTA_MMX},
2123 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2124 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2126 | PTA_PREFETCH_SSE)},
2127 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2128 | PTA_PREFETCH_SSE)},
2129 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2130 | PTA_SSE)},
2131 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2132 | PTA_SSE)},
2133 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2134 | PTA_SSE)},
2135 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2136 | PTA_MMX | PTA_SSE | PTA_SSE2
2137 | PTA_NO_SAHF)},
2138 {"k8", PROCESSOR_K8, (PTA_64BIT
2139 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_SSE | PTA_SSE2
2141 | PTA_NO_SAHF)},
2142 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2143 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2145 | PTA_NO_SAHF)},
2146 {"opteron", PROCESSOR_K8, (PTA_64BIT
2147 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE | PTA_SSE2
2149 | PTA_NO_SAHF)},
2150 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2151 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2152 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2153 | PTA_NO_SAHF)},
2154 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2155 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2156 | PTA_SSE | PTA_SSE2
2157 | PTA_NO_SAHF)},
2158 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2159 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2160 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2161 | PTA_NO_SAHF)},
2162 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2163 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2164 | PTA_SSE | PTA_SSE2
2165 | PTA_NO_SAHF)},
2166 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2167 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2168 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2169 | PTA_SSE4A
2170 | PTA_CX16 | PTA_ABM)},
2171 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2172 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2173 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2174 | PTA_SSE4A
2175 | PTA_CX16 | PTA_ABM)},
2176 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2177 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2180 int const pta_size = ARRAY_SIZE (processor_alias_table);
2182 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2183 SUBTARGET_OVERRIDE_OPTIONS;
2184 #endif
2186 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2187 SUBSUBTARGET_OVERRIDE_OPTIONS;
2188 #endif
2190 /* -fPIC is the default for x86_64. */
2191 if (TARGET_MACHO && TARGET_64BIT)
2192 flag_pic = 2;
2194 /* Set the default values for switches whose default depends on TARGET_64BIT
2195 in case they weren't overwritten by command line options. */
2196 if (TARGET_64BIT)
2198 /* Mach-O doesn't support omitting the frame pointer for now. */
2199 if (flag_omit_frame_pointer == 2)
2200 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2201 if (flag_asynchronous_unwind_tables == 2)
2202 flag_asynchronous_unwind_tables = 1;
2203 if (flag_pcc_struct_return == 2)
2204 flag_pcc_struct_return = 0;
2206 else
2208 if (flag_omit_frame_pointer == 2)
2209 flag_omit_frame_pointer = 0;
2210 if (flag_asynchronous_unwind_tables == 2)
2211 flag_asynchronous_unwind_tables = 0;
2212 if (flag_pcc_struct_return == 2)
2213 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2216 /* Need to check -mtune=generic first. */
2217 if (ix86_tune_string)
2219 if (!strcmp (ix86_tune_string, "generic")
2220 || !strcmp (ix86_tune_string, "i686")
2221 /* As special support for cross compilers we read -mtune=native
2222 as -mtune=generic. With native compilers we won't see the
2223 -mtune=native, as it was changed by the driver. */
2224 || !strcmp (ix86_tune_string, "native"))
2226 if (TARGET_64BIT)
2227 ix86_tune_string = "generic64";
2228 else
2229 ix86_tune_string = "generic32";
2231 else if (!strncmp (ix86_tune_string, "generic", 7))
2232 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2234 else
2236 if (ix86_arch_string)
2237 ix86_tune_string = ix86_arch_string;
2238 if (!ix86_tune_string)
2240 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2241 ix86_tune_defaulted = 1;
2244 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2245 need to use a sensible tune option. */
2246 if (!strcmp (ix86_tune_string, "generic")
2247 || !strcmp (ix86_tune_string, "x86-64")
2248 || !strcmp (ix86_tune_string, "i686"))
2250 if (TARGET_64BIT)
2251 ix86_tune_string = "generic64";
2252 else
2253 ix86_tune_string = "generic32";
2256 if (ix86_stringop_string)
2258 if (!strcmp (ix86_stringop_string, "rep_byte"))
2259 stringop_alg = rep_prefix_1_byte;
2260 else if (!strcmp (ix86_stringop_string, "libcall"))
2261 stringop_alg = libcall;
2262 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2263 stringop_alg = rep_prefix_4_byte;
2264 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2265 stringop_alg = rep_prefix_8_byte;
2266 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2267 stringop_alg = loop_1_byte;
2268 else if (!strcmp (ix86_stringop_string, "loop"))
2269 stringop_alg = loop;
2270 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2271 stringop_alg = unrolled_loop;
2272 else
2273 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2275 if (!strcmp (ix86_tune_string, "x86-64"))
2276 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2277 "-mtune=generic instead as appropriate.");
2279 if (!ix86_arch_string)
2280 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2281 else
2282 ix86_arch_specified = 1;
2284 if (!strcmp (ix86_arch_string, "generic"))
2285 error ("generic CPU can be used only for -mtune= switch");
2286 if (!strncmp (ix86_arch_string, "generic", 7))
2287 error ("bad value (%s) for -march= switch", ix86_arch_string);
2289 if (ix86_cmodel_string != 0)
2291 if (!strcmp (ix86_cmodel_string, "small"))
2292 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2293 else if (!strcmp (ix86_cmodel_string, "medium"))
2294 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2295 else if (!strcmp (ix86_cmodel_string, "large"))
2296 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2297 else if (flag_pic)
2298 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2299 else if (!strcmp (ix86_cmodel_string, "32"))
2300 ix86_cmodel = CM_32;
2301 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2302 ix86_cmodel = CM_KERNEL;
2303 else
2304 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2306 else
2308 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2309 use of rip-relative addressing. This eliminates fixups that
2310 would otherwise be needed if this object is to be placed in a
2311 DLL, and is essentially just as efficient as direct addressing. */
2312 if (TARGET_64BIT_MS_ABI)
2313 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2314 else if (TARGET_64BIT)
2315 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2316 else
2317 ix86_cmodel = CM_32;
2319 if (ix86_asm_string != 0)
2321 if (! TARGET_MACHO
2322 && !strcmp (ix86_asm_string, "intel"))
2323 ix86_asm_dialect = ASM_INTEL;
2324 else if (!strcmp (ix86_asm_string, "att"))
2325 ix86_asm_dialect = ASM_ATT;
2326 else
2327 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2329 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2330 error ("code model %qs not supported in the %s bit mode",
2331 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2332 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2333 sorry ("%i-bit mode not compiled in",
2334 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2339 ix86_arch = processor_alias_table[i].processor;
2340 /* Default cpu tuning to the architecture. */
2341 ix86_tune = ix86_arch;
2343 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2344 error ("CPU you selected does not support x86-64 "
2345 "instruction set");
2347 if (processor_alias_table[i].flags & PTA_MMX
2348 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2349 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2350 if (processor_alias_table[i].flags & PTA_3DNOW
2351 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2352 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2353 if (processor_alias_table[i].flags & PTA_3DNOW_A
2354 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2355 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2356 if (processor_alias_table[i].flags & PTA_SSE
2357 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2358 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2359 if (processor_alias_table[i].flags & PTA_SSE2
2360 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2361 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2362 if (processor_alias_table[i].flags & PTA_SSE3
2363 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2364 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2365 if (processor_alias_table[i].flags & PTA_SSSE3
2366 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2367 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2368 if (processor_alias_table[i].flags & PTA_SSE4_1
2369 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2370 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2371 if (processor_alias_table[i].flags & PTA_SSE4_2
2372 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2373 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2374 if (processor_alias_table[i].flags & PTA_SSE4A
2375 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2376 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2377 if (processor_alias_table[i].flags & PTA_SSE5
2378 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2379 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2381 if (processor_alias_table[i].flags & PTA_ABM)
2382 x86_abm = true;
2383 if (processor_alias_table[i].flags & PTA_CX16)
2384 x86_cmpxchg16b = true;
2385 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2386 x86_popcnt = true;
2387 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2388 x86_prefetch_sse = true;
2389 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2390 x86_sahf = true;
2391 if (processor_alias_table[i].flags & PTA_AES)
2392 x86_aes = true;
2393 if (processor_alias_table[i].flags & PTA_PCLMUL)
2394 x86_pclmul = true;
2396 break;
2399 if (i == pta_size)
2400 error ("bad value (%s) for -march= switch", ix86_arch_string);
2402 ix86_arch_mask = 1u << ix86_arch;
2403 for (i = 0; i < X86_ARCH_LAST; ++i)
2404 ix86_arch_features[i] &= ix86_arch_mask;
2406 for (i = 0; i < pta_size; i++)
2407 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2409 ix86_tune = processor_alias_table[i].processor;
2410 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2412 if (ix86_tune_defaulted)
2414 ix86_tune_string = "x86-64";
2415 for (i = 0; i < pta_size; i++)
2416 if (! strcmp (ix86_tune_string,
2417 processor_alias_table[i].name))
2418 break;
2419 ix86_tune = processor_alias_table[i].processor;
2421 else
2422 error ("CPU you selected does not support x86-64 "
2423 "instruction set");
2425 /* Intel CPUs have always interpreted SSE prefetch instructions as
2426 NOPs; so, we can enable SSE prefetch instructions even when
2427 -mtune (rather than -march) points us to a processor that has them.
2428 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2429 higher processors. */
2430 if (TARGET_CMOVE
2431 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2432 x86_prefetch_sse = true;
2433 break;
2435 if (i == pta_size)
2436 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2438 /* Enable SSE2 if AES or PCLMUL is enabled. */
2439 if ((x86_aes || x86_pclmul)
2440 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2442 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2443 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2446 ix86_tune_mask = 1u << ix86_tune;
2447 for (i = 0; i < X86_TUNE_LAST; ++i)
2448 ix86_tune_features[i] &= ix86_tune_mask;
2450 if (optimize_size)
2451 ix86_cost = &size_cost;
2452 else
2453 ix86_cost = processor_target_table[ix86_tune].cost;
2455 /* Arrange to set up i386_stack_locals for all functions. */
2456 init_machine_status = ix86_init_machine_status;
2458 /* Validate -mregparm= value. */
2459 if (ix86_regparm_string)
2461 if (TARGET_64BIT)
2462 warning (0, "-mregparm is ignored in 64-bit mode");
2463 i = atoi (ix86_regparm_string);
2464 if (i < 0 || i > REGPARM_MAX)
2465 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2466 else
2467 ix86_regparm = i;
2469 if (TARGET_64BIT)
2470 ix86_regparm = REGPARM_MAX;
2472 /* If the user has provided any of the -malign-* options,
2473 warn and use that value only if -falign-* is not set.
2474 Remove this code in GCC 3.2 or later. */
2475 if (ix86_align_loops_string)
2477 warning (0, "-malign-loops is obsolete, use -falign-loops");
2478 if (align_loops == 0)
2480 i = atoi (ix86_align_loops_string);
2481 if (i < 0 || i > MAX_CODE_ALIGN)
2482 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2483 else
2484 align_loops = 1 << i;
2488 if (ix86_align_jumps_string)
2490 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2491 if (align_jumps == 0)
2493 i = atoi (ix86_align_jumps_string);
2494 if (i < 0 || i > MAX_CODE_ALIGN)
2495 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2496 else
2497 align_jumps = 1 << i;
2501 if (ix86_align_funcs_string)
2503 warning (0, "-malign-functions is obsolete, use -falign-functions");
2504 if (align_functions == 0)
2506 i = atoi (ix86_align_funcs_string);
2507 if (i < 0 || i > MAX_CODE_ALIGN)
2508 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2509 else
2510 align_functions = 1 << i;
2514 /* Default align_* from the processor table. */
2515 if (align_loops == 0)
2517 align_loops = processor_target_table[ix86_tune].align_loop;
2518 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2520 if (align_jumps == 0)
2522 align_jumps = processor_target_table[ix86_tune].align_jump;
2523 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2525 if (align_functions == 0)
2527 align_functions = processor_target_table[ix86_tune].align_func;
2530 /* Validate -mbranch-cost= value, or provide default. */
2531 ix86_branch_cost = ix86_cost->branch_cost;
2532 if (ix86_branch_cost_string)
2534 i = atoi (ix86_branch_cost_string);
2535 if (i < 0 || i > 5)
2536 error ("-mbranch-cost=%d is not between 0 and 5", i);
2537 else
2538 ix86_branch_cost = i;
2540 if (ix86_section_threshold_string)
2542 i = atoi (ix86_section_threshold_string);
2543 if (i < 0)
2544 error ("-mlarge-data-threshold=%d is negative", i);
2545 else
2546 ix86_section_threshold = i;
2549 if (ix86_tls_dialect_string)
2551 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2552 ix86_tls_dialect = TLS_DIALECT_GNU;
2553 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2554 ix86_tls_dialect = TLS_DIALECT_GNU2;
2555 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2556 ix86_tls_dialect = TLS_DIALECT_SUN;
2557 else
2558 error ("bad value (%s) for -mtls-dialect= switch",
2559 ix86_tls_dialect_string);
2562 if (ix87_precision_string)
2564 i = atoi (ix87_precision_string);
2565 if (i != 32 && i != 64 && i != 80)
2566 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2569 if (TARGET_64BIT)
2571 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2573 /* Enable by default the SSE and MMX builtins. Do allow the user to
2574 explicitly disable any of these. In particular, disabling SSE and
2575 MMX for kernel code is extremely useful. */
2576 if (!ix86_arch_specified)
2577 ix86_isa_flags
2578 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2579 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2581 if (TARGET_RTD)
2582 warning (0, "-mrtd is ignored in 64bit mode");
2584 else
2586 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2588 if (!ix86_arch_specified)
2589 ix86_isa_flags
2590 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2592 /* i386 ABI does not specify red zone. It still makes sense to use it
2593 when programmer takes care to stack from being destroyed. */
2594 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2595 target_flags |= MASK_NO_RED_ZONE;
2598 /* Keep nonleaf frame pointers. */
2599 if (flag_omit_frame_pointer)
2600 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2601 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2602 flag_omit_frame_pointer = 1;
2604 /* If we're doing fast math, we don't care about comparison order
2605 wrt NaNs. This lets us use a shorter comparison sequence. */
2606 if (flag_finite_math_only)
2607 target_flags &= ~MASK_IEEE_FP;
2609 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2610 since the insns won't need emulation. */
2611 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2612 target_flags &= ~MASK_NO_FANCY_MATH_387;
2614 /* Likewise, if the target doesn't have a 387, or we've specified
2615 software floating point, don't use 387 inline intrinsics. */
2616 if (!TARGET_80387)
2617 target_flags |= MASK_NO_FANCY_MATH_387;
2619 /* Turn on MMX builtins for -msse. */
2620 if (TARGET_SSE)
2622 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2623 x86_prefetch_sse = true;
2626 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2627 if (TARGET_SSE4_2 || TARGET_ABM)
2628 x86_popcnt = true;
2630 /* Validate -mpreferred-stack-boundary= value, or provide default.
2631 The default of 128 bits is for Pentium III's SSE __m128. We can't
2632 change it because of optimize_size. Otherwise, we can't mix object
2633 files compiled with -Os and -On. */
2634 ix86_preferred_stack_boundary = 128;
2635 if (ix86_preferred_stack_boundary_string)
2637 i = atoi (ix86_preferred_stack_boundary_string);
2638 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2639 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2640 TARGET_64BIT ? 4 : 2);
2641 else
2642 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2645 /* Accept -msseregparm only if at least SSE support is enabled. */
2646 if (TARGET_SSEREGPARM
2647 && ! TARGET_SSE)
2648 error ("-msseregparm used without SSE enabled");
2650 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2651 if (ix86_fpmath_string != 0)
2653 if (! strcmp (ix86_fpmath_string, "387"))
2654 ix86_fpmath = FPMATH_387;
2655 else if (! strcmp (ix86_fpmath_string, "sse"))
2657 if (!TARGET_SSE)
2659 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2660 ix86_fpmath = FPMATH_387;
2662 else
2663 ix86_fpmath = FPMATH_SSE;
2665 else if (! strcmp (ix86_fpmath_string, "387,sse")
2666 || ! strcmp (ix86_fpmath_string, "sse,387"))
2668 if (!TARGET_SSE)
2670 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2671 ix86_fpmath = FPMATH_387;
2673 else if (!TARGET_80387)
2675 warning (0, "387 instruction set disabled, using SSE arithmetics");
2676 ix86_fpmath = FPMATH_SSE;
2678 else
2679 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2681 else
2682 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2685 /* If the i387 is disabled, then do not return values in it. */
2686 if (!TARGET_80387)
2687 target_flags &= ~MASK_FLOAT_RETURNS;
2689 /* Use external vectorized library in vectorizing intrinsics. */
2690 if (ix86_veclibabi_string)
2692 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2693 ix86_veclib_handler = ix86_veclibabi_svml;
2694 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2695 ix86_veclib_handler = ix86_veclibabi_acml;
2696 else
2697 error ("unknown vectorization library ABI type (%s) for "
2698 "-mveclibabi= switch", ix86_veclibabi_string);
2701 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2702 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2703 && !optimize_size)
2704 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2706 /* ??? Unwind info is not correct around the CFG unless either a frame
2707 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2708 unwind info generation to be aware of the CFG and propagating states
2709 around edges. */
2710 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2711 || flag_exceptions || flag_non_call_exceptions)
2712 && flag_omit_frame_pointer
2713 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2715 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2716 warning (0, "unwind tables currently require either a frame pointer "
2717 "or -maccumulate-outgoing-args for correctness");
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2721 /* If stack probes are required, the space used for large function
2722 arguments on the stack must also be probed, so enable
2723 -maccumulate-outgoing-args so this happens in the prologue. */
2724 if (TARGET_STACK_PROBE
2725 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2727 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2728 warning (0, "stack probing requires -maccumulate-outgoing-args "
2729 "for correctness");
2730 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2733 /* For sane SSE instruction set generation we need fcomi instruction.
2734 It is safe to enable all CMOVE instructions. */
2735 if (TARGET_SSE)
2736 TARGET_CMOVE = 1;
2738 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2740 char *p;
2741 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2742 p = strchr (internal_label_prefix, 'X');
2743 internal_label_prefix_len = p - internal_label_prefix;
2744 *p = '\0';
2747 /* When scheduling description is not available, disable scheduler pass
2748 so it won't slow down the compilation and make x87 code slower. */
2749 if (!TARGET_SCHEDULE)
2750 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2752 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2753 set_param_value ("simultaneous-prefetches",
2754 ix86_cost->simultaneous_prefetches);
2755 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2756 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2757 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2758 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2759 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2760 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2762 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2763 can be optimized to ap = __builtin_next_arg (0). */
2764 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2765 targetm.expand_builtin_va_start = NULL;
2768 /* Return true if this goes in large data/bss. */
2770 static bool
2771 ix86_in_large_data_p (tree exp)
2773 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2774 return false;
2776 /* Functions are never large data. */
2777 if (TREE_CODE (exp) == FUNCTION_DECL)
2778 return false;
2780 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2782 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2783 if (strcmp (section, ".ldata") == 0
2784 || strcmp (section, ".lbss") == 0)
2785 return true;
2786 return false;
2788 else
2790 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2792 /* If this is an incomplete type with size 0, then we can't put it
2793 in data because it might be too big when completed. */
2794 if (!size || size > ix86_section_threshold)
2795 return true;
2798 return false;
2801 /* Switch to the appropriate section for output of DECL.
2802 DECL is either a `VAR_DECL' node or a constant of some sort.
2803 RELOC indicates whether forming the initial value of DECL requires
2804 link-time relocations. */
2806 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2807 ATTRIBUTE_UNUSED;
2809 static section *
2810 x86_64_elf_select_section (tree decl, int reloc,
2811 unsigned HOST_WIDE_INT align)
2813 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2814 && ix86_in_large_data_p (decl))
2816 const char *sname = NULL;
2817 unsigned int flags = SECTION_WRITE;
2818 switch (categorize_decl_for_section (decl, reloc))
2820 case SECCAT_DATA:
2821 sname = ".ldata";
2822 break;
2823 case SECCAT_DATA_REL:
2824 sname = ".ldata.rel";
2825 break;
2826 case SECCAT_DATA_REL_LOCAL:
2827 sname = ".ldata.rel.local";
2828 break;
2829 case SECCAT_DATA_REL_RO:
2830 sname = ".ldata.rel.ro";
2831 break;
2832 case SECCAT_DATA_REL_RO_LOCAL:
2833 sname = ".ldata.rel.ro.local";
2834 break;
2835 case SECCAT_BSS:
2836 sname = ".lbss";
2837 flags |= SECTION_BSS;
2838 break;
2839 case SECCAT_RODATA:
2840 case SECCAT_RODATA_MERGE_STR:
2841 case SECCAT_RODATA_MERGE_STR_INIT:
2842 case SECCAT_RODATA_MERGE_CONST:
2843 sname = ".lrodata";
2844 flags = 0;
2845 break;
2846 case SECCAT_SRODATA:
2847 case SECCAT_SDATA:
2848 case SECCAT_SBSS:
2849 gcc_unreachable ();
2850 case SECCAT_TEXT:
2851 case SECCAT_TDATA:
2852 case SECCAT_TBSS:
2853 /* We don't split these for medium model. Place them into
2854 default sections and hope for best. */
2855 break;
2857 if (sname)
2859 /* We might get called with string constants, but get_named_section
2860 doesn't like them as they are not DECLs. Also, we need to set
2861 flags in that case. */
2862 if (!DECL_P (decl))
2863 return get_section (sname, flags, NULL);
2864 return get_named_section (decl, sname, reloc);
2867 return default_elf_select_section (decl, reloc, align);
2870 /* Build up a unique section name, expressed as a
2871 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2872 RELOC indicates whether the initial value of EXP requires
2873 link-time relocations. */
2875 static void ATTRIBUTE_UNUSED
2876 x86_64_elf_unique_section (tree decl, int reloc)
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && ix86_in_large_data_p (decl))
2881 const char *prefix = NULL;
2882 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2883 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2885 switch (categorize_decl_for_section (decl, reloc))
2887 case SECCAT_DATA:
2888 case SECCAT_DATA_REL:
2889 case SECCAT_DATA_REL_LOCAL:
2890 case SECCAT_DATA_REL_RO:
2891 case SECCAT_DATA_REL_RO_LOCAL:
2892 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2893 break;
2894 case SECCAT_BSS:
2895 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2896 break;
2897 case SECCAT_RODATA:
2898 case SECCAT_RODATA_MERGE_STR:
2899 case SECCAT_RODATA_MERGE_STR_INIT:
2900 case SECCAT_RODATA_MERGE_CONST:
2901 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2902 break;
2903 case SECCAT_SRODATA:
2904 case SECCAT_SDATA:
2905 case SECCAT_SBSS:
2906 gcc_unreachable ();
2907 case SECCAT_TEXT:
2908 case SECCAT_TDATA:
2909 case SECCAT_TBSS:
2910 /* We don't split these for medium model. Place them into
2911 default sections and hope for best. */
2912 break;
2914 if (prefix)
2916 const char *name;
2917 size_t nlen, plen;
2918 char *string;
2919 plen = strlen (prefix);
2921 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2922 name = targetm.strip_name_encoding (name);
2923 nlen = strlen (name);
2925 string = (char *) alloca (nlen + plen + 1);
2926 memcpy (string, prefix, plen);
2927 memcpy (string + plen, name, nlen + 1);
2929 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2930 return;
2933 default_unique_section (decl, reloc);
2936 #ifdef COMMON_ASM_OP
2937 /* This says how to output assembler code to declare an
2938 uninitialized external linkage data object.
2940 For medium model x86-64 we need to use .largecomm opcode for
2941 large objects. */
2942 void
2943 x86_elf_aligned_common (FILE *file,
2944 const char *name, unsigned HOST_WIDE_INT size,
2945 int align)
2947 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2948 && size > (unsigned int)ix86_section_threshold)
2949 fprintf (file, ".largecomm\t");
2950 else
2951 fprintf (file, "%s", COMMON_ASM_OP);
2952 assemble_name (file, name);
2953 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2954 size, align / BITS_PER_UNIT);
2956 #endif
2958 /* Utility function for targets to use in implementing
2959 ASM_OUTPUT_ALIGNED_BSS. */
2961 void
2962 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2963 const char *name, unsigned HOST_WIDE_INT size,
2964 int align)
2966 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2967 && size > (unsigned int)ix86_section_threshold)
2968 switch_to_section (get_named_section (decl, ".lbss", 0));
2969 else
2970 switch_to_section (bss_section);
2971 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2972 #ifdef ASM_DECLARE_OBJECT_NAME
2973 last_assemble_variable_decl = decl;
2974 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2975 #else
2976 /* Standard thing is just output label for the object. */
2977 ASM_OUTPUT_LABEL (file, name);
2978 #endif /* ASM_DECLARE_OBJECT_NAME */
2979 ASM_OUTPUT_SKIP (file, size ? size : 1);
2982 void
2983 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2985 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2986 make the problem with not enough registers even worse. */
2987 #ifdef INSN_SCHEDULING
2988 if (level > 1)
2989 flag_schedule_insns = 0;
2990 #endif
2992 if (TARGET_MACHO)
2993 /* The Darwin libraries never set errno, so we might as well
2994 avoid calling them when that's the only reason we would. */
2995 flag_errno_math = 0;
2997 /* The default values of these switches depend on the TARGET_64BIT
2998 that is not known at this moment. Mark these values with 2 and
2999 let user the to override these. In case there is no command line option
3000 specifying them, we will set the defaults in override_options. */
3001 if (optimize >= 1)
3002 flag_omit_frame_pointer = 2;
3003 flag_pcc_struct_return = 2;
3004 flag_asynchronous_unwind_tables = 2;
3005 flag_vect_cost_model = 1;
3006 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3007 SUBTARGET_OPTIMIZATION_OPTIONS;
3008 #endif
3011 /* Decide whether we can make a sibling call to a function. DECL is the
3012 declaration of the function being targeted by the call and EXP is the
3013 CALL_EXPR representing the call. */
3015 static bool
3016 ix86_function_ok_for_sibcall (tree decl, tree exp)
3018 tree func;
3019 rtx a, b;
3021 /* If we are generating position-independent code, we cannot sibcall
3022 optimize any indirect call, or a direct call to a global function,
3023 as the PLT requires %ebx be live. */
3024 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3025 return false;
3027 if (decl)
3028 func = decl;
3029 else
3031 func = TREE_TYPE (CALL_EXPR_FN (exp));
3032 if (POINTER_TYPE_P (func))
3033 func = TREE_TYPE (func);
3036 /* Check that the return value locations are the same. Like
3037 if we are returning floats on the 80387 register stack, we cannot
3038 make a sibcall from a function that doesn't return a float to a
3039 function that does or, conversely, from a function that does return
3040 a float to a function that doesn't; the necessary stack adjustment
3041 would not be executed. This is also the place we notice
3042 differences in the return value ABI. Note that it is ok for one
3043 of the functions to have void return type as long as the return
3044 value of the other is passed in a register. */
3045 a = ix86_function_value (TREE_TYPE (exp), func, false);
3046 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3047 cfun->decl, false);
3048 if (STACK_REG_P (a) || STACK_REG_P (b))
3050 if (!rtx_equal_p (a, b))
3051 return false;
3053 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3055 else if (!rtx_equal_p (a, b))
3056 return false;
3058 /* If this call is indirect, we'll need to be able to use a call-clobbered
3059 register for the address of the target function. Make sure that all
3060 such registers are not used for passing parameters. */
3061 if (!decl && !TARGET_64BIT)
3063 tree type;
3065 /* We're looking at the CALL_EXPR, we need the type of the function. */
3066 type = CALL_EXPR_FN (exp); /* pointer expression */
3067 type = TREE_TYPE (type); /* pointer type */
3068 type = TREE_TYPE (type); /* function type */
3070 if (ix86_function_regparm (type, NULL) >= 3)
3072 /* ??? Need to count the actual number of registers to be used,
3073 not the possible number of registers. Fix later. */
3074 return false;
3078 /* Dllimport'd functions are also called indirectly. */
3079 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3080 && decl && DECL_DLLIMPORT_P (decl)
3081 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3082 return false;
3084 /* If we forced aligned the stack, then sibcalling would unalign the
3085 stack, which may break the called function. */
3086 if (cfun->machine->force_align_arg_pointer)
3087 return false;
3089 /* Otherwise okay. That also includes certain types of indirect calls. */
3090 return true;
3093 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3094 calling convention attributes;
3095 arguments as in struct attribute_spec.handler. */
3097 static tree
3098 ix86_handle_cconv_attribute (tree *node, tree name,
3099 tree args,
3100 int flags ATTRIBUTE_UNUSED,
3101 bool *no_add_attrs)
3103 if (TREE_CODE (*node) != FUNCTION_TYPE
3104 && TREE_CODE (*node) != METHOD_TYPE
3105 && TREE_CODE (*node) != FIELD_DECL
3106 && TREE_CODE (*node) != TYPE_DECL)
3108 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3109 IDENTIFIER_POINTER (name));
3110 *no_add_attrs = true;
3111 return NULL_TREE;
3114 /* Can combine regparm with all attributes but fastcall. */
3115 if (is_attribute_p ("regparm", name))
3117 tree cst;
3119 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3121 error ("fastcall and regparm attributes are not compatible");
3124 cst = TREE_VALUE (args);
3125 if (TREE_CODE (cst) != INTEGER_CST)
3127 warning (OPT_Wattributes,
3128 "%qs attribute requires an integer constant argument",
3129 IDENTIFIER_POINTER (name));
3130 *no_add_attrs = true;
3132 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3134 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3135 IDENTIFIER_POINTER (name), REGPARM_MAX);
3136 *no_add_attrs = true;
3139 if (!TARGET_64BIT
3140 && lookup_attribute (ix86_force_align_arg_pointer_string,
3141 TYPE_ATTRIBUTES (*node))
3142 && compare_tree_int (cst, REGPARM_MAX-1))
3144 error ("%s functions limited to %d register parameters",
3145 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3148 return NULL_TREE;
3151 if (TARGET_64BIT)
3153 /* Do not warn when emulating the MS ABI. */
3154 if (!TARGET_64BIT_MS_ABI)
3155 warning (OPT_Wattributes, "%qs attribute ignored",
3156 IDENTIFIER_POINTER (name));
3157 *no_add_attrs = true;
3158 return NULL_TREE;
3161 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3162 if (is_attribute_p ("fastcall", name))
3164 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3166 error ("fastcall and cdecl attributes are not compatible");
3168 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3170 error ("fastcall and stdcall attributes are not compatible");
3172 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3174 error ("fastcall and regparm attributes are not compatible");
3178 /* Can combine stdcall with fastcall (redundant), regparm and
3179 sseregparm. */
3180 else if (is_attribute_p ("stdcall", name))
3182 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3184 error ("stdcall and cdecl attributes are not compatible");
3186 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3188 error ("stdcall and fastcall attributes are not compatible");
3192 /* Can combine cdecl with regparm and sseregparm. */
3193 else if (is_attribute_p ("cdecl", name))
3195 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3197 error ("stdcall and cdecl attributes are not compatible");
3199 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3201 error ("fastcall and cdecl attributes are not compatible");
3205 /* Can combine sseregparm with all attributes. */
3207 return NULL_TREE;
3210 /* Return 0 if the attributes for two types are incompatible, 1 if they
3211 are compatible, and 2 if they are nearly compatible (which causes a
3212 warning to be generated). */
3214 static int
3215 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3217 /* Check for mismatch of non-default calling convention. */
3218 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3220 if (TREE_CODE (type1) != FUNCTION_TYPE
3221 && TREE_CODE (type1) != METHOD_TYPE)
3222 return 1;
3224 /* Check for mismatched fastcall/regparm types. */
3225 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3226 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3227 || (ix86_function_regparm (type1, NULL)
3228 != ix86_function_regparm (type2, NULL)))
3229 return 0;
3231 /* Check for mismatched sseregparm types. */
3232 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3233 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3234 return 0;
3236 /* Check for mismatched return types (cdecl vs stdcall). */
3237 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3238 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3239 return 0;
3241 return 1;
3244 /* Return the regparm value for a function with the indicated TYPE and DECL.
3245 DECL may be NULL when calling function indirectly
3246 or considering a libcall. */
3248 static int
3249 ix86_function_regparm (const_tree type, const_tree decl)
3251 tree attr;
3252 int regparm = ix86_regparm;
3254 static bool error_issued;
3256 if (TARGET_64BIT)
3257 return regparm;
3259 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3260 if (attr)
3262 regparm
3263 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3265 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3267 /* We can't use regparm(3) for nested functions because
3268 these pass static chain pointer in %ecx register. */
3269 if (!error_issued && regparm == 3
3270 && decl_function_context (decl)
3271 && !DECL_NO_STATIC_CHAIN (decl))
3273 error ("nested functions are limited to 2 register parameters");
3274 error_issued = true;
3275 return 0;
3279 return regparm;
3282 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3283 return 2;
3285 /* Use register calling convention for local functions when possible. */
3286 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3287 && flag_unit_at_a_time && !profile_flag)
3289 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3290 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3291 if (i && i->local)
3293 int local_regparm, globals = 0, regno;
3294 struct function *f;
3296 /* Make sure no regparm register is taken by a
3297 fixed register variable. */
3298 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3299 if (fixed_regs[local_regparm])
3300 break;
3302 /* We can't use regparm(3) for nested functions as these use
3303 static chain pointer in third argument. */
3304 if (local_regparm == 3
3305 && (decl_function_context (decl)
3306 || ix86_force_align_arg_pointer)
3307 && !DECL_NO_STATIC_CHAIN (decl))
3308 local_regparm = 2;
3310 /* If the function realigns its stackpointer, the prologue will
3311 clobber %ecx. If we've already generated code for the callee,
3312 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3313 scanning the attributes for the self-realigning property. */
3314 f = DECL_STRUCT_FUNCTION (decl);
3315 if (local_regparm == 3
3316 && (f ? !!f->machine->force_align_arg_pointer
3317 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3318 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3319 local_regparm = 2;
3321 /* Each fixed register usage increases register pressure,
3322 so less registers should be used for argument passing.
3323 This functionality can be overriden by an explicit
3324 regparm value. */
3325 for (regno = 0; regno <= DI_REG; regno++)
3326 if (fixed_regs[regno])
3327 globals++;
3329 local_regparm
3330 = globals < local_regparm ? local_regparm - globals : 0;
3332 if (local_regparm > regparm)
3333 regparm = local_regparm;
3337 return regparm;
3340 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3341 DFmode (2) arguments in SSE registers for a function with the
3342 indicated TYPE and DECL. DECL may be NULL when calling function
3343 indirectly or considering a libcall. Otherwise return 0. */
3345 static int
3346 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3348 gcc_assert (!TARGET_64BIT);
3350 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3351 by the sseregparm attribute. */
3352 if (TARGET_SSEREGPARM
3353 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3355 if (!TARGET_SSE)
3357 if (warn)
3359 if (decl)
3360 error ("Calling %qD with attribute sseregparm without "
3361 "SSE/SSE2 enabled", decl);
3362 else
3363 error ("Calling %qT with attribute sseregparm without "
3364 "SSE/SSE2 enabled", type);
3366 return 0;
3369 return 2;
3372 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3373 (and DFmode for SSE2) arguments in SSE registers. */
3374 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3376 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3377 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3378 if (i && i->local)
3379 return TARGET_SSE2 ? 2 : 1;
3382 return 0;
3385 /* Return true if EAX is live at the start of the function. Used by
3386 ix86_expand_prologue to determine if we need special help before
3387 calling allocate_stack_worker. */
3389 static bool
3390 ix86_eax_live_at_start_p (void)
3392 /* Cheat. Don't bother working forward from ix86_function_regparm
3393 to the function type to whether an actual argument is located in
3394 eax. Instead just look at cfg info, which is still close enough
3395 to correct at this point. This gives false positives for broken
3396 functions that might use uninitialized data that happens to be
3397 allocated in eax, but who cares? */
3398 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3401 /* Value is the number of bytes of arguments automatically
3402 popped when returning from a subroutine call.
3403 FUNDECL is the declaration node of the function (as a tree),
3404 FUNTYPE is the data type of the function (as a tree),
3405 or for a library call it is an identifier node for the subroutine name.
3406 SIZE is the number of bytes of arguments passed on the stack.
3408 On the 80386, the RTD insn may be used to pop them if the number
3409 of args is fixed, but if the number is variable then the caller
3410 must pop them all. RTD can't be used for library calls now
3411 because the library is compiled with the Unix compiler.
3412 Use of RTD is a selectable option, since it is incompatible with
3413 standard Unix calling sequences. If the option is not selected,
3414 the caller must always pop the args.
3416 The attribute stdcall is equivalent to RTD on a per module basis. */
3419 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3421 int rtd;
3423 /* None of the 64-bit ABIs pop arguments. */
3424 if (TARGET_64BIT)
3425 return 0;
3427 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3429 /* Cdecl functions override -mrtd, and never pop the stack. */
3430 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3432 /* Stdcall and fastcall functions will pop the stack if not
3433 variable args. */
3434 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3435 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3436 rtd = 1;
3438 if (rtd && ! stdarg_p (funtype))
3439 return size;
3442 /* Lose any fake structure return argument if it is passed on the stack. */
3443 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3444 && !KEEP_AGGREGATE_RETURN_POINTER)
3446 int nregs = ix86_function_regparm (funtype, fundecl);
3447 if (nregs == 0)
3448 return GET_MODE_SIZE (Pmode);
3451 return 0;
3454 /* Argument support functions. */
3456 /* Return true when register may be used to pass function parameters. */
3457 bool
3458 ix86_function_arg_regno_p (int regno)
3460 int i;
3461 const int *parm_regs;
3463 if (!TARGET_64BIT)
3465 if (TARGET_MACHO)
3466 return (regno < REGPARM_MAX
3467 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3468 else
3469 return (regno < REGPARM_MAX
3470 || (TARGET_MMX && MMX_REGNO_P (regno)
3471 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3472 || (TARGET_SSE && SSE_REGNO_P (regno)
3473 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3476 if (TARGET_MACHO)
3478 if (SSE_REGNO_P (regno) && TARGET_SSE)
3479 return true;
3481 else
3483 if (TARGET_SSE && SSE_REGNO_P (regno)
3484 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3485 return true;
3488 /* RAX is used as hidden argument to va_arg functions. */
3489 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3490 return true;
3492 if (TARGET_64BIT_MS_ABI)
3493 parm_regs = x86_64_ms_abi_int_parameter_registers;
3494 else
3495 parm_regs = x86_64_int_parameter_registers;
3496 for (i = 0; i < REGPARM_MAX; i++)
3497 if (regno == parm_regs[i])
3498 return true;
3499 return false;
3502 /* Return if we do not know how to pass TYPE solely in registers. */
3504 static bool
3505 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3507 if (must_pass_in_stack_var_size_or_pad (mode, type))
3508 return true;
3510 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3511 The layout_type routine is crafty and tries to trick us into passing
3512 currently unsupported vector types on the stack by using TImode. */
3513 return (!TARGET_64BIT && mode == TImode
3514 && type && TREE_CODE (type) != VECTOR_TYPE);
3517 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3518 for a call to a function whose data type is FNTYPE.
3519 For a library call, FNTYPE is 0. */
3521 void
3522 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3523 tree fntype, /* tree ptr for function decl */
3524 rtx libname, /* SYMBOL_REF of library name or 0 */
3525 tree fndecl)
3527 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3528 memset (cum, 0, sizeof (*cum));
3530 /* Set up the number of registers to use for passing arguments. */
3531 cum->nregs = ix86_regparm;
3532 if (TARGET_SSE)
3533 cum->sse_nregs = SSE_REGPARM_MAX;
3534 if (TARGET_MMX)
3535 cum->mmx_nregs = MMX_REGPARM_MAX;
3536 cum->warn_sse = true;
3537 cum->warn_mmx = true;
3539 /* Because type might mismatch in between caller and callee, we need to
3540 use actual type of function for local calls.
3541 FIXME: cgraph_analyze can be told to actually record if function uses
3542 va_start so for local functions maybe_vaarg can be made aggressive
3543 helping K&R code.
3544 FIXME: once typesytem is fixed, we won't need this code anymore. */
3545 if (i && i->local)
3546 fntype = TREE_TYPE (fndecl);
3547 cum->maybe_vaarg = (fntype
3548 ? (!prototype_p (fntype) || stdarg_p (fntype))
3549 : !libname);
3551 if (!TARGET_64BIT)
3553 /* If there are variable arguments, then we won't pass anything
3554 in registers in 32-bit mode. */
3555 if (stdarg_p (fntype))
3557 cum->nregs = 0;
3558 cum->sse_nregs = 0;
3559 cum->mmx_nregs = 0;
3560 cum->warn_sse = 0;
3561 cum->warn_mmx = 0;
3562 return;
3565 /* Use ecx and edx registers if function has fastcall attribute,
3566 else look for regparm information. */
3567 if (fntype)
3569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3571 cum->nregs = 2;
3572 cum->fastcall = 1;
3574 else
3575 cum->nregs = ix86_function_regparm (fntype, fndecl);
3578 /* Set up the number of SSE registers used for passing SFmode
3579 and DFmode arguments. Warn for mismatching ABI. */
3580 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3584 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3585 But in the case of vector types, it is some vector mode.
3587 When we have only some of our vector isa extensions enabled, then there
3588 are some modes for which vector_mode_supported_p is false. For these
3589 modes, the generic vector support in gcc will choose some non-vector mode
3590 in order to implement the type. By computing the natural mode, we'll
3591 select the proper ABI location for the operand and not depend on whatever
3592 the middle-end decides to do with these vector types. */
3594 static enum machine_mode
3595 type_natural_mode (const_tree type)
3597 enum machine_mode mode = TYPE_MODE (type);
3599 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3601 HOST_WIDE_INT size = int_size_in_bytes (type);
3602 if ((size == 8 || size == 16)
3603 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3604 && TYPE_VECTOR_SUBPARTS (type) > 1)
3606 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3608 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3609 mode = MIN_MODE_VECTOR_FLOAT;
3610 else
3611 mode = MIN_MODE_VECTOR_INT;
3613 /* Get the mode which has this inner mode and number of units. */
3614 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3615 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3616 && GET_MODE_INNER (mode) == innermode)
3617 return mode;
3619 gcc_unreachable ();
3623 return mode;
3626 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3627 this may not agree with the mode that the type system has chosen for the
3628 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3629 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3631 static rtx
3632 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3633 unsigned int regno)
3635 rtx tmp;
3637 if (orig_mode != BLKmode)
3638 tmp = gen_rtx_REG (orig_mode, regno);
3639 else
3641 tmp = gen_rtx_REG (mode, regno);
3642 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3643 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3646 return tmp;
3649 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3650 of this code is to classify each 8bytes of incoming argument by the register
3651 class and assign registers accordingly. */
3653 /* Return the union class of CLASS1 and CLASS2.
3654 See the x86-64 PS ABI for details. */
3656 static enum x86_64_reg_class
3657 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3659 /* Rule #1: If both classes are equal, this is the resulting class. */
3660 if (class1 == class2)
3661 return class1;
3663 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3664 the other class. */
3665 if (class1 == X86_64_NO_CLASS)
3666 return class2;
3667 if (class2 == X86_64_NO_CLASS)
3668 return class1;
3670 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3671 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3672 return X86_64_MEMORY_CLASS;
3674 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3675 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3676 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3677 return X86_64_INTEGERSI_CLASS;
3678 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3679 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3680 return X86_64_INTEGER_CLASS;
3682 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3683 MEMORY is used. */
3684 if (class1 == X86_64_X87_CLASS
3685 || class1 == X86_64_X87UP_CLASS
3686 || class1 == X86_64_COMPLEX_X87_CLASS
3687 || class2 == X86_64_X87_CLASS
3688 || class2 == X86_64_X87UP_CLASS
3689 || class2 == X86_64_COMPLEX_X87_CLASS)
3690 return X86_64_MEMORY_CLASS;
3692 /* Rule #6: Otherwise class SSE is used. */
3693 return X86_64_SSE_CLASS;
3696 /* Classify the argument of type TYPE and mode MODE.
3697 CLASSES will be filled by the register class used to pass each word
3698 of the operand. The number of words is returned. In case the parameter
3699 should be passed in memory, 0 is returned. As a special case for zero
3700 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3702 BIT_OFFSET is used internally for handling records and specifies offset
3703 of the offset in bits modulo 256 to avoid overflow cases.
3705 See the x86-64 PS ABI for details.
3708 static int
3709 classify_argument (enum machine_mode mode, const_tree type,
3710 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3712 HOST_WIDE_INT bytes =
3713 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3714 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3716 /* Variable sized entities are always passed/returned in memory. */
3717 if (bytes < 0)
3718 return 0;
3720 if (mode != VOIDmode
3721 && targetm.calls.must_pass_in_stack (mode, type))
3722 return 0;
3724 if (type && AGGREGATE_TYPE_P (type))
3726 int i;
3727 tree field;
3728 enum x86_64_reg_class subclasses[MAX_CLASSES];
3730 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3731 if (bytes > 16)
3732 return 0;
3734 for (i = 0; i < words; i++)
3735 classes[i] = X86_64_NO_CLASS;
3737 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3738 signalize memory class, so handle it as special case. */
3739 if (!words)
3741 classes[0] = X86_64_NO_CLASS;
3742 return 1;
3745 /* Classify each field of record and merge classes. */
3746 switch (TREE_CODE (type))
3748 case RECORD_TYPE:
3749 /* And now merge the fields of structure. */
3750 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3752 if (TREE_CODE (field) == FIELD_DECL)
3754 int num;
3756 if (TREE_TYPE (field) == error_mark_node)
3757 continue;
3759 /* Bitfields are always classified as integer. Handle them
3760 early, since later code would consider them to be
3761 misaligned integers. */
3762 if (DECL_BIT_FIELD (field))
3764 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3765 i < ((int_bit_position (field) + (bit_offset % 64))
3766 + tree_low_cst (DECL_SIZE (field), 0)
3767 + 63) / 8 / 8; i++)
3768 classes[i] =
3769 merge_classes (X86_64_INTEGER_CLASS,
3770 classes[i]);
3772 else
3774 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3775 TREE_TYPE (field), subclasses,
3776 (int_bit_position (field)
3777 + bit_offset) % 256);
3778 if (!num)
3779 return 0;
3780 for (i = 0; i < num; i++)
3782 int pos =
3783 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3784 classes[i + pos] =
3785 merge_classes (subclasses[i], classes[i + pos]);
3790 break;
3792 case ARRAY_TYPE:
3793 /* Arrays are handled as small records. */
3795 int num;
3796 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3797 TREE_TYPE (type), subclasses, bit_offset);
3798 if (!num)
3799 return 0;
3801 /* The partial classes are now full classes. */
3802 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3803 subclasses[0] = X86_64_SSE_CLASS;
3804 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3805 subclasses[0] = X86_64_INTEGER_CLASS;
3807 for (i = 0; i < words; i++)
3808 classes[i] = subclasses[i % num];
3810 break;
3812 case UNION_TYPE:
3813 case QUAL_UNION_TYPE:
3814 /* Unions are similar to RECORD_TYPE but offset is always 0.
3816 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3818 if (TREE_CODE (field) == FIELD_DECL)
3820 int num;
3822 if (TREE_TYPE (field) == error_mark_node)
3823 continue;
3825 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3826 TREE_TYPE (field), subclasses,
3827 bit_offset);
3828 if (!num)
3829 return 0;
3830 for (i = 0; i < num; i++)
3831 classes[i] = merge_classes (subclasses[i], classes[i]);
3834 break;
3836 default:
3837 gcc_unreachable ();
3840 /* Final merger cleanup. */
3841 for (i = 0; i < words; i++)
3843 /* If one class is MEMORY, everything should be passed in
3844 memory. */
3845 if (classes[i] == X86_64_MEMORY_CLASS)
3846 return 0;
3848 /* The X86_64_SSEUP_CLASS should be always preceded by
3849 X86_64_SSE_CLASS. */
3850 if (classes[i] == X86_64_SSEUP_CLASS
3851 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3852 classes[i] = X86_64_SSE_CLASS;
3854 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3855 if (classes[i] == X86_64_X87UP_CLASS
3856 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3857 classes[i] = X86_64_SSE_CLASS;
3859 return words;
3862 /* Compute alignment needed. We align all types to natural boundaries with
3863 exception of XFmode that is aligned to 64bits. */
3864 if (mode != VOIDmode && mode != BLKmode)
3866 int mode_alignment = GET_MODE_BITSIZE (mode);
3868 if (mode == XFmode)
3869 mode_alignment = 128;
3870 else if (mode == XCmode)
3871 mode_alignment = 256;
3872 if (COMPLEX_MODE_P (mode))
3873 mode_alignment /= 2;
3874 /* Misaligned fields are always returned in memory. */
3875 if (bit_offset % mode_alignment)
3876 return 0;
3879 /* for V1xx modes, just use the base mode */
3880 if (VECTOR_MODE_P (mode) && mode != V1DImode
3881 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3882 mode = GET_MODE_INNER (mode);
3884 /* Classification of atomic types. */
3885 switch (mode)
3887 case SDmode:
3888 case DDmode:
3889 classes[0] = X86_64_SSE_CLASS;
3890 return 1;
3891 case TDmode:
3892 classes[0] = X86_64_SSE_CLASS;
3893 classes[1] = X86_64_SSEUP_CLASS;
3894 return 2;
3895 case DImode:
3896 case SImode:
3897 case HImode:
3898 case QImode:
3899 case CSImode:
3900 case CHImode:
3901 case CQImode:
3902 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3903 classes[0] = X86_64_INTEGERSI_CLASS;
3904 else
3905 classes[0] = X86_64_INTEGER_CLASS;
3906 return 1;
3907 case CDImode:
3908 case TImode:
3909 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3910 return 2;
3911 case CTImode:
3912 return 0;
3913 case SFmode:
3914 if (!(bit_offset % 64))
3915 classes[0] = X86_64_SSESF_CLASS;
3916 else
3917 classes[0] = X86_64_SSE_CLASS;
3918 return 1;
3919 case DFmode:
3920 classes[0] = X86_64_SSEDF_CLASS;
3921 return 1;
3922 case XFmode:
3923 classes[0] = X86_64_X87_CLASS;
3924 classes[1] = X86_64_X87UP_CLASS;
3925 return 2;
3926 case TFmode:
3927 classes[0] = X86_64_SSE_CLASS;
3928 classes[1] = X86_64_SSEUP_CLASS;
3929 return 2;
3930 case SCmode:
3931 classes[0] = X86_64_SSE_CLASS;
3932 return 1;
3933 case DCmode:
3934 classes[0] = X86_64_SSEDF_CLASS;
3935 classes[1] = X86_64_SSEDF_CLASS;
3936 return 2;
3937 case XCmode:
3938 classes[0] = X86_64_COMPLEX_X87_CLASS;
3939 return 1;
3940 case TCmode:
3941 /* This modes is larger than 16 bytes. */
3942 return 0;
3943 case V4SFmode:
3944 case V4SImode:
3945 case V16QImode:
3946 case V8HImode:
3947 case V2DFmode:
3948 case V2DImode:
3949 classes[0] = X86_64_SSE_CLASS;
3950 classes[1] = X86_64_SSEUP_CLASS;
3951 return 2;
3952 case V1DImode:
3953 case V2SFmode:
3954 case V2SImode:
3955 case V4HImode:
3956 case V8QImode:
3957 classes[0] = X86_64_SSE_CLASS;
3958 return 1;
3959 case BLKmode:
3960 case VOIDmode:
3961 return 0;
3962 default:
3963 gcc_assert (VECTOR_MODE_P (mode));
3965 if (bytes > 16)
3966 return 0;
3968 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3970 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3971 classes[0] = X86_64_INTEGERSI_CLASS;
3972 else
3973 classes[0] = X86_64_INTEGER_CLASS;
3974 classes[1] = X86_64_INTEGER_CLASS;
3975 return 1 + (bytes > 8);
3979 /* Examine the argument and return set number of register required in each
3980 class. Return 0 iff parameter should be passed in memory. */
3981 static int
3982 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3983 int *int_nregs, int *sse_nregs)
3985 enum x86_64_reg_class regclass[MAX_CLASSES];
3986 int n = classify_argument (mode, type, regclass, 0);
3988 *int_nregs = 0;
3989 *sse_nregs = 0;
3990 if (!n)
3991 return 0;
3992 for (n--; n >= 0; n--)
3993 switch (regclass[n])
3995 case X86_64_INTEGER_CLASS:
3996 case X86_64_INTEGERSI_CLASS:
3997 (*int_nregs)++;
3998 break;
3999 case X86_64_SSE_CLASS:
4000 case X86_64_SSESF_CLASS:
4001 case X86_64_SSEDF_CLASS:
4002 (*sse_nregs)++;
4003 break;
4004 case X86_64_NO_CLASS:
4005 case X86_64_SSEUP_CLASS:
4006 break;
4007 case X86_64_X87_CLASS:
4008 case X86_64_X87UP_CLASS:
4009 if (!in_return)
4010 return 0;
4011 break;
4012 case X86_64_COMPLEX_X87_CLASS:
4013 return in_return ? 2 : 0;
4014 case X86_64_MEMORY_CLASS:
4015 gcc_unreachable ();
4017 return 1;
4020 /* Construct container for the argument used by GCC interface. See
4021 FUNCTION_ARG for the detailed description. */
4023 static rtx
4024 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4025 const_tree type, int in_return, int nintregs, int nsseregs,
4026 const int *intreg, int sse_regno)
4028 /* The following variables hold the static issued_error state. */
4029 static bool issued_sse_arg_error;
4030 static bool issued_sse_ret_error;
4031 static bool issued_x87_ret_error;
4033 enum machine_mode tmpmode;
4034 int bytes =
4035 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4036 enum x86_64_reg_class regclass[MAX_CLASSES];
4037 int n;
4038 int i;
4039 int nexps = 0;
4040 int needed_sseregs, needed_intregs;
4041 rtx exp[MAX_CLASSES];
4042 rtx ret;
4044 n = classify_argument (mode, type, regclass, 0);
4045 if (!n)
4046 return NULL;
4047 if (!examine_argument (mode, type, in_return, &needed_intregs,
4048 &needed_sseregs))
4049 return NULL;
4050 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4051 return NULL;
4053 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4054 some less clueful developer tries to use floating-point anyway. */
4055 if (needed_sseregs && !TARGET_SSE)
4057 if (in_return)
4059 if (!issued_sse_ret_error)
4061 error ("SSE register return with SSE disabled");
4062 issued_sse_ret_error = true;
4065 else if (!issued_sse_arg_error)
4067 error ("SSE register argument with SSE disabled");
4068 issued_sse_arg_error = true;
4070 return NULL;
4073 /* Likewise, error if the ABI requires us to return values in the
4074 x87 registers and the user specified -mno-80387. */
4075 if (!TARGET_80387 && in_return)
4076 for (i = 0; i < n; i++)
4077 if (regclass[i] == X86_64_X87_CLASS
4078 || regclass[i] == X86_64_X87UP_CLASS
4079 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4081 if (!issued_x87_ret_error)
4083 error ("x87 register return with x87 disabled");
4084 issued_x87_ret_error = true;
4086 return NULL;
4089 /* First construct simple cases. Avoid SCmode, since we want to use
4090 single register to pass this type. */
4091 if (n == 1 && mode != SCmode)
4092 switch (regclass[0])
4094 case X86_64_INTEGER_CLASS:
4095 case X86_64_INTEGERSI_CLASS:
4096 return gen_rtx_REG (mode, intreg[0]);
4097 case X86_64_SSE_CLASS:
4098 case X86_64_SSESF_CLASS:
4099 case X86_64_SSEDF_CLASS:
4100 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4101 case X86_64_X87_CLASS:
4102 case X86_64_COMPLEX_X87_CLASS:
4103 return gen_rtx_REG (mode, FIRST_STACK_REG);
4104 case X86_64_NO_CLASS:
4105 /* Zero sized array, struct or class. */
4106 return NULL;
4107 default:
4108 gcc_unreachable ();
4110 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4111 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4112 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4114 if (n == 2
4115 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4116 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4117 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4118 && regclass[1] == X86_64_INTEGER_CLASS
4119 && (mode == CDImode || mode == TImode || mode == TFmode)
4120 && intreg[0] + 1 == intreg[1])
4121 return gen_rtx_REG (mode, intreg[0]);
4123 /* Otherwise figure out the entries of the PARALLEL. */
4124 for (i = 0; i < n; i++)
4126 switch (regclass[i])
4128 case X86_64_NO_CLASS:
4129 break;
4130 case X86_64_INTEGER_CLASS:
4131 case X86_64_INTEGERSI_CLASS:
4132 /* Merge TImodes on aligned occasions here too. */
4133 if (i * 8 + 8 > bytes)
4134 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4135 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4136 tmpmode = SImode;
4137 else
4138 tmpmode = DImode;
4139 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4140 if (tmpmode == BLKmode)
4141 tmpmode = DImode;
4142 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4143 gen_rtx_REG (tmpmode, *intreg),
4144 GEN_INT (i*8));
4145 intreg++;
4146 break;
4147 case X86_64_SSESF_CLASS:
4148 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4149 gen_rtx_REG (SFmode,
4150 SSE_REGNO (sse_regno)),
4151 GEN_INT (i*8));
4152 sse_regno++;
4153 break;
4154 case X86_64_SSEDF_CLASS:
4155 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4156 gen_rtx_REG (DFmode,
4157 SSE_REGNO (sse_regno)),
4158 GEN_INT (i*8));
4159 sse_regno++;
4160 break;
4161 case X86_64_SSE_CLASS:
4162 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4163 tmpmode = TImode;
4164 else
4165 tmpmode = DImode;
4166 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4167 gen_rtx_REG (tmpmode,
4168 SSE_REGNO (sse_regno)),
4169 GEN_INT (i*8));
4170 if (tmpmode == TImode)
4171 i++;
4172 sse_regno++;
4173 break;
4174 default:
4175 gcc_unreachable ();
4179 /* Empty aligned struct, union or class. */
4180 if (nexps == 0)
4181 return NULL;
4183 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4184 for (i = 0; i < nexps; i++)
4185 XVECEXP (ret, 0, i) = exp [i];
4186 return ret;
4189 /* Update the data in CUM to advance over an argument of mode MODE
4190 and data type TYPE. (TYPE is null for libcalls where that information
4191 may not be available.) */
4193 static void
4194 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4195 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4197 switch (mode)
4199 default:
4200 break;
4202 case BLKmode:
4203 if (bytes < 0)
4204 break;
4205 /* FALLTHRU */
4207 case DImode:
4208 case SImode:
4209 case HImode:
4210 case QImode:
4211 cum->words += words;
4212 cum->nregs -= words;
4213 cum->regno += words;
4215 if (cum->nregs <= 0)
4217 cum->nregs = 0;
4218 cum->regno = 0;
4220 break;
4222 case DFmode:
4223 if (cum->float_in_sse < 2)
4224 break;
4225 case SFmode:
4226 if (cum->float_in_sse < 1)
4227 break;
4228 /* FALLTHRU */
4230 case TImode:
4231 case V16QImode:
4232 case V8HImode:
4233 case V4SImode:
4234 case V2DImode:
4235 case V4SFmode:
4236 case V2DFmode:
4237 if (!type || !AGGREGATE_TYPE_P (type))
4239 cum->sse_words += words;
4240 cum->sse_nregs -= 1;
4241 cum->sse_regno += 1;
4242 if (cum->sse_nregs <= 0)
4244 cum->sse_nregs = 0;
4245 cum->sse_regno = 0;
4248 break;
4250 case V8QImode:
4251 case V4HImode:
4252 case V2SImode:
4253 case V2SFmode:
4254 case V1DImode:
4255 if (!type || !AGGREGATE_TYPE_P (type))
4257 cum->mmx_words += words;
4258 cum->mmx_nregs -= 1;
4259 cum->mmx_regno += 1;
4260 if (cum->mmx_nregs <= 0)
4262 cum->mmx_nregs = 0;
4263 cum->mmx_regno = 0;
4266 break;
4270 static void
4271 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4272 tree type, HOST_WIDE_INT words)
4274 int int_nregs, sse_nregs;
4276 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4277 cum->words += words;
4278 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4280 cum->nregs -= int_nregs;
4281 cum->sse_nregs -= sse_nregs;
4282 cum->regno += int_nregs;
4283 cum->sse_regno += sse_nregs;
4285 else
4286 cum->words += words;
4289 static void
4290 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4291 HOST_WIDE_INT words)
4293 /* Otherwise, this should be passed indirect. */
4294 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4296 cum->words += words;
4297 if (cum->nregs > 0)
4299 cum->nregs -= 1;
4300 cum->regno += 1;
4304 void
4305 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4306 tree type, int named ATTRIBUTE_UNUSED)
4308 HOST_WIDE_INT bytes, words;
4310 if (mode == BLKmode)
4311 bytes = int_size_in_bytes (type);
4312 else
4313 bytes = GET_MODE_SIZE (mode);
4314 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4316 if (type)
4317 mode = type_natural_mode (type);
4319 if (TARGET_64BIT_MS_ABI)
4320 function_arg_advance_ms_64 (cum, bytes, words);
4321 else if (TARGET_64BIT)
4322 function_arg_advance_64 (cum, mode, type, words);
4323 else
4324 function_arg_advance_32 (cum, mode, type, bytes, words);
4327 /* Define where to put the arguments to a function.
4328 Value is zero to push the argument on the stack,
4329 or a hard register in which to store the argument.
4331 MODE is the argument's machine mode.
4332 TYPE is the data type of the argument (as a tree).
4333 This is null for libcalls where that information may
4334 not be available.
4335 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4336 the preceding args and about the function being called.
4337 NAMED is nonzero if this argument is a named parameter
4338 (otherwise it is an extra parameter matching an ellipsis). */
4340 static rtx
4341 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4342 enum machine_mode orig_mode, tree type,
4343 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4345 static bool warnedsse, warnedmmx;
4347 /* Avoid the AL settings for the Unix64 ABI. */
4348 if (mode == VOIDmode)
4349 return constm1_rtx;
4351 switch (mode)
4353 default:
4354 break;
4356 case BLKmode:
4357 if (bytes < 0)
4358 break;
4359 /* FALLTHRU */
4360 case DImode:
4361 case SImode:
4362 case HImode:
4363 case QImode:
4364 if (words <= cum->nregs)
4366 int regno = cum->regno;
4368 /* Fastcall allocates the first two DWORD (SImode) or
4369 smaller arguments to ECX and EDX if it isn't an
4370 aggregate type . */
4371 if (cum->fastcall)
4373 if (mode == BLKmode
4374 || mode == DImode
4375 || (type && AGGREGATE_TYPE_P (type)))
4376 break;
4378 /* ECX not EAX is the first allocated register. */
4379 if (regno == AX_REG)
4380 regno = CX_REG;
4382 return gen_rtx_REG (mode, regno);
4384 break;
4386 case DFmode:
4387 if (cum->float_in_sse < 2)
4388 break;
4389 case SFmode:
4390 if (cum->float_in_sse < 1)
4391 break;
4392 /* FALLTHRU */
4393 case TImode:
4394 case V16QImode:
4395 case V8HImode:
4396 case V4SImode:
4397 case V2DImode:
4398 case V4SFmode:
4399 case V2DFmode:
4400 if (!type || !AGGREGATE_TYPE_P (type))
4402 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4404 warnedsse = true;
4405 warning (0, "SSE vector argument without SSE enabled "
4406 "changes the ABI");
4408 if (cum->sse_nregs)
4409 return gen_reg_or_parallel (mode, orig_mode,
4410 cum->sse_regno + FIRST_SSE_REG);
4412 break;
4414 case V8QImode:
4415 case V4HImode:
4416 case V2SImode:
4417 case V2SFmode:
4418 case V1DImode:
4419 if (!type || !AGGREGATE_TYPE_P (type))
4421 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4423 warnedmmx = true;
4424 warning (0, "MMX vector argument without MMX enabled "
4425 "changes the ABI");
4427 if (cum->mmx_nregs)
4428 return gen_reg_or_parallel (mode, orig_mode,
4429 cum->mmx_regno + FIRST_MMX_REG);
4431 break;
4434 return NULL_RTX;
4437 static rtx
4438 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4439 enum machine_mode orig_mode, tree type)
4441 /* Handle a hidden AL argument containing number of registers
4442 for varargs x86-64 functions. */
4443 if (mode == VOIDmode)
4444 return GEN_INT (cum->maybe_vaarg
4445 ? (cum->sse_nregs < 0
4446 ? SSE_REGPARM_MAX
4447 : cum->sse_regno)
4448 : -1);
4450 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4451 cum->sse_nregs,
4452 &x86_64_int_parameter_registers [cum->regno],
4453 cum->sse_regno);
4456 static rtx
4457 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4458 enum machine_mode orig_mode, int named,
4459 HOST_WIDE_INT bytes)
4461 unsigned int regno;
4463 /* Avoid the AL settings for the Unix64 ABI. */
4464 if (mode == VOIDmode)
4465 return constm1_rtx;
4467 /* If we've run out of registers, it goes on the stack. */
4468 if (cum->nregs == 0)
4469 return NULL_RTX;
4471 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4473 /* Only floating point modes are passed in anything but integer regs. */
4474 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4476 if (named)
4477 regno = cum->regno + FIRST_SSE_REG;
4478 else
4480 rtx t1, t2;
4482 /* Unnamed floating parameters are passed in both the
4483 SSE and integer registers. */
4484 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4485 t2 = gen_rtx_REG (mode, regno);
4486 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4487 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4488 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4491 /* Handle aggregated types passed in register. */
4492 if (orig_mode == BLKmode)
4494 if (bytes > 0 && bytes <= 8)
4495 mode = (bytes > 4 ? DImode : SImode);
4496 if (mode == BLKmode)
4497 mode = DImode;
4500 return gen_reg_or_parallel (mode, orig_mode, regno);
4504 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4505 tree type, int named)
4507 enum machine_mode mode = omode;
4508 HOST_WIDE_INT bytes, words;
4510 if (mode == BLKmode)
4511 bytes = int_size_in_bytes (type);
4512 else
4513 bytes = GET_MODE_SIZE (mode);
4514 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4516 /* To simplify the code below, represent vector types with a vector mode
4517 even if MMX/SSE are not active. */
4518 if (type && TREE_CODE (type) == VECTOR_TYPE)
4519 mode = type_natural_mode (type);
4521 if (TARGET_64BIT_MS_ABI)
4522 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4523 else if (TARGET_64BIT)
4524 return function_arg_64 (cum, mode, omode, type);
4525 else
4526 return function_arg_32 (cum, mode, omode, type, bytes, words);
4529 /* A C expression that indicates when an argument must be passed by
4530 reference. If nonzero for an argument, a copy of that argument is
4531 made in memory and a pointer to the argument is passed instead of
4532 the argument itself. The pointer is passed in whatever way is
4533 appropriate for passing a pointer to that type. */
4535 static bool
4536 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4537 enum machine_mode mode ATTRIBUTE_UNUSED,
4538 const_tree type, bool named ATTRIBUTE_UNUSED)
4540 /* See Windows x64 Software Convention. */
4541 if (TARGET_64BIT_MS_ABI)
4543 int msize = (int) GET_MODE_SIZE (mode);
4544 if (type)
4546 /* Arrays are passed by reference. */
4547 if (TREE_CODE (type) == ARRAY_TYPE)
4548 return true;
4550 if (AGGREGATE_TYPE_P (type))
4552 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4553 are passed by reference. */
4554 msize = int_size_in_bytes (type);
4558 /* __m128 is passed by reference. */
4559 switch (msize) {
4560 case 1: case 2: case 4: case 8:
4561 break;
4562 default:
4563 return true;
4566 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4567 return 1;
4569 return 0;
4572 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4573 ABI. Only called if TARGET_SSE. */
4574 static bool
4575 contains_128bit_aligned_vector_p (tree type)
4577 enum machine_mode mode = TYPE_MODE (type);
4578 if (SSE_REG_MODE_P (mode)
4579 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4580 return true;
4581 if (TYPE_ALIGN (type) < 128)
4582 return false;
4584 if (AGGREGATE_TYPE_P (type))
4586 /* Walk the aggregates recursively. */
4587 switch (TREE_CODE (type))
4589 case RECORD_TYPE:
4590 case UNION_TYPE:
4591 case QUAL_UNION_TYPE:
4593 tree field;
4595 /* Walk all the structure fields. */
4596 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4598 if (TREE_CODE (field) == FIELD_DECL
4599 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4600 return true;
4602 break;
4605 case ARRAY_TYPE:
4606 /* Just for use if some languages passes arrays by value. */
4607 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4608 return true;
4609 break;
4611 default:
4612 gcc_unreachable ();
4615 return false;
4618 /* Gives the alignment boundary, in bits, of an argument with the
4619 specified mode and type. */
4622 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4624 int align;
4625 if (type)
4626 align = TYPE_ALIGN (type);
4627 else
4628 align = GET_MODE_ALIGNMENT (mode);
4629 if (align < PARM_BOUNDARY)
4630 align = PARM_BOUNDARY;
4631 /* Decimal floating point is aligned to its natural boundary. */
4632 if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
4634 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4635 make an exception for SSE modes since these require 128bit
4636 alignment.
4638 The handling here differs from field_alignment. ICC aligns MMX
4639 arguments to 4 byte boundaries, while structure fields are aligned
4640 to 8 byte boundaries. */
4641 if (!TARGET_SSE)
4642 align = PARM_BOUNDARY;
4643 else if (!type)
4645 if (!SSE_REG_MODE_P (mode))
4646 align = PARM_BOUNDARY;
4648 else
4650 if (!contains_128bit_aligned_vector_p (type))
4651 align = PARM_BOUNDARY;
4654 if (align > BIGGEST_ALIGNMENT)
4655 align = BIGGEST_ALIGNMENT;
4656 return align;
4659 /* Return true if N is a possible register number of function value. */
4661 bool
4662 ix86_function_value_regno_p (int regno)
4664 switch (regno)
4666 case 0:
4667 return true;
4669 case FIRST_FLOAT_REG:
4670 if (TARGET_64BIT_MS_ABI)
4671 return false;
4672 return TARGET_FLOAT_RETURNS_IN_80387;
4674 case FIRST_SSE_REG:
4675 return TARGET_SSE;
4677 case FIRST_MMX_REG:
4678 if (TARGET_MACHO || TARGET_64BIT)
4679 return false;
4680 return TARGET_MMX;
4683 return false;
4686 /* Define how to find the value returned by a function.
4687 VALTYPE is the data type of the value (as a tree).
4688 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4689 otherwise, FUNC is 0. */
4691 static rtx
4692 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4693 const_tree fntype, const_tree fn)
4695 unsigned int regno;
4697 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4698 we normally prevent this case when mmx is not available. However
4699 some ABIs may require the result to be returned like DImode. */
4700 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4701 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4703 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4704 we prevent this case when sse is not available. However some ABIs
4705 may require the result to be returned like integer TImode. */
4706 else if (mode == TImode
4707 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4708 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4710 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4711 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4712 regno = FIRST_FLOAT_REG;
4713 else
4714 /* Most things go in %eax. */
4715 regno = AX_REG;
4717 /* Override FP return register with %xmm0 for local functions when
4718 SSE math is enabled or for functions with sseregparm attribute. */
4719 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4721 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4722 if ((sse_level >= 1 && mode == SFmode)
4723 || (sse_level == 2 && mode == DFmode))
4724 regno = FIRST_SSE_REG;
4727 return gen_rtx_REG (orig_mode, regno);
4730 static rtx
4731 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4732 const_tree valtype)
4734 rtx ret;
4736 /* Handle libcalls, which don't provide a type node. */
4737 if (valtype == NULL)
4739 switch (mode)
4741 case SFmode:
4742 case SCmode:
4743 case DFmode:
4744 case DCmode:
4745 case TFmode:
4746 case SDmode:
4747 case DDmode:
4748 case TDmode:
4749 return gen_rtx_REG (mode, FIRST_SSE_REG);
4750 case XFmode:
4751 case XCmode:
4752 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4753 case TCmode:
4754 return NULL;
4755 default:
4756 return gen_rtx_REG (mode, AX_REG);
4760 ret = construct_container (mode, orig_mode, valtype, 1,
4761 REGPARM_MAX, SSE_REGPARM_MAX,
4762 x86_64_int_return_registers, 0);
4764 /* For zero sized structures, construct_container returns NULL, but we
4765 need to keep rest of compiler happy by returning meaningful value. */
4766 if (!ret)
4767 ret = gen_rtx_REG (orig_mode, AX_REG);
4769 return ret;
4772 static rtx
4773 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4775 unsigned int regno = AX_REG;
4777 if (TARGET_SSE)
4779 switch (GET_MODE_SIZE (mode))
4781 case 16:
4782 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4783 && !COMPLEX_MODE_P (mode))
4784 regno = FIRST_SSE_REG;
4785 break;
4786 case 8:
4787 case 4:
4788 if (mode == SFmode || mode == DFmode)
4789 regno = FIRST_SSE_REG;
4790 break;
4791 default:
4792 break;
4795 return gen_rtx_REG (orig_mode, regno);
4798 static rtx
4799 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4800 enum machine_mode orig_mode, enum machine_mode mode)
4802 const_tree fn, fntype;
4804 fn = NULL_TREE;
4805 if (fntype_or_decl && DECL_P (fntype_or_decl))
4806 fn = fntype_or_decl;
4807 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4809 if (TARGET_64BIT_MS_ABI)
4810 return function_value_ms_64 (orig_mode, mode);
4811 else if (TARGET_64BIT)
4812 return function_value_64 (orig_mode, mode, valtype);
4813 else
4814 return function_value_32 (orig_mode, mode, fntype, fn);
4817 static rtx
4818 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4819 bool outgoing ATTRIBUTE_UNUSED)
4821 enum machine_mode mode, orig_mode;
4823 orig_mode = TYPE_MODE (valtype);
4824 mode = type_natural_mode (valtype);
4825 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4829 ix86_libcall_value (enum machine_mode mode)
4831 return ix86_function_value_1 (NULL, NULL, mode, mode);
4834 /* Return true iff type is returned in memory. */
4836 static int
4837 return_in_memory_32 (const_tree type, enum machine_mode mode)
4839 HOST_WIDE_INT size;
4841 if (mode == BLKmode)
4842 return 1;
4844 size = int_size_in_bytes (type);
4846 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4847 return 0;
4849 if (VECTOR_MODE_P (mode) || mode == TImode)
4851 /* User-created vectors small enough to fit in EAX. */
4852 if (size < 8)
4853 return 0;
4855 /* MMX/3dNow values are returned in MM0,
4856 except when it doesn't exits. */
4857 if (size == 8)
4858 return (TARGET_MMX ? 0 : 1);
4860 /* SSE values are returned in XMM0, except when it doesn't exist. */
4861 if (size == 16)
4862 return (TARGET_SSE ? 0 : 1);
4865 if (mode == XFmode)
4866 return 0;
4868 if (mode == TDmode)
4869 return 1;
4871 if (size > 12)
4872 return 1;
4873 return 0;
4876 static int
4877 return_in_memory_64 (const_tree type, enum machine_mode mode)
4879 int needed_intregs, needed_sseregs;
4880 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4883 static int
4884 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4886 HOST_WIDE_INT size = int_size_in_bytes (type);
4888 /* __m128 is returned in xmm0. */
4889 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4890 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
4891 return 0;
4893 /* Otherwise, the size must be exactly in [1248]. */
4894 return (size != 1 && size != 2 && size != 4 && size != 8);
4898 ix86_return_in_memory (const_tree type)
4900 const enum machine_mode mode = type_natural_mode (type);
4902 if (TARGET_64BIT_MS_ABI)
4903 return return_in_memory_ms_64 (type, mode);
4904 else if (TARGET_64BIT)
4905 return return_in_memory_64 (type, mode);
4906 else
4907 return return_in_memory_32 (type, mode);
4910 /* Return false iff TYPE is returned in memory. This version is used
4911 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4912 but differs notably in that when MMX is available, 8-byte vectors
4913 are returned in memory, rather than in MMX registers. */
4916 ix86_sol10_return_in_memory (const_tree type)
4918 int size;
4919 enum machine_mode mode = type_natural_mode (type);
4921 if (TARGET_64BIT)
4922 return return_in_memory_64 (type, mode);
4924 if (mode == BLKmode)
4925 return 1;
4927 size = int_size_in_bytes (type);
4929 if (VECTOR_MODE_P (mode))
4931 /* Return in memory only if MMX registers *are* available. This
4932 seems backwards, but it is consistent with the existing
4933 Solaris x86 ABI. */
4934 if (size == 8)
4935 return TARGET_MMX;
4936 if (size == 16)
4937 return !TARGET_SSE;
4939 else if (mode == TImode)
4940 return !TARGET_SSE;
4941 else if (mode == XFmode)
4942 return 0;
4944 return size > 12;
4947 /* When returning SSE vector types, we have a choice of either
4948 (1) being abi incompatible with a -march switch, or
4949 (2) generating an error.
4950 Given no good solution, I think the safest thing is one warning.
4951 The user won't be able to use -Werror, but....
4953 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4954 called in response to actually generating a caller or callee that
4955 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4956 via aggregate_value_p for general type probing from tree-ssa. */
4958 static rtx
4959 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4961 static bool warnedsse, warnedmmx;
4963 if (!TARGET_64BIT && type)
4965 /* Look at the return type of the function, not the function type. */
4966 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4968 if (!TARGET_SSE && !warnedsse)
4970 if (mode == TImode
4971 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4973 warnedsse = true;
4974 warning (0, "SSE vector return without SSE enabled "
4975 "changes the ABI");
4979 if (!TARGET_MMX && !warnedmmx)
4981 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4983 warnedmmx = true;
4984 warning (0, "MMX vector return without MMX enabled "
4985 "changes the ABI");
4990 return NULL;
4994 /* Create the va_list data type. */
4996 static tree
4997 ix86_build_builtin_va_list (void)
4999 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5001 /* For i386 we use plain pointer to argument area. */
5002 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5003 return build_pointer_type (char_type_node);
5005 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5006 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5008 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5009 unsigned_type_node);
5010 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5011 unsigned_type_node);
5012 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5013 ptr_type_node);
5014 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5015 ptr_type_node);
5017 va_list_gpr_counter_field = f_gpr;
5018 va_list_fpr_counter_field = f_fpr;
5020 DECL_FIELD_CONTEXT (f_gpr) = record;
5021 DECL_FIELD_CONTEXT (f_fpr) = record;
5022 DECL_FIELD_CONTEXT (f_ovf) = record;
5023 DECL_FIELD_CONTEXT (f_sav) = record;
5025 TREE_CHAIN (record) = type_decl;
5026 TYPE_NAME (record) = type_decl;
5027 TYPE_FIELDS (record) = f_gpr;
5028 TREE_CHAIN (f_gpr) = f_fpr;
5029 TREE_CHAIN (f_fpr) = f_ovf;
5030 TREE_CHAIN (f_ovf) = f_sav;
5032 layout_type (record);
5034 /* The correct type is an array type of one element. */
5035 return build_array_type (record, build_index_type (size_zero_node));
5038 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5040 static void
5041 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5043 rtx save_area, mem;
5044 rtx label;
5045 rtx label_ref;
5046 rtx tmp_reg;
5047 rtx nsse_reg;
5048 alias_set_type set;
5049 int i;
5051 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5052 return;
5054 /* Indicate to allocate space on the stack for varargs save area. */
5055 ix86_save_varrargs_registers = 1;
5056 /* We need 16-byte stack alignment to save SSE registers. If user
5057 asked for lower preferred_stack_boundary, lets just hope that he knows
5058 what he is doing and won't varargs SSE values.
5060 We also may end up assuming that only 64bit values are stored in SSE
5061 register let some floating point program work. */
5062 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5063 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5065 save_area = frame_pointer_rtx;
5066 set = get_varargs_alias_set ();
5068 for (i = cum->regno;
5069 i < ix86_regparm
5070 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5071 i++)
5073 mem = gen_rtx_MEM (Pmode,
5074 plus_constant (save_area, i * UNITS_PER_WORD));
5075 MEM_NOTRAP_P (mem) = 1;
5076 set_mem_alias_set (mem, set);
5077 emit_move_insn (mem, gen_rtx_REG (Pmode,
5078 x86_64_int_parameter_registers[i]));
5081 if (cum->sse_nregs && cfun->va_list_fpr_size)
5083 /* Now emit code to save SSE registers. The AX parameter contains number
5084 of SSE parameter registers used to call this function. We use
5085 sse_prologue_save insn template that produces computed jump across
5086 SSE saves. We need some preparation work to get this working. */
5088 label = gen_label_rtx ();
5089 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5091 /* Compute address to jump to :
5092 label - 5*eax + nnamed_sse_arguments*5 */
5093 tmp_reg = gen_reg_rtx (Pmode);
5094 nsse_reg = gen_reg_rtx (Pmode);
5095 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5096 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5097 gen_rtx_MULT (Pmode, nsse_reg,
5098 GEN_INT (4))));
5099 if (cum->sse_regno)
5100 emit_move_insn
5101 (nsse_reg,
5102 gen_rtx_CONST (DImode,
5103 gen_rtx_PLUS (DImode,
5104 label_ref,
5105 GEN_INT (cum->sse_regno * 4))));
5106 else
5107 emit_move_insn (nsse_reg, label_ref);
5108 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5110 /* Compute address of memory block we save into. We always use pointer
5111 pointing 127 bytes after first byte to store - this is needed to keep
5112 instruction size limited by 4 bytes. */
5113 tmp_reg = gen_reg_rtx (Pmode);
5114 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5115 plus_constant (save_area,
5116 8 * REGPARM_MAX + 127)));
5117 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5118 MEM_NOTRAP_P (mem) = 1;
5119 set_mem_alias_set (mem, set);
5120 set_mem_align (mem, BITS_PER_WORD);
5122 /* And finally do the dirty job! */
5123 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5124 GEN_INT (cum->sse_regno), label));
5128 static void
5129 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5131 alias_set_type set = get_varargs_alias_set ();
5132 int i;
5134 for (i = cum->regno; i < REGPARM_MAX; i++)
5136 rtx reg, mem;
5138 mem = gen_rtx_MEM (Pmode,
5139 plus_constant (virtual_incoming_args_rtx,
5140 i * UNITS_PER_WORD));
5141 MEM_NOTRAP_P (mem) = 1;
5142 set_mem_alias_set (mem, set);
5144 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5145 emit_move_insn (mem, reg);
5149 static void
5150 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5151 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5152 int no_rtl)
5154 CUMULATIVE_ARGS next_cum;
5155 tree fntype;
5157 /* This argument doesn't appear to be used anymore. Which is good,
5158 because the old code here didn't suppress rtl generation. */
5159 gcc_assert (!no_rtl);
5161 if (!TARGET_64BIT)
5162 return;
5164 fntype = TREE_TYPE (current_function_decl);
5166 /* For varargs, we do not want to skip the dummy va_dcl argument.
5167 For stdargs, we do want to skip the last named argument. */
5168 next_cum = *cum;
5169 if (stdarg_p (fntype))
5170 function_arg_advance (&next_cum, mode, type, 1);
5172 if (TARGET_64BIT_MS_ABI)
5173 setup_incoming_varargs_ms_64 (&next_cum);
5174 else
5175 setup_incoming_varargs_64 (&next_cum);
5178 /* Implement va_start. */
5180 static void
5181 ix86_va_start (tree valist, rtx nextarg)
5183 HOST_WIDE_INT words, n_gpr, n_fpr;
5184 tree f_gpr, f_fpr, f_ovf, f_sav;
5185 tree gpr, fpr, ovf, sav, t;
5186 tree type;
5188 /* Only 64bit target needs something special. */
5189 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5191 std_expand_builtin_va_start (valist, nextarg);
5192 return;
5195 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5196 f_fpr = TREE_CHAIN (f_gpr);
5197 f_ovf = TREE_CHAIN (f_fpr);
5198 f_sav = TREE_CHAIN (f_ovf);
5200 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5201 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5202 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5203 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5204 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5206 /* Count number of gp and fp argument registers used. */
5207 words = crtl->args.info.words;
5208 n_gpr = crtl->args.info.regno;
5209 n_fpr = crtl->args.info.sse_regno;
5211 if (cfun->va_list_gpr_size)
5213 type = TREE_TYPE (gpr);
5214 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5215 build_int_cst (type, n_gpr * 8));
5216 TREE_SIDE_EFFECTS (t) = 1;
5217 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5220 if (cfun->va_list_fpr_size)
5222 type = TREE_TYPE (fpr);
5223 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5224 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5225 TREE_SIDE_EFFECTS (t) = 1;
5226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5229 /* Find the overflow area. */
5230 type = TREE_TYPE (ovf);
5231 t = make_tree (type, virtual_incoming_args_rtx);
5232 if (words != 0)
5233 t = build2 (POINTER_PLUS_EXPR, type, t,
5234 size_int (words * UNITS_PER_WORD));
5235 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5236 TREE_SIDE_EFFECTS (t) = 1;
5237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5239 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5241 /* Find the register save area.
5242 Prologue of the function save it right above stack frame. */
5243 type = TREE_TYPE (sav);
5244 t = make_tree (type, frame_pointer_rtx);
5245 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5246 TREE_SIDE_EFFECTS (t) = 1;
5247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5251 /* Implement va_arg. */
5253 static tree
5254 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5256 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5257 tree f_gpr, f_fpr, f_ovf, f_sav;
5258 tree gpr, fpr, ovf, sav, t;
5259 int size, rsize;
5260 tree lab_false, lab_over = NULL_TREE;
5261 tree addr, t2;
5262 rtx container;
5263 int indirect_p = 0;
5264 tree ptrtype;
5265 enum machine_mode nat_mode;
5267 /* Only 64bit target needs something special. */
5268 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5269 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5271 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5272 f_fpr = TREE_CHAIN (f_gpr);
5273 f_ovf = TREE_CHAIN (f_fpr);
5274 f_sav = TREE_CHAIN (f_ovf);
5276 valist = build_va_arg_indirect_ref (valist);
5277 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5278 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5279 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5280 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5282 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5283 if (indirect_p)
5284 type = build_pointer_type (type);
5285 size = int_size_in_bytes (type);
5286 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5288 nat_mode = type_natural_mode (type);
5289 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5290 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5292 /* Pull the value out of the saved registers. */
5294 addr = create_tmp_var (ptr_type_node, "addr");
5295 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5297 if (container)
5299 int needed_intregs, needed_sseregs;
5300 bool need_temp;
5301 tree int_addr, sse_addr;
5303 lab_false = create_artificial_label ();
5304 lab_over = create_artificial_label ();
5306 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5308 need_temp = (!REG_P (container)
5309 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5310 || TYPE_ALIGN (type) > 128));
5312 /* In case we are passing structure, verify that it is consecutive block
5313 on the register save area. If not we need to do moves. */
5314 if (!need_temp && !REG_P (container))
5316 /* Verify that all registers are strictly consecutive */
5317 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5319 int i;
5321 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5323 rtx slot = XVECEXP (container, 0, i);
5324 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5325 || INTVAL (XEXP (slot, 1)) != i * 16)
5326 need_temp = 1;
5329 else
5331 int i;
5333 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5335 rtx slot = XVECEXP (container, 0, i);
5336 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5337 || INTVAL (XEXP (slot, 1)) != i * 8)
5338 need_temp = 1;
5342 if (!need_temp)
5344 int_addr = addr;
5345 sse_addr = addr;
5347 else
5349 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5350 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5351 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5352 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5355 /* First ensure that we fit completely in registers. */
5356 if (needed_intregs)
5358 t = build_int_cst (TREE_TYPE (gpr),
5359 (REGPARM_MAX - needed_intregs + 1) * 8);
5360 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5361 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5362 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5363 gimplify_and_add (t, pre_p);
5365 if (needed_sseregs)
5367 t = build_int_cst (TREE_TYPE (fpr),
5368 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5369 + REGPARM_MAX * 8);
5370 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5371 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5372 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5373 gimplify_and_add (t, pre_p);
5376 /* Compute index to start of area used for integer regs. */
5377 if (needed_intregs)
5379 /* int_addr = gpr + sav; */
5380 t = fold_convert (sizetype, gpr);
5381 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5382 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5383 gimplify_and_add (t, pre_p);
5385 if (needed_sseregs)
5387 /* sse_addr = fpr + sav; */
5388 t = fold_convert (sizetype, fpr);
5389 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5390 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5391 gimplify_and_add (t, pre_p);
5393 if (need_temp)
5395 int i;
5396 tree temp = create_tmp_var (type, "va_arg_tmp");
5398 /* addr = &temp; */
5399 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5400 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5401 gimplify_and_add (t, pre_p);
5403 for (i = 0; i < XVECLEN (container, 0); i++)
5405 rtx slot = XVECEXP (container, 0, i);
5406 rtx reg = XEXP (slot, 0);
5407 enum machine_mode mode = GET_MODE (reg);
5408 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5409 tree addr_type = build_pointer_type (piece_type);
5410 tree src_addr, src;
5411 int src_offset;
5412 tree dest_addr, dest;
5414 if (SSE_REGNO_P (REGNO (reg)))
5416 src_addr = sse_addr;
5417 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5419 else
5421 src_addr = int_addr;
5422 src_offset = REGNO (reg) * 8;
5424 src_addr = fold_convert (addr_type, src_addr);
5425 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5426 size_int (src_offset));
5427 src = build_va_arg_indirect_ref (src_addr);
5429 dest_addr = fold_convert (addr_type, addr);
5430 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5431 size_int (INTVAL (XEXP (slot, 1))));
5432 dest = build_va_arg_indirect_ref (dest_addr);
5434 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5435 gimplify_and_add (t, pre_p);
5439 if (needed_intregs)
5441 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5442 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5443 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5444 gimplify_and_add (t, pre_p);
5446 if (needed_sseregs)
5448 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5449 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5450 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5451 gimplify_and_add (t, pre_p);
5454 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5455 gimplify_and_add (t, pre_p);
5457 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5458 append_to_statement_list (t, pre_p);
5461 /* ... otherwise out of the overflow area. */
5463 /* Care for on-stack alignment if needed. */
5464 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5465 || integer_zerop (TYPE_SIZE (type)))
5466 t = ovf;
5467 else
5469 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5470 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5471 size_int (align - 1));
5472 t = fold_convert (sizetype, t);
5473 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5474 size_int (-align));
5475 t = fold_convert (TREE_TYPE (ovf), t);
5477 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5479 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5480 gimplify_and_add (t2, pre_p);
5482 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5483 size_int (rsize * UNITS_PER_WORD));
5484 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5485 gimplify_and_add (t, pre_p);
5487 if (container)
5489 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5490 append_to_statement_list (t, pre_p);
5493 ptrtype = build_pointer_type (type);
5494 addr = fold_convert (ptrtype, addr);
5496 if (indirect_p)
5497 addr = build_va_arg_indirect_ref (addr);
5498 return build_va_arg_indirect_ref (addr);
5501 /* Return nonzero if OPNUM's MEM should be matched
5502 in movabs* patterns. */
5505 ix86_check_movabs (rtx insn, int opnum)
5507 rtx set, mem;
5509 set = PATTERN (insn);
5510 if (GET_CODE (set) == PARALLEL)
5511 set = XVECEXP (set, 0, 0);
5512 gcc_assert (GET_CODE (set) == SET);
5513 mem = XEXP (set, opnum);
5514 while (GET_CODE (mem) == SUBREG)
5515 mem = SUBREG_REG (mem);
5516 gcc_assert (MEM_P (mem));
5517 return (volatile_ok || !MEM_VOLATILE_P (mem));
5520 /* Initialize the table of extra 80387 mathematical constants. */
5522 static void
5523 init_ext_80387_constants (void)
5525 static const char * cst[5] =
5527 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5528 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5529 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5530 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5531 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5533 int i;
5535 for (i = 0; i < 5; i++)
5537 real_from_string (&ext_80387_constants_table[i], cst[i]);
5538 /* Ensure each constant is rounded to XFmode precision. */
5539 real_convert (&ext_80387_constants_table[i],
5540 XFmode, &ext_80387_constants_table[i]);
5543 ext_80387_constants_init = 1;
5546 /* Return true if the constant is something that can be loaded with
5547 a special instruction. */
5550 standard_80387_constant_p (rtx x)
5552 enum machine_mode mode = GET_MODE (x);
5554 REAL_VALUE_TYPE r;
5556 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5557 return -1;
5559 if (x == CONST0_RTX (mode))
5560 return 1;
5561 if (x == CONST1_RTX (mode))
5562 return 2;
5564 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5566 /* For XFmode constants, try to find a special 80387 instruction when
5567 optimizing for size or on those CPUs that benefit from them. */
5568 if (mode == XFmode
5569 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5571 int i;
5573 if (! ext_80387_constants_init)
5574 init_ext_80387_constants ();
5576 for (i = 0; i < 5; i++)
5577 if (real_identical (&r, &ext_80387_constants_table[i]))
5578 return i + 3;
5581 /* Load of the constant -0.0 or -1.0 will be split as
5582 fldz;fchs or fld1;fchs sequence. */
5583 if (real_isnegzero (&r))
5584 return 8;
5585 if (real_identical (&r, &dconstm1))
5586 return 9;
5588 return 0;
5591 /* Return the opcode of the special instruction to be used to load
5592 the constant X. */
5594 const char *
5595 standard_80387_constant_opcode (rtx x)
5597 switch (standard_80387_constant_p (x))
5599 case 1:
5600 return "fldz";
5601 case 2:
5602 return "fld1";
5603 case 3:
5604 return "fldlg2";
5605 case 4:
5606 return "fldln2";
5607 case 5:
5608 return "fldl2e";
5609 case 6:
5610 return "fldl2t";
5611 case 7:
5612 return "fldpi";
5613 case 8:
5614 case 9:
5615 return "#";
5616 default:
5617 gcc_unreachable ();
5621 /* Return the CONST_DOUBLE representing the 80387 constant that is
5622 loaded by the specified special instruction. The argument IDX
5623 matches the return value from standard_80387_constant_p. */
5626 standard_80387_constant_rtx (int idx)
5628 int i;
5630 if (! ext_80387_constants_init)
5631 init_ext_80387_constants ();
5633 switch (idx)
5635 case 3:
5636 case 4:
5637 case 5:
5638 case 6:
5639 case 7:
5640 i = idx - 3;
5641 break;
5643 default:
5644 gcc_unreachable ();
5647 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5648 XFmode);
5651 /* Return 1 if mode is a valid mode for sse. */
5652 static int
5653 standard_sse_mode_p (enum machine_mode mode)
5655 switch (mode)
5657 case V16QImode:
5658 case V8HImode:
5659 case V4SImode:
5660 case V2DImode:
5661 case V4SFmode:
5662 case V2DFmode:
5663 return 1;
5665 default:
5666 return 0;
5670 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5673 standard_sse_constant_p (rtx x)
5675 enum machine_mode mode = GET_MODE (x);
5677 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5678 return 1;
5679 if (vector_all_ones_operand (x, mode)
5680 && standard_sse_mode_p (mode))
5681 return TARGET_SSE2 ? 2 : -1;
5683 return 0;
5686 /* Return the opcode of the special instruction to be used to load
5687 the constant X. */
5689 const char *
5690 standard_sse_constant_opcode (rtx insn, rtx x)
5692 switch (standard_sse_constant_p (x))
5694 case 1:
5695 if (get_attr_mode (insn) == MODE_V4SF)
5696 return "xorps\t%0, %0";
5697 else if (get_attr_mode (insn) == MODE_V2DF)
5698 return "xorpd\t%0, %0";
5699 else
5700 return "pxor\t%0, %0";
5701 case 2:
5702 return "pcmpeqd\t%0, %0";
5704 gcc_unreachable ();
5707 /* Returns 1 if OP contains a symbol reference */
5710 symbolic_reference_mentioned_p (rtx op)
5712 const char *fmt;
5713 int i;
5715 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5716 return 1;
5718 fmt = GET_RTX_FORMAT (GET_CODE (op));
5719 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5721 if (fmt[i] == 'E')
5723 int j;
5725 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5726 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5727 return 1;
5730 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5731 return 1;
5734 return 0;
5737 /* Return 1 if it is appropriate to emit `ret' instructions in the
5738 body of a function. Do this only if the epilogue is simple, needing a
5739 couple of insns. Prior to reloading, we can't tell how many registers
5740 must be saved, so return 0 then. Return 0 if there is no frame
5741 marker to de-allocate. */
5744 ix86_can_use_return_insn_p (void)
5746 struct ix86_frame frame;
5748 if (! reload_completed || frame_pointer_needed)
5749 return 0;
5751 /* Don't allow more than 32 pop, since that's all we can do
5752 with one instruction. */
5753 if (crtl->args.pops_args
5754 && crtl->args.size >= 32768)
5755 return 0;
5757 ix86_compute_frame_layout (&frame);
5758 return frame.to_allocate == 0 && frame.nregs == 0;
5761 /* Value should be nonzero if functions must have frame pointers.
5762 Zero means the frame pointer need not be set up (and parms may
5763 be accessed via the stack pointer) in functions that seem suitable. */
5766 ix86_frame_pointer_required (void)
5768 /* If we accessed previous frames, then the generated code expects
5769 to be able to access the saved ebp value in our frame. */
5770 if (cfun->machine->accesses_prev_frame)
5771 return 1;
5773 /* Several x86 os'es need a frame pointer for other reasons,
5774 usually pertaining to setjmp. */
5775 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5776 return 1;
5778 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5779 the frame pointer by default. Turn it back on now if we've not
5780 got a leaf function. */
5781 if (TARGET_OMIT_LEAF_FRAME_POINTER
5782 && (!current_function_is_leaf
5783 || ix86_current_function_calls_tls_descriptor))
5784 return 1;
5786 if (crtl->profile)
5787 return 1;
5789 return 0;
5792 /* Record that the current function accesses previous call frames. */
5794 void
5795 ix86_setup_frame_addresses (void)
5797 cfun->machine->accesses_prev_frame = 1;
5800 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5801 # define USE_HIDDEN_LINKONCE 1
5802 #else
5803 # define USE_HIDDEN_LINKONCE 0
5804 #endif
5806 static int pic_labels_used;
5808 /* Fills in the label name that should be used for a pc thunk for
5809 the given register. */
5811 static void
5812 get_pc_thunk_name (char name[32], unsigned int regno)
5814 gcc_assert (!TARGET_64BIT);
5816 if (USE_HIDDEN_LINKONCE)
5817 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5818 else
5819 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5823 /* This function generates code for -fpic that loads %ebx with
5824 the return address of the caller and then returns. */
5826 void
5827 ix86_file_end (void)
5829 rtx xops[2];
5830 int regno;
5832 for (regno = 0; regno < 8; ++regno)
5834 char name[32];
5836 if (! ((pic_labels_used >> regno) & 1))
5837 continue;
5839 get_pc_thunk_name (name, regno);
5841 #if TARGET_MACHO
5842 if (TARGET_MACHO)
5844 switch_to_section (darwin_sections[text_coal_section]);
5845 fputs ("\t.weak_definition\t", asm_out_file);
5846 assemble_name (asm_out_file, name);
5847 fputs ("\n\t.private_extern\t", asm_out_file);
5848 assemble_name (asm_out_file, name);
5849 fputs ("\n", asm_out_file);
5850 ASM_OUTPUT_LABEL (asm_out_file, name);
5852 else
5853 #endif
5854 if (USE_HIDDEN_LINKONCE)
5856 tree decl;
5858 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5859 error_mark_node);
5860 TREE_PUBLIC (decl) = 1;
5861 TREE_STATIC (decl) = 1;
5862 DECL_ONE_ONLY (decl) = 1;
5864 (*targetm.asm_out.unique_section) (decl, 0);
5865 switch_to_section (get_named_section (decl, NULL, 0));
5867 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5868 fputs ("\t.hidden\t", asm_out_file);
5869 assemble_name (asm_out_file, name);
5870 fputc ('\n', asm_out_file);
5871 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5873 else
5875 switch_to_section (text_section);
5876 ASM_OUTPUT_LABEL (asm_out_file, name);
5878 if (TARGET_64BIT_MS_ABI)
5880 xops[0] = gen_rtx_REG (Pmode, regno);
5881 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5882 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5883 output_asm_insn ("ret", xops);
5885 else
5887 xops[0] = gen_rtx_REG (SImode, regno);
5888 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5889 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5890 output_asm_insn ("ret", xops);
5894 if (NEED_INDICATE_EXEC_STACK)
5895 file_end_indicate_exec_stack ();
5898 /* Emit code for the SET_GOT patterns. */
5900 const char *
5901 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5903 rtx xops[3];
5905 xops[0] = dest;
5907 if (TARGET_VXWORKS_RTP && flag_pic)
5909 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5910 xops[2] = gen_rtx_MEM (Pmode,
5911 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5912 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5914 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5915 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5916 an unadorned address. */
5917 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5918 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5919 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5920 return "";
5923 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5925 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5927 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5929 if (!flag_pic)
5930 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5931 else
5932 output_asm_insn ("call\t%a2", xops);
5934 #if TARGET_MACHO
5935 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5936 is what will be referenced by the Mach-O PIC subsystem. */
5937 if (!label)
5938 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5939 #endif
5941 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5942 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5944 if (flag_pic)
5945 output_asm_insn ("pop{l}\t%0", xops);
5947 else
5949 char name[32];
5950 get_pc_thunk_name (name, REGNO (dest));
5951 pic_labels_used |= 1 << REGNO (dest);
5953 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5954 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5955 output_asm_insn ("call\t%X2", xops);
5956 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5957 is what will be referenced by the Mach-O PIC subsystem. */
5958 #if TARGET_MACHO
5959 if (!label)
5960 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5961 else
5962 targetm.asm_out.internal_label (asm_out_file, "L",
5963 CODE_LABEL_NUMBER (label));
5964 #endif
5967 if (TARGET_MACHO)
5968 return "";
5970 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5971 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5972 else
5973 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5975 return "";
5978 /* Generate an "push" pattern for input ARG. */
5980 static rtx
5981 gen_push (rtx arg)
5983 return gen_rtx_SET (VOIDmode,
5984 gen_rtx_MEM (Pmode,
5985 gen_rtx_PRE_DEC (Pmode,
5986 stack_pointer_rtx)),
5987 arg);
5990 /* Return >= 0 if there is an unused call-clobbered register available
5991 for the entire function. */
5993 static unsigned int
5994 ix86_select_alt_pic_regnum (void)
5996 if (current_function_is_leaf && !crtl->profile
5997 && !ix86_current_function_calls_tls_descriptor)
5999 int i;
6000 for (i = 2; i >= 0; --i)
6001 if (!df_regs_ever_live_p (i))
6002 return i;
6005 return INVALID_REGNUM;
6008 /* Return 1 if we need to save REGNO. */
6009 static int
6010 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6012 if (pic_offset_table_rtx
6013 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6014 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6015 || crtl->profile
6016 || crtl->calls_eh_return
6017 || crtl->uses_const_pool))
6019 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6020 return 0;
6021 return 1;
6024 if (crtl->calls_eh_return && maybe_eh_return)
6026 unsigned i;
6027 for (i = 0; ; i++)
6029 unsigned test = EH_RETURN_DATA_REGNO (i);
6030 if (test == INVALID_REGNUM)
6031 break;
6032 if (test == regno)
6033 return 1;
6037 if (cfun->machine->force_align_arg_pointer
6038 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6039 return 1;
6041 return (df_regs_ever_live_p (regno)
6042 && !call_used_regs[regno]
6043 && !fixed_regs[regno]
6044 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6047 /* Return number of registers to be saved on the stack. */
6049 static int
6050 ix86_nsaved_regs (void)
6052 int nregs = 0;
6053 int regno;
6055 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6056 if (ix86_save_reg (regno, true))
6057 nregs++;
6058 return nregs;
6061 /* Return the offset between two registers, one to be eliminated, and the other
6062 its replacement, at the start of a routine. */
6064 HOST_WIDE_INT
6065 ix86_initial_elimination_offset (int from, int to)
6067 struct ix86_frame frame;
6068 ix86_compute_frame_layout (&frame);
6070 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6071 return frame.hard_frame_pointer_offset;
6072 else if (from == FRAME_POINTER_REGNUM
6073 && to == HARD_FRAME_POINTER_REGNUM)
6074 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6075 else
6077 gcc_assert (to == STACK_POINTER_REGNUM);
6079 if (from == ARG_POINTER_REGNUM)
6080 return frame.stack_pointer_offset;
6082 gcc_assert (from == FRAME_POINTER_REGNUM);
6083 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6087 /* Fill structure ix86_frame about frame of currently computed function. */
6089 static void
6090 ix86_compute_frame_layout (struct ix86_frame *frame)
6092 HOST_WIDE_INT total_size;
6093 unsigned int stack_alignment_needed;
6094 HOST_WIDE_INT offset;
6095 unsigned int preferred_alignment;
6096 HOST_WIDE_INT size = get_frame_size ();
6098 frame->nregs = ix86_nsaved_regs ();
6099 total_size = size;
6101 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6102 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6104 /* During reload iteration the amount of registers saved can change.
6105 Recompute the value as needed. Do not recompute when amount of registers
6106 didn't change as reload does multiple calls to the function and does not
6107 expect the decision to change within single iteration. */
6108 if (!optimize_size
6109 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6111 int count = frame->nregs;
6113 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6114 /* The fast prologue uses move instead of push to save registers. This
6115 is significantly longer, but also executes faster as modern hardware
6116 can execute the moves in parallel, but can't do that for push/pop.
6118 Be careful about choosing what prologue to emit: When function takes
6119 many instructions to execute we may use slow version as well as in
6120 case function is known to be outside hot spot (this is known with
6121 feedback only). Weight the size of function by number of registers
6122 to save as it is cheap to use one or two push instructions but very
6123 slow to use many of them. */
6124 if (count)
6125 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6126 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6127 || (flag_branch_probabilities
6128 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6129 cfun->machine->use_fast_prologue_epilogue = false;
6130 else
6131 cfun->machine->use_fast_prologue_epilogue
6132 = !expensive_function_p (count);
6134 if (TARGET_PROLOGUE_USING_MOVE
6135 && cfun->machine->use_fast_prologue_epilogue)
6136 frame->save_regs_using_mov = true;
6137 else
6138 frame->save_regs_using_mov = false;
6141 /* Skip return address and saved base pointer. */
6142 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6144 frame->hard_frame_pointer_offset = offset;
6146 /* Do some sanity checking of stack_alignment_needed and
6147 preferred_alignment, since i386 port is the only using those features
6148 that may break easily. */
6150 gcc_assert (!size || stack_alignment_needed);
6151 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6152 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6153 gcc_assert (stack_alignment_needed
6154 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6156 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6157 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6159 /* Register save area */
6160 offset += frame->nregs * UNITS_PER_WORD;
6162 /* Va-arg area */
6163 if (ix86_save_varrargs_registers)
6165 offset += X86_64_VARARGS_SIZE;
6166 frame->va_arg_size = X86_64_VARARGS_SIZE;
6168 else
6169 frame->va_arg_size = 0;
6171 /* Align start of frame for local function. */
6172 frame->padding1 = ((offset + stack_alignment_needed - 1)
6173 & -stack_alignment_needed) - offset;
6175 offset += frame->padding1;
6177 /* Frame pointer points here. */
6178 frame->frame_pointer_offset = offset;
6180 offset += size;
6182 /* Add outgoing arguments area. Can be skipped if we eliminated
6183 all the function calls as dead code.
6184 Skipping is however impossible when function calls alloca. Alloca
6185 expander assumes that last crtl->outgoing_args_size
6186 of stack frame are unused. */
6187 if (ACCUMULATE_OUTGOING_ARGS
6188 && (!current_function_is_leaf || cfun->calls_alloca
6189 || ix86_current_function_calls_tls_descriptor))
6191 offset += crtl->outgoing_args_size;
6192 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6194 else
6195 frame->outgoing_arguments_size = 0;
6197 /* Align stack boundary. Only needed if we're calling another function
6198 or using alloca. */
6199 if (!current_function_is_leaf || cfun->calls_alloca
6200 || ix86_current_function_calls_tls_descriptor)
6201 frame->padding2 = ((offset + preferred_alignment - 1)
6202 & -preferred_alignment) - offset;
6203 else
6204 frame->padding2 = 0;
6206 offset += frame->padding2;
6208 /* We've reached end of stack frame. */
6209 frame->stack_pointer_offset = offset;
6211 /* Size prologue needs to allocate. */
6212 frame->to_allocate =
6213 (size + frame->padding1 + frame->padding2
6214 + frame->outgoing_arguments_size + frame->va_arg_size);
6216 if ((!frame->to_allocate && frame->nregs <= 1)
6217 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6218 frame->save_regs_using_mov = false;
6220 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6221 && current_function_is_leaf
6222 && !ix86_current_function_calls_tls_descriptor)
6224 frame->red_zone_size = frame->to_allocate;
6225 if (frame->save_regs_using_mov)
6226 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6227 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6228 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6230 else
6231 frame->red_zone_size = 0;
6232 frame->to_allocate -= frame->red_zone_size;
6233 frame->stack_pointer_offset -= frame->red_zone_size;
6234 #if 0
6235 fprintf (stderr, "\n");
6236 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6237 fprintf (stderr, "size: %ld\n", (long)size);
6238 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6239 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6240 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6241 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6242 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6243 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6244 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6245 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6246 (long)frame->hard_frame_pointer_offset);
6247 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6248 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6249 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6250 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6251 #endif
6254 /* Emit code to save registers in the prologue. */
6256 static void
6257 ix86_emit_save_regs (void)
6259 unsigned int regno;
6260 rtx insn;
6262 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6263 if (ix86_save_reg (regno, true))
6265 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6266 RTX_FRAME_RELATED_P (insn) = 1;
6270 /* Emit code to save registers using MOV insns. First register
6271 is restored from POINTER + OFFSET. */
6272 static void
6273 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6275 unsigned int regno;
6276 rtx insn;
6278 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6279 if (ix86_save_reg (regno, true))
6281 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6282 Pmode, offset),
6283 gen_rtx_REG (Pmode, regno));
6284 RTX_FRAME_RELATED_P (insn) = 1;
6285 offset += UNITS_PER_WORD;
6289 /* Expand prologue or epilogue stack adjustment.
6290 The pattern exist to put a dependency on all ebp-based memory accesses.
6291 STYLE should be negative if instructions should be marked as frame related,
6292 zero if %r11 register is live and cannot be freely used and positive
6293 otherwise. */
6295 static void
6296 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6298 rtx insn;
6300 if (! TARGET_64BIT)
6301 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6302 else if (x86_64_immediate_operand (offset, DImode))
6303 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6304 else
6306 rtx r11;
6307 /* r11 is used by indirect sibcall return as well, set before the
6308 epilogue and used after the epilogue. ATM indirect sibcall
6309 shouldn't be used together with huge frame sizes in one
6310 function because of the frame_size check in sibcall.c. */
6311 gcc_assert (style);
6312 r11 = gen_rtx_REG (DImode, R11_REG);
6313 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6314 if (style < 0)
6315 RTX_FRAME_RELATED_P (insn) = 1;
6316 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6317 offset));
6319 if (style < 0)
6320 RTX_FRAME_RELATED_P (insn) = 1;
6323 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6325 static rtx
6326 ix86_internal_arg_pointer (void)
6328 bool has_force_align_arg_pointer =
6329 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6330 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6331 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6332 && DECL_NAME (current_function_decl)
6333 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6334 && DECL_FILE_SCOPE_P (current_function_decl))
6335 || ix86_force_align_arg_pointer
6336 || has_force_align_arg_pointer)
6338 /* Nested functions can't realign the stack due to a register
6339 conflict. */
6340 if (DECL_CONTEXT (current_function_decl)
6341 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6343 if (ix86_force_align_arg_pointer)
6344 warning (0, "-mstackrealign ignored for nested functions");
6345 if (has_force_align_arg_pointer)
6346 error ("%s not supported for nested functions",
6347 ix86_force_align_arg_pointer_string);
6348 return virtual_incoming_args_rtx;
6350 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6351 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6353 else
6354 return virtual_incoming_args_rtx;
6357 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6358 This is called from dwarf2out.c to emit call frame instructions
6359 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6360 static void
6361 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6363 rtx unspec = SET_SRC (pattern);
6364 gcc_assert (GET_CODE (unspec) == UNSPEC);
6366 switch (index)
6368 case UNSPEC_REG_SAVE:
6369 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6370 SET_DEST (pattern));
6371 break;
6372 case UNSPEC_DEF_CFA:
6373 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6374 INTVAL (XVECEXP (unspec, 0, 0)));
6375 break;
6376 default:
6377 gcc_unreachable ();
6381 /* Expand the prologue into a bunch of separate insns. */
6383 void
6384 ix86_expand_prologue (void)
6386 rtx insn;
6387 bool pic_reg_used;
6388 struct ix86_frame frame;
6389 HOST_WIDE_INT allocate;
6391 ix86_compute_frame_layout (&frame);
6393 if (cfun->machine->force_align_arg_pointer)
6395 rtx x, y;
6397 /* Grab the argument pointer. */
6398 x = plus_constant (stack_pointer_rtx, 4);
6399 y = cfun->machine->force_align_arg_pointer;
6400 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6401 RTX_FRAME_RELATED_P (insn) = 1;
6403 /* The unwind info consists of two parts: install the fafp as the cfa,
6404 and record the fafp as the "save register" of the stack pointer.
6405 The later is there in order that the unwinder can see where it
6406 should restore the stack pointer across the and insn. */
6407 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6408 x = gen_rtx_SET (VOIDmode, y, x);
6409 RTX_FRAME_RELATED_P (x) = 1;
6410 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6411 UNSPEC_REG_SAVE);
6412 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6413 RTX_FRAME_RELATED_P (y) = 1;
6414 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6415 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6416 REG_NOTES (insn) = x;
6418 /* Align the stack. */
6419 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6420 GEN_INT (-16)));
6422 /* And here we cheat like madmen with the unwind info. We force the
6423 cfa register back to sp+4, which is exactly what it was at the
6424 start of the function. Re-pushing the return address results in
6425 the return at the same spot relative to the cfa, and thus is
6426 correct wrt the unwind info. */
6427 x = cfun->machine->force_align_arg_pointer;
6428 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6429 insn = emit_insn (gen_push (x));
6430 RTX_FRAME_RELATED_P (insn) = 1;
6432 x = GEN_INT (4);
6433 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6434 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6435 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6436 REG_NOTES (insn) = x;
6439 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6440 slower on all targets. Also sdb doesn't like it. */
6442 if (frame_pointer_needed)
6444 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6445 RTX_FRAME_RELATED_P (insn) = 1;
6447 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6448 RTX_FRAME_RELATED_P (insn) = 1;
6451 allocate = frame.to_allocate;
6453 if (!frame.save_regs_using_mov)
6454 ix86_emit_save_regs ();
6455 else
6456 allocate += frame.nregs * UNITS_PER_WORD;
6458 /* When using red zone we may start register saving before allocating
6459 the stack frame saving one cycle of the prologue. However I will
6460 avoid doing this if I am going to have to probe the stack since
6461 at least on x86_64 the stack probe can turn into a call that clobbers
6462 a red zone location */
6463 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6464 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6465 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6466 : stack_pointer_rtx,
6467 -frame.nregs * UNITS_PER_WORD);
6469 if (allocate == 0)
6471 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6472 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6473 GEN_INT (-allocate), -1);
6474 else
6476 /* Only valid for Win32. */
6477 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6478 bool eax_live;
6479 rtx t;
6481 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6483 if (TARGET_64BIT_MS_ABI)
6484 eax_live = false;
6485 else
6486 eax_live = ix86_eax_live_at_start_p ();
6488 if (eax_live)
6490 emit_insn (gen_push (eax));
6491 allocate -= UNITS_PER_WORD;
6494 emit_move_insn (eax, GEN_INT (allocate));
6496 if (TARGET_64BIT)
6497 insn = gen_allocate_stack_worker_64 (eax);
6498 else
6499 insn = gen_allocate_stack_worker_32 (eax);
6500 insn = emit_insn (insn);
6501 RTX_FRAME_RELATED_P (insn) = 1;
6502 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6503 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6504 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6505 t, REG_NOTES (insn));
6507 if (eax_live)
6509 if (frame_pointer_needed)
6510 t = plus_constant (hard_frame_pointer_rtx,
6511 allocate
6512 - frame.to_allocate
6513 - frame.nregs * UNITS_PER_WORD);
6514 else
6515 t = plus_constant (stack_pointer_rtx, allocate);
6516 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6520 if (frame.save_regs_using_mov
6521 && !(TARGET_RED_ZONE
6522 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6524 if (!frame_pointer_needed || !frame.to_allocate)
6525 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6526 else
6527 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6528 -frame.nregs * UNITS_PER_WORD);
6531 pic_reg_used = false;
6532 if (pic_offset_table_rtx
6533 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6534 || crtl->profile))
6536 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6538 if (alt_pic_reg_used != INVALID_REGNUM)
6539 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6541 pic_reg_used = true;
6544 if (pic_reg_used)
6546 if (TARGET_64BIT)
6548 if (ix86_cmodel == CM_LARGE_PIC)
6550 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6551 rtx label = gen_label_rtx ();
6552 emit_label (label);
6553 LABEL_PRESERVE_P (label) = 1;
6554 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6555 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6556 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6557 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6558 pic_offset_table_rtx, tmp_reg));
6560 else
6561 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6563 else
6564 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6567 /* Prevent function calls from being scheduled before the call to mcount.
6568 In the pic_reg_used case, make sure that the got load isn't deleted. */
6569 if (crtl->profile)
6571 if (pic_reg_used)
6572 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6573 emit_insn (gen_blockage ());
6577 /* Emit code to restore saved registers using MOV insns. First register
6578 is restored from POINTER + OFFSET. */
6579 static void
6580 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6581 int maybe_eh_return)
6583 int regno;
6584 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6586 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6587 if (ix86_save_reg (regno, maybe_eh_return))
6589 /* Ensure that adjust_address won't be forced to produce pointer
6590 out of range allowed by x86-64 instruction set. */
6591 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6593 rtx r11;
6595 r11 = gen_rtx_REG (DImode, R11_REG);
6596 emit_move_insn (r11, GEN_INT (offset));
6597 emit_insn (gen_adddi3 (r11, r11, pointer));
6598 base_address = gen_rtx_MEM (Pmode, r11);
6599 offset = 0;
6601 emit_move_insn (gen_rtx_REG (Pmode, regno),
6602 adjust_address (base_address, Pmode, offset));
6603 offset += UNITS_PER_WORD;
6607 /* Restore function stack, frame, and registers. */
6609 void
6610 ix86_expand_epilogue (int style)
6612 int regno;
6613 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6614 struct ix86_frame frame;
6615 HOST_WIDE_INT offset;
6617 ix86_compute_frame_layout (&frame);
6619 /* Calculate start of saved registers relative to ebp. Special care
6620 must be taken for the normal return case of a function using
6621 eh_return: the eax and edx registers are marked as saved, but not
6622 restored along this path. */
6623 offset = frame.nregs;
6624 if (crtl->calls_eh_return && style != 2)
6625 offset -= 2;
6626 offset *= -UNITS_PER_WORD;
6628 /* If we're only restoring one register and sp is not valid then
6629 using a move instruction to restore the register since it's
6630 less work than reloading sp and popping the register.
6632 The default code result in stack adjustment using add/lea instruction,
6633 while this code results in LEAVE instruction (or discrete equivalent),
6634 so it is profitable in some other cases as well. Especially when there
6635 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6636 and there is exactly one register to pop. This heuristic may need some
6637 tuning in future. */
6638 if ((!sp_valid && frame.nregs <= 1)
6639 || (TARGET_EPILOGUE_USING_MOVE
6640 && cfun->machine->use_fast_prologue_epilogue
6641 && (frame.nregs > 1 || frame.to_allocate))
6642 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6643 || (frame_pointer_needed && TARGET_USE_LEAVE
6644 && cfun->machine->use_fast_prologue_epilogue
6645 && frame.nregs == 1)
6646 || crtl->calls_eh_return)
6648 /* Restore registers. We can use ebp or esp to address the memory
6649 locations. If both are available, default to ebp, since offsets
6650 are known to be small. Only exception is esp pointing directly to the
6651 end of block of saved registers, where we may simplify addressing
6652 mode. */
6654 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6655 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6656 frame.to_allocate, style == 2);
6657 else
6658 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6659 offset, style == 2);
6661 /* eh_return epilogues need %ecx added to the stack pointer. */
6662 if (style == 2)
6664 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6666 if (frame_pointer_needed)
6668 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6669 tmp = plus_constant (tmp, UNITS_PER_WORD);
6670 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6672 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6673 emit_move_insn (hard_frame_pointer_rtx, tmp);
6675 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6676 const0_rtx, style);
6678 else
6680 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6681 tmp = plus_constant (tmp, (frame.to_allocate
6682 + frame.nregs * UNITS_PER_WORD));
6683 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6686 else if (!frame_pointer_needed)
6687 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6688 GEN_INT (frame.to_allocate
6689 + frame.nregs * UNITS_PER_WORD),
6690 style);
6691 /* If not an i386, mov & pop is faster than "leave". */
6692 else if (TARGET_USE_LEAVE || optimize_size
6693 || !cfun->machine->use_fast_prologue_epilogue)
6694 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6695 else
6697 pro_epilogue_adjust_stack (stack_pointer_rtx,
6698 hard_frame_pointer_rtx,
6699 const0_rtx, style);
6700 if (TARGET_64BIT)
6701 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6702 else
6703 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6706 else
6708 /* First step is to deallocate the stack frame so that we can
6709 pop the registers. */
6710 if (!sp_valid)
6712 gcc_assert (frame_pointer_needed);
6713 pro_epilogue_adjust_stack (stack_pointer_rtx,
6714 hard_frame_pointer_rtx,
6715 GEN_INT (offset), style);
6717 else if (frame.to_allocate)
6718 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6719 GEN_INT (frame.to_allocate), style);
6721 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6722 if (ix86_save_reg (regno, false))
6724 if (TARGET_64BIT)
6725 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6726 else
6727 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6729 if (frame_pointer_needed)
6731 /* Leave results in shorter dependency chains on CPUs that are
6732 able to grok it fast. */
6733 if (TARGET_USE_LEAVE)
6734 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6735 else if (TARGET_64BIT)
6736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6737 else
6738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6742 if (cfun->machine->force_align_arg_pointer)
6744 emit_insn (gen_addsi3 (stack_pointer_rtx,
6745 cfun->machine->force_align_arg_pointer,
6746 GEN_INT (-4)));
6749 /* Sibcall epilogues don't want a return instruction. */
6750 if (style == 0)
6751 return;
6753 if (crtl->args.pops_args && crtl->args.size)
6755 rtx popc = GEN_INT (crtl->args.pops_args);
6757 /* i386 can only pop 64K bytes. If asked to pop more, pop
6758 return address, do explicit add, and jump indirectly to the
6759 caller. */
6761 if (crtl->args.pops_args >= 65536)
6763 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6765 /* There is no "pascal" calling convention in any 64bit ABI. */
6766 gcc_assert (!TARGET_64BIT);
6768 emit_insn (gen_popsi1 (ecx));
6769 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6770 emit_jump_insn (gen_return_indirect_internal (ecx));
6772 else
6773 emit_jump_insn (gen_return_pop_internal (popc));
6775 else
6776 emit_jump_insn (gen_return_internal ());
6779 /* Reset from the function's potential modifications. */
6781 static void
6782 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6783 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6785 if (pic_offset_table_rtx)
6786 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6787 #if TARGET_MACHO
6788 /* Mach-O doesn't support labels at the end of objects, so if
6789 it looks like we might want one, insert a NOP. */
6791 rtx insn = get_last_insn ();
6792 while (insn
6793 && NOTE_P (insn)
6794 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6795 insn = PREV_INSN (insn);
6796 if (insn
6797 && (LABEL_P (insn)
6798 || (NOTE_P (insn)
6799 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6800 fputs ("\tnop\n", file);
6802 #endif
6806 /* Extract the parts of an RTL expression that is a valid memory address
6807 for an instruction. Return 0 if the structure of the address is
6808 grossly off. Return -1 if the address contains ASHIFT, so it is not
6809 strictly valid, but still used for computing length of lea instruction. */
6812 ix86_decompose_address (rtx addr, struct ix86_address *out)
6814 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6815 rtx base_reg, index_reg;
6816 HOST_WIDE_INT scale = 1;
6817 rtx scale_rtx = NULL_RTX;
6818 int retval = 1;
6819 enum ix86_address_seg seg = SEG_DEFAULT;
6821 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6822 base = addr;
6823 else if (GET_CODE (addr) == PLUS)
6825 rtx addends[4], op;
6826 int n = 0, i;
6828 op = addr;
6831 if (n >= 4)
6832 return 0;
6833 addends[n++] = XEXP (op, 1);
6834 op = XEXP (op, 0);
6836 while (GET_CODE (op) == PLUS);
6837 if (n >= 4)
6838 return 0;
6839 addends[n] = op;
6841 for (i = n; i >= 0; --i)
6843 op = addends[i];
6844 switch (GET_CODE (op))
6846 case MULT:
6847 if (index)
6848 return 0;
6849 index = XEXP (op, 0);
6850 scale_rtx = XEXP (op, 1);
6851 break;
6853 case UNSPEC:
6854 if (XINT (op, 1) == UNSPEC_TP
6855 && TARGET_TLS_DIRECT_SEG_REFS
6856 && seg == SEG_DEFAULT)
6857 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6858 else
6859 return 0;
6860 break;
6862 case REG:
6863 case SUBREG:
6864 if (!base)
6865 base = op;
6866 else if (!index)
6867 index = op;
6868 else
6869 return 0;
6870 break;
6872 case CONST:
6873 case CONST_INT:
6874 case SYMBOL_REF:
6875 case LABEL_REF:
6876 if (disp)
6877 return 0;
6878 disp = op;
6879 break;
6881 default:
6882 return 0;
6886 else if (GET_CODE (addr) == MULT)
6888 index = XEXP (addr, 0); /* index*scale */
6889 scale_rtx = XEXP (addr, 1);
6891 else if (GET_CODE (addr) == ASHIFT)
6893 rtx tmp;
6895 /* We're called for lea too, which implements ashift on occasion. */
6896 index = XEXP (addr, 0);
6897 tmp = XEXP (addr, 1);
6898 if (!CONST_INT_P (tmp))
6899 return 0;
6900 scale = INTVAL (tmp);
6901 if ((unsigned HOST_WIDE_INT) scale > 3)
6902 return 0;
6903 scale = 1 << scale;
6904 retval = -1;
6906 else
6907 disp = addr; /* displacement */
6909 /* Extract the integral value of scale. */
6910 if (scale_rtx)
6912 if (!CONST_INT_P (scale_rtx))
6913 return 0;
6914 scale = INTVAL (scale_rtx);
6917 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6918 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6920 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6921 if (base_reg && index_reg && scale == 1
6922 && (index_reg == arg_pointer_rtx
6923 || index_reg == frame_pointer_rtx
6924 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6926 rtx tmp;
6927 tmp = base, base = index, index = tmp;
6928 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6931 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6932 if ((base_reg == hard_frame_pointer_rtx
6933 || base_reg == frame_pointer_rtx
6934 || base_reg == arg_pointer_rtx) && !disp)
6935 disp = const0_rtx;
6937 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6938 Avoid this by transforming to [%esi+0]. */
6939 if (TARGET_K6 && !optimize_size
6940 && base_reg && !index_reg && !disp
6941 && REG_P (base_reg)
6942 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6943 disp = const0_rtx;
6945 /* Special case: encode reg+reg instead of reg*2. */
6946 if (!base && index && scale && scale == 2)
6947 base = index, base_reg = index_reg, scale = 1;
6949 /* Special case: scaling cannot be encoded without base or displacement. */
6950 if (!base && !disp && index && scale != 1)
6951 disp = const0_rtx;
6953 out->base = base;
6954 out->index = index;
6955 out->disp = disp;
6956 out->scale = scale;
6957 out->seg = seg;
6959 return retval;
6962 /* Return cost of the memory address x.
6963 For i386, it is better to use a complex address than let gcc copy
6964 the address into a reg and make a new pseudo. But not if the address
6965 requires to two regs - that would mean more pseudos with longer
6966 lifetimes. */
6967 static int
6968 ix86_address_cost (rtx x)
6970 struct ix86_address parts;
6971 int cost = 1;
6972 int ok = ix86_decompose_address (x, &parts);
6974 gcc_assert (ok);
6976 if (parts.base && GET_CODE (parts.base) == SUBREG)
6977 parts.base = SUBREG_REG (parts.base);
6978 if (parts.index && GET_CODE (parts.index) == SUBREG)
6979 parts.index = SUBREG_REG (parts.index);
6981 /* Attempt to minimize number of registers in the address. */
6982 if ((parts.base
6983 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6984 || (parts.index
6985 && (!REG_P (parts.index)
6986 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6987 cost++;
6989 if (parts.base
6990 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6991 && parts.index
6992 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6993 && parts.base != parts.index)
6994 cost++;
6996 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6997 since it's predecode logic can't detect the length of instructions
6998 and it degenerates to vector decoded. Increase cost of such
6999 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7000 to split such addresses or even refuse such addresses at all.
7002 Following addressing modes are affected:
7003 [base+scale*index]
7004 [scale*index+disp]
7005 [base+index]
7007 The first and last case may be avoidable by explicitly coding the zero in
7008 memory address, but I don't have AMD-K6 machine handy to check this
7009 theory. */
7011 if (TARGET_K6
7012 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7013 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7014 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7015 cost += 10;
7017 return cost;
7020 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7021 this is used for to form addresses to local data when -fPIC is in
7022 use. */
7024 static bool
7025 darwin_local_data_pic (rtx disp)
7027 if (GET_CODE (disp) == MINUS)
7029 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7030 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7031 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7033 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7034 if (! strcmp (sym_name, "<pic base>"))
7035 return true;
7039 return false;
7042 /* Determine if a given RTX is a valid constant. We already know this
7043 satisfies CONSTANT_P. */
7045 bool
7046 legitimate_constant_p (rtx x)
7048 switch (GET_CODE (x))
7050 case CONST:
7051 x = XEXP (x, 0);
7053 if (GET_CODE (x) == PLUS)
7055 if (!CONST_INT_P (XEXP (x, 1)))
7056 return false;
7057 x = XEXP (x, 0);
7060 if (TARGET_MACHO && darwin_local_data_pic (x))
7061 return true;
7063 /* Only some unspecs are valid as "constants". */
7064 if (GET_CODE (x) == UNSPEC)
7065 switch (XINT (x, 1))
7067 case UNSPEC_GOT:
7068 case UNSPEC_GOTOFF:
7069 case UNSPEC_PLTOFF:
7070 return TARGET_64BIT;
7071 case UNSPEC_TPOFF:
7072 case UNSPEC_NTPOFF:
7073 x = XVECEXP (x, 0, 0);
7074 return (GET_CODE (x) == SYMBOL_REF
7075 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7076 case UNSPEC_DTPOFF:
7077 x = XVECEXP (x, 0, 0);
7078 return (GET_CODE (x) == SYMBOL_REF
7079 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7080 default:
7081 return false;
7084 /* We must have drilled down to a symbol. */
7085 if (GET_CODE (x) == LABEL_REF)
7086 return true;
7087 if (GET_CODE (x) != SYMBOL_REF)
7088 return false;
7089 /* FALLTHRU */
7091 case SYMBOL_REF:
7092 /* TLS symbols are never valid. */
7093 if (SYMBOL_REF_TLS_MODEL (x))
7094 return false;
7096 /* DLLIMPORT symbols are never valid. */
7097 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7098 && SYMBOL_REF_DLLIMPORT_P (x))
7099 return false;
7100 break;
7102 case CONST_DOUBLE:
7103 if (GET_MODE (x) == TImode
7104 && x != CONST0_RTX (TImode)
7105 && !TARGET_64BIT)
7106 return false;
7107 break;
7109 case CONST_VECTOR:
7110 if (x == CONST0_RTX (GET_MODE (x)))
7111 return true;
7112 return false;
7114 default:
7115 break;
7118 /* Otherwise we handle everything else in the move patterns. */
7119 return true;
7122 /* Determine if it's legal to put X into the constant pool. This
7123 is not possible for the address of thread-local symbols, which
7124 is checked above. */
7126 static bool
7127 ix86_cannot_force_const_mem (rtx x)
7129 /* We can always put integral constants and vectors in memory. */
7130 switch (GET_CODE (x))
7132 case CONST_INT:
7133 case CONST_DOUBLE:
7134 case CONST_VECTOR:
7135 return false;
7137 default:
7138 break;
7140 return !legitimate_constant_p (x);
7143 /* Determine if a given RTX is a valid constant address. */
7145 bool
7146 constant_address_p (rtx x)
7148 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7151 /* Nonzero if the constant value X is a legitimate general operand
7152 when generating PIC code. It is given that flag_pic is on and
7153 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7155 bool
7156 legitimate_pic_operand_p (rtx x)
7158 rtx inner;
7160 switch (GET_CODE (x))
7162 case CONST:
7163 inner = XEXP (x, 0);
7164 if (GET_CODE (inner) == PLUS
7165 && CONST_INT_P (XEXP (inner, 1)))
7166 inner = XEXP (inner, 0);
7168 /* Only some unspecs are valid as "constants". */
7169 if (GET_CODE (inner) == UNSPEC)
7170 switch (XINT (inner, 1))
7172 case UNSPEC_GOT:
7173 case UNSPEC_GOTOFF:
7174 case UNSPEC_PLTOFF:
7175 return TARGET_64BIT;
7176 case UNSPEC_TPOFF:
7177 x = XVECEXP (inner, 0, 0);
7178 return (GET_CODE (x) == SYMBOL_REF
7179 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7180 default:
7181 return false;
7183 /* FALLTHRU */
7185 case SYMBOL_REF:
7186 case LABEL_REF:
7187 return legitimate_pic_address_disp_p (x);
7189 default:
7190 return true;
7194 /* Determine if a given CONST RTX is a valid memory displacement
7195 in PIC mode. */
7198 legitimate_pic_address_disp_p (rtx disp)
7200 bool saw_plus;
7202 /* In 64bit mode we can allow direct addresses of symbols and labels
7203 when they are not dynamic symbols. */
7204 if (TARGET_64BIT)
7206 rtx op0 = disp, op1;
7208 switch (GET_CODE (disp))
7210 case LABEL_REF:
7211 return true;
7213 case CONST:
7214 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7215 break;
7216 op0 = XEXP (XEXP (disp, 0), 0);
7217 op1 = XEXP (XEXP (disp, 0), 1);
7218 if (!CONST_INT_P (op1)
7219 || INTVAL (op1) >= 16*1024*1024
7220 || INTVAL (op1) < -16*1024*1024)
7221 break;
7222 if (GET_CODE (op0) == LABEL_REF)
7223 return true;
7224 if (GET_CODE (op0) != SYMBOL_REF)
7225 break;
7226 /* FALLTHRU */
7228 case SYMBOL_REF:
7229 /* TLS references should always be enclosed in UNSPEC. */
7230 if (SYMBOL_REF_TLS_MODEL (op0))
7231 return false;
7232 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7233 && ix86_cmodel != CM_LARGE_PIC)
7234 return true;
7235 break;
7237 default:
7238 break;
7241 if (GET_CODE (disp) != CONST)
7242 return 0;
7243 disp = XEXP (disp, 0);
7245 if (TARGET_64BIT)
7247 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7248 of GOT tables. We should not need these anyway. */
7249 if (GET_CODE (disp) != UNSPEC
7250 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7251 && XINT (disp, 1) != UNSPEC_GOTOFF
7252 && XINT (disp, 1) != UNSPEC_PLTOFF))
7253 return 0;
7255 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7256 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7257 return 0;
7258 return 1;
7261 saw_plus = false;
7262 if (GET_CODE (disp) == PLUS)
7264 if (!CONST_INT_P (XEXP (disp, 1)))
7265 return 0;
7266 disp = XEXP (disp, 0);
7267 saw_plus = true;
7270 if (TARGET_MACHO && darwin_local_data_pic (disp))
7271 return 1;
7273 if (GET_CODE (disp) != UNSPEC)
7274 return 0;
7276 switch (XINT (disp, 1))
7278 case UNSPEC_GOT:
7279 if (saw_plus)
7280 return false;
7281 /* We need to check for both symbols and labels because VxWorks loads
7282 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7283 details. */
7284 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7285 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7286 case UNSPEC_GOTOFF:
7287 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7288 While ABI specify also 32bit relocation but we don't produce it in
7289 small PIC model at all. */
7290 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7291 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7292 && !TARGET_64BIT)
7293 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7294 return false;
7295 case UNSPEC_GOTTPOFF:
7296 case UNSPEC_GOTNTPOFF:
7297 case UNSPEC_INDNTPOFF:
7298 if (saw_plus)
7299 return false;
7300 disp = XVECEXP (disp, 0, 0);
7301 return (GET_CODE (disp) == SYMBOL_REF
7302 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7303 case UNSPEC_NTPOFF:
7304 disp = XVECEXP (disp, 0, 0);
7305 return (GET_CODE (disp) == SYMBOL_REF
7306 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7307 case UNSPEC_DTPOFF:
7308 disp = XVECEXP (disp, 0, 0);
7309 return (GET_CODE (disp) == SYMBOL_REF
7310 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7313 return 0;
7316 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7317 memory address for an instruction. The MODE argument is the machine mode
7318 for the MEM expression that wants to use this address.
7320 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7321 convert common non-canonical forms to canonical form so that they will
7322 be recognized. */
7325 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7326 rtx addr, int strict)
7328 struct ix86_address parts;
7329 rtx base, index, disp;
7330 HOST_WIDE_INT scale;
7331 const char *reason = NULL;
7332 rtx reason_rtx = NULL_RTX;
7334 if (ix86_decompose_address (addr, &parts) <= 0)
7336 reason = "decomposition failed";
7337 goto report_error;
7340 base = parts.base;
7341 index = parts.index;
7342 disp = parts.disp;
7343 scale = parts.scale;
7345 /* Validate base register.
7347 Don't allow SUBREG's that span more than a word here. It can lead to spill
7348 failures when the base is one word out of a two word structure, which is
7349 represented internally as a DImode int. */
7351 if (base)
7353 rtx reg;
7354 reason_rtx = base;
7356 if (REG_P (base))
7357 reg = base;
7358 else if (GET_CODE (base) == SUBREG
7359 && REG_P (SUBREG_REG (base))
7360 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7361 <= UNITS_PER_WORD)
7362 reg = SUBREG_REG (base);
7363 else
7365 reason = "base is not a register";
7366 goto report_error;
7369 if (GET_MODE (base) != Pmode)
7371 reason = "base is not in Pmode";
7372 goto report_error;
7375 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7376 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7378 reason = "base is not valid";
7379 goto report_error;
7383 /* Validate index register.
7385 Don't allow SUBREG's that span more than a word here -- same as above. */
7387 if (index)
7389 rtx reg;
7390 reason_rtx = index;
7392 if (REG_P (index))
7393 reg = index;
7394 else if (GET_CODE (index) == SUBREG
7395 && REG_P (SUBREG_REG (index))
7396 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7397 <= UNITS_PER_WORD)
7398 reg = SUBREG_REG (index);
7399 else
7401 reason = "index is not a register";
7402 goto report_error;
7405 if (GET_MODE (index) != Pmode)
7407 reason = "index is not in Pmode";
7408 goto report_error;
7411 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7412 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7414 reason = "index is not valid";
7415 goto report_error;
7419 /* Validate scale factor. */
7420 if (scale != 1)
7422 reason_rtx = GEN_INT (scale);
7423 if (!index)
7425 reason = "scale without index";
7426 goto report_error;
7429 if (scale != 2 && scale != 4 && scale != 8)
7431 reason = "scale is not a valid multiplier";
7432 goto report_error;
7436 /* Validate displacement. */
7437 if (disp)
7439 reason_rtx = disp;
7441 if (GET_CODE (disp) == CONST
7442 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7443 switch (XINT (XEXP (disp, 0), 1))
7445 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7446 used. While ABI specify also 32bit relocations, we don't produce
7447 them at all and use IP relative instead. */
7448 case UNSPEC_GOT:
7449 case UNSPEC_GOTOFF:
7450 gcc_assert (flag_pic);
7451 if (!TARGET_64BIT)
7452 goto is_legitimate_pic;
7453 reason = "64bit address unspec";
7454 goto report_error;
7456 case UNSPEC_GOTPCREL:
7457 gcc_assert (flag_pic);
7458 goto is_legitimate_pic;
7460 case UNSPEC_GOTTPOFF:
7461 case UNSPEC_GOTNTPOFF:
7462 case UNSPEC_INDNTPOFF:
7463 case UNSPEC_NTPOFF:
7464 case UNSPEC_DTPOFF:
7465 break;
7467 default:
7468 reason = "invalid address unspec";
7469 goto report_error;
7472 else if (SYMBOLIC_CONST (disp)
7473 && (flag_pic
7474 || (TARGET_MACHO
7475 #if TARGET_MACHO
7476 && MACHOPIC_INDIRECT
7477 && !machopic_operand_p (disp)
7478 #endif
7482 is_legitimate_pic:
7483 if (TARGET_64BIT && (index || base))
7485 /* foo@dtpoff(%rX) is ok. */
7486 if (GET_CODE (disp) != CONST
7487 || GET_CODE (XEXP (disp, 0)) != PLUS
7488 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7489 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7490 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7491 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7493 reason = "non-constant pic memory reference";
7494 goto report_error;
7497 else if (! legitimate_pic_address_disp_p (disp))
7499 reason = "displacement is an invalid pic construct";
7500 goto report_error;
7503 /* This code used to verify that a symbolic pic displacement
7504 includes the pic_offset_table_rtx register.
7506 While this is good idea, unfortunately these constructs may
7507 be created by "adds using lea" optimization for incorrect
7508 code like:
7510 int a;
7511 int foo(int i)
7513 return *(&a+i);
7516 This code is nonsensical, but results in addressing
7517 GOT table with pic_offset_table_rtx base. We can't
7518 just refuse it easily, since it gets matched by
7519 "addsi3" pattern, that later gets split to lea in the
7520 case output register differs from input. While this
7521 can be handled by separate addsi pattern for this case
7522 that never results in lea, this seems to be easier and
7523 correct fix for crash to disable this test. */
7525 else if (GET_CODE (disp) != LABEL_REF
7526 && !CONST_INT_P (disp)
7527 && (GET_CODE (disp) != CONST
7528 || !legitimate_constant_p (disp))
7529 && (GET_CODE (disp) != SYMBOL_REF
7530 || !legitimate_constant_p (disp)))
7532 reason = "displacement is not constant";
7533 goto report_error;
7535 else if (TARGET_64BIT
7536 && !x86_64_immediate_operand (disp, VOIDmode))
7538 reason = "displacement is out of range";
7539 goto report_error;
7543 /* Everything looks valid. */
7544 return TRUE;
7546 report_error:
7547 return FALSE;
7550 /* Return a unique alias set for the GOT. */
7552 static alias_set_type
7553 ix86_GOT_alias_set (void)
7555 static alias_set_type set = -1;
7556 if (set == -1)
7557 set = new_alias_set ();
7558 return set;
7561 /* Return a legitimate reference for ORIG (an address) using the
7562 register REG. If REG is 0, a new pseudo is generated.
7564 There are two types of references that must be handled:
7566 1. Global data references must load the address from the GOT, via
7567 the PIC reg. An insn is emitted to do this load, and the reg is
7568 returned.
7570 2. Static data references, constant pool addresses, and code labels
7571 compute the address as an offset from the GOT, whose base is in
7572 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7573 differentiate them from global data objects. The returned
7574 address is the PIC reg + an unspec constant.
7576 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7577 reg also appears in the address. */
7579 static rtx
7580 legitimize_pic_address (rtx orig, rtx reg)
7582 rtx addr = orig;
7583 rtx new_rtx = orig;
7584 rtx base;
7586 #if TARGET_MACHO
7587 if (TARGET_MACHO && !TARGET_64BIT)
7589 if (reg == 0)
7590 reg = gen_reg_rtx (Pmode);
7591 /* Use the generic Mach-O PIC machinery. */
7592 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7594 #endif
7596 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7597 new_rtx = addr;
7598 else if (TARGET_64BIT
7599 && ix86_cmodel != CM_SMALL_PIC
7600 && gotoff_operand (addr, Pmode))
7602 rtx tmpreg;
7603 /* This symbol may be referenced via a displacement from the PIC
7604 base address (@GOTOFF). */
7606 if (reload_in_progress)
7607 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7608 if (GET_CODE (addr) == CONST)
7609 addr = XEXP (addr, 0);
7610 if (GET_CODE (addr) == PLUS)
7612 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7613 UNSPEC_GOTOFF);
7614 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7616 else
7617 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7618 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7619 if (!reg)
7620 tmpreg = gen_reg_rtx (Pmode);
7621 else
7622 tmpreg = reg;
7623 emit_move_insn (tmpreg, new_rtx);
7625 if (reg != 0)
7627 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7628 tmpreg, 1, OPTAB_DIRECT);
7629 new_rtx = reg;
7631 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7633 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7635 /* This symbol may be referenced via a displacement from the PIC
7636 base address (@GOTOFF). */
7638 if (reload_in_progress)
7639 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7640 if (GET_CODE (addr) == CONST)
7641 addr = XEXP (addr, 0);
7642 if (GET_CODE (addr) == PLUS)
7644 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7645 UNSPEC_GOTOFF);
7646 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7648 else
7649 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7650 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7651 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7653 if (reg != 0)
7655 emit_move_insn (reg, new_rtx);
7656 new_rtx = reg;
7659 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7660 /* We can't use @GOTOFF for text labels on VxWorks;
7661 see gotoff_operand. */
7662 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7664 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7666 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7667 return legitimize_dllimport_symbol (addr, true);
7668 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7669 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7670 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7672 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7673 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7677 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7680 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7681 new_rtx = gen_const_mem (Pmode, new_rtx);
7682 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7684 if (reg == 0)
7685 reg = gen_reg_rtx (Pmode);
7686 /* Use directly gen_movsi, otherwise the address is loaded
7687 into register for CSE. We don't want to CSE this addresses,
7688 instead we CSE addresses from the GOT table, so skip this. */
7689 emit_insn (gen_movsi (reg, new_rtx));
7690 new_rtx = reg;
7692 else
7694 /* This symbol must be referenced via a load from the
7695 Global Offset Table (@GOT). */
7697 if (reload_in_progress)
7698 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7699 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7700 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7701 if (TARGET_64BIT)
7702 new_rtx = force_reg (Pmode, new_rtx);
7703 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7704 new_rtx = gen_const_mem (Pmode, new_rtx);
7705 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7707 if (reg == 0)
7708 reg = gen_reg_rtx (Pmode);
7709 emit_move_insn (reg, new_rtx);
7710 new_rtx = reg;
7713 else
7715 if (CONST_INT_P (addr)
7716 && !x86_64_immediate_operand (addr, VOIDmode))
7718 if (reg)
7720 emit_move_insn (reg, addr);
7721 new_rtx = reg;
7723 else
7724 new_rtx = force_reg (Pmode, addr);
7726 else if (GET_CODE (addr) == CONST)
7728 addr = XEXP (addr, 0);
7730 /* We must match stuff we generate before. Assume the only
7731 unspecs that can get here are ours. Not that we could do
7732 anything with them anyway.... */
7733 if (GET_CODE (addr) == UNSPEC
7734 || (GET_CODE (addr) == PLUS
7735 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7736 return orig;
7737 gcc_assert (GET_CODE (addr) == PLUS);
7739 if (GET_CODE (addr) == PLUS)
7741 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7743 /* Check first to see if this is a constant offset from a @GOTOFF
7744 symbol reference. */
7745 if (gotoff_operand (op0, Pmode)
7746 && CONST_INT_P (op1))
7748 if (!TARGET_64BIT)
7750 if (reload_in_progress)
7751 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7752 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7753 UNSPEC_GOTOFF);
7754 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7755 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7756 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7758 if (reg != 0)
7760 emit_move_insn (reg, new_rtx);
7761 new_rtx = reg;
7764 else
7766 if (INTVAL (op1) < -16*1024*1024
7767 || INTVAL (op1) >= 16*1024*1024)
7769 if (!x86_64_immediate_operand (op1, Pmode))
7770 op1 = force_reg (Pmode, op1);
7771 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7775 else
7777 base = legitimize_pic_address (XEXP (addr, 0), reg);
7778 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7779 base == reg ? NULL_RTX : reg);
7781 if (CONST_INT_P (new_rtx))
7782 new_rtx = plus_constant (base, INTVAL (new_rtx));
7783 else
7785 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7787 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7788 new_rtx = XEXP (new_rtx, 1);
7790 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7795 return new_rtx;
7798 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7800 static rtx
7801 get_thread_pointer (int to_reg)
7803 rtx tp, reg, insn;
7805 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7806 if (!to_reg)
7807 return tp;
7809 reg = gen_reg_rtx (Pmode);
7810 insn = gen_rtx_SET (VOIDmode, reg, tp);
7811 insn = emit_insn (insn);
7813 return reg;
7816 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7817 false if we expect this to be used for a memory address and true if
7818 we expect to load the address into a register. */
7820 static rtx
7821 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7823 rtx dest, base, off, pic, tp;
7824 int type;
7826 switch (model)
7828 case TLS_MODEL_GLOBAL_DYNAMIC:
7829 dest = gen_reg_rtx (Pmode);
7830 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7832 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7834 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7836 start_sequence ();
7837 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7838 insns = get_insns ();
7839 end_sequence ();
7841 CONST_OR_PURE_CALL_P (insns) = 1;
7842 emit_libcall_block (insns, dest, rax, x);
7844 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7845 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7846 else
7847 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7849 if (TARGET_GNU2_TLS)
7851 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7853 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7855 break;
7857 case TLS_MODEL_LOCAL_DYNAMIC:
7858 base = gen_reg_rtx (Pmode);
7859 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7861 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7863 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7865 start_sequence ();
7866 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7867 insns = get_insns ();
7868 end_sequence ();
7870 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7871 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7872 CONST_OR_PURE_CALL_P (insns) = 1;
7873 emit_libcall_block (insns, base, rax, note);
7875 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7876 emit_insn (gen_tls_local_dynamic_base_64 (base));
7877 else
7878 emit_insn (gen_tls_local_dynamic_base_32 (base));
7880 if (TARGET_GNU2_TLS)
7882 rtx x = ix86_tls_module_base ();
7884 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7885 gen_rtx_MINUS (Pmode, x, tp));
7888 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7889 off = gen_rtx_CONST (Pmode, off);
7891 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7893 if (TARGET_GNU2_TLS)
7895 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7897 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7900 break;
7902 case TLS_MODEL_INITIAL_EXEC:
7903 if (TARGET_64BIT)
7905 pic = NULL;
7906 type = UNSPEC_GOTNTPOFF;
7908 else if (flag_pic)
7910 if (reload_in_progress)
7911 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7912 pic = pic_offset_table_rtx;
7913 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7915 else if (!TARGET_ANY_GNU_TLS)
7917 pic = gen_reg_rtx (Pmode);
7918 emit_insn (gen_set_got (pic));
7919 type = UNSPEC_GOTTPOFF;
7921 else
7923 pic = NULL;
7924 type = UNSPEC_INDNTPOFF;
7927 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7928 off = gen_rtx_CONST (Pmode, off);
7929 if (pic)
7930 off = gen_rtx_PLUS (Pmode, pic, off);
7931 off = gen_const_mem (Pmode, off);
7932 set_mem_alias_set (off, ix86_GOT_alias_set ());
7934 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7936 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7937 off = force_reg (Pmode, off);
7938 return gen_rtx_PLUS (Pmode, base, off);
7940 else
7942 base = get_thread_pointer (true);
7943 dest = gen_reg_rtx (Pmode);
7944 emit_insn (gen_subsi3 (dest, base, off));
7946 break;
7948 case TLS_MODEL_LOCAL_EXEC:
7949 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7950 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7951 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7952 off = gen_rtx_CONST (Pmode, off);
7954 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7956 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7957 return gen_rtx_PLUS (Pmode, base, off);
7959 else
7961 base = get_thread_pointer (true);
7962 dest = gen_reg_rtx (Pmode);
7963 emit_insn (gen_subsi3 (dest, base, off));
7965 break;
7967 default:
7968 gcc_unreachable ();
7971 return dest;
7974 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7975 to symbol DECL. */
7977 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7978 htab_t dllimport_map;
7980 static tree
7981 get_dllimport_decl (tree decl)
7983 struct tree_map *h, in;
7984 void **loc;
7985 const char *name;
7986 const char *prefix;
7987 size_t namelen, prefixlen;
7988 char *imp_name;
7989 tree to;
7990 rtx rtl;
7992 if (!dllimport_map)
7993 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7995 in.hash = htab_hash_pointer (decl);
7996 in.base.from = decl;
7997 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7998 h = (struct tree_map *) *loc;
7999 if (h)
8000 return h->to;
8002 *loc = h = GGC_NEW (struct tree_map);
8003 h->hash = in.hash;
8004 h->base.from = decl;
8005 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8006 DECL_ARTIFICIAL (to) = 1;
8007 DECL_IGNORED_P (to) = 1;
8008 DECL_EXTERNAL (to) = 1;
8009 TREE_READONLY (to) = 1;
8011 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8012 name = targetm.strip_name_encoding (name);
8013 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8014 namelen = strlen (name);
8015 prefixlen = strlen (prefix);
8016 imp_name = (char *) alloca (namelen + prefixlen + 1);
8017 memcpy (imp_name, prefix, prefixlen);
8018 memcpy (imp_name + prefixlen, name, namelen + 1);
8020 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8021 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8022 SET_SYMBOL_REF_DECL (rtl, to);
8023 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8025 rtl = gen_const_mem (Pmode, rtl);
8026 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8028 SET_DECL_RTL (to, rtl);
8029 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8031 return to;
8034 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8035 true if we require the result be a register. */
8037 static rtx
8038 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8040 tree imp_decl;
8041 rtx x;
8043 gcc_assert (SYMBOL_REF_DECL (symbol));
8044 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8046 x = DECL_RTL (imp_decl);
8047 if (want_reg)
8048 x = force_reg (Pmode, x);
8049 return x;
8052 /* Try machine-dependent ways of modifying an illegitimate address
8053 to be legitimate. If we find one, return the new, valid address.
8054 This macro is used in only one place: `memory_address' in explow.c.
8056 OLDX is the address as it was before break_out_memory_refs was called.
8057 In some cases it is useful to look at this to decide what needs to be done.
8059 MODE and WIN are passed so that this macro can use
8060 GO_IF_LEGITIMATE_ADDRESS.
8062 It is always safe for this macro to do nothing. It exists to recognize
8063 opportunities to optimize the output.
8065 For the 80386, we handle X+REG by loading X into a register R and
8066 using R+REG. R will go in a general reg and indexing will be used.
8067 However, if REG is a broken-out memory address or multiplication,
8068 nothing needs to be done because REG can certainly go in a general reg.
8070 When -fpic is used, special handling is needed for symbolic references.
8071 See comments by legitimize_pic_address in i386.c for details. */
8074 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8076 int changed = 0;
8077 unsigned log;
8079 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8080 if (log)
8081 return legitimize_tls_address (x, (enum tls_model) log, false);
8082 if (GET_CODE (x) == CONST
8083 && GET_CODE (XEXP (x, 0)) == PLUS
8084 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8085 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8087 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8088 (enum tls_model) log, false);
8089 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8092 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8094 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8095 return legitimize_dllimport_symbol (x, true);
8096 if (GET_CODE (x) == CONST
8097 && GET_CODE (XEXP (x, 0)) == PLUS
8098 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8099 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8101 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8102 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8106 if (flag_pic && SYMBOLIC_CONST (x))
8107 return legitimize_pic_address (x, 0);
8109 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8110 if (GET_CODE (x) == ASHIFT
8111 && CONST_INT_P (XEXP (x, 1))
8112 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8114 changed = 1;
8115 log = INTVAL (XEXP (x, 1));
8116 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8117 GEN_INT (1 << log));
8120 if (GET_CODE (x) == PLUS)
8122 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8124 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8125 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8126 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8128 changed = 1;
8129 log = INTVAL (XEXP (XEXP (x, 0), 1));
8130 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8131 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8132 GEN_INT (1 << log));
8135 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8136 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8137 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8139 changed = 1;
8140 log = INTVAL (XEXP (XEXP (x, 1), 1));
8141 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8142 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8143 GEN_INT (1 << log));
8146 /* Put multiply first if it isn't already. */
8147 if (GET_CODE (XEXP (x, 1)) == MULT)
8149 rtx tmp = XEXP (x, 0);
8150 XEXP (x, 0) = XEXP (x, 1);
8151 XEXP (x, 1) = tmp;
8152 changed = 1;
8155 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8156 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8157 created by virtual register instantiation, register elimination, and
8158 similar optimizations. */
8159 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8161 changed = 1;
8162 x = gen_rtx_PLUS (Pmode,
8163 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8164 XEXP (XEXP (x, 1), 0)),
8165 XEXP (XEXP (x, 1), 1));
8168 /* Canonicalize
8169 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8170 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8171 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8173 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8174 && CONSTANT_P (XEXP (x, 1)))
8176 rtx constant;
8177 rtx other = NULL_RTX;
8179 if (CONST_INT_P (XEXP (x, 1)))
8181 constant = XEXP (x, 1);
8182 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8184 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8186 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8187 other = XEXP (x, 1);
8189 else
8190 constant = 0;
8192 if (constant)
8194 changed = 1;
8195 x = gen_rtx_PLUS (Pmode,
8196 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8197 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8198 plus_constant (other, INTVAL (constant)));
8202 if (changed && legitimate_address_p (mode, x, FALSE))
8203 return x;
8205 if (GET_CODE (XEXP (x, 0)) == MULT)
8207 changed = 1;
8208 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8211 if (GET_CODE (XEXP (x, 1)) == MULT)
8213 changed = 1;
8214 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8217 if (changed
8218 && REG_P (XEXP (x, 1))
8219 && REG_P (XEXP (x, 0)))
8220 return x;
8222 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8224 changed = 1;
8225 x = legitimize_pic_address (x, 0);
8228 if (changed && legitimate_address_p (mode, x, FALSE))
8229 return x;
8231 if (REG_P (XEXP (x, 0)))
8233 rtx temp = gen_reg_rtx (Pmode);
8234 rtx val = force_operand (XEXP (x, 1), temp);
8235 if (val != temp)
8236 emit_move_insn (temp, val);
8238 XEXP (x, 1) = temp;
8239 return x;
8242 else if (REG_P (XEXP (x, 1)))
8244 rtx temp = gen_reg_rtx (Pmode);
8245 rtx val = force_operand (XEXP (x, 0), temp);
8246 if (val != temp)
8247 emit_move_insn (temp, val);
8249 XEXP (x, 0) = temp;
8250 return x;
8254 return x;
8257 /* Print an integer constant expression in assembler syntax. Addition
8258 and subtraction are the only arithmetic that may appear in these
8259 expressions. FILE is the stdio stream to write to, X is the rtx, and
8260 CODE is the operand print code from the output string. */
8262 static void
8263 output_pic_addr_const (FILE *file, rtx x, int code)
8265 char buf[256];
8267 switch (GET_CODE (x))
8269 case PC:
8270 gcc_assert (flag_pic);
8271 putc ('.', file);
8272 break;
8274 case SYMBOL_REF:
8275 if (! TARGET_MACHO || TARGET_64BIT)
8276 output_addr_const (file, x);
8277 else
8279 const char *name = XSTR (x, 0);
8281 /* Mark the decl as referenced so that cgraph will
8282 output the function. */
8283 if (SYMBOL_REF_DECL (x))
8284 mark_decl_referenced (SYMBOL_REF_DECL (x));
8286 #if TARGET_MACHO
8287 if (MACHOPIC_INDIRECT
8288 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8289 name = machopic_indirection_name (x, /*stub_p=*/true);
8290 #endif
8291 assemble_name (file, name);
8293 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8294 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8295 fputs ("@PLT", file);
8296 break;
8298 case LABEL_REF:
8299 x = XEXP (x, 0);
8300 /* FALLTHRU */
8301 case CODE_LABEL:
8302 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8303 assemble_name (asm_out_file, buf);
8304 break;
8306 case CONST_INT:
8307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8308 break;
8310 case CONST:
8311 /* This used to output parentheses around the expression,
8312 but that does not work on the 386 (either ATT or BSD assembler). */
8313 output_pic_addr_const (file, XEXP (x, 0), code);
8314 break;
8316 case CONST_DOUBLE:
8317 if (GET_MODE (x) == VOIDmode)
8319 /* We can use %d if the number is <32 bits and positive. */
8320 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8321 fprintf (file, "0x%lx%08lx",
8322 (unsigned long) CONST_DOUBLE_HIGH (x),
8323 (unsigned long) CONST_DOUBLE_LOW (x));
8324 else
8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8327 else
8328 /* We can't handle floating point constants;
8329 PRINT_OPERAND must handle them. */
8330 output_operand_lossage ("floating constant misused");
8331 break;
8333 case PLUS:
8334 /* Some assemblers need integer constants to appear first. */
8335 if (CONST_INT_P (XEXP (x, 0)))
8337 output_pic_addr_const (file, XEXP (x, 0), code);
8338 putc ('+', file);
8339 output_pic_addr_const (file, XEXP (x, 1), code);
8341 else
8343 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8344 output_pic_addr_const (file, XEXP (x, 1), code);
8345 putc ('+', file);
8346 output_pic_addr_const (file, XEXP (x, 0), code);
8348 break;
8350 case MINUS:
8351 if (!TARGET_MACHO)
8352 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8353 output_pic_addr_const (file, XEXP (x, 0), code);
8354 putc ('-', file);
8355 output_pic_addr_const (file, XEXP (x, 1), code);
8356 if (!TARGET_MACHO)
8357 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8358 break;
8360 case UNSPEC:
8361 gcc_assert (XVECLEN (x, 0) == 1);
8362 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8363 switch (XINT (x, 1))
8365 case UNSPEC_GOT:
8366 fputs ("@GOT", file);
8367 break;
8368 case UNSPEC_GOTOFF:
8369 fputs ("@GOTOFF", file);
8370 break;
8371 case UNSPEC_PLTOFF:
8372 fputs ("@PLTOFF", file);
8373 break;
8374 case UNSPEC_GOTPCREL:
8375 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8376 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8377 break;
8378 case UNSPEC_GOTTPOFF:
8379 /* FIXME: This might be @TPOFF in Sun ld too. */
8380 fputs ("@GOTTPOFF", file);
8381 break;
8382 case UNSPEC_TPOFF:
8383 fputs ("@TPOFF", file);
8384 break;
8385 case UNSPEC_NTPOFF:
8386 if (TARGET_64BIT)
8387 fputs ("@TPOFF", file);
8388 else
8389 fputs ("@NTPOFF", file);
8390 break;
8391 case UNSPEC_DTPOFF:
8392 fputs ("@DTPOFF", file);
8393 break;
8394 case UNSPEC_GOTNTPOFF:
8395 if (TARGET_64BIT)
8396 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8397 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8398 else
8399 fputs ("@GOTNTPOFF", file);
8400 break;
8401 case UNSPEC_INDNTPOFF:
8402 fputs ("@INDNTPOFF", file);
8403 break;
8404 default:
8405 output_operand_lossage ("invalid UNSPEC as operand");
8406 break;
8408 break;
8410 default:
8411 output_operand_lossage ("invalid expression as operand");
8415 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8416 We need to emit DTP-relative relocations. */
8418 static void ATTRIBUTE_UNUSED
8419 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8421 fputs (ASM_LONG, file);
8422 output_addr_const (file, x);
8423 fputs ("@DTPOFF", file);
8424 switch (size)
8426 case 4:
8427 break;
8428 case 8:
8429 fputs (", 0", file);
8430 break;
8431 default:
8432 gcc_unreachable ();
8436 /* In the name of slightly smaller debug output, and to cater to
8437 general assembler lossage, recognize PIC+GOTOFF and turn it back
8438 into a direct symbol reference.
8440 On Darwin, this is necessary to avoid a crash, because Darwin
8441 has a different PIC label for each routine but the DWARF debugging
8442 information is not associated with any particular routine, so it's
8443 necessary to remove references to the PIC label from RTL stored by
8444 the DWARF output code. */
8446 static rtx
8447 ix86_delegitimize_address (rtx orig_x)
8449 rtx x = orig_x;
8450 /* reg_addend is NULL or a multiple of some register. */
8451 rtx reg_addend = NULL_RTX;
8452 /* const_addend is NULL or a const_int. */
8453 rtx const_addend = NULL_RTX;
8454 /* This is the result, or NULL. */
8455 rtx result = NULL_RTX;
8457 if (MEM_P (x))
8458 x = XEXP (x, 0);
8460 if (TARGET_64BIT)
8462 if (GET_CODE (x) != CONST
8463 || GET_CODE (XEXP (x, 0)) != UNSPEC
8464 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8465 || !MEM_P (orig_x))
8466 return orig_x;
8467 return XVECEXP (XEXP (x, 0), 0, 0);
8470 if (GET_CODE (x) != PLUS
8471 || GET_CODE (XEXP (x, 1)) != CONST)
8472 return orig_x;
8474 if (REG_P (XEXP (x, 0))
8475 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8476 /* %ebx + GOT/GOTOFF */
8478 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8480 /* %ebx + %reg * scale + GOT/GOTOFF */
8481 reg_addend = XEXP (x, 0);
8482 if (REG_P (XEXP (reg_addend, 0))
8483 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8484 reg_addend = XEXP (reg_addend, 1);
8485 else if (REG_P (XEXP (reg_addend, 1))
8486 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8487 reg_addend = XEXP (reg_addend, 0);
8488 else
8489 return orig_x;
8490 if (!REG_P (reg_addend)
8491 && GET_CODE (reg_addend) != MULT
8492 && GET_CODE (reg_addend) != ASHIFT)
8493 return orig_x;
8495 else
8496 return orig_x;
8498 x = XEXP (XEXP (x, 1), 0);
8499 if (GET_CODE (x) == PLUS
8500 && CONST_INT_P (XEXP (x, 1)))
8502 const_addend = XEXP (x, 1);
8503 x = XEXP (x, 0);
8506 if (GET_CODE (x) == UNSPEC
8507 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8508 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8509 result = XVECEXP (x, 0, 0);
8511 if (TARGET_MACHO && darwin_local_data_pic (x)
8512 && !MEM_P (orig_x))
8513 result = XEXP (x, 0);
8515 if (! result)
8516 return orig_x;
8518 if (const_addend)
8519 result = gen_rtx_PLUS (Pmode, result, const_addend);
8520 if (reg_addend)
8521 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8522 return result;
8525 /* If X is a machine specific address (i.e. a symbol or label being
8526 referenced as a displacement from the GOT implemented using an
8527 UNSPEC), then return the base term. Otherwise return X. */
8530 ix86_find_base_term (rtx x)
8532 rtx term;
8534 if (TARGET_64BIT)
8536 if (GET_CODE (x) != CONST)
8537 return x;
8538 term = XEXP (x, 0);
8539 if (GET_CODE (term) == PLUS
8540 && (CONST_INT_P (XEXP (term, 1))
8541 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8542 term = XEXP (term, 0);
8543 if (GET_CODE (term) != UNSPEC
8544 || XINT (term, 1) != UNSPEC_GOTPCREL)
8545 return x;
8547 term = XVECEXP (term, 0, 0);
8549 if (GET_CODE (term) != SYMBOL_REF
8550 && GET_CODE (term) != LABEL_REF)
8551 return x;
8553 return term;
8556 term = ix86_delegitimize_address (x);
8558 if (GET_CODE (term) != SYMBOL_REF
8559 && GET_CODE (term) != LABEL_REF)
8560 return x;
8562 return term;
8565 static void
8566 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8567 int fp, FILE *file)
8569 const char *suffix;
8571 if (mode == CCFPmode || mode == CCFPUmode)
8573 enum rtx_code second_code, bypass_code;
8574 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8575 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8576 code = ix86_fp_compare_code_to_integer (code);
8577 mode = CCmode;
8579 if (reverse)
8580 code = reverse_condition (code);
8582 switch (code)
8584 case EQ:
8585 switch (mode)
8587 case CCAmode:
8588 suffix = "a";
8589 break;
8591 case CCCmode:
8592 suffix = "c";
8593 break;
8595 case CCOmode:
8596 suffix = "o";
8597 break;
8599 case CCSmode:
8600 suffix = "s";
8601 break;
8603 default:
8604 suffix = "e";
8606 break;
8607 case NE:
8608 switch (mode)
8610 case CCAmode:
8611 suffix = "na";
8612 break;
8614 case CCCmode:
8615 suffix = "nc";
8616 break;
8618 case CCOmode:
8619 suffix = "no";
8620 break;
8622 case CCSmode:
8623 suffix = "ns";
8624 break;
8626 default:
8627 suffix = "ne";
8629 break;
8630 case GT:
8631 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8632 suffix = "g";
8633 break;
8634 case GTU:
8635 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8636 Those same assemblers have the same but opposite lossage on cmov. */
8637 if (mode == CCmode)
8638 suffix = fp ? "nbe" : "a";
8639 else if (mode == CCCmode)
8640 suffix = "b";
8641 else
8642 gcc_unreachable ();
8643 break;
8644 case LT:
8645 switch (mode)
8647 case CCNOmode:
8648 case CCGOCmode:
8649 suffix = "s";
8650 break;
8652 case CCmode:
8653 case CCGCmode:
8654 suffix = "l";
8655 break;
8657 default:
8658 gcc_unreachable ();
8660 break;
8661 case LTU:
8662 gcc_assert (mode == CCmode || mode == CCCmode);
8663 suffix = "b";
8664 break;
8665 case GE:
8666 switch (mode)
8668 case CCNOmode:
8669 case CCGOCmode:
8670 suffix = "ns";
8671 break;
8673 case CCmode:
8674 case CCGCmode:
8675 suffix = "ge";
8676 break;
8678 default:
8679 gcc_unreachable ();
8681 break;
8682 case GEU:
8683 /* ??? As above. */
8684 gcc_assert (mode == CCmode || mode == CCCmode);
8685 suffix = fp ? "nb" : "ae";
8686 break;
8687 case LE:
8688 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8689 suffix = "le";
8690 break;
8691 case LEU:
8692 /* ??? As above. */
8693 if (mode == CCmode)
8694 suffix = "be";
8695 else if (mode == CCCmode)
8696 suffix = fp ? "nb" : "ae";
8697 else
8698 gcc_unreachable ();
8699 break;
8700 case UNORDERED:
8701 suffix = fp ? "u" : "p";
8702 break;
8703 case ORDERED:
8704 suffix = fp ? "nu" : "np";
8705 break;
8706 default:
8707 gcc_unreachable ();
8709 fputs (suffix, file);
8712 /* Print the name of register X to FILE based on its machine mode and number.
8713 If CODE is 'w', pretend the mode is HImode.
8714 If CODE is 'b', pretend the mode is QImode.
8715 If CODE is 'k', pretend the mode is SImode.
8716 If CODE is 'q', pretend the mode is DImode.
8717 If CODE is 'h', pretend the reg is the 'high' byte register.
8718 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8720 void
8721 print_reg (rtx x, int code, FILE *file)
8723 gcc_assert (x == pc_rtx
8724 || (REGNO (x) != ARG_POINTER_REGNUM
8725 && REGNO (x) != FRAME_POINTER_REGNUM
8726 && REGNO (x) != FLAGS_REG
8727 && REGNO (x) != FPSR_REG
8728 && REGNO (x) != FPCR_REG));
8730 if (ASSEMBLER_DIALECT == ASM_ATT)
8731 putc ('%', file);
8733 if (x == pc_rtx)
8735 gcc_assert (TARGET_64BIT);
8736 fputs ("rip", file);
8737 return;
8740 if (code == 'w' || MMX_REG_P (x))
8741 code = 2;
8742 else if (code == 'b')
8743 code = 1;
8744 else if (code == 'k')
8745 code = 4;
8746 else if (code == 'q')
8747 code = 8;
8748 else if (code == 'y')
8749 code = 3;
8750 else if (code == 'h')
8751 code = 0;
8752 else
8753 code = GET_MODE_SIZE (GET_MODE (x));
8755 /* Irritatingly, AMD extended registers use different naming convention
8756 from the normal registers. */
8757 if (REX_INT_REG_P (x))
8759 gcc_assert (TARGET_64BIT);
8760 switch (code)
8762 case 0:
8763 error ("extended registers have no high halves");
8764 break;
8765 case 1:
8766 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8767 break;
8768 case 2:
8769 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8770 break;
8771 case 4:
8772 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8773 break;
8774 case 8:
8775 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8776 break;
8777 default:
8778 error ("unsupported operand size for extended register");
8779 break;
8781 return;
8783 switch (code)
8785 case 3:
8786 if (STACK_TOP_P (x))
8788 fputs ("st(0)", file);
8789 break;
8791 /* FALLTHRU */
8792 case 8:
8793 case 4:
8794 case 12:
8795 if (! ANY_FP_REG_P (x))
8796 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8797 /* FALLTHRU */
8798 case 16:
8799 case 2:
8800 normal:
8801 fputs (hi_reg_name[REGNO (x)], file);
8802 break;
8803 case 1:
8804 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8805 goto normal;
8806 fputs (qi_reg_name[REGNO (x)], file);
8807 break;
8808 case 0:
8809 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8810 goto normal;
8811 fputs (qi_high_reg_name[REGNO (x)], file);
8812 break;
8813 default:
8814 gcc_unreachable ();
8818 /* Locate some local-dynamic symbol still in use by this function
8819 so that we can print its name in some tls_local_dynamic_base
8820 pattern. */
8822 static int
8823 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8825 rtx x = *px;
8827 if (GET_CODE (x) == SYMBOL_REF
8828 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8830 cfun->machine->some_ld_name = XSTR (x, 0);
8831 return 1;
8834 return 0;
8837 static const char *
8838 get_some_local_dynamic_name (void)
8840 rtx insn;
8842 if (cfun->machine->some_ld_name)
8843 return cfun->machine->some_ld_name;
8845 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8846 if (INSN_P (insn)
8847 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8848 return cfun->machine->some_ld_name;
8850 gcc_unreachable ();
8853 /* Meaning of CODE:
8854 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8855 C -- print opcode suffix for set/cmov insn.
8856 c -- like C, but print reversed condition
8857 F,f -- likewise, but for floating-point.
8858 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8859 otherwise nothing
8860 R -- print the prefix for register names.
8861 z -- print the opcode suffix for the size of the current operand.
8862 * -- print a star (in certain assembler syntax)
8863 A -- print an absolute memory reference.
8864 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8865 s -- print a shift double count, followed by the assemblers argument
8866 delimiter.
8867 b -- print the QImode name of the register for the indicated operand.
8868 %b0 would print %al if operands[0] is reg 0.
8869 w -- likewise, print the HImode name of the register.
8870 k -- likewise, print the SImode name of the register.
8871 q -- likewise, print the DImode name of the register.
8872 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8873 y -- print "st(0)" instead of "st" as a register.
8874 D -- print condition for SSE cmp instruction.
8875 P -- if PIC, print an @PLT suffix.
8876 X -- don't print any sort of PIC '@' suffix for a symbol.
8877 & -- print some in-use local-dynamic symbol name.
8878 H -- print a memory address offset by 8; used for sse high-parts
8879 Y -- print condition for SSE5 com* instruction.
8880 + -- print a branch hint as 'cs' or 'ds' prefix
8881 ; -- print a semicolon (after prefixes due to bug in older gas).
8884 void
8885 print_operand (FILE *file, rtx x, int code)
8887 if (code)
8889 switch (code)
8891 case '*':
8892 if (ASSEMBLER_DIALECT == ASM_ATT)
8893 putc ('*', file);
8894 return;
8896 case '&':
8897 assemble_name (file, get_some_local_dynamic_name ());
8898 return;
8900 case 'A':
8901 switch (ASSEMBLER_DIALECT)
8903 case ASM_ATT:
8904 putc ('*', file);
8905 break;
8907 case ASM_INTEL:
8908 /* Intel syntax. For absolute addresses, registers should not
8909 be surrounded by braces. */
8910 if (!REG_P (x))
8912 putc ('[', file);
8913 PRINT_OPERAND (file, x, 0);
8914 putc (']', file);
8915 return;
8917 break;
8919 default:
8920 gcc_unreachable ();
8923 PRINT_OPERAND (file, x, 0);
8924 return;
8927 case 'L':
8928 if (ASSEMBLER_DIALECT == ASM_ATT)
8929 putc ('l', file);
8930 return;
8932 case 'W':
8933 if (ASSEMBLER_DIALECT == ASM_ATT)
8934 putc ('w', file);
8935 return;
8937 case 'B':
8938 if (ASSEMBLER_DIALECT == ASM_ATT)
8939 putc ('b', file);
8940 return;
8942 case 'Q':
8943 if (ASSEMBLER_DIALECT == ASM_ATT)
8944 putc ('l', file);
8945 return;
8947 case 'S':
8948 if (ASSEMBLER_DIALECT == ASM_ATT)
8949 putc ('s', file);
8950 return;
8952 case 'T':
8953 if (ASSEMBLER_DIALECT == ASM_ATT)
8954 putc ('t', file);
8955 return;
8957 case 'z':
8958 /* 387 opcodes don't get size suffixes if the operands are
8959 registers. */
8960 if (STACK_REG_P (x))
8961 return;
8963 /* Likewise if using Intel opcodes. */
8964 if (ASSEMBLER_DIALECT == ASM_INTEL)
8965 return;
8967 /* This is the size of op from size of operand. */
8968 switch (GET_MODE_SIZE (GET_MODE (x)))
8970 case 1:
8971 putc ('b', file);
8972 return;
8974 case 2:
8975 if (MEM_P (x))
8977 #ifdef HAVE_GAS_FILDS_FISTS
8978 putc ('s', file);
8979 #endif
8980 return;
8982 else
8983 putc ('w', file);
8984 return;
8986 case 4:
8987 if (GET_MODE (x) == SFmode)
8989 putc ('s', file);
8990 return;
8992 else
8993 putc ('l', file);
8994 return;
8996 case 12:
8997 case 16:
8998 putc ('t', file);
8999 return;
9001 case 8:
9002 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9004 #ifdef GAS_MNEMONICS
9005 putc ('q', file);
9006 #else
9007 putc ('l', file);
9008 putc ('l', file);
9009 #endif
9011 else
9012 putc ('l', file);
9013 return;
9015 default:
9016 gcc_unreachable ();
9019 case 'b':
9020 case 'w':
9021 case 'k':
9022 case 'q':
9023 case 'h':
9024 case 'y':
9025 case 'X':
9026 case 'P':
9027 break;
9029 case 's':
9030 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9032 PRINT_OPERAND (file, x, 0);
9033 putc (',', file);
9035 return;
9037 case 'D':
9038 /* Little bit of braindamage here. The SSE compare instructions
9039 does use completely different names for the comparisons that the
9040 fp conditional moves. */
9041 switch (GET_CODE (x))
9043 case EQ:
9044 case UNEQ:
9045 fputs ("eq", file);
9046 break;
9047 case LT:
9048 case UNLT:
9049 fputs ("lt", file);
9050 break;
9051 case LE:
9052 case UNLE:
9053 fputs ("le", file);
9054 break;
9055 case UNORDERED:
9056 fputs ("unord", file);
9057 break;
9058 case NE:
9059 case LTGT:
9060 fputs ("neq", file);
9061 break;
9062 case UNGE:
9063 case GE:
9064 fputs ("nlt", file);
9065 break;
9066 case UNGT:
9067 case GT:
9068 fputs ("nle", file);
9069 break;
9070 case ORDERED:
9071 fputs ("ord", file);
9072 break;
9073 default:
9074 gcc_unreachable ();
9076 return;
9077 case 'O':
9078 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9079 if (ASSEMBLER_DIALECT == ASM_ATT)
9081 switch (GET_MODE (x))
9083 case HImode: putc ('w', file); break;
9084 case SImode:
9085 case SFmode: putc ('l', file); break;
9086 case DImode:
9087 case DFmode: putc ('q', file); break;
9088 default: gcc_unreachable ();
9090 putc ('.', file);
9092 #endif
9093 return;
9094 case 'C':
9095 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9096 return;
9097 case 'F':
9098 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9099 if (ASSEMBLER_DIALECT == ASM_ATT)
9100 putc ('.', file);
9101 #endif
9102 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9103 return;
9105 /* Like above, but reverse condition */
9106 case 'c':
9107 /* Check to see if argument to %c is really a constant
9108 and not a condition code which needs to be reversed. */
9109 if (!COMPARISON_P (x))
9111 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9112 return;
9114 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9115 return;
9116 case 'f':
9117 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9118 if (ASSEMBLER_DIALECT == ASM_ATT)
9119 putc ('.', file);
9120 #endif
9121 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9122 return;
9124 case 'H':
9125 /* It doesn't actually matter what mode we use here, as we're
9126 only going to use this for printing. */
9127 x = adjust_address_nv (x, DImode, 8);
9128 break;
9130 case '+':
9132 rtx x;
9134 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9135 return;
9137 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9138 if (x)
9140 int pred_val = INTVAL (XEXP (x, 0));
9142 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9143 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9145 int taken = pred_val > REG_BR_PROB_BASE / 2;
9146 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9148 /* Emit hints only in the case default branch prediction
9149 heuristics would fail. */
9150 if (taken != cputaken)
9152 /* We use 3e (DS) prefix for taken branches and
9153 2e (CS) prefix for not taken branches. */
9154 if (taken)
9155 fputs ("ds ; ", file);
9156 else
9157 fputs ("cs ; ", file);
9161 return;
9164 case 'Y':
9165 switch (GET_CODE (x))
9167 case NE:
9168 fputs ("neq", file);
9169 break;
9170 case EQ:
9171 fputs ("eq", file);
9172 break;
9173 case GE:
9174 case GEU:
9175 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9176 break;
9177 case GT:
9178 case GTU:
9179 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9180 break;
9181 case LE:
9182 case LEU:
9183 fputs ("le", file);
9184 break;
9185 case LT:
9186 case LTU:
9187 fputs ("lt", file);
9188 break;
9189 case UNORDERED:
9190 fputs ("unord", file);
9191 break;
9192 case ORDERED:
9193 fputs ("ord", file);
9194 break;
9195 case UNEQ:
9196 fputs ("ueq", file);
9197 break;
9198 case UNGE:
9199 fputs ("nlt", file);
9200 break;
9201 case UNGT:
9202 fputs ("nle", file);
9203 break;
9204 case UNLE:
9205 fputs ("ule", file);
9206 break;
9207 case UNLT:
9208 fputs ("ult", file);
9209 break;
9210 case LTGT:
9211 fputs ("une", file);
9212 break;
9213 default:
9214 gcc_unreachable ();
9216 return;
9218 case ';':
9219 #if TARGET_MACHO
9220 fputs (" ; ", file);
9221 #else
9222 fputc (' ', file);
9223 #endif
9224 return;
9226 default:
9227 output_operand_lossage ("invalid operand code '%c'", code);
9231 if (REG_P (x))
9232 print_reg (x, code, file);
9234 else if (MEM_P (x))
9236 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9237 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9238 && GET_MODE (x) != BLKmode)
9240 const char * size;
9241 switch (GET_MODE_SIZE (GET_MODE (x)))
9243 case 1: size = "BYTE"; break;
9244 case 2: size = "WORD"; break;
9245 case 4: size = "DWORD"; break;
9246 case 8: size = "QWORD"; break;
9247 case 12: size = "XWORD"; break;
9248 case 16:
9249 if (GET_MODE (x) == XFmode)
9250 size = "XWORD";
9251 else
9252 size = "XMMWORD";
9253 break;
9254 default:
9255 gcc_unreachable ();
9258 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9259 if (code == 'b')
9260 size = "BYTE";
9261 else if (code == 'w')
9262 size = "WORD";
9263 else if (code == 'k')
9264 size = "DWORD";
9266 fputs (size, file);
9267 fputs (" PTR ", file);
9270 x = XEXP (x, 0);
9271 /* Avoid (%rip) for call operands. */
9272 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9273 && !CONST_INT_P (x))
9274 output_addr_const (file, x);
9275 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9276 output_operand_lossage ("invalid constraints for operand");
9277 else
9278 output_address (x);
9281 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9283 REAL_VALUE_TYPE r;
9284 long l;
9286 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9287 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9289 if (ASSEMBLER_DIALECT == ASM_ATT)
9290 putc ('$', file);
9291 fprintf (file, "0x%08lx", (long unsigned int) l);
9294 /* These float cases don't actually occur as immediate operands. */
9295 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9297 char dstr[30];
9299 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9300 fprintf (file, "%s", dstr);
9303 else if (GET_CODE (x) == CONST_DOUBLE
9304 && GET_MODE (x) == XFmode)
9306 char dstr[30];
9308 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9309 fprintf (file, "%s", dstr);
9312 else
9314 /* We have patterns that allow zero sets of memory, for instance.
9315 In 64-bit mode, we should probably support all 8-byte vectors,
9316 since we can in fact encode that into an immediate. */
9317 if (GET_CODE (x) == CONST_VECTOR)
9319 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9320 x = const0_rtx;
9323 if (code != 'P')
9325 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9327 if (ASSEMBLER_DIALECT == ASM_ATT)
9328 putc ('$', file);
9330 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9331 || GET_CODE (x) == LABEL_REF)
9333 if (ASSEMBLER_DIALECT == ASM_ATT)
9334 putc ('$', file);
9335 else
9336 fputs ("OFFSET FLAT:", file);
9339 if (CONST_INT_P (x))
9340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9341 else if (flag_pic)
9342 output_pic_addr_const (file, x, code);
9343 else
9344 output_addr_const (file, x);
9348 /* Print a memory operand whose address is ADDR. */
9350 void
9351 print_operand_address (FILE *file, rtx addr)
9353 struct ix86_address parts;
9354 rtx base, index, disp;
9355 int scale;
9356 int ok = ix86_decompose_address (addr, &parts);
9358 gcc_assert (ok);
9360 base = parts.base;
9361 index = parts.index;
9362 disp = parts.disp;
9363 scale = parts.scale;
9365 switch (parts.seg)
9367 case SEG_DEFAULT:
9368 break;
9369 case SEG_FS:
9370 case SEG_GS:
9371 if (ASSEMBLER_DIALECT == ASM_ATT)
9372 putc ('%', file);
9373 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9374 break;
9375 default:
9376 gcc_unreachable ();
9379 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9380 if (TARGET_64BIT && !base && !index)
9382 rtx symbol = disp;
9384 if (GET_CODE (disp) == CONST
9385 && GET_CODE (XEXP (disp, 0)) == PLUS
9386 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9387 symbol = XEXP (XEXP (disp, 0), 0);
9389 if (GET_CODE (symbol) == LABEL_REF
9390 || (GET_CODE (symbol) == SYMBOL_REF
9391 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9392 base = pc_rtx;
9394 if (!base && !index)
9396 /* Displacement only requires special attention. */
9398 if (CONST_INT_P (disp))
9400 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9401 fputs ("ds:", file);
9402 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9404 else if (flag_pic)
9405 output_pic_addr_const (file, disp, 0);
9406 else
9407 output_addr_const (file, disp);
9409 else
9411 if (ASSEMBLER_DIALECT == ASM_ATT)
9413 if (disp)
9415 if (flag_pic)
9416 output_pic_addr_const (file, disp, 0);
9417 else if (GET_CODE (disp) == LABEL_REF)
9418 output_asm_label (disp);
9419 else
9420 output_addr_const (file, disp);
9423 putc ('(', file);
9424 if (base)
9425 print_reg (base, 0, file);
9426 if (index)
9428 putc (',', file);
9429 print_reg (index, 0, file);
9430 if (scale != 1)
9431 fprintf (file, ",%d", scale);
9433 putc (')', file);
9435 else
9437 rtx offset = NULL_RTX;
9439 if (disp)
9441 /* Pull out the offset of a symbol; print any symbol itself. */
9442 if (GET_CODE (disp) == CONST
9443 && GET_CODE (XEXP (disp, 0)) == PLUS
9444 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9446 offset = XEXP (XEXP (disp, 0), 1);
9447 disp = gen_rtx_CONST (VOIDmode,
9448 XEXP (XEXP (disp, 0), 0));
9451 if (flag_pic)
9452 output_pic_addr_const (file, disp, 0);
9453 else if (GET_CODE (disp) == LABEL_REF)
9454 output_asm_label (disp);
9455 else if (CONST_INT_P (disp))
9456 offset = disp;
9457 else
9458 output_addr_const (file, disp);
9461 putc ('[', file);
9462 if (base)
9464 print_reg (base, 0, file);
9465 if (offset)
9467 if (INTVAL (offset) >= 0)
9468 putc ('+', file);
9469 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9472 else if (offset)
9473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9474 else
9475 putc ('0', file);
9477 if (index)
9479 putc ('+', file);
9480 print_reg (index, 0, file);
9481 if (scale != 1)
9482 fprintf (file, "*%d", scale);
9484 putc (']', file);
9489 bool
9490 output_addr_const_extra (FILE *file, rtx x)
9492 rtx op;
9494 if (GET_CODE (x) != UNSPEC)
9495 return false;
9497 op = XVECEXP (x, 0, 0);
9498 switch (XINT (x, 1))
9500 case UNSPEC_GOTTPOFF:
9501 output_addr_const (file, op);
9502 /* FIXME: This might be @TPOFF in Sun ld. */
9503 fputs ("@GOTTPOFF", file);
9504 break;
9505 case UNSPEC_TPOFF:
9506 output_addr_const (file, op);
9507 fputs ("@TPOFF", file);
9508 break;
9509 case UNSPEC_NTPOFF:
9510 output_addr_const (file, op);
9511 if (TARGET_64BIT)
9512 fputs ("@TPOFF", file);
9513 else
9514 fputs ("@NTPOFF", file);
9515 break;
9516 case UNSPEC_DTPOFF:
9517 output_addr_const (file, op);
9518 fputs ("@DTPOFF", file);
9519 break;
9520 case UNSPEC_GOTNTPOFF:
9521 output_addr_const (file, op);
9522 if (TARGET_64BIT)
9523 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9524 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9525 else
9526 fputs ("@GOTNTPOFF", file);
9527 break;
9528 case UNSPEC_INDNTPOFF:
9529 output_addr_const (file, op);
9530 fputs ("@INDNTPOFF", file);
9531 break;
9533 default:
9534 return false;
9537 return true;
9540 /* Split one or more DImode RTL references into pairs of SImode
9541 references. The RTL can be REG, offsettable MEM, integer constant, or
9542 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9543 split and "num" is its length. lo_half and hi_half are output arrays
9544 that parallel "operands". */
9546 void
9547 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9549 while (num--)
9551 rtx op = operands[num];
9553 /* simplify_subreg refuse to split volatile memory addresses,
9554 but we still have to handle it. */
9555 if (MEM_P (op))
9557 lo_half[num] = adjust_address (op, SImode, 0);
9558 hi_half[num] = adjust_address (op, SImode, 4);
9560 else
9562 lo_half[num] = simplify_gen_subreg (SImode, op,
9563 GET_MODE (op) == VOIDmode
9564 ? DImode : GET_MODE (op), 0);
9565 hi_half[num] = simplify_gen_subreg (SImode, op,
9566 GET_MODE (op) == VOIDmode
9567 ? DImode : GET_MODE (op), 4);
9571 /* Split one or more TImode RTL references into pairs of DImode
9572 references. The RTL can be REG, offsettable MEM, integer constant, or
9573 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9574 split and "num" is its length. lo_half and hi_half are output arrays
9575 that parallel "operands". */
9577 void
9578 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9580 while (num--)
9582 rtx op = operands[num];
9584 /* simplify_subreg refuse to split volatile memory addresses, but we
9585 still have to handle it. */
9586 if (MEM_P (op))
9588 lo_half[num] = adjust_address (op, DImode, 0);
9589 hi_half[num] = adjust_address (op, DImode, 8);
9591 else
9593 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9594 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9599 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9600 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9601 is the expression of the binary operation. The output may either be
9602 emitted here, or returned to the caller, like all output_* functions.
9604 There is no guarantee that the operands are the same mode, as they
9605 might be within FLOAT or FLOAT_EXTEND expressions. */
9607 #ifndef SYSV386_COMPAT
9608 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9609 wants to fix the assemblers because that causes incompatibility
9610 with gcc. No-one wants to fix gcc because that causes
9611 incompatibility with assemblers... You can use the option of
9612 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9613 #define SYSV386_COMPAT 1
9614 #endif
9616 const char *
9617 output_387_binary_op (rtx insn, rtx *operands)
9619 static char buf[30];
9620 const char *p;
9621 const char *ssep;
9622 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9624 #ifdef ENABLE_CHECKING
9625 /* Even if we do not want to check the inputs, this documents input
9626 constraints. Which helps in understanding the following code. */
9627 if (STACK_REG_P (operands[0])
9628 && ((REG_P (operands[1])
9629 && REGNO (operands[0]) == REGNO (operands[1])
9630 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9631 || (REG_P (operands[2])
9632 && REGNO (operands[0]) == REGNO (operands[2])
9633 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9634 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9635 ; /* ok */
9636 else
9637 gcc_assert (is_sse);
9638 #endif
9640 switch (GET_CODE (operands[3]))
9642 case PLUS:
9643 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9644 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9645 p = "fiadd";
9646 else
9647 p = "fadd";
9648 ssep = "add";
9649 break;
9651 case MINUS:
9652 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9653 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9654 p = "fisub";
9655 else
9656 p = "fsub";
9657 ssep = "sub";
9658 break;
9660 case MULT:
9661 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9662 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9663 p = "fimul";
9664 else
9665 p = "fmul";
9666 ssep = "mul";
9667 break;
9669 case DIV:
9670 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9671 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9672 p = "fidiv";
9673 else
9674 p = "fdiv";
9675 ssep = "div";
9676 break;
9678 default:
9679 gcc_unreachable ();
9682 if (is_sse)
9684 strcpy (buf, ssep);
9685 if (GET_MODE (operands[0]) == SFmode)
9686 strcat (buf, "ss\t{%2, %0|%0, %2}");
9687 else
9688 strcat (buf, "sd\t{%2, %0|%0, %2}");
9689 return buf;
9691 strcpy (buf, p);
9693 switch (GET_CODE (operands[3]))
9695 case MULT:
9696 case PLUS:
9697 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9699 rtx temp = operands[2];
9700 operands[2] = operands[1];
9701 operands[1] = temp;
9704 /* know operands[0] == operands[1]. */
9706 if (MEM_P (operands[2]))
9708 p = "%z2\t%2";
9709 break;
9712 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9714 if (STACK_TOP_P (operands[0]))
9715 /* How is it that we are storing to a dead operand[2]?
9716 Well, presumably operands[1] is dead too. We can't
9717 store the result to st(0) as st(0) gets popped on this
9718 instruction. Instead store to operands[2] (which I
9719 think has to be st(1)). st(1) will be popped later.
9720 gcc <= 2.8.1 didn't have this check and generated
9721 assembly code that the Unixware assembler rejected. */
9722 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9723 else
9724 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9725 break;
9728 if (STACK_TOP_P (operands[0]))
9729 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9730 else
9731 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9732 break;
9734 case MINUS:
9735 case DIV:
9736 if (MEM_P (operands[1]))
9738 p = "r%z1\t%1";
9739 break;
9742 if (MEM_P (operands[2]))
9744 p = "%z2\t%2";
9745 break;
9748 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9750 #if SYSV386_COMPAT
9751 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9752 derived assemblers, confusingly reverse the direction of
9753 the operation for fsub{r} and fdiv{r} when the
9754 destination register is not st(0). The Intel assembler
9755 doesn't have this brain damage. Read !SYSV386_COMPAT to
9756 figure out what the hardware really does. */
9757 if (STACK_TOP_P (operands[0]))
9758 p = "{p\t%0, %2|rp\t%2, %0}";
9759 else
9760 p = "{rp\t%2, %0|p\t%0, %2}";
9761 #else
9762 if (STACK_TOP_P (operands[0]))
9763 /* As above for fmul/fadd, we can't store to st(0). */
9764 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9765 else
9766 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9767 #endif
9768 break;
9771 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9773 #if SYSV386_COMPAT
9774 if (STACK_TOP_P (operands[0]))
9775 p = "{rp\t%0, %1|p\t%1, %0}";
9776 else
9777 p = "{p\t%1, %0|rp\t%0, %1}";
9778 #else
9779 if (STACK_TOP_P (operands[0]))
9780 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9781 else
9782 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9783 #endif
9784 break;
9787 if (STACK_TOP_P (operands[0]))
9789 if (STACK_TOP_P (operands[1]))
9790 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9791 else
9792 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9793 break;
9795 else if (STACK_TOP_P (operands[1]))
9797 #if SYSV386_COMPAT
9798 p = "{\t%1, %0|r\t%0, %1}";
9799 #else
9800 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9801 #endif
9803 else
9805 #if SYSV386_COMPAT
9806 p = "{r\t%2, %0|\t%0, %2}";
9807 #else
9808 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9809 #endif
9811 break;
9813 default:
9814 gcc_unreachable ();
9817 strcat (buf, p);
9818 return buf;
9821 /* Return needed mode for entity in optimize_mode_switching pass. */
9824 ix86_mode_needed (int entity, rtx insn)
9826 enum attr_i387_cw mode;
9828 /* The mode UNINITIALIZED is used to store control word after a
9829 function call or ASM pattern. The mode ANY specify that function
9830 has no requirements on the control word and make no changes in the
9831 bits we are interested in. */
9833 if (CALL_P (insn)
9834 || (NONJUMP_INSN_P (insn)
9835 && (asm_noperands (PATTERN (insn)) >= 0
9836 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9837 return I387_CW_UNINITIALIZED;
9839 if (recog_memoized (insn) < 0)
9840 return I387_CW_ANY;
9842 mode = get_attr_i387_cw (insn);
9844 switch (entity)
9846 case I387_TRUNC:
9847 if (mode == I387_CW_TRUNC)
9848 return mode;
9849 break;
9851 case I387_FLOOR:
9852 if (mode == I387_CW_FLOOR)
9853 return mode;
9854 break;
9856 case I387_CEIL:
9857 if (mode == I387_CW_CEIL)
9858 return mode;
9859 break;
9861 case I387_MASK_PM:
9862 if (mode == I387_CW_MASK_PM)
9863 return mode;
9864 break;
9866 default:
9867 gcc_unreachable ();
9870 return I387_CW_ANY;
9873 /* Output code to initialize control word copies used by trunc?f?i and
9874 rounding patterns. CURRENT_MODE is set to current control word,
9875 while NEW_MODE is set to new control word. */
9877 void
9878 emit_i387_cw_initialization (int mode)
9880 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9881 rtx new_mode;
9883 enum ix86_stack_slot slot;
9885 rtx reg = gen_reg_rtx (HImode);
9887 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9888 emit_move_insn (reg, copy_rtx (stored_mode));
9890 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9892 switch (mode)
9894 case I387_CW_TRUNC:
9895 /* round toward zero (truncate) */
9896 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9897 slot = SLOT_CW_TRUNC;
9898 break;
9900 case I387_CW_FLOOR:
9901 /* round down toward -oo */
9902 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9903 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9904 slot = SLOT_CW_FLOOR;
9905 break;
9907 case I387_CW_CEIL:
9908 /* round up toward +oo */
9909 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9910 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9911 slot = SLOT_CW_CEIL;
9912 break;
9914 case I387_CW_MASK_PM:
9915 /* mask precision exception for nearbyint() */
9916 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9917 slot = SLOT_CW_MASK_PM;
9918 break;
9920 default:
9921 gcc_unreachable ();
9924 else
9926 switch (mode)
9928 case I387_CW_TRUNC:
9929 /* round toward zero (truncate) */
9930 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9931 slot = SLOT_CW_TRUNC;
9932 break;
9934 case I387_CW_FLOOR:
9935 /* round down toward -oo */
9936 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9937 slot = SLOT_CW_FLOOR;
9938 break;
9940 case I387_CW_CEIL:
9941 /* round up toward +oo */
9942 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9943 slot = SLOT_CW_CEIL;
9944 break;
9946 case I387_CW_MASK_PM:
9947 /* mask precision exception for nearbyint() */
9948 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9949 slot = SLOT_CW_MASK_PM;
9950 break;
9952 default:
9953 gcc_unreachable ();
9957 gcc_assert (slot < MAX_386_STACK_LOCALS);
9959 new_mode = assign_386_stack_local (HImode, slot);
9960 emit_move_insn (new_mode, reg);
9963 /* Output code for INSN to convert a float to a signed int. OPERANDS
9964 are the insn operands. The output may be [HSD]Imode and the input
9965 operand may be [SDX]Fmode. */
9967 const char *
9968 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9970 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9971 int dimode_p = GET_MODE (operands[0]) == DImode;
9972 int round_mode = get_attr_i387_cw (insn);
9974 /* Jump through a hoop or two for DImode, since the hardware has no
9975 non-popping instruction. We used to do this a different way, but
9976 that was somewhat fragile and broke with post-reload splitters. */
9977 if ((dimode_p || fisttp) && !stack_top_dies)
9978 output_asm_insn ("fld\t%y1", operands);
9980 gcc_assert (STACK_TOP_P (operands[1]));
9981 gcc_assert (MEM_P (operands[0]));
9982 gcc_assert (GET_MODE (operands[1]) != TFmode);
9984 if (fisttp)
9985 output_asm_insn ("fisttp%z0\t%0", operands);
9986 else
9988 if (round_mode != I387_CW_ANY)
9989 output_asm_insn ("fldcw\t%3", operands);
9990 if (stack_top_dies || dimode_p)
9991 output_asm_insn ("fistp%z0\t%0", operands);
9992 else
9993 output_asm_insn ("fist%z0\t%0", operands);
9994 if (round_mode != I387_CW_ANY)
9995 output_asm_insn ("fldcw\t%2", operands);
9998 return "";
10001 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10002 have the values zero or one, indicates the ffreep insn's operand
10003 from the OPERANDS array. */
10005 static const char *
10006 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10008 if (TARGET_USE_FFREEP)
10009 #if HAVE_AS_IX86_FFREEP
10010 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10011 #else
10013 static char retval[] = ".word\t0xc_df";
10014 int regno = REGNO (operands[opno]);
10016 gcc_assert (FP_REGNO_P (regno));
10018 retval[9] = '0' + (regno - FIRST_STACK_REG);
10019 return retval;
10021 #endif
10023 return opno ? "fstp\t%y1" : "fstp\t%y0";
10027 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10028 should be used. UNORDERED_P is true when fucom should be used. */
10030 const char *
10031 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10033 int stack_top_dies;
10034 rtx cmp_op0, cmp_op1;
10035 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10037 if (eflags_p)
10039 cmp_op0 = operands[0];
10040 cmp_op1 = operands[1];
10042 else
10044 cmp_op0 = operands[1];
10045 cmp_op1 = operands[2];
10048 if (is_sse)
10050 if (GET_MODE (operands[0]) == SFmode)
10051 if (unordered_p)
10052 return "ucomiss\t{%1, %0|%0, %1}";
10053 else
10054 return "comiss\t{%1, %0|%0, %1}";
10055 else
10056 if (unordered_p)
10057 return "ucomisd\t{%1, %0|%0, %1}";
10058 else
10059 return "comisd\t{%1, %0|%0, %1}";
10062 gcc_assert (STACK_TOP_P (cmp_op0));
10064 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10066 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10068 if (stack_top_dies)
10070 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10071 return output_387_ffreep (operands, 1);
10073 else
10074 return "ftst\n\tfnstsw\t%0";
10077 if (STACK_REG_P (cmp_op1)
10078 && stack_top_dies
10079 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10080 && REGNO (cmp_op1) != FIRST_STACK_REG)
10082 /* If both the top of the 387 stack dies, and the other operand
10083 is also a stack register that dies, then this must be a
10084 `fcompp' float compare */
10086 if (eflags_p)
10088 /* There is no double popping fcomi variant. Fortunately,
10089 eflags is immune from the fstp's cc clobbering. */
10090 if (unordered_p)
10091 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10092 else
10093 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10094 return output_387_ffreep (operands, 0);
10096 else
10098 if (unordered_p)
10099 return "fucompp\n\tfnstsw\t%0";
10100 else
10101 return "fcompp\n\tfnstsw\t%0";
10104 else
10106 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10108 static const char * const alt[16] =
10110 "fcom%z2\t%y2\n\tfnstsw\t%0",
10111 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10112 "fucom%z2\t%y2\n\tfnstsw\t%0",
10113 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10115 "ficom%z2\t%y2\n\tfnstsw\t%0",
10116 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10117 NULL,
10118 NULL,
10120 "fcomi\t{%y1, %0|%0, %y1}",
10121 "fcomip\t{%y1, %0|%0, %y1}",
10122 "fucomi\t{%y1, %0|%0, %y1}",
10123 "fucomip\t{%y1, %0|%0, %y1}",
10125 NULL,
10126 NULL,
10127 NULL,
10128 NULL
10131 int mask;
10132 const char *ret;
10134 mask = eflags_p << 3;
10135 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10136 mask |= unordered_p << 1;
10137 mask |= stack_top_dies;
10139 gcc_assert (mask < 16);
10140 ret = alt[mask];
10141 gcc_assert (ret);
10143 return ret;
10147 void
10148 ix86_output_addr_vec_elt (FILE *file, int value)
10150 const char *directive = ASM_LONG;
10152 #ifdef ASM_QUAD
10153 if (TARGET_64BIT)
10154 directive = ASM_QUAD;
10155 #else
10156 gcc_assert (!TARGET_64BIT);
10157 #endif
10159 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10162 void
10163 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10165 const char *directive = ASM_LONG;
10167 #ifdef ASM_QUAD
10168 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10169 directive = ASM_QUAD;
10170 #else
10171 gcc_assert (!TARGET_64BIT);
10172 #endif
10173 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10174 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10175 fprintf (file, "%s%s%d-%s%d\n",
10176 directive, LPREFIX, value, LPREFIX, rel);
10177 else if (HAVE_AS_GOTOFF_IN_DATA)
10178 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10179 #if TARGET_MACHO
10180 else if (TARGET_MACHO)
10182 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10183 machopic_output_function_base_name (file);
10184 fprintf(file, "\n");
10186 #endif
10187 else
10188 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10189 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10192 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10193 for the target. */
10195 void
10196 ix86_expand_clear (rtx dest)
10198 rtx tmp;
10200 /* We play register width games, which are only valid after reload. */
10201 gcc_assert (reload_completed);
10203 /* Avoid HImode and its attendant prefix byte. */
10204 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10205 dest = gen_rtx_REG (SImode, REGNO (dest));
10206 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10208 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10209 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10211 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10212 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10215 emit_insn (tmp);
10218 /* X is an unchanging MEM. If it is a constant pool reference, return
10219 the constant pool rtx, else NULL. */
10222 maybe_get_pool_constant (rtx x)
10224 x = ix86_delegitimize_address (XEXP (x, 0));
10226 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10227 return get_pool_constant (x);
10229 return NULL_RTX;
10232 void
10233 ix86_expand_move (enum machine_mode mode, rtx operands[])
10235 rtx op0, op1;
10236 enum tls_model model;
10238 op0 = operands[0];
10239 op1 = operands[1];
10241 if (GET_CODE (op1) == SYMBOL_REF)
10243 model = SYMBOL_REF_TLS_MODEL (op1);
10244 if (model)
10246 op1 = legitimize_tls_address (op1, model, true);
10247 op1 = force_operand (op1, op0);
10248 if (op1 == op0)
10249 return;
10251 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10252 && SYMBOL_REF_DLLIMPORT_P (op1))
10253 op1 = legitimize_dllimport_symbol (op1, false);
10255 else if (GET_CODE (op1) == CONST
10256 && GET_CODE (XEXP (op1, 0)) == PLUS
10257 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10259 rtx addend = XEXP (XEXP (op1, 0), 1);
10260 rtx symbol = XEXP (XEXP (op1, 0), 0);
10261 rtx tmp = NULL;
10263 model = SYMBOL_REF_TLS_MODEL (symbol);
10264 if (model)
10265 tmp = legitimize_tls_address (symbol, model, true);
10266 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10267 && SYMBOL_REF_DLLIMPORT_P (symbol))
10268 tmp = legitimize_dllimport_symbol (symbol, true);
10270 if (tmp)
10272 tmp = force_operand (tmp, NULL);
10273 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10274 op0, 1, OPTAB_DIRECT);
10275 if (tmp == op0)
10276 return;
10280 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10282 if (TARGET_MACHO && !TARGET_64BIT)
10284 #if TARGET_MACHO
10285 if (MACHOPIC_PURE)
10287 rtx temp = ((reload_in_progress
10288 || ((op0 && REG_P (op0))
10289 && mode == Pmode))
10290 ? op0 : gen_reg_rtx (Pmode));
10291 op1 = machopic_indirect_data_reference (op1, temp);
10292 op1 = machopic_legitimize_pic_address (op1, mode,
10293 temp == op1 ? 0 : temp);
10295 else if (MACHOPIC_INDIRECT)
10296 op1 = machopic_indirect_data_reference (op1, 0);
10297 if (op0 == op1)
10298 return;
10299 #endif
10301 else
10303 if (MEM_P (op0))
10304 op1 = force_reg (Pmode, op1);
10305 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10307 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10308 op1 = legitimize_pic_address (op1, reg);
10309 if (op0 == op1)
10310 return;
10314 else
10316 if (MEM_P (op0)
10317 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10318 || !push_operand (op0, mode))
10319 && MEM_P (op1))
10320 op1 = force_reg (mode, op1);
10322 if (push_operand (op0, mode)
10323 && ! general_no_elim_operand (op1, mode))
10324 op1 = copy_to_mode_reg (mode, op1);
10326 /* Force large constants in 64bit compilation into register
10327 to get them CSEed. */
10328 if (can_create_pseudo_p ()
10329 && (mode == DImode) && TARGET_64BIT
10330 && immediate_operand (op1, mode)
10331 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10332 && !register_operand (op0, mode)
10333 && optimize)
10334 op1 = copy_to_mode_reg (mode, op1);
10336 if (can_create_pseudo_p ()
10337 && FLOAT_MODE_P (mode)
10338 && GET_CODE (op1) == CONST_DOUBLE)
10340 /* If we are loading a floating point constant to a register,
10341 force the value to memory now, since we'll get better code
10342 out the back end. */
10344 op1 = validize_mem (force_const_mem (mode, op1));
10345 if (!register_operand (op0, mode))
10347 rtx temp = gen_reg_rtx (mode);
10348 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10349 emit_move_insn (op0, temp);
10350 return;
10355 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10358 void
10359 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10361 rtx op0 = operands[0], op1 = operands[1];
10362 unsigned int align = GET_MODE_ALIGNMENT (mode);
10364 /* Force constants other than zero into memory. We do not know how
10365 the instructions used to build constants modify the upper 64 bits
10366 of the register, once we have that information we may be able
10367 to handle some of them more efficiently. */
10368 if (can_create_pseudo_p ()
10369 && register_operand (op0, mode)
10370 && (CONSTANT_P (op1)
10371 || (GET_CODE (op1) == SUBREG
10372 && CONSTANT_P (SUBREG_REG (op1))))
10373 && standard_sse_constant_p (op1) <= 0)
10374 op1 = validize_mem (force_const_mem (mode, op1));
10376 /* TDmode values are passed as TImode on the stack. TImode values
10377 are moved via xmm registers, and moving them to stack can result in
10378 unaligned memory access. Use ix86_expand_vector_move_misalign()
10379 if memory operand is not aligned correctly. */
10380 if (can_create_pseudo_p ()
10381 && (mode == TImode) && !TARGET_64BIT
10382 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10383 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10385 rtx tmp[2];
10387 /* ix86_expand_vector_move_misalign() does not like constants ... */
10388 if (CONSTANT_P (op1)
10389 || (GET_CODE (op1) == SUBREG
10390 && CONSTANT_P (SUBREG_REG (op1))))
10391 op1 = validize_mem (force_const_mem (mode, op1));
10393 /* ... nor both arguments in memory. */
10394 if (!register_operand (op0, mode)
10395 && !register_operand (op1, mode))
10396 op1 = force_reg (mode, op1);
10398 tmp[0] = op0; tmp[1] = op1;
10399 ix86_expand_vector_move_misalign (mode, tmp);
10400 return;
10403 /* Make operand1 a register if it isn't already. */
10404 if (can_create_pseudo_p ()
10405 && !register_operand (op0, mode)
10406 && !register_operand (op1, mode))
10408 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10409 return;
10412 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10415 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10416 straight to ix86_expand_vector_move. */
10417 /* Code generation for scalar reg-reg moves of single and double precision data:
10418 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10419 movaps reg, reg
10420 else
10421 movss reg, reg
10422 if (x86_sse_partial_reg_dependency == true)
10423 movapd reg, reg
10424 else
10425 movsd reg, reg
10427 Code generation for scalar loads of double precision data:
10428 if (x86_sse_split_regs == true)
10429 movlpd mem, reg (gas syntax)
10430 else
10431 movsd mem, reg
10433 Code generation for unaligned packed loads of single precision data
10434 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10435 if (x86_sse_unaligned_move_optimal)
10436 movups mem, reg
10438 if (x86_sse_partial_reg_dependency == true)
10440 xorps reg, reg
10441 movlps mem, reg
10442 movhps mem+8, reg
10444 else
10446 movlps mem, reg
10447 movhps mem+8, reg
10450 Code generation for unaligned packed loads of double precision data
10451 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10452 if (x86_sse_unaligned_move_optimal)
10453 movupd mem, reg
10455 if (x86_sse_split_regs == true)
10457 movlpd mem, reg
10458 movhpd mem+8, reg
10460 else
10462 movsd mem, reg
10463 movhpd mem+8, reg
10467 void
10468 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10470 rtx op0, op1, m;
10472 op0 = operands[0];
10473 op1 = operands[1];
10475 if (MEM_P (op1))
10477 /* If we're optimizing for size, movups is the smallest. */
10478 if (optimize_size)
10480 op0 = gen_lowpart (V4SFmode, op0);
10481 op1 = gen_lowpart (V4SFmode, op1);
10482 emit_insn (gen_sse_movups (op0, op1));
10483 return;
10486 /* ??? If we have typed data, then it would appear that using
10487 movdqu is the only way to get unaligned data loaded with
10488 integer type. */
10489 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10491 op0 = gen_lowpart (V16QImode, op0);
10492 op1 = gen_lowpart (V16QImode, op1);
10493 emit_insn (gen_sse2_movdqu (op0, op1));
10494 return;
10497 if (TARGET_SSE2 && mode == V2DFmode)
10499 rtx zero;
10501 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10503 op0 = gen_lowpart (V2DFmode, op0);
10504 op1 = gen_lowpart (V2DFmode, op1);
10505 emit_insn (gen_sse2_movupd (op0, op1));
10506 return;
10509 /* When SSE registers are split into halves, we can avoid
10510 writing to the top half twice. */
10511 if (TARGET_SSE_SPLIT_REGS)
10513 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10514 zero = op0;
10516 else
10518 /* ??? Not sure about the best option for the Intel chips.
10519 The following would seem to satisfy; the register is
10520 entirely cleared, breaking the dependency chain. We
10521 then store to the upper half, with a dependency depth
10522 of one. A rumor has it that Intel recommends two movsd
10523 followed by an unpacklpd, but this is unconfirmed. And
10524 given that the dependency depth of the unpacklpd would
10525 still be one, I'm not sure why this would be better. */
10526 zero = CONST0_RTX (V2DFmode);
10529 m = adjust_address (op1, DFmode, 0);
10530 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10531 m = adjust_address (op1, DFmode, 8);
10532 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10534 else
10536 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10538 op0 = gen_lowpart (V4SFmode, op0);
10539 op1 = gen_lowpart (V4SFmode, op1);
10540 emit_insn (gen_sse_movups (op0, op1));
10541 return;
10544 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10545 emit_move_insn (op0, CONST0_RTX (mode));
10546 else
10547 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10549 if (mode != V4SFmode)
10550 op0 = gen_lowpart (V4SFmode, op0);
10551 m = adjust_address (op1, V2SFmode, 0);
10552 emit_insn (gen_sse_loadlps (op0, op0, m));
10553 m = adjust_address (op1, V2SFmode, 8);
10554 emit_insn (gen_sse_loadhps (op0, op0, m));
10557 else if (MEM_P (op0))
10559 /* If we're optimizing for size, movups is the smallest. */
10560 if (optimize_size)
10562 op0 = gen_lowpart (V4SFmode, op0);
10563 op1 = gen_lowpart (V4SFmode, op1);
10564 emit_insn (gen_sse_movups (op0, op1));
10565 return;
10568 /* ??? Similar to above, only less clear because of quote
10569 typeless stores unquote. */
10570 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10571 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10573 op0 = gen_lowpart (V16QImode, op0);
10574 op1 = gen_lowpart (V16QImode, op1);
10575 emit_insn (gen_sse2_movdqu (op0, op1));
10576 return;
10579 if (TARGET_SSE2 && mode == V2DFmode)
10581 m = adjust_address (op0, DFmode, 0);
10582 emit_insn (gen_sse2_storelpd (m, op1));
10583 m = adjust_address (op0, DFmode, 8);
10584 emit_insn (gen_sse2_storehpd (m, op1));
10586 else
10588 if (mode != V4SFmode)
10589 op1 = gen_lowpart (V4SFmode, op1);
10590 m = adjust_address (op0, V2SFmode, 0);
10591 emit_insn (gen_sse_storelps (m, op1));
10592 m = adjust_address (op0, V2SFmode, 8);
10593 emit_insn (gen_sse_storehps (m, op1));
10596 else
10597 gcc_unreachable ();
10600 /* Expand a push in MODE. This is some mode for which we do not support
10601 proper push instructions, at least from the registers that we expect
10602 the value to live in. */
10604 void
10605 ix86_expand_push (enum machine_mode mode, rtx x)
10607 rtx tmp;
10609 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10610 GEN_INT (-GET_MODE_SIZE (mode)),
10611 stack_pointer_rtx, 1, OPTAB_DIRECT);
10612 if (tmp != stack_pointer_rtx)
10613 emit_move_insn (stack_pointer_rtx, tmp);
10615 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10616 emit_move_insn (tmp, x);
10619 /* Helper function of ix86_fixup_binary_operands to canonicalize
10620 operand order. Returns true if the operands should be swapped. */
10622 static bool
10623 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10624 rtx operands[])
10626 rtx dst = operands[0];
10627 rtx src1 = operands[1];
10628 rtx src2 = operands[2];
10630 /* If the operation is not commutative, we can't do anything. */
10631 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10632 return false;
10634 /* Highest priority is that src1 should match dst. */
10635 if (rtx_equal_p (dst, src1))
10636 return false;
10637 if (rtx_equal_p (dst, src2))
10638 return true;
10640 /* Next highest priority is that immediate constants come second. */
10641 if (immediate_operand (src2, mode))
10642 return false;
10643 if (immediate_operand (src1, mode))
10644 return true;
10646 /* Lowest priority is that memory references should come second. */
10647 if (MEM_P (src2))
10648 return false;
10649 if (MEM_P (src1))
10650 return true;
10652 return false;
10656 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10657 destination to use for the operation. If different from the true
10658 destination in operands[0], a copy operation will be required. */
10661 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10662 rtx operands[])
10664 rtx dst = operands[0];
10665 rtx src1 = operands[1];
10666 rtx src2 = operands[2];
10668 /* Canonicalize operand order. */
10669 if (ix86_swap_binary_operands_p (code, mode, operands))
10671 rtx temp = src1;
10672 src1 = src2;
10673 src2 = temp;
10676 /* Both source operands cannot be in memory. */
10677 if (MEM_P (src1) && MEM_P (src2))
10679 /* Optimization: Only read from memory once. */
10680 if (rtx_equal_p (src1, src2))
10682 src2 = force_reg (mode, src2);
10683 src1 = src2;
10685 else
10686 src2 = force_reg (mode, src2);
10689 /* If the destination is memory, and we do not have matching source
10690 operands, do things in registers. */
10691 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10692 dst = gen_reg_rtx (mode);
10694 /* Source 1 cannot be a constant. */
10695 if (CONSTANT_P (src1))
10696 src1 = force_reg (mode, src1);
10698 /* Source 1 cannot be a non-matching memory. */
10699 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10700 src1 = force_reg (mode, src1);
10702 operands[1] = src1;
10703 operands[2] = src2;
10704 return dst;
10707 /* Similarly, but assume that the destination has already been
10708 set up properly. */
10710 void
10711 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10712 enum machine_mode mode, rtx operands[])
10714 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10715 gcc_assert (dst == operands[0]);
10718 /* Attempt to expand a binary operator. Make the expansion closer to the
10719 actual machine, then just general_operand, which will allow 3 separate
10720 memory references (one output, two input) in a single insn. */
10722 void
10723 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10724 rtx operands[])
10726 rtx src1, src2, dst, op, clob;
10728 dst = ix86_fixup_binary_operands (code, mode, operands);
10729 src1 = operands[1];
10730 src2 = operands[2];
10732 /* Emit the instruction. */
10734 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10735 if (reload_in_progress)
10737 /* Reload doesn't know about the flags register, and doesn't know that
10738 it doesn't want to clobber it. We can only do this with PLUS. */
10739 gcc_assert (code == PLUS);
10740 emit_insn (op);
10742 else
10744 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10745 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10748 /* Fix up the destination if needed. */
10749 if (dst != operands[0])
10750 emit_move_insn (operands[0], dst);
10753 /* Return TRUE or FALSE depending on whether the binary operator meets the
10754 appropriate constraints. */
10757 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10758 rtx operands[3])
10760 rtx dst = operands[0];
10761 rtx src1 = operands[1];
10762 rtx src2 = operands[2];
10764 /* Both source operands cannot be in memory. */
10765 if (MEM_P (src1) && MEM_P (src2))
10766 return 0;
10768 /* Canonicalize operand order for commutative operators. */
10769 if (ix86_swap_binary_operands_p (code, mode, operands))
10771 rtx temp = src1;
10772 src1 = src2;
10773 src2 = temp;
10776 /* If the destination is memory, we must have a matching source operand. */
10777 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10778 return 0;
10780 /* Source 1 cannot be a constant. */
10781 if (CONSTANT_P (src1))
10782 return 0;
10784 /* Source 1 cannot be a non-matching memory. */
10785 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10786 return 0;
10788 return 1;
10791 /* Attempt to expand a unary operator. Make the expansion closer to the
10792 actual machine, then just general_operand, which will allow 2 separate
10793 memory references (one output, one input) in a single insn. */
10795 void
10796 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10797 rtx operands[])
10799 int matching_memory;
10800 rtx src, dst, op, clob;
10802 dst = operands[0];
10803 src = operands[1];
10805 /* If the destination is memory, and we do not have matching source
10806 operands, do things in registers. */
10807 matching_memory = 0;
10808 if (MEM_P (dst))
10810 if (rtx_equal_p (dst, src))
10811 matching_memory = 1;
10812 else
10813 dst = gen_reg_rtx (mode);
10816 /* When source operand is memory, destination must match. */
10817 if (MEM_P (src) && !matching_memory)
10818 src = force_reg (mode, src);
10820 /* Emit the instruction. */
10822 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10823 if (reload_in_progress || code == NOT)
10825 /* Reload doesn't know about the flags register, and doesn't know that
10826 it doesn't want to clobber it. */
10827 gcc_assert (code == NOT);
10828 emit_insn (op);
10830 else
10832 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10833 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10836 /* Fix up the destination if needed. */
10837 if (dst != operands[0])
10838 emit_move_insn (operands[0], dst);
10841 /* Return TRUE or FALSE depending on whether the unary operator meets the
10842 appropriate constraints. */
10845 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10846 enum machine_mode mode ATTRIBUTE_UNUSED,
10847 rtx operands[2] ATTRIBUTE_UNUSED)
10849 /* If one of operands is memory, source and destination must match. */
10850 if ((MEM_P (operands[0])
10851 || MEM_P (operands[1]))
10852 && ! rtx_equal_p (operands[0], operands[1]))
10853 return FALSE;
10854 return TRUE;
10857 /* Post-reload splitter for converting an SF or DFmode value in an
10858 SSE register into an unsigned SImode. */
10860 void
10861 ix86_split_convert_uns_si_sse (rtx operands[])
10863 enum machine_mode vecmode;
10864 rtx value, large, zero_or_two31, input, two31, x;
10866 large = operands[1];
10867 zero_or_two31 = operands[2];
10868 input = operands[3];
10869 two31 = operands[4];
10870 vecmode = GET_MODE (large);
10871 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10873 /* Load up the value into the low element. We must ensure that the other
10874 elements are valid floats -- zero is the easiest such value. */
10875 if (MEM_P (input))
10877 if (vecmode == V4SFmode)
10878 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10879 else
10880 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10882 else
10884 input = gen_rtx_REG (vecmode, REGNO (input));
10885 emit_move_insn (value, CONST0_RTX (vecmode));
10886 if (vecmode == V4SFmode)
10887 emit_insn (gen_sse_movss (value, value, input));
10888 else
10889 emit_insn (gen_sse2_movsd (value, value, input));
10892 emit_move_insn (large, two31);
10893 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10895 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10896 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10898 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10899 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10901 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10902 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10904 large = gen_rtx_REG (V4SImode, REGNO (large));
10905 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10907 x = gen_rtx_REG (V4SImode, REGNO (value));
10908 if (vecmode == V4SFmode)
10909 emit_insn (gen_sse2_cvttps2dq (x, value));
10910 else
10911 emit_insn (gen_sse2_cvttpd2dq (x, value));
10912 value = x;
10914 emit_insn (gen_xorv4si3 (value, value, large));
10917 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10918 Expects the 64-bit DImode to be supplied in a pair of integral
10919 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10920 -mfpmath=sse, !optimize_size only. */
10922 void
10923 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10925 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10926 rtx int_xmm, fp_xmm;
10927 rtx biases, exponents;
10928 rtx x;
10930 int_xmm = gen_reg_rtx (V4SImode);
10931 if (TARGET_INTER_UNIT_MOVES)
10932 emit_insn (gen_movdi_to_sse (int_xmm, input));
10933 else if (TARGET_SSE_SPLIT_REGS)
10935 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10936 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10938 else
10940 x = gen_reg_rtx (V2DImode);
10941 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10942 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10945 x = gen_rtx_CONST_VECTOR (V4SImode,
10946 gen_rtvec (4, GEN_INT (0x43300000UL),
10947 GEN_INT (0x45300000UL),
10948 const0_rtx, const0_rtx));
10949 exponents = validize_mem (force_const_mem (V4SImode, x));
10951 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10952 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10954 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10955 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10956 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10957 (0x1.0p84 + double(fp_value_hi_xmm)).
10958 Note these exponents differ by 32. */
10960 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10962 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10963 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10964 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10965 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10966 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10967 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10968 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10969 biases = validize_mem (force_const_mem (V2DFmode, biases));
10970 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10972 /* Add the upper and lower DFmode values together. */
10973 if (TARGET_SSE3)
10974 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10975 else
10977 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10978 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10979 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10982 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10985 /* Not used, but eases macroization of patterns. */
10986 void
10987 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
10988 rtx input ATTRIBUTE_UNUSED)
10990 gcc_unreachable ();
10993 /* Convert an unsigned SImode value into a DFmode. Only currently used
10994 for SSE, but applicable anywhere. */
10996 void
10997 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10999 REAL_VALUE_TYPE TWO31r;
11000 rtx x, fp;
11002 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11003 NULL, 1, OPTAB_DIRECT);
11005 fp = gen_reg_rtx (DFmode);
11006 emit_insn (gen_floatsidf2 (fp, x));
11008 real_ldexp (&TWO31r, &dconst1, 31);
11009 x = const_double_from_real_value (TWO31r, DFmode);
11011 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11012 if (x != target)
11013 emit_move_insn (target, x);
11016 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11017 32-bit mode; otherwise we have a direct convert instruction. */
11019 void
11020 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11022 REAL_VALUE_TYPE TWO32r;
11023 rtx fp_lo, fp_hi, x;
11025 fp_lo = gen_reg_rtx (DFmode);
11026 fp_hi = gen_reg_rtx (DFmode);
11028 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11030 real_ldexp (&TWO32r, &dconst1, 32);
11031 x = const_double_from_real_value (TWO32r, DFmode);
11032 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11034 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11036 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11037 0, OPTAB_DIRECT);
11038 if (x != target)
11039 emit_move_insn (target, x);
11042 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11043 For x86_32, -mfpmath=sse, !optimize_size only. */
11044 void
11045 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11047 REAL_VALUE_TYPE ONE16r;
11048 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11050 real_ldexp (&ONE16r, &dconst1, 16);
11051 x = const_double_from_real_value (ONE16r, SFmode);
11052 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11053 NULL, 0, OPTAB_DIRECT);
11054 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11055 NULL, 0, OPTAB_DIRECT);
11056 fp_hi = gen_reg_rtx (SFmode);
11057 fp_lo = gen_reg_rtx (SFmode);
11058 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11059 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11060 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11061 0, OPTAB_DIRECT);
11062 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11063 0, OPTAB_DIRECT);
11064 if (!rtx_equal_p (target, fp_hi))
11065 emit_move_insn (target, fp_hi);
11068 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11069 then replicate the value for all elements of the vector
11070 register. */
11073 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11075 rtvec v;
11076 switch (mode)
11078 case SImode:
11079 gcc_assert (vect);
11080 v = gen_rtvec (4, value, value, value, value);
11081 return gen_rtx_CONST_VECTOR (V4SImode, v);
11083 case DImode:
11084 gcc_assert (vect);
11085 v = gen_rtvec (2, value, value);
11086 return gen_rtx_CONST_VECTOR (V2DImode, v);
11088 case SFmode:
11089 if (vect)
11090 v = gen_rtvec (4, value, value, value, value);
11091 else
11092 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11093 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11094 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11096 case DFmode:
11097 if (vect)
11098 v = gen_rtvec (2, value, value);
11099 else
11100 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11101 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11103 default:
11104 gcc_unreachable ();
11108 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11109 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11110 for an SSE register. If VECT is true, then replicate the mask for
11111 all elements of the vector register. If INVERT is true, then create
11112 a mask excluding the sign bit. */
11115 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11117 enum machine_mode vec_mode, imode;
11118 HOST_WIDE_INT hi, lo;
11119 int shift = 63;
11120 rtx v;
11121 rtx mask;
11123 /* Find the sign bit, sign extended to 2*HWI. */
11124 switch (mode)
11126 case SImode:
11127 case SFmode:
11128 imode = SImode;
11129 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11130 lo = 0x80000000, hi = lo < 0;
11131 break;
11133 case DImode:
11134 case DFmode:
11135 imode = DImode;
11136 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11137 if (HOST_BITS_PER_WIDE_INT >= 64)
11138 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11139 else
11140 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11141 break;
11143 case TImode:
11144 case TFmode:
11145 imode = TImode;
11146 vec_mode = VOIDmode;
11147 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11148 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11149 break;
11151 default:
11152 gcc_unreachable ();
11155 if (invert)
11156 lo = ~lo, hi = ~hi;
11158 /* Force this value into the low part of a fp vector constant. */
11159 mask = immed_double_const (lo, hi, imode);
11160 mask = gen_lowpart (mode, mask);
11162 if (vec_mode == VOIDmode)
11163 return force_reg (mode, mask);
11165 v = ix86_build_const_vector (mode, vect, mask);
11166 return force_reg (vec_mode, v);
11169 /* Generate code for floating point ABS or NEG. */
11171 void
11172 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11173 rtx operands[])
11175 rtx mask, set, use, clob, dst, src;
11176 bool use_sse = false;
11177 bool vector_mode = VECTOR_MODE_P (mode);
11178 enum machine_mode elt_mode = mode;
11180 if (vector_mode)
11182 elt_mode = GET_MODE_INNER (mode);
11183 use_sse = true;
11185 else if (mode == TFmode)
11186 use_sse = true;
11187 else if (TARGET_SSE_MATH)
11188 use_sse = SSE_FLOAT_MODE_P (mode);
11190 /* NEG and ABS performed with SSE use bitwise mask operations.
11191 Create the appropriate mask now. */
11192 if (use_sse)
11193 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11194 else
11195 mask = NULL_RTX;
11197 dst = operands[0];
11198 src = operands[1];
11200 if (vector_mode)
11202 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11203 set = gen_rtx_SET (VOIDmode, dst, set);
11204 emit_insn (set);
11206 else
11208 set = gen_rtx_fmt_e (code, mode, src);
11209 set = gen_rtx_SET (VOIDmode, dst, set);
11210 if (mask)
11212 use = gen_rtx_USE (VOIDmode, mask);
11213 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11214 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11215 gen_rtvec (3, set, use, clob)));
11217 else
11218 emit_insn (set);
11222 /* Expand a copysign operation. Special case operand 0 being a constant. */
11224 void
11225 ix86_expand_copysign (rtx operands[])
11227 enum machine_mode mode, vmode;
11228 rtx dest, op0, op1, mask, nmask;
11230 dest = operands[0];
11231 op0 = operands[1];
11232 op1 = operands[2];
11234 mode = GET_MODE (dest);
11235 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11237 if (GET_CODE (op0) == CONST_DOUBLE)
11239 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11241 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11242 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11244 if (mode == SFmode || mode == DFmode)
11246 if (op0 == CONST0_RTX (mode))
11247 op0 = CONST0_RTX (vmode);
11248 else
11250 rtvec v;
11252 if (mode == SFmode)
11253 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11254 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11255 else
11256 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11257 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11261 mask = ix86_build_signbit_mask (mode, 0, 0);
11263 if (mode == SFmode)
11264 copysign_insn = gen_copysignsf3_const;
11265 else if (mode == DFmode)
11266 copysign_insn = gen_copysigndf3_const;
11267 else
11268 copysign_insn = gen_copysigntf3_const;
11270 emit_insn (copysign_insn (dest, op0, op1, mask));
11272 else
11274 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11276 nmask = ix86_build_signbit_mask (mode, 0, 1);
11277 mask = ix86_build_signbit_mask (mode, 0, 0);
11279 if (mode == SFmode)
11280 copysign_insn = gen_copysignsf3_var;
11281 else if (mode == DFmode)
11282 copysign_insn = gen_copysigndf3_var;
11283 else
11284 copysign_insn = gen_copysigntf3_var;
11286 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11290 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11291 be a constant, and so has already been expanded into a vector constant. */
11293 void
11294 ix86_split_copysign_const (rtx operands[])
11296 enum machine_mode mode, vmode;
11297 rtx dest, op0, op1, mask, x;
11299 dest = operands[0];
11300 op0 = operands[1];
11301 op1 = operands[2];
11302 mask = operands[3];
11304 mode = GET_MODE (dest);
11305 vmode = GET_MODE (mask);
11307 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11308 x = gen_rtx_AND (vmode, dest, mask);
11309 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11311 if (op0 != CONST0_RTX (vmode))
11313 x = gen_rtx_IOR (vmode, dest, op0);
11314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11318 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11319 so we have to do two masks. */
11321 void
11322 ix86_split_copysign_var (rtx operands[])
11324 enum machine_mode mode, vmode;
11325 rtx dest, scratch, op0, op1, mask, nmask, x;
11327 dest = operands[0];
11328 scratch = operands[1];
11329 op0 = operands[2];
11330 op1 = operands[3];
11331 nmask = operands[4];
11332 mask = operands[5];
11334 mode = GET_MODE (dest);
11335 vmode = GET_MODE (mask);
11337 if (rtx_equal_p (op0, op1))
11339 /* Shouldn't happen often (it's useless, obviously), but when it does
11340 we'd generate incorrect code if we continue below. */
11341 emit_move_insn (dest, op0);
11342 return;
11345 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11347 gcc_assert (REGNO (op1) == REGNO (scratch));
11349 x = gen_rtx_AND (vmode, scratch, mask);
11350 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11352 dest = mask;
11353 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11354 x = gen_rtx_NOT (vmode, dest);
11355 x = gen_rtx_AND (vmode, x, op0);
11356 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11358 else
11360 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11362 x = gen_rtx_AND (vmode, scratch, mask);
11364 else /* alternative 2,4 */
11366 gcc_assert (REGNO (mask) == REGNO (scratch));
11367 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11368 x = gen_rtx_AND (vmode, scratch, op1);
11370 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11372 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11374 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11375 x = gen_rtx_AND (vmode, dest, nmask);
11377 else /* alternative 3,4 */
11379 gcc_assert (REGNO (nmask) == REGNO (dest));
11380 dest = nmask;
11381 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11382 x = gen_rtx_AND (vmode, dest, op0);
11384 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11387 x = gen_rtx_IOR (vmode, dest, scratch);
11388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11391 /* Return TRUE or FALSE depending on whether the first SET in INSN
11392 has source and destination with matching CC modes, and that the
11393 CC mode is at least as constrained as REQ_MODE. */
11396 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11398 rtx set;
11399 enum machine_mode set_mode;
11401 set = PATTERN (insn);
11402 if (GET_CODE (set) == PARALLEL)
11403 set = XVECEXP (set, 0, 0);
11404 gcc_assert (GET_CODE (set) == SET);
11405 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11407 set_mode = GET_MODE (SET_DEST (set));
11408 switch (set_mode)
11410 case CCNOmode:
11411 if (req_mode != CCNOmode
11412 && (req_mode != CCmode
11413 || XEXP (SET_SRC (set), 1) != const0_rtx))
11414 return 0;
11415 break;
11416 case CCmode:
11417 if (req_mode == CCGCmode)
11418 return 0;
11419 /* FALLTHRU */
11420 case CCGCmode:
11421 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11422 return 0;
11423 /* FALLTHRU */
11424 case CCGOCmode:
11425 if (req_mode == CCZmode)
11426 return 0;
11427 /* FALLTHRU */
11428 case CCZmode:
11429 break;
11431 default:
11432 gcc_unreachable ();
11435 return (GET_MODE (SET_SRC (set)) == set_mode);
11438 /* Generate insn patterns to do an integer compare of OPERANDS. */
11440 static rtx
11441 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11443 enum machine_mode cmpmode;
11444 rtx tmp, flags;
11446 cmpmode = SELECT_CC_MODE (code, op0, op1);
11447 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11449 /* This is very simple, but making the interface the same as in the
11450 FP case makes the rest of the code easier. */
11451 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11452 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11454 /* Return the test that should be put into the flags user, i.e.
11455 the bcc, scc, or cmov instruction. */
11456 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11459 /* Figure out whether to use ordered or unordered fp comparisons.
11460 Return the appropriate mode to use. */
11462 enum machine_mode
11463 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11465 /* ??? In order to make all comparisons reversible, we do all comparisons
11466 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11467 all forms trapping and nontrapping comparisons, we can make inequality
11468 comparisons trapping again, since it results in better code when using
11469 FCOM based compares. */
11470 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11473 enum machine_mode
11474 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11476 enum machine_mode mode = GET_MODE (op0);
11478 if (SCALAR_FLOAT_MODE_P (mode))
11480 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11481 return ix86_fp_compare_mode (code);
11484 switch (code)
11486 /* Only zero flag is needed. */
11487 case EQ: /* ZF=0 */
11488 case NE: /* ZF!=0 */
11489 return CCZmode;
11490 /* Codes needing carry flag. */
11491 case GEU: /* CF=0 */
11492 case LTU: /* CF=1 */
11493 /* Detect overflow checks. They need just the carry flag. */
11494 if (GET_CODE (op0) == PLUS
11495 && rtx_equal_p (op1, XEXP (op0, 0)))
11496 return CCCmode;
11497 else
11498 return CCmode;
11499 case GTU: /* CF=0 & ZF=0 */
11500 case LEU: /* CF=1 | ZF=1 */
11501 /* Detect overflow checks. They need just the carry flag. */
11502 if (GET_CODE (op0) == MINUS
11503 && rtx_equal_p (op1, XEXP (op0, 0)))
11504 return CCCmode;
11505 else
11506 return CCmode;
11507 /* Codes possibly doable only with sign flag when
11508 comparing against zero. */
11509 case GE: /* SF=OF or SF=0 */
11510 case LT: /* SF<>OF or SF=1 */
11511 if (op1 == const0_rtx)
11512 return CCGOCmode;
11513 else
11514 /* For other cases Carry flag is not required. */
11515 return CCGCmode;
11516 /* Codes doable only with sign flag when comparing
11517 against zero, but we miss jump instruction for it
11518 so we need to use relational tests against overflow
11519 that thus needs to be zero. */
11520 case GT: /* ZF=0 & SF=OF */
11521 case LE: /* ZF=1 | SF<>OF */
11522 if (op1 == const0_rtx)
11523 return CCNOmode;
11524 else
11525 return CCGCmode;
11526 /* strcmp pattern do (use flags) and combine may ask us for proper
11527 mode. */
11528 case USE:
11529 return CCmode;
11530 default:
11531 gcc_unreachable ();
11535 /* Return the fixed registers used for condition codes. */
11537 static bool
11538 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11540 *p1 = FLAGS_REG;
11541 *p2 = FPSR_REG;
11542 return true;
11545 /* If two condition code modes are compatible, return a condition code
11546 mode which is compatible with both. Otherwise, return
11547 VOIDmode. */
11549 static enum machine_mode
11550 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11552 if (m1 == m2)
11553 return m1;
11555 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11556 return VOIDmode;
11558 if ((m1 == CCGCmode && m2 == CCGOCmode)
11559 || (m1 == CCGOCmode && m2 == CCGCmode))
11560 return CCGCmode;
11562 switch (m1)
11564 default:
11565 gcc_unreachable ();
11567 case CCmode:
11568 case CCGCmode:
11569 case CCGOCmode:
11570 case CCNOmode:
11571 case CCAmode:
11572 case CCCmode:
11573 case CCOmode:
11574 case CCSmode:
11575 case CCZmode:
11576 switch (m2)
11578 default:
11579 return VOIDmode;
11581 case CCmode:
11582 case CCGCmode:
11583 case CCGOCmode:
11584 case CCNOmode:
11585 case CCAmode:
11586 case CCCmode:
11587 case CCOmode:
11588 case CCSmode:
11589 case CCZmode:
11590 return CCmode;
11593 case CCFPmode:
11594 case CCFPUmode:
11595 /* These are only compatible with themselves, which we already
11596 checked above. */
11597 return VOIDmode;
11601 /* Split comparison code CODE into comparisons we can do using branch
11602 instructions. BYPASS_CODE is comparison code for branch that will
11603 branch around FIRST_CODE and SECOND_CODE. If some of branches
11604 is not required, set value to UNKNOWN.
11605 We never require more than two branches. */
11607 void
11608 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11609 enum rtx_code *first_code,
11610 enum rtx_code *second_code)
11612 *first_code = code;
11613 *bypass_code = UNKNOWN;
11614 *second_code = UNKNOWN;
11616 /* The fcomi comparison sets flags as follows:
11618 cmp ZF PF CF
11619 > 0 0 0
11620 < 0 0 1
11621 = 1 0 0
11622 un 1 1 1 */
11624 switch (code)
11626 case GT: /* GTU - CF=0 & ZF=0 */
11627 case GE: /* GEU - CF=0 */
11628 case ORDERED: /* PF=0 */
11629 case UNORDERED: /* PF=1 */
11630 case UNEQ: /* EQ - ZF=1 */
11631 case UNLT: /* LTU - CF=1 */
11632 case UNLE: /* LEU - CF=1 | ZF=1 */
11633 case LTGT: /* EQ - ZF=0 */
11634 break;
11635 case LT: /* LTU - CF=1 - fails on unordered */
11636 *first_code = UNLT;
11637 *bypass_code = UNORDERED;
11638 break;
11639 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11640 *first_code = UNLE;
11641 *bypass_code = UNORDERED;
11642 break;
11643 case EQ: /* EQ - ZF=1 - fails on unordered */
11644 *first_code = UNEQ;
11645 *bypass_code = UNORDERED;
11646 break;
11647 case NE: /* NE - ZF=0 - fails on unordered */
11648 *first_code = LTGT;
11649 *second_code = UNORDERED;
11650 break;
11651 case UNGE: /* GEU - CF=0 - fails on unordered */
11652 *first_code = GE;
11653 *second_code = UNORDERED;
11654 break;
11655 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11656 *first_code = GT;
11657 *second_code = UNORDERED;
11658 break;
11659 default:
11660 gcc_unreachable ();
11662 if (!TARGET_IEEE_FP)
11664 *second_code = UNKNOWN;
11665 *bypass_code = UNKNOWN;
11669 /* Return cost of comparison done fcom + arithmetics operations on AX.
11670 All following functions do use number of instructions as a cost metrics.
11671 In future this should be tweaked to compute bytes for optimize_size and
11672 take into account performance of various instructions on various CPUs. */
11673 static int
11674 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11676 if (!TARGET_IEEE_FP)
11677 return 4;
11678 /* The cost of code output by ix86_expand_fp_compare. */
11679 switch (code)
11681 case UNLE:
11682 case UNLT:
11683 case LTGT:
11684 case GT:
11685 case GE:
11686 case UNORDERED:
11687 case ORDERED:
11688 case UNEQ:
11689 return 4;
11690 break;
11691 case LT:
11692 case NE:
11693 case EQ:
11694 case UNGE:
11695 return 5;
11696 break;
11697 case LE:
11698 case UNGT:
11699 return 6;
11700 break;
11701 default:
11702 gcc_unreachable ();
11706 /* Return cost of comparison done using fcomi operation.
11707 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11708 static int
11709 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11711 enum rtx_code bypass_code, first_code, second_code;
11712 /* Return arbitrarily high cost when instruction is not supported - this
11713 prevents gcc from using it. */
11714 if (!TARGET_CMOVE)
11715 return 1024;
11716 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11717 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11720 /* Return cost of comparison done using sahf operation.
11721 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11722 static int
11723 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11725 enum rtx_code bypass_code, first_code, second_code;
11726 /* Return arbitrarily high cost when instruction is not preferred - this
11727 avoids gcc from using it. */
11728 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11729 return 1024;
11730 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11731 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11734 /* Compute cost of the comparison done using any method.
11735 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11736 static int
11737 ix86_fp_comparison_cost (enum rtx_code code)
11739 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11740 int min;
11742 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11743 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11745 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11746 if (min > sahf_cost)
11747 min = sahf_cost;
11748 if (min > fcomi_cost)
11749 min = fcomi_cost;
11750 return min;
11753 /* Return true if we should use an FCOMI instruction for this
11754 fp comparison. */
11757 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11759 enum rtx_code swapped_code = swap_condition (code);
11761 return ((ix86_fp_comparison_cost (code)
11762 == ix86_fp_comparison_fcomi_cost (code))
11763 || (ix86_fp_comparison_cost (swapped_code)
11764 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11767 /* Swap, force into registers, or otherwise massage the two operands
11768 to a fp comparison. The operands are updated in place; the new
11769 comparison code is returned. */
11771 static enum rtx_code
11772 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11774 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11775 rtx op0 = *pop0, op1 = *pop1;
11776 enum machine_mode op_mode = GET_MODE (op0);
11777 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11779 /* All of the unordered compare instructions only work on registers.
11780 The same is true of the fcomi compare instructions. The XFmode
11781 compare instructions require registers except when comparing
11782 against zero or when converting operand 1 from fixed point to
11783 floating point. */
11785 if (!is_sse
11786 && (fpcmp_mode == CCFPUmode
11787 || (op_mode == XFmode
11788 && ! (standard_80387_constant_p (op0) == 1
11789 || standard_80387_constant_p (op1) == 1)
11790 && GET_CODE (op1) != FLOAT)
11791 || ix86_use_fcomi_compare (code)))
11793 op0 = force_reg (op_mode, op0);
11794 op1 = force_reg (op_mode, op1);
11796 else
11798 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11799 things around if they appear profitable, otherwise force op0
11800 into a register. */
11802 if (standard_80387_constant_p (op0) == 0
11803 || (MEM_P (op0)
11804 && ! (standard_80387_constant_p (op1) == 0
11805 || MEM_P (op1))))
11807 rtx tmp;
11808 tmp = op0, op0 = op1, op1 = tmp;
11809 code = swap_condition (code);
11812 if (!REG_P (op0))
11813 op0 = force_reg (op_mode, op0);
11815 if (CONSTANT_P (op1))
11817 int tmp = standard_80387_constant_p (op1);
11818 if (tmp == 0)
11819 op1 = validize_mem (force_const_mem (op_mode, op1));
11820 else if (tmp == 1)
11822 if (TARGET_CMOVE)
11823 op1 = force_reg (op_mode, op1);
11825 else
11826 op1 = force_reg (op_mode, op1);
11830 /* Try to rearrange the comparison to make it cheaper. */
11831 if (ix86_fp_comparison_cost (code)
11832 > ix86_fp_comparison_cost (swap_condition (code))
11833 && (REG_P (op1) || can_create_pseudo_p ()))
11835 rtx tmp;
11836 tmp = op0, op0 = op1, op1 = tmp;
11837 code = swap_condition (code);
11838 if (!REG_P (op0))
11839 op0 = force_reg (op_mode, op0);
11842 *pop0 = op0;
11843 *pop1 = op1;
11844 return code;
11847 /* Convert comparison codes we use to represent FP comparison to integer
11848 code that will result in proper branch. Return UNKNOWN if no such code
11849 is available. */
11851 enum rtx_code
11852 ix86_fp_compare_code_to_integer (enum rtx_code code)
11854 switch (code)
11856 case GT:
11857 return GTU;
11858 case GE:
11859 return GEU;
11860 case ORDERED:
11861 case UNORDERED:
11862 return code;
11863 break;
11864 case UNEQ:
11865 return EQ;
11866 break;
11867 case UNLT:
11868 return LTU;
11869 break;
11870 case UNLE:
11871 return LEU;
11872 break;
11873 case LTGT:
11874 return NE;
11875 break;
11876 default:
11877 return UNKNOWN;
11881 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11883 static rtx
11884 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11885 rtx *second_test, rtx *bypass_test)
11887 enum machine_mode fpcmp_mode, intcmp_mode;
11888 rtx tmp, tmp2;
11889 int cost = ix86_fp_comparison_cost (code);
11890 enum rtx_code bypass_code, first_code, second_code;
11892 fpcmp_mode = ix86_fp_compare_mode (code);
11893 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11895 if (second_test)
11896 *second_test = NULL_RTX;
11897 if (bypass_test)
11898 *bypass_test = NULL_RTX;
11900 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11902 /* Do fcomi/sahf based test when profitable. */
11903 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11904 && (bypass_code == UNKNOWN || bypass_test)
11905 && (second_code == UNKNOWN || second_test))
11907 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11908 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11909 tmp);
11910 if (TARGET_CMOVE)
11911 emit_insn (tmp);
11912 else
11914 gcc_assert (TARGET_SAHF);
11916 if (!scratch)
11917 scratch = gen_reg_rtx (HImode);
11918 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11920 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11923 /* The FP codes work out to act like unsigned. */
11924 intcmp_mode = fpcmp_mode;
11925 code = first_code;
11926 if (bypass_code != UNKNOWN)
11927 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11928 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11929 const0_rtx);
11930 if (second_code != UNKNOWN)
11931 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11932 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11933 const0_rtx);
11935 else
11937 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11938 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11939 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11940 if (!scratch)
11941 scratch = gen_reg_rtx (HImode);
11942 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11944 /* In the unordered case, we have to check C2 for NaN's, which
11945 doesn't happen to work out to anything nice combination-wise.
11946 So do some bit twiddling on the value we've got in AH to come
11947 up with an appropriate set of condition codes. */
11949 intcmp_mode = CCNOmode;
11950 switch (code)
11952 case GT:
11953 case UNGT:
11954 if (code == GT || !TARGET_IEEE_FP)
11956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11957 code = EQ;
11959 else
11961 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11962 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11963 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11964 intcmp_mode = CCmode;
11965 code = GEU;
11967 break;
11968 case LT:
11969 case UNLT:
11970 if (code == LT && TARGET_IEEE_FP)
11972 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11973 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11974 intcmp_mode = CCmode;
11975 code = EQ;
11977 else
11979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11980 code = NE;
11982 break;
11983 case GE:
11984 case UNGE:
11985 if (code == GE || !TARGET_IEEE_FP)
11987 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11988 code = EQ;
11990 else
11992 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11993 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11994 GEN_INT (0x01)));
11995 code = NE;
11997 break;
11998 case LE:
11999 case UNLE:
12000 if (code == LE && TARGET_IEEE_FP)
12002 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12003 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12004 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12005 intcmp_mode = CCmode;
12006 code = LTU;
12008 else
12010 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12011 code = NE;
12013 break;
12014 case EQ:
12015 case UNEQ:
12016 if (code == EQ && TARGET_IEEE_FP)
12018 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12019 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12020 intcmp_mode = CCmode;
12021 code = EQ;
12023 else
12025 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12026 code = NE;
12027 break;
12029 break;
12030 case NE:
12031 case LTGT:
12032 if (code == NE && TARGET_IEEE_FP)
12034 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12035 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12036 GEN_INT (0x40)));
12037 code = NE;
12039 else
12041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12042 code = EQ;
12044 break;
12046 case UNORDERED:
12047 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12048 code = NE;
12049 break;
12050 case ORDERED:
12051 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12052 code = EQ;
12053 break;
12055 default:
12056 gcc_unreachable ();
12060 /* Return the test that should be put into the flags user, i.e.
12061 the bcc, scc, or cmov instruction. */
12062 return gen_rtx_fmt_ee (code, VOIDmode,
12063 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12064 const0_rtx);
12068 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12070 rtx op0, op1, ret;
12071 op0 = ix86_compare_op0;
12072 op1 = ix86_compare_op1;
12074 if (second_test)
12075 *second_test = NULL_RTX;
12076 if (bypass_test)
12077 *bypass_test = NULL_RTX;
12079 if (ix86_compare_emitted)
12081 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12082 ix86_compare_emitted = NULL_RTX;
12084 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12086 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12087 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12088 second_test, bypass_test);
12090 else
12091 ret = ix86_expand_int_compare (code, op0, op1);
12093 return ret;
12096 /* Return true if the CODE will result in nontrivial jump sequence. */
12097 bool
12098 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12100 enum rtx_code bypass_code, first_code, second_code;
12101 if (!TARGET_CMOVE)
12102 return true;
12103 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12104 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12107 void
12108 ix86_expand_branch (enum rtx_code code, rtx label)
12110 rtx tmp;
12112 /* If we have emitted a compare insn, go straight to simple.
12113 ix86_expand_compare won't emit anything if ix86_compare_emitted
12114 is non NULL. */
12115 if (ix86_compare_emitted)
12116 goto simple;
12118 switch (GET_MODE (ix86_compare_op0))
12120 case QImode:
12121 case HImode:
12122 case SImode:
12123 simple:
12124 tmp = ix86_expand_compare (code, NULL, NULL);
12125 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12126 gen_rtx_LABEL_REF (VOIDmode, label),
12127 pc_rtx);
12128 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12129 return;
12131 case SFmode:
12132 case DFmode:
12133 case XFmode:
12135 rtvec vec;
12136 int use_fcomi;
12137 enum rtx_code bypass_code, first_code, second_code;
12139 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12140 &ix86_compare_op1);
12142 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12144 /* Check whether we will use the natural sequence with one jump. If
12145 so, we can expand jump early. Otherwise delay expansion by
12146 creating compound insn to not confuse optimizers. */
12147 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12149 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12150 gen_rtx_LABEL_REF (VOIDmode, label),
12151 pc_rtx, NULL_RTX, NULL_RTX);
12153 else
12155 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12156 ix86_compare_op0, ix86_compare_op1);
12157 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12158 gen_rtx_LABEL_REF (VOIDmode, label),
12159 pc_rtx);
12160 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12162 use_fcomi = ix86_use_fcomi_compare (code);
12163 vec = rtvec_alloc (3 + !use_fcomi);
12164 RTVEC_ELT (vec, 0) = tmp;
12165 RTVEC_ELT (vec, 1)
12166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12167 RTVEC_ELT (vec, 2)
12168 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12169 if (! use_fcomi)
12170 RTVEC_ELT (vec, 3)
12171 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12173 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12175 return;
12178 case DImode:
12179 if (TARGET_64BIT)
12180 goto simple;
12181 case TImode:
12182 /* Expand DImode branch into multiple compare+branch. */
12184 rtx lo[2], hi[2], label2;
12185 enum rtx_code code1, code2, code3;
12186 enum machine_mode submode;
12188 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12190 tmp = ix86_compare_op0;
12191 ix86_compare_op0 = ix86_compare_op1;
12192 ix86_compare_op1 = tmp;
12193 code = swap_condition (code);
12195 if (GET_MODE (ix86_compare_op0) == DImode)
12197 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12198 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12199 submode = SImode;
12201 else
12203 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12204 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12205 submode = DImode;
12208 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12209 avoid two branches. This costs one extra insn, so disable when
12210 optimizing for size. */
12212 if ((code == EQ || code == NE)
12213 && (!optimize_size
12214 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12216 rtx xor0, xor1;
12218 xor1 = hi[0];
12219 if (hi[1] != const0_rtx)
12220 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12221 NULL_RTX, 0, OPTAB_WIDEN);
12223 xor0 = lo[0];
12224 if (lo[1] != const0_rtx)
12225 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12226 NULL_RTX, 0, OPTAB_WIDEN);
12228 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12229 NULL_RTX, 0, OPTAB_WIDEN);
12231 ix86_compare_op0 = tmp;
12232 ix86_compare_op1 = const0_rtx;
12233 ix86_expand_branch (code, label);
12234 return;
12237 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12238 op1 is a constant and the low word is zero, then we can just
12239 examine the high word. Similarly for low word -1 and
12240 less-or-equal-than or greater-than. */
12242 if (CONST_INT_P (hi[1]))
12243 switch (code)
12245 case LT: case LTU: case GE: case GEU:
12246 if (lo[1] == const0_rtx)
12248 ix86_compare_op0 = hi[0];
12249 ix86_compare_op1 = hi[1];
12250 ix86_expand_branch (code, label);
12251 return;
12253 break;
12254 case LE: case LEU: case GT: case GTU:
12255 if (lo[1] == constm1_rtx)
12257 ix86_compare_op0 = hi[0];
12258 ix86_compare_op1 = hi[1];
12259 ix86_expand_branch (code, label);
12260 return;
12262 break;
12263 default:
12264 break;
12267 /* Otherwise, we need two or three jumps. */
12269 label2 = gen_label_rtx ();
12271 code1 = code;
12272 code2 = swap_condition (code);
12273 code3 = unsigned_condition (code);
12275 switch (code)
12277 case LT: case GT: case LTU: case GTU:
12278 break;
12280 case LE: code1 = LT; code2 = GT; break;
12281 case GE: code1 = GT; code2 = LT; break;
12282 case LEU: code1 = LTU; code2 = GTU; break;
12283 case GEU: code1 = GTU; code2 = LTU; break;
12285 case EQ: code1 = UNKNOWN; code2 = NE; break;
12286 case NE: code2 = UNKNOWN; break;
12288 default:
12289 gcc_unreachable ();
12293 * a < b =>
12294 * if (hi(a) < hi(b)) goto true;
12295 * if (hi(a) > hi(b)) goto false;
12296 * if (lo(a) < lo(b)) goto true;
12297 * false:
12300 ix86_compare_op0 = hi[0];
12301 ix86_compare_op1 = hi[1];
12303 if (code1 != UNKNOWN)
12304 ix86_expand_branch (code1, label);
12305 if (code2 != UNKNOWN)
12306 ix86_expand_branch (code2, label2);
12308 ix86_compare_op0 = lo[0];
12309 ix86_compare_op1 = lo[1];
12310 ix86_expand_branch (code3, label);
12312 if (code2 != UNKNOWN)
12313 emit_label (label2);
12314 return;
12317 default:
12318 gcc_unreachable ();
12322 /* Split branch based on floating point condition. */
12323 void
12324 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12325 rtx target1, rtx target2, rtx tmp, rtx pushed)
12327 rtx second, bypass;
12328 rtx label = NULL_RTX;
12329 rtx condition;
12330 int bypass_probability = -1, second_probability = -1, probability = -1;
12331 rtx i;
12333 if (target2 != pc_rtx)
12335 rtx tmp = target2;
12336 code = reverse_condition_maybe_unordered (code);
12337 target2 = target1;
12338 target1 = tmp;
12341 condition = ix86_expand_fp_compare (code, op1, op2,
12342 tmp, &second, &bypass);
12344 /* Remove pushed operand from stack. */
12345 if (pushed)
12346 ix86_free_from_memory (GET_MODE (pushed));
12348 if (split_branch_probability >= 0)
12350 /* Distribute the probabilities across the jumps.
12351 Assume the BYPASS and SECOND to be always test
12352 for UNORDERED. */
12353 probability = split_branch_probability;
12355 /* Value of 1 is low enough to make no need for probability
12356 to be updated. Later we may run some experiments and see
12357 if unordered values are more frequent in practice. */
12358 if (bypass)
12359 bypass_probability = 1;
12360 if (second)
12361 second_probability = 1;
12363 if (bypass != NULL_RTX)
12365 label = gen_label_rtx ();
12366 i = emit_jump_insn (gen_rtx_SET
12367 (VOIDmode, pc_rtx,
12368 gen_rtx_IF_THEN_ELSE (VOIDmode,
12369 bypass,
12370 gen_rtx_LABEL_REF (VOIDmode,
12371 label),
12372 pc_rtx)));
12373 if (bypass_probability >= 0)
12374 REG_NOTES (i)
12375 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12376 GEN_INT (bypass_probability),
12377 REG_NOTES (i));
12379 i = emit_jump_insn (gen_rtx_SET
12380 (VOIDmode, pc_rtx,
12381 gen_rtx_IF_THEN_ELSE (VOIDmode,
12382 condition, target1, target2)));
12383 if (probability >= 0)
12384 REG_NOTES (i)
12385 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12386 GEN_INT (probability),
12387 REG_NOTES (i));
12388 if (second != NULL_RTX)
12390 i = emit_jump_insn (gen_rtx_SET
12391 (VOIDmode, pc_rtx,
12392 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12393 target2)));
12394 if (second_probability >= 0)
12395 REG_NOTES (i)
12396 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12397 GEN_INT (second_probability),
12398 REG_NOTES (i));
12400 if (label != NULL_RTX)
12401 emit_label (label);
12405 ix86_expand_setcc (enum rtx_code code, rtx dest)
12407 rtx ret, tmp, tmpreg, equiv;
12408 rtx second_test, bypass_test;
12410 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12411 return 0; /* FAIL */
12413 gcc_assert (GET_MODE (dest) == QImode);
12415 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12416 PUT_MODE (ret, QImode);
12418 tmp = dest;
12419 tmpreg = dest;
12421 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12422 if (bypass_test || second_test)
12424 rtx test = second_test;
12425 int bypass = 0;
12426 rtx tmp2 = gen_reg_rtx (QImode);
12427 if (bypass_test)
12429 gcc_assert (!second_test);
12430 test = bypass_test;
12431 bypass = 1;
12432 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12434 PUT_MODE (test, QImode);
12435 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12437 if (bypass)
12438 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12439 else
12440 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12443 /* Attach a REG_EQUAL note describing the comparison result. */
12444 if (ix86_compare_op0 && ix86_compare_op1)
12446 equiv = simplify_gen_relational (code, QImode,
12447 GET_MODE (ix86_compare_op0),
12448 ix86_compare_op0, ix86_compare_op1);
12449 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12452 return 1; /* DONE */
12455 /* Expand comparison setting or clearing carry flag. Return true when
12456 successful and set pop for the operation. */
12457 static bool
12458 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12460 enum machine_mode mode =
12461 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12463 /* Do not handle DImode compares that go through special path. */
12464 if (mode == (TARGET_64BIT ? TImode : DImode))
12465 return false;
12467 if (SCALAR_FLOAT_MODE_P (mode))
12469 rtx second_test = NULL, bypass_test = NULL;
12470 rtx compare_op, compare_seq;
12472 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12474 /* Shortcut: following common codes never translate
12475 into carry flag compares. */
12476 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12477 || code == ORDERED || code == UNORDERED)
12478 return false;
12480 /* These comparisons require zero flag; swap operands so they won't. */
12481 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12482 && !TARGET_IEEE_FP)
12484 rtx tmp = op0;
12485 op0 = op1;
12486 op1 = tmp;
12487 code = swap_condition (code);
12490 /* Try to expand the comparison and verify that we end up with
12491 carry flag based comparison. This fails to be true only when
12492 we decide to expand comparison using arithmetic that is not
12493 too common scenario. */
12494 start_sequence ();
12495 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12496 &second_test, &bypass_test);
12497 compare_seq = get_insns ();
12498 end_sequence ();
12500 if (second_test || bypass_test)
12501 return false;
12503 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12504 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12505 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12506 else
12507 code = GET_CODE (compare_op);
12509 if (code != LTU && code != GEU)
12510 return false;
12512 emit_insn (compare_seq);
12513 *pop = compare_op;
12514 return true;
12517 if (!INTEGRAL_MODE_P (mode))
12518 return false;
12520 switch (code)
12522 case LTU:
12523 case GEU:
12524 break;
12526 /* Convert a==0 into (unsigned)a<1. */
12527 case EQ:
12528 case NE:
12529 if (op1 != const0_rtx)
12530 return false;
12531 op1 = const1_rtx;
12532 code = (code == EQ ? LTU : GEU);
12533 break;
12535 /* Convert a>b into b<a or a>=b-1. */
12536 case GTU:
12537 case LEU:
12538 if (CONST_INT_P (op1))
12540 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12541 /* Bail out on overflow. We still can swap operands but that
12542 would force loading of the constant into register. */
12543 if (op1 == const0_rtx
12544 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12545 return false;
12546 code = (code == GTU ? GEU : LTU);
12548 else
12550 rtx tmp = op1;
12551 op1 = op0;
12552 op0 = tmp;
12553 code = (code == GTU ? LTU : GEU);
12555 break;
12557 /* Convert a>=0 into (unsigned)a<0x80000000. */
12558 case LT:
12559 case GE:
12560 if (mode == DImode || op1 != const0_rtx)
12561 return false;
12562 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12563 code = (code == LT ? GEU : LTU);
12564 break;
12565 case LE:
12566 case GT:
12567 if (mode == DImode || op1 != constm1_rtx)
12568 return false;
12569 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12570 code = (code == LE ? GEU : LTU);
12571 break;
12573 default:
12574 return false;
12576 /* Swapping operands may cause constant to appear as first operand. */
12577 if (!nonimmediate_operand (op0, VOIDmode))
12579 if (!can_create_pseudo_p ())
12580 return false;
12581 op0 = force_reg (mode, op0);
12583 ix86_compare_op0 = op0;
12584 ix86_compare_op1 = op1;
12585 *pop = ix86_expand_compare (code, NULL, NULL);
12586 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12587 return true;
12591 ix86_expand_int_movcc (rtx operands[])
12593 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12594 rtx compare_seq, compare_op;
12595 rtx second_test, bypass_test;
12596 enum machine_mode mode = GET_MODE (operands[0]);
12597 bool sign_bit_compare_p = false;;
12599 start_sequence ();
12600 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12601 compare_seq = get_insns ();
12602 end_sequence ();
12604 compare_code = GET_CODE (compare_op);
12606 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12607 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12608 sign_bit_compare_p = true;
12610 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12611 HImode insns, we'd be swallowed in word prefix ops. */
12613 if ((mode != HImode || TARGET_FAST_PREFIX)
12614 && (mode != (TARGET_64BIT ? TImode : DImode))
12615 && CONST_INT_P (operands[2])
12616 && CONST_INT_P (operands[3]))
12618 rtx out = operands[0];
12619 HOST_WIDE_INT ct = INTVAL (operands[2]);
12620 HOST_WIDE_INT cf = INTVAL (operands[3]);
12621 HOST_WIDE_INT diff;
12623 diff = ct - cf;
12624 /* Sign bit compares are better done using shifts than we do by using
12625 sbb. */
12626 if (sign_bit_compare_p
12627 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12628 ix86_compare_op1, &compare_op))
12630 /* Detect overlap between destination and compare sources. */
12631 rtx tmp = out;
12633 if (!sign_bit_compare_p)
12635 bool fpcmp = false;
12637 compare_code = GET_CODE (compare_op);
12639 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12640 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12642 fpcmp = true;
12643 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12646 /* To simplify rest of code, restrict to the GEU case. */
12647 if (compare_code == LTU)
12649 HOST_WIDE_INT tmp = ct;
12650 ct = cf;
12651 cf = tmp;
12652 compare_code = reverse_condition (compare_code);
12653 code = reverse_condition (code);
12655 else
12657 if (fpcmp)
12658 PUT_CODE (compare_op,
12659 reverse_condition_maybe_unordered
12660 (GET_CODE (compare_op)));
12661 else
12662 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12664 diff = ct - cf;
12666 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12667 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12668 tmp = gen_reg_rtx (mode);
12670 if (mode == DImode)
12671 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12672 else
12673 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12675 else
12677 if (code == GT || code == GE)
12678 code = reverse_condition (code);
12679 else
12681 HOST_WIDE_INT tmp = ct;
12682 ct = cf;
12683 cf = tmp;
12684 diff = ct - cf;
12686 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12687 ix86_compare_op1, VOIDmode, 0, -1);
12690 if (diff == 1)
12693 * cmpl op0,op1
12694 * sbbl dest,dest
12695 * [addl dest, ct]
12697 * Size 5 - 8.
12699 if (ct)
12700 tmp = expand_simple_binop (mode, PLUS,
12701 tmp, GEN_INT (ct),
12702 copy_rtx (tmp), 1, OPTAB_DIRECT);
12704 else if (cf == -1)
12707 * cmpl op0,op1
12708 * sbbl dest,dest
12709 * orl $ct, dest
12711 * Size 8.
12713 tmp = expand_simple_binop (mode, IOR,
12714 tmp, GEN_INT (ct),
12715 copy_rtx (tmp), 1, OPTAB_DIRECT);
12717 else if (diff == -1 && ct)
12720 * cmpl op0,op1
12721 * sbbl dest,dest
12722 * notl dest
12723 * [addl dest, cf]
12725 * Size 8 - 11.
12727 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12728 if (cf)
12729 tmp = expand_simple_binop (mode, PLUS,
12730 copy_rtx (tmp), GEN_INT (cf),
12731 copy_rtx (tmp), 1, OPTAB_DIRECT);
12733 else
12736 * cmpl op0,op1
12737 * sbbl dest,dest
12738 * [notl dest]
12739 * andl cf - ct, dest
12740 * [addl dest, ct]
12742 * Size 8 - 11.
12745 if (cf == 0)
12747 cf = ct;
12748 ct = 0;
12749 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12752 tmp = expand_simple_binop (mode, AND,
12753 copy_rtx (tmp),
12754 gen_int_mode (cf - ct, mode),
12755 copy_rtx (tmp), 1, OPTAB_DIRECT);
12756 if (ct)
12757 tmp = expand_simple_binop (mode, PLUS,
12758 copy_rtx (tmp), GEN_INT (ct),
12759 copy_rtx (tmp), 1, OPTAB_DIRECT);
12762 if (!rtx_equal_p (tmp, out))
12763 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12765 return 1; /* DONE */
12768 if (diff < 0)
12770 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12772 HOST_WIDE_INT tmp;
12773 tmp = ct, ct = cf, cf = tmp;
12774 diff = -diff;
12776 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12778 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12780 /* We may be reversing unordered compare to normal compare, that
12781 is not valid in general (we may convert non-trapping condition
12782 to trapping one), however on i386 we currently emit all
12783 comparisons unordered. */
12784 compare_code = reverse_condition_maybe_unordered (compare_code);
12785 code = reverse_condition_maybe_unordered (code);
12787 else
12789 compare_code = reverse_condition (compare_code);
12790 code = reverse_condition (code);
12794 compare_code = UNKNOWN;
12795 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12796 && CONST_INT_P (ix86_compare_op1))
12798 if (ix86_compare_op1 == const0_rtx
12799 && (code == LT || code == GE))
12800 compare_code = code;
12801 else if (ix86_compare_op1 == constm1_rtx)
12803 if (code == LE)
12804 compare_code = LT;
12805 else if (code == GT)
12806 compare_code = GE;
12810 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12811 if (compare_code != UNKNOWN
12812 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12813 && (cf == -1 || ct == -1))
12815 /* If lea code below could be used, only optimize
12816 if it results in a 2 insn sequence. */
12818 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12819 || diff == 3 || diff == 5 || diff == 9)
12820 || (compare_code == LT && ct == -1)
12821 || (compare_code == GE && cf == -1))
12824 * notl op1 (if necessary)
12825 * sarl $31, op1
12826 * orl cf, op1
12828 if (ct != -1)
12830 cf = ct;
12831 ct = -1;
12832 code = reverse_condition (code);
12835 out = emit_store_flag (out, code, ix86_compare_op0,
12836 ix86_compare_op1, VOIDmode, 0, -1);
12838 out = expand_simple_binop (mode, IOR,
12839 out, GEN_INT (cf),
12840 out, 1, OPTAB_DIRECT);
12841 if (out != operands[0])
12842 emit_move_insn (operands[0], out);
12844 return 1; /* DONE */
12849 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12850 || diff == 3 || diff == 5 || diff == 9)
12851 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12852 && (mode != DImode
12853 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12856 * xorl dest,dest
12857 * cmpl op1,op2
12858 * setcc dest
12859 * lea cf(dest*(ct-cf)),dest
12861 * Size 14.
12863 * This also catches the degenerate setcc-only case.
12866 rtx tmp;
12867 int nops;
12869 out = emit_store_flag (out, code, ix86_compare_op0,
12870 ix86_compare_op1, VOIDmode, 0, 1);
12872 nops = 0;
12873 /* On x86_64 the lea instruction operates on Pmode, so we need
12874 to get arithmetics done in proper mode to match. */
12875 if (diff == 1)
12876 tmp = copy_rtx (out);
12877 else
12879 rtx out1;
12880 out1 = copy_rtx (out);
12881 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12882 nops++;
12883 if (diff & 1)
12885 tmp = gen_rtx_PLUS (mode, tmp, out1);
12886 nops++;
12889 if (cf != 0)
12891 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12892 nops++;
12894 if (!rtx_equal_p (tmp, out))
12896 if (nops == 1)
12897 out = force_operand (tmp, copy_rtx (out));
12898 else
12899 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12901 if (!rtx_equal_p (out, operands[0]))
12902 emit_move_insn (operands[0], copy_rtx (out));
12904 return 1; /* DONE */
12908 * General case: Jumpful:
12909 * xorl dest,dest cmpl op1, op2
12910 * cmpl op1, op2 movl ct, dest
12911 * setcc dest jcc 1f
12912 * decl dest movl cf, dest
12913 * andl (cf-ct),dest 1:
12914 * addl ct,dest
12916 * Size 20. Size 14.
12918 * This is reasonably steep, but branch mispredict costs are
12919 * high on modern cpus, so consider failing only if optimizing
12920 * for space.
12923 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12924 && BRANCH_COST >= 2)
12926 if (cf == 0)
12928 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12930 cf = ct;
12931 ct = 0;
12933 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12935 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12937 /* We may be reversing unordered compare to normal compare,
12938 that is not valid in general (we may convert non-trapping
12939 condition to trapping one), however on i386 we currently
12940 emit all comparisons unordered. */
12941 code = reverse_condition_maybe_unordered (code);
12943 else
12945 code = reverse_condition (code);
12946 if (compare_code != UNKNOWN)
12947 compare_code = reverse_condition (compare_code);
12951 if (compare_code != UNKNOWN)
12953 /* notl op1 (if needed)
12954 sarl $31, op1
12955 andl (cf-ct), op1
12956 addl ct, op1
12958 For x < 0 (resp. x <= -1) there will be no notl,
12959 so if possible swap the constants to get rid of the
12960 complement.
12961 True/false will be -1/0 while code below (store flag
12962 followed by decrement) is 0/-1, so the constants need
12963 to be exchanged once more. */
12965 if (compare_code == GE || !cf)
12967 code = reverse_condition (code);
12968 compare_code = LT;
12970 else
12972 HOST_WIDE_INT tmp = cf;
12973 cf = ct;
12974 ct = tmp;
12977 out = emit_store_flag (out, code, ix86_compare_op0,
12978 ix86_compare_op1, VOIDmode, 0, -1);
12980 else
12982 out = emit_store_flag (out, code, ix86_compare_op0,
12983 ix86_compare_op1, VOIDmode, 0, 1);
12985 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12986 copy_rtx (out), 1, OPTAB_DIRECT);
12989 out = expand_simple_binop (mode, AND, copy_rtx (out),
12990 gen_int_mode (cf - ct, mode),
12991 copy_rtx (out), 1, OPTAB_DIRECT);
12992 if (ct)
12993 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12994 copy_rtx (out), 1, OPTAB_DIRECT);
12995 if (!rtx_equal_p (out, operands[0]))
12996 emit_move_insn (operands[0], copy_rtx (out));
12998 return 1; /* DONE */
13002 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13004 /* Try a few things more with specific constants and a variable. */
13006 optab op;
13007 rtx var, orig_out, out, tmp;
13009 if (BRANCH_COST <= 2)
13010 return 0; /* FAIL */
13012 /* If one of the two operands is an interesting constant, load a
13013 constant with the above and mask it in with a logical operation. */
13015 if (CONST_INT_P (operands[2]))
13017 var = operands[3];
13018 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13019 operands[3] = constm1_rtx, op = and_optab;
13020 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13021 operands[3] = const0_rtx, op = ior_optab;
13022 else
13023 return 0; /* FAIL */
13025 else if (CONST_INT_P (operands[3]))
13027 var = operands[2];
13028 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13029 operands[2] = constm1_rtx, op = and_optab;
13030 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13031 operands[2] = const0_rtx, op = ior_optab;
13032 else
13033 return 0; /* FAIL */
13035 else
13036 return 0; /* FAIL */
13038 orig_out = operands[0];
13039 tmp = gen_reg_rtx (mode);
13040 operands[0] = tmp;
13042 /* Recurse to get the constant loaded. */
13043 if (ix86_expand_int_movcc (operands) == 0)
13044 return 0; /* FAIL */
13046 /* Mask in the interesting variable. */
13047 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13048 OPTAB_WIDEN);
13049 if (!rtx_equal_p (out, orig_out))
13050 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13052 return 1; /* DONE */
13056 * For comparison with above,
13058 * movl cf,dest
13059 * movl ct,tmp
13060 * cmpl op1,op2
13061 * cmovcc tmp,dest
13063 * Size 15.
13066 if (! nonimmediate_operand (operands[2], mode))
13067 operands[2] = force_reg (mode, operands[2]);
13068 if (! nonimmediate_operand (operands[3], mode))
13069 operands[3] = force_reg (mode, operands[3]);
13071 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13073 rtx tmp = gen_reg_rtx (mode);
13074 emit_move_insn (tmp, operands[3]);
13075 operands[3] = tmp;
13077 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13079 rtx tmp = gen_reg_rtx (mode);
13080 emit_move_insn (tmp, operands[2]);
13081 operands[2] = tmp;
13084 if (! register_operand (operands[2], VOIDmode)
13085 && (mode == QImode
13086 || ! register_operand (operands[3], VOIDmode)))
13087 operands[2] = force_reg (mode, operands[2]);
13089 if (mode == QImode
13090 && ! register_operand (operands[3], VOIDmode))
13091 operands[3] = force_reg (mode, operands[3]);
13093 emit_insn (compare_seq);
13094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13095 gen_rtx_IF_THEN_ELSE (mode,
13096 compare_op, operands[2],
13097 operands[3])));
13098 if (bypass_test)
13099 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13100 gen_rtx_IF_THEN_ELSE (mode,
13101 bypass_test,
13102 copy_rtx (operands[3]),
13103 copy_rtx (operands[0]))));
13104 if (second_test)
13105 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13106 gen_rtx_IF_THEN_ELSE (mode,
13107 second_test,
13108 copy_rtx (operands[2]),
13109 copy_rtx (operands[0]))));
13111 return 1; /* DONE */
13114 /* Swap, force into registers, or otherwise massage the two operands
13115 to an sse comparison with a mask result. Thus we differ a bit from
13116 ix86_prepare_fp_compare_args which expects to produce a flags result.
13118 The DEST operand exists to help determine whether to commute commutative
13119 operators. The POP0/POP1 operands are updated in place. The new
13120 comparison code is returned, or UNKNOWN if not implementable. */
13122 static enum rtx_code
13123 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13124 rtx *pop0, rtx *pop1)
13126 rtx tmp;
13128 switch (code)
13130 case LTGT:
13131 case UNEQ:
13132 /* We have no LTGT as an operator. We could implement it with
13133 NE & ORDERED, but this requires an extra temporary. It's
13134 not clear that it's worth it. */
13135 return UNKNOWN;
13137 case LT:
13138 case LE:
13139 case UNGT:
13140 case UNGE:
13141 /* These are supported directly. */
13142 break;
13144 case EQ:
13145 case NE:
13146 case UNORDERED:
13147 case ORDERED:
13148 /* For commutative operators, try to canonicalize the destination
13149 operand to be first in the comparison - this helps reload to
13150 avoid extra moves. */
13151 if (!dest || !rtx_equal_p (dest, *pop1))
13152 break;
13153 /* FALLTHRU */
13155 case GE:
13156 case GT:
13157 case UNLE:
13158 case UNLT:
13159 /* These are not supported directly. Swap the comparison operands
13160 to transform into something that is supported. */
13161 tmp = *pop0;
13162 *pop0 = *pop1;
13163 *pop1 = tmp;
13164 code = swap_condition (code);
13165 break;
13167 default:
13168 gcc_unreachable ();
13171 return code;
13174 /* Detect conditional moves that exactly match min/max operational
13175 semantics. Note that this is IEEE safe, as long as we don't
13176 interchange the operands.
13178 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13179 and TRUE if the operation is successful and instructions are emitted. */
13181 static bool
13182 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13183 rtx cmp_op1, rtx if_true, rtx if_false)
13185 enum machine_mode mode;
13186 bool is_min;
13187 rtx tmp;
13189 if (code == LT)
13191 else if (code == UNGE)
13193 tmp = if_true;
13194 if_true = if_false;
13195 if_false = tmp;
13197 else
13198 return false;
13200 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13201 is_min = true;
13202 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13203 is_min = false;
13204 else
13205 return false;
13207 mode = GET_MODE (dest);
13209 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13210 but MODE may be a vector mode and thus not appropriate. */
13211 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13213 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13214 rtvec v;
13216 if_true = force_reg (mode, if_true);
13217 v = gen_rtvec (2, if_true, if_false);
13218 tmp = gen_rtx_UNSPEC (mode, v, u);
13220 else
13222 code = is_min ? SMIN : SMAX;
13223 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13226 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13227 return true;
13230 /* Expand an sse vector comparison. Return the register with the result. */
13232 static rtx
13233 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13234 rtx op_true, rtx op_false)
13236 enum machine_mode mode = GET_MODE (dest);
13237 rtx x;
13239 cmp_op0 = force_reg (mode, cmp_op0);
13240 if (!nonimmediate_operand (cmp_op1, mode))
13241 cmp_op1 = force_reg (mode, cmp_op1);
13243 if (optimize
13244 || reg_overlap_mentioned_p (dest, op_true)
13245 || reg_overlap_mentioned_p (dest, op_false))
13246 dest = gen_reg_rtx (mode);
13248 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13249 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13251 return dest;
13254 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13255 operations. This is used for both scalar and vector conditional moves. */
13257 static void
13258 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13260 enum machine_mode mode = GET_MODE (dest);
13261 rtx t2, t3, x;
13263 if (TARGET_SSE5)
13265 rtx pcmov = gen_rtx_SET (mode, dest,
13266 gen_rtx_IF_THEN_ELSE (mode, cmp,
13267 op_true,
13268 op_false));
13269 emit_insn (pcmov);
13271 else if (op_false == CONST0_RTX (mode))
13273 op_true = force_reg (mode, op_true);
13274 x = gen_rtx_AND (mode, cmp, op_true);
13275 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13277 else if (op_true == CONST0_RTX (mode))
13279 op_false = force_reg (mode, op_false);
13280 x = gen_rtx_NOT (mode, cmp);
13281 x = gen_rtx_AND (mode, x, op_false);
13282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13284 else
13286 op_true = force_reg (mode, op_true);
13287 op_false = force_reg (mode, op_false);
13289 t2 = gen_reg_rtx (mode);
13290 if (optimize)
13291 t3 = gen_reg_rtx (mode);
13292 else
13293 t3 = dest;
13295 x = gen_rtx_AND (mode, op_true, cmp);
13296 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13298 x = gen_rtx_NOT (mode, cmp);
13299 x = gen_rtx_AND (mode, x, op_false);
13300 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13302 x = gen_rtx_IOR (mode, t3, t2);
13303 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13307 /* Expand a floating-point conditional move. Return true if successful. */
13310 ix86_expand_fp_movcc (rtx operands[])
13312 enum machine_mode mode = GET_MODE (operands[0]);
13313 enum rtx_code code = GET_CODE (operands[1]);
13314 rtx tmp, compare_op, second_test, bypass_test;
13316 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13318 enum machine_mode cmode;
13320 /* Since we've no cmove for sse registers, don't force bad register
13321 allocation just to gain access to it. Deny movcc when the
13322 comparison mode doesn't match the move mode. */
13323 cmode = GET_MODE (ix86_compare_op0);
13324 if (cmode == VOIDmode)
13325 cmode = GET_MODE (ix86_compare_op1);
13326 if (cmode != mode)
13327 return 0;
13329 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13330 &ix86_compare_op0,
13331 &ix86_compare_op1);
13332 if (code == UNKNOWN)
13333 return 0;
13335 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13336 ix86_compare_op1, operands[2],
13337 operands[3]))
13338 return 1;
13340 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13341 ix86_compare_op1, operands[2], operands[3]);
13342 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13343 return 1;
13346 /* The floating point conditional move instructions don't directly
13347 support conditions resulting from a signed integer comparison. */
13349 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13351 /* The floating point conditional move instructions don't directly
13352 support signed integer comparisons. */
13354 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13356 gcc_assert (!second_test && !bypass_test);
13357 tmp = gen_reg_rtx (QImode);
13358 ix86_expand_setcc (code, tmp);
13359 code = NE;
13360 ix86_compare_op0 = tmp;
13361 ix86_compare_op1 = const0_rtx;
13362 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13364 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13366 tmp = gen_reg_rtx (mode);
13367 emit_move_insn (tmp, operands[3]);
13368 operands[3] = tmp;
13370 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13372 tmp = gen_reg_rtx (mode);
13373 emit_move_insn (tmp, operands[2]);
13374 operands[2] = tmp;
13377 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13378 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13379 operands[2], operands[3])));
13380 if (bypass_test)
13381 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13382 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13383 operands[3], operands[0])));
13384 if (second_test)
13385 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13386 gen_rtx_IF_THEN_ELSE (mode, second_test,
13387 operands[2], operands[0])));
13389 return 1;
13392 /* Expand a floating-point vector conditional move; a vcond operation
13393 rather than a movcc operation. */
13395 bool
13396 ix86_expand_fp_vcond (rtx operands[])
13398 enum rtx_code code = GET_CODE (operands[3]);
13399 rtx cmp;
13401 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13402 &operands[4], &operands[5]);
13403 if (code == UNKNOWN)
13404 return false;
13406 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13407 operands[5], operands[1], operands[2]))
13408 return true;
13410 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13411 operands[1], operands[2]);
13412 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13413 return true;
13416 /* Expand a signed/unsigned integral vector conditional move. */
13418 bool
13419 ix86_expand_int_vcond (rtx operands[])
13421 enum machine_mode mode = GET_MODE (operands[0]);
13422 enum rtx_code code = GET_CODE (operands[3]);
13423 bool negate = false;
13424 rtx x, cop0, cop1;
13426 cop0 = operands[4];
13427 cop1 = operands[5];
13429 /* Canonicalize the comparison to EQ, GT, GTU. */
13430 switch (code)
13432 case EQ:
13433 case GT:
13434 case GTU:
13435 break;
13437 case NE:
13438 case LE:
13439 case LEU:
13440 code = reverse_condition (code);
13441 negate = true;
13442 break;
13444 case GE:
13445 case GEU:
13446 code = reverse_condition (code);
13447 negate = true;
13448 /* FALLTHRU */
13450 case LT:
13451 case LTU:
13452 code = swap_condition (code);
13453 x = cop0, cop0 = cop1, cop1 = x;
13454 break;
13456 default:
13457 gcc_unreachable ();
13460 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13461 if (mode == V2DImode)
13463 switch (code)
13465 case EQ:
13466 /* SSE4.1 supports EQ. */
13467 if (!TARGET_SSE4_1)
13468 return false;
13469 break;
13471 case GT:
13472 case GTU:
13473 /* SSE4.2 supports GT/GTU. */
13474 if (!TARGET_SSE4_2)
13475 return false;
13476 break;
13478 default:
13479 gcc_unreachable ();
13483 /* Unsigned parallel compare is not supported by the hardware. Play some
13484 tricks to turn this into a signed comparison against 0. */
13485 if (code == GTU)
13487 cop0 = force_reg (mode, cop0);
13489 switch (mode)
13491 case V4SImode:
13492 case V2DImode:
13494 rtx t1, t2, mask;
13496 /* Perform a parallel modulo subtraction. */
13497 t1 = gen_reg_rtx (mode);
13498 emit_insn ((mode == V4SImode
13499 ? gen_subv4si3
13500 : gen_subv2di3) (t1, cop0, cop1));
13502 /* Extract the original sign bit of op0. */
13503 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13504 true, false);
13505 t2 = gen_reg_rtx (mode);
13506 emit_insn ((mode == V4SImode
13507 ? gen_andv4si3
13508 : gen_andv2di3) (t2, cop0, mask));
13510 /* XOR it back into the result of the subtraction. This results
13511 in the sign bit set iff we saw unsigned underflow. */
13512 x = gen_reg_rtx (mode);
13513 emit_insn ((mode == V4SImode
13514 ? gen_xorv4si3
13515 : gen_xorv2di3) (x, t1, t2));
13517 code = GT;
13519 break;
13521 case V16QImode:
13522 case V8HImode:
13523 /* Perform a parallel unsigned saturating subtraction. */
13524 x = gen_reg_rtx (mode);
13525 emit_insn (gen_rtx_SET (VOIDmode, x,
13526 gen_rtx_US_MINUS (mode, cop0, cop1)));
13528 code = EQ;
13529 negate = !negate;
13530 break;
13532 default:
13533 gcc_unreachable ();
13536 cop0 = x;
13537 cop1 = CONST0_RTX (mode);
13540 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13541 operands[1+negate], operands[2-negate]);
13543 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13544 operands[2-negate]);
13545 return true;
13548 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13549 true if we should do zero extension, else sign extension. HIGH_P is
13550 true if we want the N/2 high elements, else the low elements. */
13552 void
13553 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13555 enum machine_mode imode = GET_MODE (operands[1]);
13556 rtx (*unpack)(rtx, rtx, rtx);
13557 rtx se, dest;
13559 switch (imode)
13561 case V16QImode:
13562 if (high_p)
13563 unpack = gen_vec_interleave_highv16qi;
13564 else
13565 unpack = gen_vec_interleave_lowv16qi;
13566 break;
13567 case V8HImode:
13568 if (high_p)
13569 unpack = gen_vec_interleave_highv8hi;
13570 else
13571 unpack = gen_vec_interleave_lowv8hi;
13572 break;
13573 case V4SImode:
13574 if (high_p)
13575 unpack = gen_vec_interleave_highv4si;
13576 else
13577 unpack = gen_vec_interleave_lowv4si;
13578 break;
13579 default:
13580 gcc_unreachable ();
13583 dest = gen_lowpart (imode, operands[0]);
13585 if (unsigned_p)
13586 se = force_reg (imode, CONST0_RTX (imode));
13587 else
13588 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13589 operands[1], pc_rtx, pc_rtx);
13591 emit_insn (unpack (dest, operands[1], se));
13594 /* This function performs the same task as ix86_expand_sse_unpack,
13595 but with SSE4.1 instructions. */
13597 void
13598 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13600 enum machine_mode imode = GET_MODE (operands[1]);
13601 rtx (*unpack)(rtx, rtx);
13602 rtx src, dest;
13604 switch (imode)
13606 case V16QImode:
13607 if (unsigned_p)
13608 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13609 else
13610 unpack = gen_sse4_1_extendv8qiv8hi2;
13611 break;
13612 case V8HImode:
13613 if (unsigned_p)
13614 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13615 else
13616 unpack = gen_sse4_1_extendv4hiv4si2;
13617 break;
13618 case V4SImode:
13619 if (unsigned_p)
13620 unpack = gen_sse4_1_zero_extendv2siv2di2;
13621 else
13622 unpack = gen_sse4_1_extendv2siv2di2;
13623 break;
13624 default:
13625 gcc_unreachable ();
13628 dest = operands[0];
13629 if (high_p)
13631 /* Shift higher 8 bytes to lower 8 bytes. */
13632 src = gen_reg_rtx (imode);
13633 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13634 gen_lowpart (TImode, operands[1]),
13635 GEN_INT (64)));
13637 else
13638 src = operands[1];
13640 emit_insn (unpack (dest, src));
13643 /* This function performs the same task as ix86_expand_sse_unpack,
13644 but with amdfam15 instructions. */
13646 #define PPERM_SRC 0x00 /* copy source */
13647 #define PPERM_INVERT 0x20 /* invert source */
13648 #define PPERM_REVERSE 0x40 /* bit reverse source */
13649 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13650 #define PPERM_ZERO 0x80 /* all 0's */
13651 #define PPERM_ONES 0xa0 /* all 1's */
13652 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13653 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13655 #define PPERM_SRC1 0x00 /* use first source byte */
13656 #define PPERM_SRC2 0x10 /* use second source byte */
13658 void
13659 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13661 enum machine_mode imode = GET_MODE (operands[1]);
13662 int pperm_bytes[16];
13663 int i;
13664 int h = (high_p) ? 8 : 0;
13665 int h2;
13666 int sign_extend;
13667 rtvec v = rtvec_alloc (16);
13668 rtvec vs;
13669 rtx x, p;
13670 rtx op0 = operands[0], op1 = operands[1];
13672 switch (imode)
13674 case V16QImode:
13675 vs = rtvec_alloc (8);
13676 h2 = (high_p) ? 8 : 0;
13677 for (i = 0; i < 8; i++)
13679 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13680 pperm_bytes[2*i+1] = ((unsigned_p)
13681 ? PPERM_ZERO
13682 : PPERM_SIGN | PPERM_SRC2 | i | h);
13685 for (i = 0; i < 16; i++)
13686 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13688 for (i = 0; i < 8; i++)
13689 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13691 p = gen_rtx_PARALLEL (VOIDmode, vs);
13692 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13693 if (unsigned_p)
13694 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13695 else
13696 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13697 break;
13699 case V8HImode:
13700 vs = rtvec_alloc (4);
13701 h2 = (high_p) ? 4 : 0;
13702 for (i = 0; i < 4; i++)
13704 sign_extend = ((unsigned_p)
13705 ? PPERM_ZERO
13706 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13707 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13708 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13709 pperm_bytes[4*i+2] = sign_extend;
13710 pperm_bytes[4*i+3] = sign_extend;
13713 for (i = 0; i < 16; i++)
13714 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13716 for (i = 0; i < 4; i++)
13717 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13719 p = gen_rtx_PARALLEL (VOIDmode, vs);
13720 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13721 if (unsigned_p)
13722 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13723 else
13724 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13725 break;
13727 case V4SImode:
13728 vs = rtvec_alloc (2);
13729 h2 = (high_p) ? 2 : 0;
13730 for (i = 0; i < 2; i++)
13732 sign_extend = ((unsigned_p)
13733 ? PPERM_ZERO
13734 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13735 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13736 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13737 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13738 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13739 pperm_bytes[8*i+4] = sign_extend;
13740 pperm_bytes[8*i+5] = sign_extend;
13741 pperm_bytes[8*i+6] = sign_extend;
13742 pperm_bytes[8*i+7] = sign_extend;
13745 for (i = 0; i < 16; i++)
13746 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13748 for (i = 0; i < 2; i++)
13749 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13751 p = gen_rtx_PARALLEL (VOIDmode, vs);
13752 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13753 if (unsigned_p)
13754 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13755 else
13756 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13757 break;
13759 default:
13760 gcc_unreachable ();
13763 return;
13766 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13767 next narrower integer vector type */
13768 void
13769 ix86_expand_sse5_pack (rtx operands[3])
13771 enum machine_mode imode = GET_MODE (operands[0]);
13772 int pperm_bytes[16];
13773 int i;
13774 rtvec v = rtvec_alloc (16);
13775 rtx x;
13776 rtx op0 = operands[0];
13777 rtx op1 = operands[1];
13778 rtx op2 = operands[2];
13780 switch (imode)
13782 case V16QImode:
13783 for (i = 0; i < 8; i++)
13785 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13786 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13789 for (i = 0; i < 16; i++)
13790 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13792 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13793 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13794 break;
13796 case V8HImode:
13797 for (i = 0; i < 4; i++)
13799 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13800 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13801 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13802 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13805 for (i = 0; i < 16; i++)
13806 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13808 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13809 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13810 break;
13812 case V4SImode:
13813 for (i = 0; i < 2; i++)
13815 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13816 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13817 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13818 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13819 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13820 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13821 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13822 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13825 for (i = 0; i < 16; i++)
13826 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13828 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13829 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13830 break;
13832 default:
13833 gcc_unreachable ();
13836 return;
13839 /* Expand conditional increment or decrement using adb/sbb instructions.
13840 The default case using setcc followed by the conditional move can be
13841 done by generic code. */
13843 ix86_expand_int_addcc (rtx operands[])
13845 enum rtx_code code = GET_CODE (operands[1]);
13846 rtx compare_op;
13847 rtx val = const0_rtx;
13848 bool fpcmp = false;
13849 enum machine_mode mode = GET_MODE (operands[0]);
13851 if (operands[3] != const1_rtx
13852 && operands[3] != constm1_rtx)
13853 return 0;
13854 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13855 ix86_compare_op1, &compare_op))
13856 return 0;
13857 code = GET_CODE (compare_op);
13859 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13860 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13862 fpcmp = true;
13863 code = ix86_fp_compare_code_to_integer (code);
13866 if (code != LTU)
13868 val = constm1_rtx;
13869 if (fpcmp)
13870 PUT_CODE (compare_op,
13871 reverse_condition_maybe_unordered
13872 (GET_CODE (compare_op)));
13873 else
13874 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13876 PUT_MODE (compare_op, mode);
13878 /* Construct either adc or sbb insn. */
13879 if ((code == LTU) == (operands[3] == constm1_rtx))
13881 switch (GET_MODE (operands[0]))
13883 case QImode:
13884 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13885 break;
13886 case HImode:
13887 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13888 break;
13889 case SImode:
13890 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13891 break;
13892 case DImode:
13893 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13894 break;
13895 default:
13896 gcc_unreachable ();
13899 else
13901 switch (GET_MODE (operands[0]))
13903 case QImode:
13904 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13905 break;
13906 case HImode:
13907 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13908 break;
13909 case SImode:
13910 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13911 break;
13912 case DImode:
13913 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13914 break;
13915 default:
13916 gcc_unreachable ();
13919 return 1; /* DONE */
13923 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13924 works for floating pointer parameters and nonoffsetable memories.
13925 For pushes, it returns just stack offsets; the values will be saved
13926 in the right order. Maximally three parts are generated. */
13928 static int
13929 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13931 int size;
13933 if (!TARGET_64BIT)
13934 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13935 else
13936 size = (GET_MODE_SIZE (mode) + 4) / 8;
13938 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13939 gcc_assert (size >= 2 && size <= 3);
13941 /* Optimize constant pool reference to immediates. This is used by fp
13942 moves, that force all constants to memory to allow combining. */
13943 if (MEM_P (operand) && MEM_READONLY_P (operand))
13945 rtx tmp = maybe_get_pool_constant (operand);
13946 if (tmp)
13947 operand = tmp;
13950 if (MEM_P (operand) && !offsettable_memref_p (operand))
13952 /* The only non-offsetable memories we handle are pushes. */
13953 int ok = push_operand (operand, VOIDmode);
13955 gcc_assert (ok);
13957 operand = copy_rtx (operand);
13958 PUT_MODE (operand, Pmode);
13959 parts[0] = parts[1] = parts[2] = operand;
13960 return size;
13963 if (GET_CODE (operand) == CONST_VECTOR)
13965 enum machine_mode imode = int_mode_for_mode (mode);
13966 /* Caution: if we looked through a constant pool memory above,
13967 the operand may actually have a different mode now. That's
13968 ok, since we want to pun this all the way back to an integer. */
13969 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13970 gcc_assert (operand != NULL);
13971 mode = imode;
13974 if (!TARGET_64BIT)
13976 if (mode == DImode)
13977 split_di (&operand, 1, &parts[0], &parts[1]);
13978 else
13980 if (REG_P (operand))
13982 gcc_assert (reload_completed);
13983 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13984 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13985 if (size == 3)
13986 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13988 else if (offsettable_memref_p (operand))
13990 operand = adjust_address (operand, SImode, 0);
13991 parts[0] = operand;
13992 parts[1] = adjust_address (operand, SImode, 4);
13993 if (size == 3)
13994 parts[2] = adjust_address (operand, SImode, 8);
13996 else if (GET_CODE (operand) == CONST_DOUBLE)
13998 REAL_VALUE_TYPE r;
13999 long l[4];
14001 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14002 switch (mode)
14004 case XFmode:
14005 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14006 parts[2] = gen_int_mode (l[2], SImode);
14007 break;
14008 case DFmode:
14009 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14010 break;
14011 default:
14012 gcc_unreachable ();
14014 parts[1] = gen_int_mode (l[1], SImode);
14015 parts[0] = gen_int_mode (l[0], SImode);
14017 else
14018 gcc_unreachable ();
14021 else
14023 if (mode == TImode)
14024 split_ti (&operand, 1, &parts[0], &parts[1]);
14025 if (mode == XFmode || mode == TFmode)
14027 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14028 if (REG_P (operand))
14030 gcc_assert (reload_completed);
14031 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14032 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14034 else if (offsettable_memref_p (operand))
14036 operand = adjust_address (operand, DImode, 0);
14037 parts[0] = operand;
14038 parts[1] = adjust_address (operand, upper_mode, 8);
14040 else if (GET_CODE (operand) == CONST_DOUBLE)
14042 REAL_VALUE_TYPE r;
14043 long l[4];
14045 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14046 real_to_target (l, &r, mode);
14048 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14049 if (HOST_BITS_PER_WIDE_INT >= 64)
14050 parts[0]
14051 = gen_int_mode
14052 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14053 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14054 DImode);
14055 else
14056 parts[0] = immed_double_const (l[0], l[1], DImode);
14058 if (upper_mode == SImode)
14059 parts[1] = gen_int_mode (l[2], SImode);
14060 else if (HOST_BITS_PER_WIDE_INT >= 64)
14061 parts[1]
14062 = gen_int_mode
14063 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14064 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14065 DImode);
14066 else
14067 parts[1] = immed_double_const (l[2], l[3], DImode);
14069 else
14070 gcc_unreachable ();
14074 return size;
14077 /* Emit insns to perform a move or push of DI, DF, and XF values.
14078 Return false when normal moves are needed; true when all required
14079 insns have been emitted. Operands 2-4 contain the input values
14080 int the correct order; operands 5-7 contain the output values. */
14082 void
14083 ix86_split_long_move (rtx operands[])
14085 rtx part[2][3];
14086 int nparts;
14087 int push = 0;
14088 int collisions = 0;
14089 enum machine_mode mode = GET_MODE (operands[0]);
14091 /* The DFmode expanders may ask us to move double.
14092 For 64bit target this is single move. By hiding the fact
14093 here we simplify i386.md splitters. */
14094 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14096 /* Optimize constant pool reference to immediates. This is used by
14097 fp moves, that force all constants to memory to allow combining. */
14099 if (MEM_P (operands[1])
14100 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14101 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14102 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14103 if (push_operand (operands[0], VOIDmode))
14105 operands[0] = copy_rtx (operands[0]);
14106 PUT_MODE (operands[0], Pmode);
14108 else
14109 operands[0] = gen_lowpart (DImode, operands[0]);
14110 operands[1] = gen_lowpart (DImode, operands[1]);
14111 emit_move_insn (operands[0], operands[1]);
14112 return;
14115 /* The only non-offsettable memory we handle is push. */
14116 if (push_operand (operands[0], VOIDmode))
14117 push = 1;
14118 else
14119 gcc_assert (!MEM_P (operands[0])
14120 || offsettable_memref_p (operands[0]));
14122 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14123 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14125 /* When emitting push, take care for source operands on the stack. */
14126 if (push && MEM_P (operands[1])
14127 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14129 if (nparts == 3)
14130 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14131 XEXP (part[1][2], 0));
14132 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14133 XEXP (part[1][1], 0));
14136 /* We need to do copy in the right order in case an address register
14137 of the source overlaps the destination. */
14138 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14140 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14141 collisions++;
14142 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14143 collisions++;
14144 if (nparts == 3
14145 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14146 collisions++;
14148 /* Collision in the middle part can be handled by reordering. */
14149 if (collisions == 1 && nparts == 3
14150 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14152 rtx tmp;
14153 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14154 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14157 /* If there are more collisions, we can't handle it by reordering.
14158 Do an lea to the last part and use only one colliding move. */
14159 else if (collisions > 1)
14161 rtx base;
14163 collisions = 1;
14165 base = part[0][nparts - 1];
14167 /* Handle the case when the last part isn't valid for lea.
14168 Happens in 64-bit mode storing the 12-byte XFmode. */
14169 if (GET_MODE (base) != Pmode)
14170 base = gen_rtx_REG (Pmode, REGNO (base));
14172 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14173 part[1][0] = replace_equiv_address (part[1][0], base);
14174 part[1][1] = replace_equiv_address (part[1][1],
14175 plus_constant (base, UNITS_PER_WORD));
14176 if (nparts == 3)
14177 part[1][2] = replace_equiv_address (part[1][2],
14178 plus_constant (base, 8));
14182 if (push)
14184 if (!TARGET_64BIT)
14186 if (nparts == 3)
14188 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14189 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14190 emit_move_insn (part[0][2], part[1][2]);
14193 else
14195 /* In 64bit mode we don't have 32bit push available. In case this is
14196 register, it is OK - we will just use larger counterpart. We also
14197 retype memory - these comes from attempt to avoid REX prefix on
14198 moving of second half of TFmode value. */
14199 if (GET_MODE (part[1][1]) == SImode)
14201 switch (GET_CODE (part[1][1]))
14203 case MEM:
14204 part[1][1] = adjust_address (part[1][1], DImode, 0);
14205 break;
14207 case REG:
14208 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14209 break;
14211 default:
14212 gcc_unreachable ();
14215 if (GET_MODE (part[1][0]) == SImode)
14216 part[1][0] = part[1][1];
14219 emit_move_insn (part[0][1], part[1][1]);
14220 emit_move_insn (part[0][0], part[1][0]);
14221 return;
14224 /* Choose correct order to not overwrite the source before it is copied. */
14225 if ((REG_P (part[0][0])
14226 && REG_P (part[1][1])
14227 && (REGNO (part[0][0]) == REGNO (part[1][1])
14228 || (nparts == 3
14229 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14230 || (collisions > 0
14231 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14233 if (nparts == 3)
14235 operands[2] = part[0][2];
14236 operands[3] = part[0][1];
14237 operands[4] = part[0][0];
14238 operands[5] = part[1][2];
14239 operands[6] = part[1][1];
14240 operands[7] = part[1][0];
14242 else
14244 operands[2] = part[0][1];
14245 operands[3] = part[0][0];
14246 operands[5] = part[1][1];
14247 operands[6] = part[1][0];
14250 else
14252 if (nparts == 3)
14254 operands[2] = part[0][0];
14255 operands[3] = part[0][1];
14256 operands[4] = part[0][2];
14257 operands[5] = part[1][0];
14258 operands[6] = part[1][1];
14259 operands[7] = part[1][2];
14261 else
14263 operands[2] = part[0][0];
14264 operands[3] = part[0][1];
14265 operands[5] = part[1][0];
14266 operands[6] = part[1][1];
14270 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14271 if (optimize_size)
14273 if (CONST_INT_P (operands[5])
14274 && operands[5] != const0_rtx
14275 && REG_P (operands[2]))
14277 if (CONST_INT_P (operands[6])
14278 && INTVAL (operands[6]) == INTVAL (operands[5]))
14279 operands[6] = operands[2];
14281 if (nparts == 3
14282 && CONST_INT_P (operands[7])
14283 && INTVAL (operands[7]) == INTVAL (operands[5]))
14284 operands[7] = operands[2];
14287 if (nparts == 3
14288 && CONST_INT_P (operands[6])
14289 && operands[6] != const0_rtx
14290 && REG_P (operands[3])
14291 && CONST_INT_P (operands[7])
14292 && INTVAL (operands[7]) == INTVAL (operands[6]))
14293 operands[7] = operands[3];
14296 emit_move_insn (operands[2], operands[5]);
14297 emit_move_insn (operands[3], operands[6]);
14298 if (nparts == 3)
14299 emit_move_insn (operands[4], operands[7]);
14301 return;
14304 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14305 left shift by a constant, either using a single shift or
14306 a sequence of add instructions. */
14308 static void
14309 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14311 if (count == 1)
14313 emit_insn ((mode == DImode
14314 ? gen_addsi3
14315 : gen_adddi3) (operand, operand, operand));
14317 else if (!optimize_size
14318 && count * ix86_cost->add <= ix86_cost->shift_const)
14320 int i;
14321 for (i=0; i<count; i++)
14323 emit_insn ((mode == DImode
14324 ? gen_addsi3
14325 : gen_adddi3) (operand, operand, operand));
14328 else
14329 emit_insn ((mode == DImode
14330 ? gen_ashlsi3
14331 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14334 void
14335 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14337 rtx low[2], high[2];
14338 int count;
14339 const int single_width = mode == DImode ? 32 : 64;
14341 if (CONST_INT_P (operands[2]))
14343 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14344 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14346 if (count >= single_width)
14348 emit_move_insn (high[0], low[1]);
14349 emit_move_insn (low[0], const0_rtx);
14351 if (count > single_width)
14352 ix86_expand_ashl_const (high[0], count - single_width, mode);
14354 else
14356 if (!rtx_equal_p (operands[0], operands[1]))
14357 emit_move_insn (operands[0], operands[1]);
14358 emit_insn ((mode == DImode
14359 ? gen_x86_shld_1
14360 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14361 ix86_expand_ashl_const (low[0], count, mode);
14363 return;
14366 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14368 if (operands[1] == const1_rtx)
14370 /* Assuming we've chosen a QImode capable registers, then 1 << N
14371 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14372 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14374 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14376 ix86_expand_clear (low[0]);
14377 ix86_expand_clear (high[0]);
14378 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14380 d = gen_lowpart (QImode, low[0]);
14381 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14382 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14383 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14385 d = gen_lowpart (QImode, high[0]);
14386 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14387 s = gen_rtx_NE (QImode, flags, const0_rtx);
14388 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14391 /* Otherwise, we can get the same results by manually performing
14392 a bit extract operation on bit 5/6, and then performing the two
14393 shifts. The two methods of getting 0/1 into low/high are exactly
14394 the same size. Avoiding the shift in the bit extract case helps
14395 pentium4 a bit; no one else seems to care much either way. */
14396 else
14398 rtx x;
14400 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14401 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14402 else
14403 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14404 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14406 emit_insn ((mode == DImode
14407 ? gen_lshrsi3
14408 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14409 emit_insn ((mode == DImode
14410 ? gen_andsi3
14411 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14412 emit_move_insn (low[0], high[0]);
14413 emit_insn ((mode == DImode
14414 ? gen_xorsi3
14415 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14418 emit_insn ((mode == DImode
14419 ? gen_ashlsi3
14420 : gen_ashldi3) (low[0], low[0], operands[2]));
14421 emit_insn ((mode == DImode
14422 ? gen_ashlsi3
14423 : gen_ashldi3) (high[0], high[0], operands[2]));
14424 return;
14427 if (operands[1] == constm1_rtx)
14429 /* For -1 << N, we can avoid the shld instruction, because we
14430 know that we're shifting 0...31/63 ones into a -1. */
14431 emit_move_insn (low[0], constm1_rtx);
14432 if (optimize_size)
14433 emit_move_insn (high[0], low[0]);
14434 else
14435 emit_move_insn (high[0], constm1_rtx);
14437 else
14439 if (!rtx_equal_p (operands[0], operands[1]))
14440 emit_move_insn (operands[0], operands[1]);
14442 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14443 emit_insn ((mode == DImode
14444 ? gen_x86_shld_1
14445 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14448 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14450 if (TARGET_CMOVE && scratch)
14452 ix86_expand_clear (scratch);
14453 emit_insn ((mode == DImode
14454 ? gen_x86_shift_adj_1
14455 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14457 else
14458 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14461 void
14462 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14464 rtx low[2], high[2];
14465 int count;
14466 const int single_width = mode == DImode ? 32 : 64;
14468 if (CONST_INT_P (operands[2]))
14470 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14471 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14473 if (count == single_width * 2 - 1)
14475 emit_move_insn (high[0], high[1]);
14476 emit_insn ((mode == DImode
14477 ? gen_ashrsi3
14478 : gen_ashrdi3) (high[0], high[0],
14479 GEN_INT (single_width - 1)));
14480 emit_move_insn (low[0], high[0]);
14483 else if (count >= single_width)
14485 emit_move_insn (low[0], high[1]);
14486 emit_move_insn (high[0], low[0]);
14487 emit_insn ((mode == DImode
14488 ? gen_ashrsi3
14489 : gen_ashrdi3) (high[0], high[0],
14490 GEN_INT (single_width - 1)));
14491 if (count > single_width)
14492 emit_insn ((mode == DImode
14493 ? gen_ashrsi3
14494 : gen_ashrdi3) (low[0], low[0],
14495 GEN_INT (count - single_width)));
14497 else
14499 if (!rtx_equal_p (operands[0], operands[1]))
14500 emit_move_insn (operands[0], operands[1]);
14501 emit_insn ((mode == DImode
14502 ? gen_x86_shrd_1
14503 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14504 emit_insn ((mode == DImode
14505 ? gen_ashrsi3
14506 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14509 else
14511 if (!rtx_equal_p (operands[0], operands[1]))
14512 emit_move_insn (operands[0], operands[1]);
14514 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14516 emit_insn ((mode == DImode
14517 ? gen_x86_shrd_1
14518 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14519 emit_insn ((mode == DImode
14520 ? gen_ashrsi3
14521 : gen_ashrdi3) (high[0], high[0], operands[2]));
14523 if (TARGET_CMOVE && scratch)
14525 emit_move_insn (scratch, high[0]);
14526 emit_insn ((mode == DImode
14527 ? gen_ashrsi3
14528 : gen_ashrdi3) (scratch, scratch,
14529 GEN_INT (single_width - 1)));
14530 emit_insn ((mode == DImode
14531 ? gen_x86_shift_adj_1
14532 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14533 scratch));
14535 else
14536 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14540 void
14541 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14543 rtx low[2], high[2];
14544 int count;
14545 const int single_width = mode == DImode ? 32 : 64;
14547 if (CONST_INT_P (operands[2]))
14549 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14550 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14552 if (count >= single_width)
14554 emit_move_insn (low[0], high[1]);
14555 ix86_expand_clear (high[0]);
14557 if (count > single_width)
14558 emit_insn ((mode == DImode
14559 ? gen_lshrsi3
14560 : gen_lshrdi3) (low[0], low[0],
14561 GEN_INT (count - single_width)));
14563 else
14565 if (!rtx_equal_p (operands[0], operands[1]))
14566 emit_move_insn (operands[0], operands[1]);
14567 emit_insn ((mode == DImode
14568 ? gen_x86_shrd_1
14569 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14570 emit_insn ((mode == DImode
14571 ? gen_lshrsi3
14572 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14575 else
14577 if (!rtx_equal_p (operands[0], operands[1]))
14578 emit_move_insn (operands[0], operands[1]);
14580 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14582 emit_insn ((mode == DImode
14583 ? gen_x86_shrd_1
14584 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14585 emit_insn ((mode == DImode
14586 ? gen_lshrsi3
14587 : gen_lshrdi3) (high[0], high[0], operands[2]));
14589 /* Heh. By reversing the arguments, we can reuse this pattern. */
14590 if (TARGET_CMOVE && scratch)
14592 ix86_expand_clear (scratch);
14593 emit_insn ((mode == DImode
14594 ? gen_x86_shift_adj_1
14595 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14596 scratch));
14598 else
14599 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14603 /* Predict just emitted jump instruction to be taken with probability PROB. */
14604 static void
14605 predict_jump (int prob)
14607 rtx insn = get_last_insn ();
14608 gcc_assert (JUMP_P (insn));
14609 REG_NOTES (insn)
14610 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14611 GEN_INT (prob),
14612 REG_NOTES (insn));
14615 /* Helper function for the string operations below. Dest VARIABLE whether
14616 it is aligned to VALUE bytes. If true, jump to the label. */
14617 static rtx
14618 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14620 rtx label = gen_label_rtx ();
14621 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14622 if (GET_MODE (variable) == DImode)
14623 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14624 else
14625 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14626 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14627 1, label);
14628 if (epilogue)
14629 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14630 else
14631 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14632 return label;
14635 /* Adjust COUNTER by the VALUE. */
14636 static void
14637 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14639 if (GET_MODE (countreg) == DImode)
14640 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14641 else
14642 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14645 /* Zero extend possibly SImode EXP to Pmode register. */
14647 ix86_zero_extend_to_Pmode (rtx exp)
14649 rtx r;
14650 if (GET_MODE (exp) == VOIDmode)
14651 return force_reg (Pmode, exp);
14652 if (GET_MODE (exp) == Pmode)
14653 return copy_to_mode_reg (Pmode, exp);
14654 r = gen_reg_rtx (Pmode);
14655 emit_insn (gen_zero_extendsidi2 (r, exp));
14656 return r;
14659 /* Divide COUNTREG by SCALE. */
14660 static rtx
14661 scale_counter (rtx countreg, int scale)
14663 rtx sc;
14664 rtx piece_size_mask;
14666 if (scale == 1)
14667 return countreg;
14668 if (CONST_INT_P (countreg))
14669 return GEN_INT (INTVAL (countreg) / scale);
14670 gcc_assert (REG_P (countreg));
14672 piece_size_mask = GEN_INT (scale - 1);
14673 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14674 GEN_INT (exact_log2 (scale)),
14675 NULL, 1, OPTAB_DIRECT);
14676 return sc;
14679 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14680 DImode for constant loop counts. */
14682 static enum machine_mode
14683 counter_mode (rtx count_exp)
14685 if (GET_MODE (count_exp) != VOIDmode)
14686 return GET_MODE (count_exp);
14687 if (GET_CODE (count_exp) != CONST_INT)
14688 return Pmode;
14689 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14690 return DImode;
14691 return SImode;
14694 /* When SRCPTR is non-NULL, output simple loop to move memory
14695 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14696 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14697 equivalent loop to set memory by VALUE (supposed to be in MODE).
14699 The size is rounded down to whole number of chunk size moved at once.
14700 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14703 static void
14704 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14705 rtx destptr, rtx srcptr, rtx value,
14706 rtx count, enum machine_mode mode, int unroll,
14707 int expected_size)
14709 rtx out_label, top_label, iter, tmp;
14710 enum machine_mode iter_mode = counter_mode (count);
14711 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14712 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14713 rtx size;
14714 rtx x_addr;
14715 rtx y_addr;
14716 int i;
14718 top_label = gen_label_rtx ();
14719 out_label = gen_label_rtx ();
14720 iter = gen_reg_rtx (iter_mode);
14722 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14723 NULL, 1, OPTAB_DIRECT);
14724 /* Those two should combine. */
14725 if (piece_size == const1_rtx)
14727 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14728 true, out_label);
14729 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14731 emit_move_insn (iter, const0_rtx);
14733 emit_label (top_label);
14735 tmp = convert_modes (Pmode, iter_mode, iter, true);
14736 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14737 destmem = change_address (destmem, mode, x_addr);
14739 if (srcmem)
14741 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14742 srcmem = change_address (srcmem, mode, y_addr);
14744 /* When unrolling for chips that reorder memory reads and writes,
14745 we can save registers by using single temporary.
14746 Also using 4 temporaries is overkill in 32bit mode. */
14747 if (!TARGET_64BIT && 0)
14749 for (i = 0; i < unroll; i++)
14751 if (i)
14753 destmem =
14754 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14755 srcmem =
14756 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14758 emit_move_insn (destmem, srcmem);
14761 else
14763 rtx tmpreg[4];
14764 gcc_assert (unroll <= 4);
14765 for (i = 0; i < unroll; i++)
14767 tmpreg[i] = gen_reg_rtx (mode);
14768 if (i)
14770 srcmem =
14771 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14773 emit_move_insn (tmpreg[i], srcmem);
14775 for (i = 0; i < unroll; i++)
14777 if (i)
14779 destmem =
14780 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14782 emit_move_insn (destmem, tmpreg[i]);
14786 else
14787 for (i = 0; i < unroll; i++)
14789 if (i)
14790 destmem =
14791 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14792 emit_move_insn (destmem, value);
14795 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14796 true, OPTAB_LIB_WIDEN);
14797 if (tmp != iter)
14798 emit_move_insn (iter, tmp);
14800 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14801 true, top_label);
14802 if (expected_size != -1)
14804 expected_size /= GET_MODE_SIZE (mode) * unroll;
14805 if (expected_size == 0)
14806 predict_jump (0);
14807 else if (expected_size > REG_BR_PROB_BASE)
14808 predict_jump (REG_BR_PROB_BASE - 1);
14809 else
14810 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14812 else
14813 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14814 iter = ix86_zero_extend_to_Pmode (iter);
14815 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14816 true, OPTAB_LIB_WIDEN);
14817 if (tmp != destptr)
14818 emit_move_insn (destptr, tmp);
14819 if (srcptr)
14821 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14822 true, OPTAB_LIB_WIDEN);
14823 if (tmp != srcptr)
14824 emit_move_insn (srcptr, tmp);
14826 emit_label (out_label);
14829 /* Output "rep; mov" instruction.
14830 Arguments have same meaning as for previous function */
14831 static void
14832 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14833 rtx destptr, rtx srcptr,
14834 rtx count,
14835 enum machine_mode mode)
14837 rtx destexp;
14838 rtx srcexp;
14839 rtx countreg;
14841 /* If the size is known, it is shorter to use rep movs. */
14842 if (mode == QImode && CONST_INT_P (count)
14843 && !(INTVAL (count) & 3))
14844 mode = SImode;
14846 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14847 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14848 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14849 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14850 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14851 if (mode != QImode)
14853 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14854 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14855 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14856 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14857 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14858 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14860 else
14862 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14863 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14865 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14866 destexp, srcexp));
14869 /* Output "rep; stos" instruction.
14870 Arguments have same meaning as for previous function */
14871 static void
14872 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14873 rtx count,
14874 enum machine_mode mode)
14876 rtx destexp;
14877 rtx countreg;
14879 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14880 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14881 value = force_reg (mode, gen_lowpart (mode, value));
14882 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14883 if (mode != QImode)
14885 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14886 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14887 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14889 else
14890 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14891 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14894 static void
14895 emit_strmov (rtx destmem, rtx srcmem,
14896 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14898 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14899 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14900 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14903 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14904 static void
14905 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14906 rtx destptr, rtx srcptr, rtx count, int max_size)
14908 rtx src, dest;
14909 if (CONST_INT_P (count))
14911 HOST_WIDE_INT countval = INTVAL (count);
14912 int offset = 0;
14914 if ((countval & 0x10) && max_size > 16)
14916 if (TARGET_64BIT)
14918 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14919 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14921 else
14922 gcc_unreachable ();
14923 offset += 16;
14925 if ((countval & 0x08) && max_size > 8)
14927 if (TARGET_64BIT)
14928 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14929 else
14931 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14932 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14934 offset += 8;
14936 if ((countval & 0x04) && max_size > 4)
14938 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14939 offset += 4;
14941 if ((countval & 0x02) && max_size > 2)
14943 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14944 offset += 2;
14946 if ((countval & 0x01) && max_size > 1)
14948 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14949 offset += 1;
14951 return;
14953 if (max_size > 8)
14955 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14956 count, 1, OPTAB_DIRECT);
14957 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14958 count, QImode, 1, 4);
14959 return;
14962 /* When there are stringops, we can cheaply increase dest and src pointers.
14963 Otherwise we save code size by maintaining offset (zero is readily
14964 available from preceding rep operation) and using x86 addressing modes.
14966 if (TARGET_SINGLE_STRINGOP)
14968 if (max_size > 4)
14970 rtx label = ix86_expand_aligntest (count, 4, true);
14971 src = change_address (srcmem, SImode, srcptr);
14972 dest = change_address (destmem, SImode, destptr);
14973 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14974 emit_label (label);
14975 LABEL_NUSES (label) = 1;
14977 if (max_size > 2)
14979 rtx label = ix86_expand_aligntest (count, 2, true);
14980 src = change_address (srcmem, HImode, srcptr);
14981 dest = change_address (destmem, HImode, destptr);
14982 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14983 emit_label (label);
14984 LABEL_NUSES (label) = 1;
14986 if (max_size > 1)
14988 rtx label = ix86_expand_aligntest (count, 1, true);
14989 src = change_address (srcmem, QImode, srcptr);
14990 dest = change_address (destmem, QImode, destptr);
14991 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14992 emit_label (label);
14993 LABEL_NUSES (label) = 1;
14996 else
14998 rtx offset = force_reg (Pmode, const0_rtx);
14999 rtx tmp;
15001 if (max_size > 4)
15003 rtx label = ix86_expand_aligntest (count, 4, true);
15004 src = change_address (srcmem, SImode, srcptr);
15005 dest = change_address (destmem, SImode, destptr);
15006 emit_move_insn (dest, src);
15007 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15008 true, OPTAB_LIB_WIDEN);
15009 if (tmp != offset)
15010 emit_move_insn (offset, tmp);
15011 emit_label (label);
15012 LABEL_NUSES (label) = 1;
15014 if (max_size > 2)
15016 rtx label = ix86_expand_aligntest (count, 2, true);
15017 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15018 src = change_address (srcmem, HImode, tmp);
15019 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15020 dest = change_address (destmem, HImode, tmp);
15021 emit_move_insn (dest, src);
15022 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15023 true, OPTAB_LIB_WIDEN);
15024 if (tmp != offset)
15025 emit_move_insn (offset, tmp);
15026 emit_label (label);
15027 LABEL_NUSES (label) = 1;
15029 if (max_size > 1)
15031 rtx label = ix86_expand_aligntest (count, 1, true);
15032 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15033 src = change_address (srcmem, QImode, tmp);
15034 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15035 dest = change_address (destmem, QImode, tmp);
15036 emit_move_insn (dest, src);
15037 emit_label (label);
15038 LABEL_NUSES (label) = 1;
15043 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15044 static void
15045 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15046 rtx count, int max_size)
15048 count =
15049 expand_simple_binop (counter_mode (count), AND, count,
15050 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15051 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15052 gen_lowpart (QImode, value), count, QImode,
15053 1, max_size / 2);
15056 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15057 static void
15058 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15060 rtx dest;
15062 if (CONST_INT_P (count))
15064 HOST_WIDE_INT countval = INTVAL (count);
15065 int offset = 0;
15067 if ((countval & 0x10) && max_size > 16)
15069 if (TARGET_64BIT)
15071 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15072 emit_insn (gen_strset (destptr, dest, value));
15073 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15074 emit_insn (gen_strset (destptr, dest, value));
15076 else
15077 gcc_unreachable ();
15078 offset += 16;
15080 if ((countval & 0x08) && max_size > 8)
15082 if (TARGET_64BIT)
15084 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15085 emit_insn (gen_strset (destptr, dest, value));
15087 else
15089 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15090 emit_insn (gen_strset (destptr, dest, value));
15091 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15092 emit_insn (gen_strset (destptr, dest, value));
15094 offset += 8;
15096 if ((countval & 0x04) && max_size > 4)
15098 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15099 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15100 offset += 4;
15102 if ((countval & 0x02) && max_size > 2)
15104 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15105 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15106 offset += 2;
15108 if ((countval & 0x01) && max_size > 1)
15110 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15111 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15112 offset += 1;
15114 return;
15116 if (max_size > 32)
15118 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15119 return;
15121 if (max_size > 16)
15123 rtx label = ix86_expand_aligntest (count, 16, true);
15124 if (TARGET_64BIT)
15126 dest = change_address (destmem, DImode, destptr);
15127 emit_insn (gen_strset (destptr, dest, value));
15128 emit_insn (gen_strset (destptr, dest, value));
15130 else
15132 dest = change_address (destmem, SImode, destptr);
15133 emit_insn (gen_strset (destptr, dest, value));
15134 emit_insn (gen_strset (destptr, dest, value));
15135 emit_insn (gen_strset (destptr, dest, value));
15136 emit_insn (gen_strset (destptr, dest, value));
15138 emit_label (label);
15139 LABEL_NUSES (label) = 1;
15141 if (max_size > 8)
15143 rtx label = ix86_expand_aligntest (count, 8, true);
15144 if (TARGET_64BIT)
15146 dest = change_address (destmem, DImode, destptr);
15147 emit_insn (gen_strset (destptr, dest, value));
15149 else
15151 dest = change_address (destmem, SImode, destptr);
15152 emit_insn (gen_strset (destptr, dest, value));
15153 emit_insn (gen_strset (destptr, dest, value));
15155 emit_label (label);
15156 LABEL_NUSES (label) = 1;
15158 if (max_size > 4)
15160 rtx label = ix86_expand_aligntest (count, 4, true);
15161 dest = change_address (destmem, SImode, destptr);
15162 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15163 emit_label (label);
15164 LABEL_NUSES (label) = 1;
15166 if (max_size > 2)
15168 rtx label = ix86_expand_aligntest (count, 2, true);
15169 dest = change_address (destmem, HImode, destptr);
15170 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15171 emit_label (label);
15172 LABEL_NUSES (label) = 1;
15174 if (max_size > 1)
15176 rtx label = ix86_expand_aligntest (count, 1, true);
15177 dest = change_address (destmem, QImode, destptr);
15178 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15179 emit_label (label);
15180 LABEL_NUSES (label) = 1;
15184 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15185 DESIRED_ALIGNMENT. */
15186 static void
15187 expand_movmem_prologue (rtx destmem, rtx srcmem,
15188 rtx destptr, rtx srcptr, rtx count,
15189 int align, int desired_alignment)
15191 if (align <= 1 && desired_alignment > 1)
15193 rtx label = ix86_expand_aligntest (destptr, 1, false);
15194 srcmem = change_address (srcmem, QImode, srcptr);
15195 destmem = change_address (destmem, QImode, destptr);
15196 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15197 ix86_adjust_counter (count, 1);
15198 emit_label (label);
15199 LABEL_NUSES (label) = 1;
15201 if (align <= 2 && desired_alignment > 2)
15203 rtx label = ix86_expand_aligntest (destptr, 2, false);
15204 srcmem = change_address (srcmem, HImode, srcptr);
15205 destmem = change_address (destmem, HImode, destptr);
15206 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15207 ix86_adjust_counter (count, 2);
15208 emit_label (label);
15209 LABEL_NUSES (label) = 1;
15211 if (align <= 4 && desired_alignment > 4)
15213 rtx label = ix86_expand_aligntest (destptr, 4, false);
15214 srcmem = change_address (srcmem, SImode, srcptr);
15215 destmem = change_address (destmem, SImode, destptr);
15216 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15217 ix86_adjust_counter (count, 4);
15218 emit_label (label);
15219 LABEL_NUSES (label) = 1;
15221 gcc_assert (desired_alignment <= 8);
15224 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15225 DESIRED_ALIGNMENT. */
15226 static void
15227 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15228 int align, int desired_alignment)
15230 if (align <= 1 && desired_alignment > 1)
15232 rtx label = ix86_expand_aligntest (destptr, 1, false);
15233 destmem = change_address (destmem, QImode, destptr);
15234 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15235 ix86_adjust_counter (count, 1);
15236 emit_label (label);
15237 LABEL_NUSES (label) = 1;
15239 if (align <= 2 && desired_alignment > 2)
15241 rtx label = ix86_expand_aligntest (destptr, 2, false);
15242 destmem = change_address (destmem, HImode, destptr);
15243 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15244 ix86_adjust_counter (count, 2);
15245 emit_label (label);
15246 LABEL_NUSES (label) = 1;
15248 if (align <= 4 && desired_alignment > 4)
15250 rtx label = ix86_expand_aligntest (destptr, 4, false);
15251 destmem = change_address (destmem, SImode, destptr);
15252 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15253 ix86_adjust_counter (count, 4);
15254 emit_label (label);
15255 LABEL_NUSES (label) = 1;
15257 gcc_assert (desired_alignment <= 8);
15260 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15261 static enum stringop_alg
15262 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15263 int *dynamic_check)
15265 const struct stringop_algs * algs;
15266 /* Algorithms using the rep prefix want at least edi and ecx;
15267 additionally, memset wants eax and memcpy wants esi. Don't
15268 consider such algorithms if the user has appropriated those
15269 registers for their own purposes. */
15270 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15271 || (memset
15272 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15274 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15275 || (alg != rep_prefix_1_byte \
15276 && alg != rep_prefix_4_byte \
15277 && alg != rep_prefix_8_byte))
15279 *dynamic_check = -1;
15280 if (memset)
15281 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15282 else
15283 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15284 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15285 return stringop_alg;
15286 /* rep; movq or rep; movl is the smallest variant. */
15287 else if (optimize_size)
15289 if (!count || (count & 3))
15290 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15291 else
15292 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15294 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15296 else if (expected_size != -1 && expected_size < 4)
15297 return loop_1_byte;
15298 else if (expected_size != -1)
15300 unsigned int i;
15301 enum stringop_alg alg = libcall;
15302 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15304 /* We get here if the algorithms that were not libcall-based
15305 were rep-prefix based and we are unable to use rep prefixes
15306 based on global register usage. Break out of the loop and
15307 use the heuristic below. */
15308 if (algs->size[i].max == 0)
15309 break;
15310 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15312 enum stringop_alg candidate = algs->size[i].alg;
15314 if (candidate != libcall && ALG_USABLE_P (candidate))
15315 alg = candidate;
15316 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15317 last non-libcall inline algorithm. */
15318 if (TARGET_INLINE_ALL_STRINGOPS)
15320 /* When the current size is best to be copied by a libcall,
15321 but we are still forced to inline, run the heuristic below
15322 that will pick code for medium sized blocks. */
15323 if (alg != libcall)
15324 return alg;
15325 break;
15327 else if (ALG_USABLE_P (candidate))
15328 return candidate;
15331 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15333 /* When asked to inline the call anyway, try to pick meaningful choice.
15334 We look for maximal size of block that is faster to copy by hand and
15335 take blocks of at most of that size guessing that average size will
15336 be roughly half of the block.
15338 If this turns out to be bad, we might simply specify the preferred
15339 choice in ix86_costs. */
15340 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15341 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15343 int max = -1;
15344 enum stringop_alg alg;
15345 int i;
15346 bool any_alg_usable_p = true;
15348 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15350 enum stringop_alg candidate = algs->size[i].alg;
15351 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15353 if (candidate != libcall && candidate
15354 && ALG_USABLE_P (candidate))
15355 max = algs->size[i].max;
15357 /* If there aren't any usable algorithms, then recursing on
15358 smaller sizes isn't going to find anything. Just return the
15359 simple byte-at-a-time copy loop. */
15360 if (!any_alg_usable_p)
15362 /* Pick something reasonable. */
15363 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15364 *dynamic_check = 128;
15365 return loop_1_byte;
15367 if (max == -1)
15368 max = 4096;
15369 alg = decide_alg (count, max / 2, memset, dynamic_check);
15370 gcc_assert (*dynamic_check == -1);
15371 gcc_assert (alg != libcall);
15372 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15373 *dynamic_check = max;
15374 return alg;
15376 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15377 #undef ALG_USABLE_P
15380 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15381 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15382 static int
15383 decide_alignment (int align,
15384 enum stringop_alg alg,
15385 int expected_size)
15387 int desired_align = 0;
15388 switch (alg)
15390 case no_stringop:
15391 gcc_unreachable ();
15392 case loop:
15393 case unrolled_loop:
15394 desired_align = GET_MODE_SIZE (Pmode);
15395 break;
15396 case rep_prefix_8_byte:
15397 desired_align = 8;
15398 break;
15399 case rep_prefix_4_byte:
15400 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15401 copying whole cacheline at once. */
15402 if (TARGET_PENTIUMPRO)
15403 desired_align = 8;
15404 else
15405 desired_align = 4;
15406 break;
15407 case rep_prefix_1_byte:
15408 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15409 copying whole cacheline at once. */
15410 if (TARGET_PENTIUMPRO)
15411 desired_align = 8;
15412 else
15413 desired_align = 1;
15414 break;
15415 case loop_1_byte:
15416 desired_align = 1;
15417 break;
15418 case libcall:
15419 return 0;
15422 if (optimize_size)
15423 desired_align = 1;
15424 if (desired_align < align)
15425 desired_align = align;
15426 if (expected_size != -1 && expected_size < 4)
15427 desired_align = align;
15428 return desired_align;
15431 /* Return the smallest power of 2 greater than VAL. */
15432 static int
15433 smallest_pow2_greater_than (int val)
15435 int ret = 1;
15436 while (ret <= val)
15437 ret <<= 1;
15438 return ret;
15441 /* Expand string move (memcpy) operation. Use i386 string operations when
15442 profitable. expand_setmem contains similar code. The code depends upon
15443 architecture, block size and alignment, but always has the same
15444 overall structure:
15446 1) Prologue guard: Conditional that jumps up to epilogues for small
15447 blocks that can be handled by epilogue alone. This is faster but
15448 also needed for correctness, since prologue assume the block is larger
15449 than the desired alignment.
15451 Optional dynamic check for size and libcall for large
15452 blocks is emitted here too, with -minline-stringops-dynamically.
15454 2) Prologue: copy first few bytes in order to get destination aligned
15455 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15456 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15457 We emit either a jump tree on power of two sized blocks, or a byte loop.
15459 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15460 with specified algorithm.
15462 4) Epilogue: code copying tail of the block that is too small to be
15463 handled by main body (or up to size guarded by prologue guard). */
15466 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15467 rtx expected_align_exp, rtx expected_size_exp)
15469 rtx destreg;
15470 rtx srcreg;
15471 rtx label = NULL;
15472 rtx tmp;
15473 rtx jump_around_label = NULL;
15474 HOST_WIDE_INT align = 1;
15475 unsigned HOST_WIDE_INT count = 0;
15476 HOST_WIDE_INT expected_size = -1;
15477 int size_needed = 0, epilogue_size_needed;
15478 int desired_align = 0;
15479 enum stringop_alg alg;
15480 int dynamic_check;
15482 if (CONST_INT_P (align_exp))
15483 align = INTVAL (align_exp);
15484 /* i386 can do misaligned access on reasonably increased cost. */
15485 if (CONST_INT_P (expected_align_exp)
15486 && INTVAL (expected_align_exp) > align)
15487 align = INTVAL (expected_align_exp);
15488 if (CONST_INT_P (count_exp))
15489 count = expected_size = INTVAL (count_exp);
15490 if (CONST_INT_P (expected_size_exp) && count == 0)
15491 expected_size = INTVAL (expected_size_exp);
15493 /* Make sure we don't need to care about overflow later on. */
15494 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15495 return 0;
15497 /* Step 0: Decide on preferred algorithm, desired alignment and
15498 size of chunks to be copied by main loop. */
15500 alg = decide_alg (count, expected_size, false, &dynamic_check);
15501 desired_align = decide_alignment (align, alg, expected_size);
15503 if (!TARGET_ALIGN_STRINGOPS)
15504 align = desired_align;
15506 if (alg == libcall)
15507 return 0;
15508 gcc_assert (alg != no_stringop);
15509 if (!count)
15510 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15511 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15512 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15513 switch (alg)
15515 case libcall:
15516 case no_stringop:
15517 gcc_unreachable ();
15518 case loop:
15519 size_needed = GET_MODE_SIZE (Pmode);
15520 break;
15521 case unrolled_loop:
15522 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15523 break;
15524 case rep_prefix_8_byte:
15525 size_needed = 8;
15526 break;
15527 case rep_prefix_4_byte:
15528 size_needed = 4;
15529 break;
15530 case rep_prefix_1_byte:
15531 case loop_1_byte:
15532 size_needed = 1;
15533 break;
15536 epilogue_size_needed = size_needed;
15538 /* Step 1: Prologue guard. */
15540 /* Alignment code needs count to be in register. */
15541 if (CONST_INT_P (count_exp) && desired_align > align)
15542 count_exp = force_reg (counter_mode (count_exp), count_exp);
15543 gcc_assert (desired_align >= 1 && align >= 1);
15545 /* Ensure that alignment prologue won't copy past end of block. */
15546 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15548 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15549 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15550 Make sure it is power of 2. */
15551 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15553 if (CONST_INT_P (count_exp))
15555 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15556 goto epilogue;
15558 else
15560 label = gen_label_rtx ();
15561 emit_cmp_and_jump_insns (count_exp,
15562 GEN_INT (epilogue_size_needed),
15563 LTU, 0, counter_mode (count_exp), 1, label);
15564 if (expected_size == -1 || expected_size < epilogue_size_needed)
15565 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15566 else
15567 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15571 /* Emit code to decide on runtime whether library call or inline should be
15572 used. */
15573 if (dynamic_check != -1)
15575 if (CONST_INT_P (count_exp))
15577 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15579 emit_block_move_via_libcall (dst, src, count_exp, false);
15580 count_exp = const0_rtx;
15581 goto epilogue;
15584 else
15586 rtx hot_label = gen_label_rtx ();
15587 jump_around_label = gen_label_rtx ();
15588 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15589 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15590 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15591 emit_block_move_via_libcall (dst, src, count_exp, false);
15592 emit_jump (jump_around_label);
15593 emit_label (hot_label);
15597 /* Step 2: Alignment prologue. */
15599 if (desired_align > align)
15601 /* Except for the first move in epilogue, we no longer know
15602 constant offset in aliasing info. It don't seems to worth
15603 the pain to maintain it for the first move, so throw away
15604 the info early. */
15605 src = change_address (src, BLKmode, srcreg);
15606 dst = change_address (dst, BLKmode, destreg);
15607 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15608 desired_align);
15610 if (label && size_needed == 1)
15612 emit_label (label);
15613 LABEL_NUSES (label) = 1;
15614 label = NULL;
15617 /* Step 3: Main loop. */
15619 switch (alg)
15621 case libcall:
15622 case no_stringop:
15623 gcc_unreachable ();
15624 case loop_1_byte:
15625 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15626 count_exp, QImode, 1, expected_size);
15627 break;
15628 case loop:
15629 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15630 count_exp, Pmode, 1, expected_size);
15631 break;
15632 case unrolled_loop:
15633 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15634 registers for 4 temporaries anyway. */
15635 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15636 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15637 expected_size);
15638 break;
15639 case rep_prefix_8_byte:
15640 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15641 DImode);
15642 break;
15643 case rep_prefix_4_byte:
15644 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15645 SImode);
15646 break;
15647 case rep_prefix_1_byte:
15648 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15649 QImode);
15650 break;
15652 /* Adjust properly the offset of src and dest memory for aliasing. */
15653 if (CONST_INT_P (count_exp))
15655 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15656 (count / size_needed) * size_needed);
15657 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15658 (count / size_needed) * size_needed);
15660 else
15662 src = change_address (src, BLKmode, srcreg);
15663 dst = change_address (dst, BLKmode, destreg);
15666 /* Step 4: Epilogue to copy the remaining bytes. */
15667 epilogue:
15668 if (label)
15670 /* When the main loop is done, COUNT_EXP might hold original count,
15671 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15672 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15673 bytes. Compensate if needed. */
15675 if (size_needed < epilogue_size_needed)
15677 tmp =
15678 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15679 GEN_INT (size_needed - 1), count_exp, 1,
15680 OPTAB_DIRECT);
15681 if (tmp != count_exp)
15682 emit_move_insn (count_exp, tmp);
15684 emit_label (label);
15685 LABEL_NUSES (label) = 1;
15688 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15689 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15690 epilogue_size_needed);
15691 if (jump_around_label)
15692 emit_label (jump_around_label);
15693 return 1;
15696 /* Helper function for memcpy. For QImode value 0xXY produce
15697 0xXYXYXYXY of wide specified by MODE. This is essentially
15698 a * 0x10101010, but we can do slightly better than
15699 synth_mult by unwinding the sequence by hand on CPUs with
15700 slow multiply. */
15701 static rtx
15702 promote_duplicated_reg (enum machine_mode mode, rtx val)
15704 enum machine_mode valmode = GET_MODE (val);
15705 rtx tmp;
15706 int nops = mode == DImode ? 3 : 2;
15708 gcc_assert (mode == SImode || mode == DImode);
15709 if (val == const0_rtx)
15710 return copy_to_mode_reg (mode, const0_rtx);
15711 if (CONST_INT_P (val))
15713 HOST_WIDE_INT v = INTVAL (val) & 255;
15715 v |= v << 8;
15716 v |= v << 16;
15717 if (mode == DImode)
15718 v |= (v << 16) << 16;
15719 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15722 if (valmode == VOIDmode)
15723 valmode = QImode;
15724 if (valmode != QImode)
15725 val = gen_lowpart (QImode, val);
15726 if (mode == QImode)
15727 return val;
15728 if (!TARGET_PARTIAL_REG_STALL)
15729 nops--;
15730 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15731 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15732 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15733 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15735 rtx reg = convert_modes (mode, QImode, val, true);
15736 tmp = promote_duplicated_reg (mode, const1_rtx);
15737 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15738 OPTAB_DIRECT);
15740 else
15742 rtx reg = convert_modes (mode, QImode, val, true);
15744 if (!TARGET_PARTIAL_REG_STALL)
15745 if (mode == SImode)
15746 emit_insn (gen_movsi_insv_1 (reg, reg));
15747 else
15748 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15749 else
15751 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15752 NULL, 1, OPTAB_DIRECT);
15753 reg =
15754 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15756 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15757 NULL, 1, OPTAB_DIRECT);
15758 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15759 if (mode == SImode)
15760 return reg;
15761 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15762 NULL, 1, OPTAB_DIRECT);
15763 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15764 return reg;
15768 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15769 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15770 alignment from ALIGN to DESIRED_ALIGN. */
15771 static rtx
15772 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15774 rtx promoted_val;
15776 if (TARGET_64BIT
15777 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15778 promoted_val = promote_duplicated_reg (DImode, val);
15779 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15780 promoted_val = promote_duplicated_reg (SImode, val);
15781 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15782 promoted_val = promote_duplicated_reg (HImode, val);
15783 else
15784 promoted_val = val;
15786 return promoted_val;
15789 /* Expand string clear operation (bzero). Use i386 string operations when
15790 profitable. See expand_movmem comment for explanation of individual
15791 steps performed. */
15793 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15794 rtx expected_align_exp, rtx expected_size_exp)
15796 rtx destreg;
15797 rtx label = NULL;
15798 rtx tmp;
15799 rtx jump_around_label = NULL;
15800 HOST_WIDE_INT align = 1;
15801 unsigned HOST_WIDE_INT count = 0;
15802 HOST_WIDE_INT expected_size = -1;
15803 int size_needed = 0, epilogue_size_needed;
15804 int desired_align = 0;
15805 enum stringop_alg alg;
15806 rtx promoted_val = NULL;
15807 bool force_loopy_epilogue = false;
15808 int dynamic_check;
15810 if (CONST_INT_P (align_exp))
15811 align = INTVAL (align_exp);
15812 /* i386 can do misaligned access on reasonably increased cost. */
15813 if (CONST_INT_P (expected_align_exp)
15814 && INTVAL (expected_align_exp) > align)
15815 align = INTVAL (expected_align_exp);
15816 if (CONST_INT_P (count_exp))
15817 count = expected_size = INTVAL (count_exp);
15818 if (CONST_INT_P (expected_size_exp) && count == 0)
15819 expected_size = INTVAL (expected_size_exp);
15821 /* Make sure we don't need to care about overflow later on. */
15822 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15823 return 0;
15825 /* Step 0: Decide on preferred algorithm, desired alignment and
15826 size of chunks to be copied by main loop. */
15828 alg = decide_alg (count, expected_size, true, &dynamic_check);
15829 desired_align = decide_alignment (align, alg, expected_size);
15831 if (!TARGET_ALIGN_STRINGOPS)
15832 align = desired_align;
15834 if (alg == libcall)
15835 return 0;
15836 gcc_assert (alg != no_stringop);
15837 if (!count)
15838 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15839 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15840 switch (alg)
15842 case libcall:
15843 case no_stringop:
15844 gcc_unreachable ();
15845 case loop:
15846 size_needed = GET_MODE_SIZE (Pmode);
15847 break;
15848 case unrolled_loop:
15849 size_needed = GET_MODE_SIZE (Pmode) * 4;
15850 break;
15851 case rep_prefix_8_byte:
15852 size_needed = 8;
15853 break;
15854 case rep_prefix_4_byte:
15855 size_needed = 4;
15856 break;
15857 case rep_prefix_1_byte:
15858 case loop_1_byte:
15859 size_needed = 1;
15860 break;
15862 epilogue_size_needed = size_needed;
15864 /* Step 1: Prologue guard. */
15866 /* Alignment code needs count to be in register. */
15867 if (CONST_INT_P (count_exp) && desired_align > align)
15869 enum machine_mode mode = SImode;
15870 if (TARGET_64BIT && (count & ~0xffffffff))
15871 mode = DImode;
15872 count_exp = force_reg (mode, count_exp);
15874 /* Do the cheap promotion to allow better CSE across the
15875 main loop and epilogue (ie one load of the big constant in the
15876 front of all code. */
15877 if (CONST_INT_P (val_exp))
15878 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15879 desired_align, align);
15880 /* Ensure that alignment prologue won't copy past end of block. */
15881 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15883 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15884 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15885 Make sure it is power of 2. */
15886 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15888 /* To improve performance of small blocks, we jump around the VAL
15889 promoting mode. This mean that if the promoted VAL is not constant,
15890 we might not use it in the epilogue and have to use byte
15891 loop variant. */
15892 if (epilogue_size_needed > 2 && !promoted_val)
15893 force_loopy_epilogue = true;
15894 label = gen_label_rtx ();
15895 emit_cmp_and_jump_insns (count_exp,
15896 GEN_INT (epilogue_size_needed),
15897 LTU, 0, counter_mode (count_exp), 1, label);
15898 if (GET_CODE (count_exp) == CONST_INT)
15900 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15901 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15902 else
15903 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15905 if (dynamic_check != -1)
15907 rtx hot_label = gen_label_rtx ();
15908 jump_around_label = gen_label_rtx ();
15909 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15910 LEU, 0, counter_mode (count_exp), 1, hot_label);
15911 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15912 set_storage_via_libcall (dst, count_exp, val_exp, false);
15913 emit_jump (jump_around_label);
15914 emit_label (hot_label);
15917 /* Step 2: Alignment prologue. */
15919 /* Do the expensive promotion once we branched off the small blocks. */
15920 if (!promoted_val)
15921 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15922 desired_align, align);
15923 gcc_assert (desired_align >= 1 && align >= 1);
15925 if (desired_align > align)
15927 /* Except for the first move in epilogue, we no longer know
15928 constant offset in aliasing info. It don't seems to worth
15929 the pain to maintain it for the first move, so throw away
15930 the info early. */
15931 dst = change_address (dst, BLKmode, destreg);
15932 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15933 desired_align);
15935 if (label && size_needed == 1)
15937 emit_label (label);
15938 LABEL_NUSES (label) = 1;
15939 label = NULL;
15942 /* Step 3: Main loop. */
15944 switch (alg)
15946 case libcall:
15947 case no_stringop:
15948 gcc_unreachable ();
15949 case loop_1_byte:
15950 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15951 count_exp, QImode, 1, expected_size);
15952 break;
15953 case loop:
15954 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15955 count_exp, Pmode, 1, expected_size);
15956 break;
15957 case unrolled_loop:
15958 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15959 count_exp, Pmode, 4, expected_size);
15960 break;
15961 case rep_prefix_8_byte:
15962 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15963 DImode);
15964 break;
15965 case rep_prefix_4_byte:
15966 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15967 SImode);
15968 break;
15969 case rep_prefix_1_byte:
15970 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15971 QImode);
15972 break;
15974 /* Adjust properly the offset of src and dest memory for aliasing. */
15975 if (CONST_INT_P (count_exp))
15976 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15977 (count / size_needed) * size_needed);
15978 else
15979 dst = change_address (dst, BLKmode, destreg);
15981 /* Step 4: Epilogue to copy the remaining bytes. */
15983 if (label)
15985 /* When the main loop is done, COUNT_EXP might hold original count,
15986 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15987 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15988 bytes. Compensate if needed. */
15990 if (size_needed < desired_align - align)
15992 tmp =
15993 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15994 GEN_INT (size_needed - 1), count_exp, 1,
15995 OPTAB_DIRECT);
15996 size_needed = desired_align - align + 1;
15997 if (tmp != count_exp)
15998 emit_move_insn (count_exp, tmp);
16000 emit_label (label);
16001 LABEL_NUSES (label) = 1;
16003 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16005 if (force_loopy_epilogue)
16006 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16007 size_needed);
16008 else
16009 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16010 size_needed);
16012 if (jump_around_label)
16013 emit_label (jump_around_label);
16014 return 1;
16017 /* Expand the appropriate insns for doing strlen if not just doing
16018 repnz; scasb
16020 out = result, initialized with the start address
16021 align_rtx = alignment of the address.
16022 scratch = scratch register, initialized with the startaddress when
16023 not aligned, otherwise undefined
16025 This is just the body. It needs the initializations mentioned above and
16026 some address computing at the end. These things are done in i386.md. */
16028 static void
16029 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16031 int align;
16032 rtx tmp;
16033 rtx align_2_label = NULL_RTX;
16034 rtx align_3_label = NULL_RTX;
16035 rtx align_4_label = gen_label_rtx ();
16036 rtx end_0_label = gen_label_rtx ();
16037 rtx mem;
16038 rtx tmpreg = gen_reg_rtx (SImode);
16039 rtx scratch = gen_reg_rtx (SImode);
16040 rtx cmp;
16042 align = 0;
16043 if (CONST_INT_P (align_rtx))
16044 align = INTVAL (align_rtx);
16046 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16048 /* Is there a known alignment and is it less than 4? */
16049 if (align < 4)
16051 rtx scratch1 = gen_reg_rtx (Pmode);
16052 emit_move_insn (scratch1, out);
16053 /* Is there a known alignment and is it not 2? */
16054 if (align != 2)
16056 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16057 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16059 /* Leave just the 3 lower bits. */
16060 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16061 NULL_RTX, 0, OPTAB_WIDEN);
16063 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16064 Pmode, 1, align_4_label);
16065 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16066 Pmode, 1, align_2_label);
16067 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16068 Pmode, 1, align_3_label);
16070 else
16072 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16073 check if is aligned to 4 - byte. */
16075 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16076 NULL_RTX, 0, OPTAB_WIDEN);
16078 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16079 Pmode, 1, align_4_label);
16082 mem = change_address (src, QImode, out);
16084 /* Now compare the bytes. */
16086 /* Compare the first n unaligned byte on a byte per byte basis. */
16087 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16088 QImode, 1, end_0_label);
16090 /* Increment the address. */
16091 if (TARGET_64BIT)
16092 emit_insn (gen_adddi3 (out, out, const1_rtx));
16093 else
16094 emit_insn (gen_addsi3 (out, out, const1_rtx));
16096 /* Not needed with an alignment of 2 */
16097 if (align != 2)
16099 emit_label (align_2_label);
16101 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16102 end_0_label);
16104 if (TARGET_64BIT)
16105 emit_insn (gen_adddi3 (out, out, const1_rtx));
16106 else
16107 emit_insn (gen_addsi3 (out, out, const1_rtx));
16109 emit_label (align_3_label);
16112 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16113 end_0_label);
16115 if (TARGET_64BIT)
16116 emit_insn (gen_adddi3 (out, out, const1_rtx));
16117 else
16118 emit_insn (gen_addsi3 (out, out, const1_rtx));
16121 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16122 align this loop. It gives only huge programs, but does not help to
16123 speed up. */
16124 emit_label (align_4_label);
16126 mem = change_address (src, SImode, out);
16127 emit_move_insn (scratch, mem);
16128 if (TARGET_64BIT)
16129 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16130 else
16131 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16133 /* This formula yields a nonzero result iff one of the bytes is zero.
16134 This saves three branches inside loop and many cycles. */
16136 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16137 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16138 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16139 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16140 gen_int_mode (0x80808080, SImode)));
16141 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16142 align_4_label);
16144 if (TARGET_CMOVE)
16146 rtx reg = gen_reg_rtx (SImode);
16147 rtx reg2 = gen_reg_rtx (Pmode);
16148 emit_move_insn (reg, tmpreg);
16149 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16151 /* If zero is not in the first two bytes, move two bytes forward. */
16152 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16154 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16155 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16156 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16157 reg,
16158 tmpreg)));
16159 /* Emit lea manually to avoid clobbering of flags. */
16160 emit_insn (gen_rtx_SET (SImode, reg2,
16161 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16163 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16164 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16165 emit_insn (gen_rtx_SET (VOIDmode, out,
16166 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16167 reg2,
16168 out)));
16171 else
16173 rtx end_2_label = gen_label_rtx ();
16174 /* Is zero in the first two bytes? */
16176 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16177 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16178 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16179 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16180 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16181 pc_rtx);
16182 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16183 JUMP_LABEL (tmp) = end_2_label;
16185 /* Not in the first two. Move two bytes forward. */
16186 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16187 if (TARGET_64BIT)
16188 emit_insn (gen_adddi3 (out, out, const2_rtx));
16189 else
16190 emit_insn (gen_addsi3 (out, out, const2_rtx));
16192 emit_label (end_2_label);
16196 /* Avoid branch in fixing the byte. */
16197 tmpreg = gen_lowpart (QImode, tmpreg);
16198 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16199 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16200 if (TARGET_64BIT)
16201 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16202 else
16203 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16205 emit_label (end_0_label);
16208 /* Expand strlen. */
16211 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16213 rtx addr, scratch1, scratch2, scratch3, scratch4;
16215 /* The generic case of strlen expander is long. Avoid it's
16216 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16218 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16219 && !TARGET_INLINE_ALL_STRINGOPS
16220 && !optimize_size
16221 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16222 return 0;
16224 addr = force_reg (Pmode, XEXP (src, 0));
16225 scratch1 = gen_reg_rtx (Pmode);
16227 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16228 && !optimize_size)
16230 /* Well it seems that some optimizer does not combine a call like
16231 foo(strlen(bar), strlen(bar));
16232 when the move and the subtraction is done here. It does calculate
16233 the length just once when these instructions are done inside of
16234 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16235 often used and I use one fewer register for the lifetime of
16236 output_strlen_unroll() this is better. */
16238 emit_move_insn (out, addr);
16240 ix86_expand_strlensi_unroll_1 (out, src, align);
16242 /* strlensi_unroll_1 returns the address of the zero at the end of
16243 the string, like memchr(), so compute the length by subtracting
16244 the start address. */
16245 if (TARGET_64BIT)
16246 emit_insn (gen_subdi3 (out, out, addr));
16247 else
16248 emit_insn (gen_subsi3 (out, out, addr));
16250 else
16252 rtx unspec;
16254 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16255 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16256 return false;
16258 scratch2 = gen_reg_rtx (Pmode);
16259 scratch3 = gen_reg_rtx (Pmode);
16260 scratch4 = force_reg (Pmode, constm1_rtx);
16262 emit_move_insn (scratch3, addr);
16263 eoschar = force_reg (QImode, eoschar);
16265 src = replace_equiv_address_nv (src, scratch3);
16267 /* If .md starts supporting :P, this can be done in .md. */
16268 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16269 scratch4), UNSPEC_SCAS);
16270 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16271 if (TARGET_64BIT)
16273 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16274 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16276 else
16278 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16279 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16282 return 1;
16285 /* For given symbol (function) construct code to compute address of it's PLT
16286 entry in large x86-64 PIC model. */
16288 construct_plt_address (rtx symbol)
16290 rtx tmp = gen_reg_rtx (Pmode);
16291 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16293 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16294 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16296 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16297 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16298 return tmp;
16301 void
16302 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16303 rtx callarg2 ATTRIBUTE_UNUSED,
16304 rtx pop, int sibcall)
16306 rtx use = NULL, call;
16308 if (pop == const0_rtx)
16309 pop = NULL;
16310 gcc_assert (!TARGET_64BIT || !pop);
16312 if (TARGET_MACHO && !TARGET_64BIT)
16314 #if TARGET_MACHO
16315 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16316 fnaddr = machopic_indirect_call_target (fnaddr);
16317 #endif
16319 else
16321 /* Static functions and indirect calls don't need the pic register. */
16322 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16323 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16324 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16325 use_reg (&use, pic_offset_table_rtx);
16328 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16330 rtx al = gen_rtx_REG (QImode, AX_REG);
16331 emit_move_insn (al, callarg2);
16332 use_reg (&use, al);
16335 if (ix86_cmodel == CM_LARGE_PIC
16336 && GET_CODE (fnaddr) == MEM
16337 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16338 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16339 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16340 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16342 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16343 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16345 if (sibcall && TARGET_64BIT
16346 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16348 rtx addr;
16349 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16350 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16351 emit_move_insn (fnaddr, addr);
16352 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16355 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16356 if (retval)
16357 call = gen_rtx_SET (VOIDmode, retval, call);
16358 if (pop)
16360 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16361 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16362 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16365 call = emit_call_insn (call);
16366 if (use)
16367 CALL_INSN_FUNCTION_USAGE (call) = use;
16371 /* Clear stack slot assignments remembered from previous functions.
16372 This is called from INIT_EXPANDERS once before RTL is emitted for each
16373 function. */
16375 static struct machine_function *
16376 ix86_init_machine_status (void)
16378 struct machine_function *f;
16380 f = GGC_CNEW (struct machine_function);
16381 f->use_fast_prologue_epilogue_nregs = -1;
16382 f->tls_descriptor_call_expanded_p = 0;
16384 return f;
16387 /* Return a MEM corresponding to a stack slot with mode MODE.
16388 Allocate a new slot if necessary.
16390 The RTL for a function can have several slots available: N is
16391 which slot to use. */
16394 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16396 struct stack_local_entry *s;
16398 gcc_assert (n < MAX_386_STACK_LOCALS);
16400 /* Virtual slot is valid only before vregs are instantiated. */
16401 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16403 for (s = ix86_stack_locals; s; s = s->next)
16404 if (s->mode == mode && s->n == n)
16405 return copy_rtx (s->rtl);
16407 s = (struct stack_local_entry *)
16408 ggc_alloc (sizeof (struct stack_local_entry));
16409 s->n = n;
16410 s->mode = mode;
16411 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16413 s->next = ix86_stack_locals;
16414 ix86_stack_locals = s;
16415 return s->rtl;
16418 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16420 static GTY(()) rtx ix86_tls_symbol;
16422 ix86_tls_get_addr (void)
16425 if (!ix86_tls_symbol)
16427 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16428 (TARGET_ANY_GNU_TLS
16429 && !TARGET_64BIT)
16430 ? "___tls_get_addr"
16431 : "__tls_get_addr");
16434 return ix86_tls_symbol;
16437 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16439 static GTY(()) rtx ix86_tls_module_base_symbol;
16441 ix86_tls_module_base (void)
16444 if (!ix86_tls_module_base_symbol)
16446 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16447 "_TLS_MODULE_BASE_");
16448 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16449 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16452 return ix86_tls_module_base_symbol;
16455 /* Calculate the length of the memory address in the instruction
16456 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16459 memory_address_length (rtx addr)
16461 struct ix86_address parts;
16462 rtx base, index, disp;
16463 int len;
16464 int ok;
16466 if (GET_CODE (addr) == PRE_DEC
16467 || GET_CODE (addr) == POST_INC
16468 || GET_CODE (addr) == PRE_MODIFY
16469 || GET_CODE (addr) == POST_MODIFY)
16470 return 0;
16472 ok = ix86_decompose_address (addr, &parts);
16473 gcc_assert (ok);
16475 if (parts.base && GET_CODE (parts.base) == SUBREG)
16476 parts.base = SUBREG_REG (parts.base);
16477 if (parts.index && GET_CODE (parts.index) == SUBREG)
16478 parts.index = SUBREG_REG (parts.index);
16480 base = parts.base;
16481 index = parts.index;
16482 disp = parts.disp;
16483 len = 0;
16485 /* Rule of thumb:
16486 - esp as the base always wants an index,
16487 - ebp as the base always wants a displacement. */
16489 /* Register Indirect. */
16490 if (base && !index && !disp)
16492 /* esp (for its index) and ebp (for its displacement) need
16493 the two-byte modrm form. */
16494 if (addr == stack_pointer_rtx
16495 || addr == arg_pointer_rtx
16496 || addr == frame_pointer_rtx
16497 || addr == hard_frame_pointer_rtx)
16498 len = 1;
16501 /* Direct Addressing. */
16502 else if (disp && !base && !index)
16503 len = 4;
16505 else
16507 /* Find the length of the displacement constant. */
16508 if (disp)
16510 if (base && satisfies_constraint_K (disp))
16511 len = 1;
16512 else
16513 len = 4;
16515 /* ebp always wants a displacement. */
16516 else if (base == hard_frame_pointer_rtx)
16517 len = 1;
16519 /* An index requires the two-byte modrm form.... */
16520 if (index
16521 /* ...like esp, which always wants an index. */
16522 || base == stack_pointer_rtx
16523 || base == arg_pointer_rtx
16524 || base == frame_pointer_rtx)
16525 len += 1;
16528 return len;
16531 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16532 is set, expect that insn have 8bit immediate alternative. */
16534 ix86_attr_length_immediate_default (rtx insn, int shortform)
16536 int len = 0;
16537 int i;
16538 extract_insn_cached (insn);
16539 for (i = recog_data.n_operands - 1; i >= 0; --i)
16540 if (CONSTANT_P (recog_data.operand[i]))
16542 gcc_assert (!len);
16543 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16544 len = 1;
16545 else
16547 switch (get_attr_mode (insn))
16549 case MODE_QI:
16550 len+=1;
16551 break;
16552 case MODE_HI:
16553 len+=2;
16554 break;
16555 case MODE_SI:
16556 len+=4;
16557 break;
16558 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16559 case MODE_DI:
16560 len+=4;
16561 break;
16562 default:
16563 fatal_insn ("unknown insn mode", insn);
16567 return len;
16569 /* Compute default value for "length_address" attribute. */
16571 ix86_attr_length_address_default (rtx insn)
16573 int i;
16575 if (get_attr_type (insn) == TYPE_LEA)
16577 rtx set = PATTERN (insn);
16579 if (GET_CODE (set) == PARALLEL)
16580 set = XVECEXP (set, 0, 0);
16582 gcc_assert (GET_CODE (set) == SET);
16584 return memory_address_length (SET_SRC (set));
16587 extract_insn_cached (insn);
16588 for (i = recog_data.n_operands - 1; i >= 0; --i)
16589 if (MEM_P (recog_data.operand[i]))
16591 return memory_address_length (XEXP (recog_data.operand[i], 0));
16592 break;
16594 return 0;
16597 /* Return the maximum number of instructions a cpu can issue. */
16599 static int
16600 ix86_issue_rate (void)
16602 switch (ix86_tune)
16604 case PROCESSOR_PENTIUM:
16605 case PROCESSOR_K6:
16606 return 2;
16608 case PROCESSOR_PENTIUMPRO:
16609 case PROCESSOR_PENTIUM4:
16610 case PROCESSOR_ATHLON:
16611 case PROCESSOR_K8:
16612 case PROCESSOR_AMDFAM10:
16613 case PROCESSOR_NOCONA:
16614 case PROCESSOR_GENERIC32:
16615 case PROCESSOR_GENERIC64:
16616 return 3;
16618 case PROCESSOR_CORE2:
16619 return 4;
16621 default:
16622 return 1;
16626 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16627 by DEP_INSN and nothing set by DEP_INSN. */
16629 static int
16630 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16632 rtx set, set2;
16634 /* Simplify the test for uninteresting insns. */
16635 if (insn_type != TYPE_SETCC
16636 && insn_type != TYPE_ICMOV
16637 && insn_type != TYPE_FCMOV
16638 && insn_type != TYPE_IBR)
16639 return 0;
16641 if ((set = single_set (dep_insn)) != 0)
16643 set = SET_DEST (set);
16644 set2 = NULL_RTX;
16646 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16647 && XVECLEN (PATTERN (dep_insn), 0) == 2
16648 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16649 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16651 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16652 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16654 else
16655 return 0;
16657 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16658 return 0;
16660 /* This test is true if the dependent insn reads the flags but
16661 not any other potentially set register. */
16662 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16663 return 0;
16665 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16666 return 0;
16668 return 1;
16671 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16672 address with operands set by DEP_INSN. */
16674 static int
16675 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16677 rtx addr;
16679 if (insn_type == TYPE_LEA
16680 && TARGET_PENTIUM)
16682 addr = PATTERN (insn);
16684 if (GET_CODE (addr) == PARALLEL)
16685 addr = XVECEXP (addr, 0, 0);
16687 gcc_assert (GET_CODE (addr) == SET);
16689 addr = SET_SRC (addr);
16691 else
16693 int i;
16694 extract_insn_cached (insn);
16695 for (i = recog_data.n_operands - 1; i >= 0; --i)
16696 if (MEM_P (recog_data.operand[i]))
16698 addr = XEXP (recog_data.operand[i], 0);
16699 goto found;
16701 return 0;
16702 found:;
16705 return modified_in_p (addr, dep_insn);
16708 static int
16709 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16711 enum attr_type insn_type, dep_insn_type;
16712 enum attr_memory memory;
16713 rtx set, set2;
16714 int dep_insn_code_number;
16716 /* Anti and output dependencies have zero cost on all CPUs. */
16717 if (REG_NOTE_KIND (link) != 0)
16718 return 0;
16720 dep_insn_code_number = recog_memoized (dep_insn);
16722 /* If we can't recognize the insns, we can't really do anything. */
16723 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16724 return cost;
16726 insn_type = get_attr_type (insn);
16727 dep_insn_type = get_attr_type (dep_insn);
16729 switch (ix86_tune)
16731 case PROCESSOR_PENTIUM:
16732 /* Address Generation Interlock adds a cycle of latency. */
16733 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16734 cost += 1;
16736 /* ??? Compares pair with jump/setcc. */
16737 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16738 cost = 0;
16740 /* Floating point stores require value to be ready one cycle earlier. */
16741 if (insn_type == TYPE_FMOV
16742 && get_attr_memory (insn) == MEMORY_STORE
16743 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16744 cost += 1;
16745 break;
16747 case PROCESSOR_PENTIUMPRO:
16748 memory = get_attr_memory (insn);
16750 /* INT->FP conversion is expensive. */
16751 if (get_attr_fp_int_src (dep_insn))
16752 cost += 5;
16754 /* There is one cycle extra latency between an FP op and a store. */
16755 if (insn_type == TYPE_FMOV
16756 && (set = single_set (dep_insn)) != NULL_RTX
16757 && (set2 = single_set (insn)) != NULL_RTX
16758 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16759 && MEM_P (SET_DEST (set2)))
16760 cost += 1;
16762 /* Show ability of reorder buffer to hide latency of load by executing
16763 in parallel with previous instruction in case
16764 previous instruction is not needed to compute the address. */
16765 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16766 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16768 /* Claim moves to take one cycle, as core can issue one load
16769 at time and the next load can start cycle later. */
16770 if (dep_insn_type == TYPE_IMOV
16771 || dep_insn_type == TYPE_FMOV)
16772 cost = 1;
16773 else if (cost > 1)
16774 cost--;
16776 break;
16778 case PROCESSOR_K6:
16779 memory = get_attr_memory (insn);
16781 /* The esp dependency is resolved before the instruction is really
16782 finished. */
16783 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16784 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16785 return 1;
16787 /* INT->FP conversion is expensive. */
16788 if (get_attr_fp_int_src (dep_insn))
16789 cost += 5;
16791 /* Show ability of reorder buffer to hide latency of load by executing
16792 in parallel with previous instruction in case
16793 previous instruction is not needed to compute the address. */
16794 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16795 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16797 /* Claim moves to take one cycle, as core can issue one load
16798 at time and the next load can start cycle later. */
16799 if (dep_insn_type == TYPE_IMOV
16800 || dep_insn_type == TYPE_FMOV)
16801 cost = 1;
16802 else if (cost > 2)
16803 cost -= 2;
16804 else
16805 cost = 1;
16807 break;
16809 case PROCESSOR_ATHLON:
16810 case PROCESSOR_K8:
16811 case PROCESSOR_AMDFAM10:
16812 case PROCESSOR_GENERIC32:
16813 case PROCESSOR_GENERIC64:
16814 memory = get_attr_memory (insn);
16816 /* Show ability of reorder buffer to hide latency of load by executing
16817 in parallel with previous instruction in case
16818 previous instruction is not needed to compute the address. */
16819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16820 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16822 enum attr_unit unit = get_attr_unit (insn);
16823 int loadcost = 3;
16825 /* Because of the difference between the length of integer and
16826 floating unit pipeline preparation stages, the memory operands
16827 for floating point are cheaper.
16829 ??? For Athlon it the difference is most probably 2. */
16830 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16831 loadcost = 3;
16832 else
16833 loadcost = TARGET_ATHLON ? 2 : 0;
16835 if (cost >= loadcost)
16836 cost -= loadcost;
16837 else
16838 cost = 0;
16841 default:
16842 break;
16845 return cost;
16848 /* How many alternative schedules to try. This should be as wide as the
16849 scheduling freedom in the DFA, but no wider. Making this value too
16850 large results extra work for the scheduler. */
16852 static int
16853 ia32_multipass_dfa_lookahead (void)
16855 switch (ix86_tune)
16857 case PROCESSOR_PENTIUM:
16858 return 2;
16860 case PROCESSOR_PENTIUMPRO:
16861 case PROCESSOR_K6:
16862 return 1;
16864 default:
16865 return 0;
16870 /* Compute the alignment given to a constant that is being placed in memory.
16871 EXP is the constant and ALIGN is the alignment that the object would
16872 ordinarily have.
16873 The value of this function is used instead of that alignment to align
16874 the object. */
16877 ix86_constant_alignment (tree exp, int align)
16879 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16880 || TREE_CODE (exp) == INTEGER_CST)
16882 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16883 return 64;
16884 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16885 return 128;
16887 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16888 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16889 return BITS_PER_WORD;
16891 return align;
16894 /* Compute the alignment for a static variable.
16895 TYPE is the data type, and ALIGN is the alignment that
16896 the object would ordinarily have. The value of this function is used
16897 instead of that alignment to align the object. */
16900 ix86_data_alignment (tree type, int align)
16902 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16904 if (AGGREGATE_TYPE_P (type)
16905 && TYPE_SIZE (type)
16906 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16907 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16908 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16909 && align < max_align)
16910 align = max_align;
16912 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16913 to 16byte boundary. */
16914 if (TARGET_64BIT)
16916 if (AGGREGATE_TYPE_P (type)
16917 && TYPE_SIZE (type)
16918 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16919 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16920 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16921 return 128;
16924 if (TREE_CODE (type) == ARRAY_TYPE)
16926 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16927 return 64;
16928 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16929 return 128;
16931 else if (TREE_CODE (type) == COMPLEX_TYPE)
16934 if (TYPE_MODE (type) == DCmode && align < 64)
16935 return 64;
16936 if (TYPE_MODE (type) == XCmode && align < 128)
16937 return 128;
16939 else if ((TREE_CODE (type) == RECORD_TYPE
16940 || TREE_CODE (type) == UNION_TYPE
16941 || TREE_CODE (type) == QUAL_UNION_TYPE)
16942 && TYPE_FIELDS (type))
16944 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16945 return 64;
16946 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16947 return 128;
16949 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16950 || TREE_CODE (type) == INTEGER_TYPE)
16952 if (TYPE_MODE (type) == DFmode && align < 64)
16953 return 64;
16954 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16955 return 128;
16958 return align;
16961 /* Compute the alignment for a local variable.
16962 TYPE is the data type, and ALIGN is the alignment that
16963 the object would ordinarily have. The value of this macro is used
16964 instead of that alignment to align the object. */
16967 ix86_local_alignment (tree type, int align)
16969 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16970 to 16byte boundary. */
16971 if (TARGET_64BIT)
16973 if (AGGREGATE_TYPE_P (type)
16974 && TYPE_SIZE (type)
16975 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16976 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16977 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16978 return 128;
16980 if (TREE_CODE (type) == ARRAY_TYPE)
16982 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16983 return 64;
16984 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16985 return 128;
16987 else if (TREE_CODE (type) == COMPLEX_TYPE)
16989 if (TYPE_MODE (type) == DCmode && align < 64)
16990 return 64;
16991 if (TYPE_MODE (type) == XCmode && align < 128)
16992 return 128;
16994 else if ((TREE_CODE (type) == RECORD_TYPE
16995 || TREE_CODE (type) == UNION_TYPE
16996 || TREE_CODE (type) == QUAL_UNION_TYPE)
16997 && TYPE_FIELDS (type))
16999 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17000 return 64;
17001 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17002 return 128;
17004 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17005 || TREE_CODE (type) == INTEGER_TYPE)
17008 if (TYPE_MODE (type) == DFmode && align < 64)
17009 return 64;
17010 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17011 return 128;
17013 return align;
17016 /* Emit RTL insns to initialize the variable parts of a trampoline.
17017 FNADDR is an RTX for the address of the function's pure code.
17018 CXT is an RTX for the static chain value for the function. */
17019 void
17020 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17022 if (!TARGET_64BIT)
17024 /* Compute offset from the end of the jmp to the target function. */
17025 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17026 plus_constant (tramp, 10),
17027 NULL_RTX, 1, OPTAB_DIRECT);
17028 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17029 gen_int_mode (0xb9, QImode));
17030 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17031 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17032 gen_int_mode (0xe9, QImode));
17033 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17035 else
17037 int offset = 0;
17038 /* Try to load address using shorter movl instead of movabs.
17039 We may want to support movq for kernel mode, but kernel does not use
17040 trampolines at the moment. */
17041 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17043 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17044 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17045 gen_int_mode (0xbb41, HImode));
17046 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17047 gen_lowpart (SImode, fnaddr));
17048 offset += 6;
17050 else
17052 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17053 gen_int_mode (0xbb49, HImode));
17054 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17055 fnaddr);
17056 offset += 10;
17058 /* Load static chain using movabs to r10. */
17059 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17060 gen_int_mode (0xba49, HImode));
17061 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17062 cxt);
17063 offset += 10;
17064 /* Jump to the r11 */
17065 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17066 gen_int_mode (0xff49, HImode));
17067 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17068 gen_int_mode (0xe3, QImode));
17069 offset += 3;
17070 gcc_assert (offset <= TRAMPOLINE_SIZE);
17073 #ifdef ENABLE_EXECUTE_STACK
17074 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17075 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17076 #endif
17079 /* Codes for all the SSE/MMX builtins. */
17080 enum ix86_builtins
17082 IX86_BUILTIN_ADDPS,
17083 IX86_BUILTIN_ADDSS,
17084 IX86_BUILTIN_DIVPS,
17085 IX86_BUILTIN_DIVSS,
17086 IX86_BUILTIN_MULPS,
17087 IX86_BUILTIN_MULSS,
17088 IX86_BUILTIN_SUBPS,
17089 IX86_BUILTIN_SUBSS,
17091 IX86_BUILTIN_CMPEQPS,
17092 IX86_BUILTIN_CMPLTPS,
17093 IX86_BUILTIN_CMPLEPS,
17094 IX86_BUILTIN_CMPGTPS,
17095 IX86_BUILTIN_CMPGEPS,
17096 IX86_BUILTIN_CMPNEQPS,
17097 IX86_BUILTIN_CMPNLTPS,
17098 IX86_BUILTIN_CMPNLEPS,
17099 IX86_BUILTIN_CMPNGTPS,
17100 IX86_BUILTIN_CMPNGEPS,
17101 IX86_BUILTIN_CMPORDPS,
17102 IX86_BUILTIN_CMPUNORDPS,
17103 IX86_BUILTIN_CMPEQSS,
17104 IX86_BUILTIN_CMPLTSS,
17105 IX86_BUILTIN_CMPLESS,
17106 IX86_BUILTIN_CMPNEQSS,
17107 IX86_BUILTIN_CMPNLTSS,
17108 IX86_BUILTIN_CMPNLESS,
17109 IX86_BUILTIN_CMPNGTSS,
17110 IX86_BUILTIN_CMPNGESS,
17111 IX86_BUILTIN_CMPORDSS,
17112 IX86_BUILTIN_CMPUNORDSS,
17114 IX86_BUILTIN_COMIEQSS,
17115 IX86_BUILTIN_COMILTSS,
17116 IX86_BUILTIN_COMILESS,
17117 IX86_BUILTIN_COMIGTSS,
17118 IX86_BUILTIN_COMIGESS,
17119 IX86_BUILTIN_COMINEQSS,
17120 IX86_BUILTIN_UCOMIEQSS,
17121 IX86_BUILTIN_UCOMILTSS,
17122 IX86_BUILTIN_UCOMILESS,
17123 IX86_BUILTIN_UCOMIGTSS,
17124 IX86_BUILTIN_UCOMIGESS,
17125 IX86_BUILTIN_UCOMINEQSS,
17127 IX86_BUILTIN_CVTPI2PS,
17128 IX86_BUILTIN_CVTPS2PI,
17129 IX86_BUILTIN_CVTSI2SS,
17130 IX86_BUILTIN_CVTSI642SS,
17131 IX86_BUILTIN_CVTSS2SI,
17132 IX86_BUILTIN_CVTSS2SI64,
17133 IX86_BUILTIN_CVTTPS2PI,
17134 IX86_BUILTIN_CVTTSS2SI,
17135 IX86_BUILTIN_CVTTSS2SI64,
17137 IX86_BUILTIN_MAXPS,
17138 IX86_BUILTIN_MAXSS,
17139 IX86_BUILTIN_MINPS,
17140 IX86_BUILTIN_MINSS,
17142 IX86_BUILTIN_LOADUPS,
17143 IX86_BUILTIN_STOREUPS,
17144 IX86_BUILTIN_MOVSS,
17146 IX86_BUILTIN_MOVHLPS,
17147 IX86_BUILTIN_MOVLHPS,
17148 IX86_BUILTIN_LOADHPS,
17149 IX86_BUILTIN_LOADLPS,
17150 IX86_BUILTIN_STOREHPS,
17151 IX86_BUILTIN_STORELPS,
17153 IX86_BUILTIN_MASKMOVQ,
17154 IX86_BUILTIN_MOVMSKPS,
17155 IX86_BUILTIN_PMOVMSKB,
17157 IX86_BUILTIN_MOVNTPS,
17158 IX86_BUILTIN_MOVNTQ,
17160 IX86_BUILTIN_LOADDQU,
17161 IX86_BUILTIN_STOREDQU,
17163 IX86_BUILTIN_PACKSSWB,
17164 IX86_BUILTIN_PACKSSDW,
17165 IX86_BUILTIN_PACKUSWB,
17167 IX86_BUILTIN_PADDB,
17168 IX86_BUILTIN_PADDW,
17169 IX86_BUILTIN_PADDD,
17170 IX86_BUILTIN_PADDQ,
17171 IX86_BUILTIN_PADDSB,
17172 IX86_BUILTIN_PADDSW,
17173 IX86_BUILTIN_PADDUSB,
17174 IX86_BUILTIN_PADDUSW,
17175 IX86_BUILTIN_PSUBB,
17176 IX86_BUILTIN_PSUBW,
17177 IX86_BUILTIN_PSUBD,
17178 IX86_BUILTIN_PSUBQ,
17179 IX86_BUILTIN_PSUBSB,
17180 IX86_BUILTIN_PSUBSW,
17181 IX86_BUILTIN_PSUBUSB,
17182 IX86_BUILTIN_PSUBUSW,
17184 IX86_BUILTIN_PAND,
17185 IX86_BUILTIN_PANDN,
17186 IX86_BUILTIN_POR,
17187 IX86_BUILTIN_PXOR,
17189 IX86_BUILTIN_PAVGB,
17190 IX86_BUILTIN_PAVGW,
17192 IX86_BUILTIN_PCMPEQB,
17193 IX86_BUILTIN_PCMPEQW,
17194 IX86_BUILTIN_PCMPEQD,
17195 IX86_BUILTIN_PCMPGTB,
17196 IX86_BUILTIN_PCMPGTW,
17197 IX86_BUILTIN_PCMPGTD,
17199 IX86_BUILTIN_PMADDWD,
17201 IX86_BUILTIN_PMAXSW,
17202 IX86_BUILTIN_PMAXUB,
17203 IX86_BUILTIN_PMINSW,
17204 IX86_BUILTIN_PMINUB,
17206 IX86_BUILTIN_PMULHUW,
17207 IX86_BUILTIN_PMULHW,
17208 IX86_BUILTIN_PMULLW,
17210 IX86_BUILTIN_PSADBW,
17211 IX86_BUILTIN_PSHUFW,
17213 IX86_BUILTIN_PSLLW,
17214 IX86_BUILTIN_PSLLD,
17215 IX86_BUILTIN_PSLLQ,
17216 IX86_BUILTIN_PSRAW,
17217 IX86_BUILTIN_PSRAD,
17218 IX86_BUILTIN_PSRLW,
17219 IX86_BUILTIN_PSRLD,
17220 IX86_BUILTIN_PSRLQ,
17221 IX86_BUILTIN_PSLLWI,
17222 IX86_BUILTIN_PSLLDI,
17223 IX86_BUILTIN_PSLLQI,
17224 IX86_BUILTIN_PSRAWI,
17225 IX86_BUILTIN_PSRADI,
17226 IX86_BUILTIN_PSRLWI,
17227 IX86_BUILTIN_PSRLDI,
17228 IX86_BUILTIN_PSRLQI,
17230 IX86_BUILTIN_PUNPCKHBW,
17231 IX86_BUILTIN_PUNPCKHWD,
17232 IX86_BUILTIN_PUNPCKHDQ,
17233 IX86_BUILTIN_PUNPCKLBW,
17234 IX86_BUILTIN_PUNPCKLWD,
17235 IX86_BUILTIN_PUNPCKLDQ,
17237 IX86_BUILTIN_SHUFPS,
17239 IX86_BUILTIN_RCPPS,
17240 IX86_BUILTIN_RCPSS,
17241 IX86_BUILTIN_RSQRTPS,
17242 IX86_BUILTIN_RSQRTPS_NR,
17243 IX86_BUILTIN_RSQRTSS,
17244 IX86_BUILTIN_RSQRTF,
17245 IX86_BUILTIN_SQRTPS,
17246 IX86_BUILTIN_SQRTPS_NR,
17247 IX86_BUILTIN_SQRTSS,
17249 IX86_BUILTIN_UNPCKHPS,
17250 IX86_BUILTIN_UNPCKLPS,
17252 IX86_BUILTIN_ANDPS,
17253 IX86_BUILTIN_ANDNPS,
17254 IX86_BUILTIN_ORPS,
17255 IX86_BUILTIN_XORPS,
17257 IX86_BUILTIN_EMMS,
17258 IX86_BUILTIN_LDMXCSR,
17259 IX86_BUILTIN_STMXCSR,
17260 IX86_BUILTIN_SFENCE,
17262 /* 3DNow! Original */
17263 IX86_BUILTIN_FEMMS,
17264 IX86_BUILTIN_PAVGUSB,
17265 IX86_BUILTIN_PF2ID,
17266 IX86_BUILTIN_PFACC,
17267 IX86_BUILTIN_PFADD,
17268 IX86_BUILTIN_PFCMPEQ,
17269 IX86_BUILTIN_PFCMPGE,
17270 IX86_BUILTIN_PFCMPGT,
17271 IX86_BUILTIN_PFMAX,
17272 IX86_BUILTIN_PFMIN,
17273 IX86_BUILTIN_PFMUL,
17274 IX86_BUILTIN_PFRCP,
17275 IX86_BUILTIN_PFRCPIT1,
17276 IX86_BUILTIN_PFRCPIT2,
17277 IX86_BUILTIN_PFRSQIT1,
17278 IX86_BUILTIN_PFRSQRT,
17279 IX86_BUILTIN_PFSUB,
17280 IX86_BUILTIN_PFSUBR,
17281 IX86_BUILTIN_PI2FD,
17282 IX86_BUILTIN_PMULHRW,
17284 /* 3DNow! Athlon Extensions */
17285 IX86_BUILTIN_PF2IW,
17286 IX86_BUILTIN_PFNACC,
17287 IX86_BUILTIN_PFPNACC,
17288 IX86_BUILTIN_PI2FW,
17289 IX86_BUILTIN_PSWAPDSI,
17290 IX86_BUILTIN_PSWAPDSF,
17292 /* SSE2 */
17293 IX86_BUILTIN_ADDPD,
17294 IX86_BUILTIN_ADDSD,
17295 IX86_BUILTIN_DIVPD,
17296 IX86_BUILTIN_DIVSD,
17297 IX86_BUILTIN_MULPD,
17298 IX86_BUILTIN_MULSD,
17299 IX86_BUILTIN_SUBPD,
17300 IX86_BUILTIN_SUBSD,
17302 IX86_BUILTIN_CMPEQPD,
17303 IX86_BUILTIN_CMPLTPD,
17304 IX86_BUILTIN_CMPLEPD,
17305 IX86_BUILTIN_CMPGTPD,
17306 IX86_BUILTIN_CMPGEPD,
17307 IX86_BUILTIN_CMPNEQPD,
17308 IX86_BUILTIN_CMPNLTPD,
17309 IX86_BUILTIN_CMPNLEPD,
17310 IX86_BUILTIN_CMPNGTPD,
17311 IX86_BUILTIN_CMPNGEPD,
17312 IX86_BUILTIN_CMPORDPD,
17313 IX86_BUILTIN_CMPUNORDPD,
17314 IX86_BUILTIN_CMPEQSD,
17315 IX86_BUILTIN_CMPLTSD,
17316 IX86_BUILTIN_CMPLESD,
17317 IX86_BUILTIN_CMPNEQSD,
17318 IX86_BUILTIN_CMPNLTSD,
17319 IX86_BUILTIN_CMPNLESD,
17320 IX86_BUILTIN_CMPORDSD,
17321 IX86_BUILTIN_CMPUNORDSD,
17323 IX86_BUILTIN_COMIEQSD,
17324 IX86_BUILTIN_COMILTSD,
17325 IX86_BUILTIN_COMILESD,
17326 IX86_BUILTIN_COMIGTSD,
17327 IX86_BUILTIN_COMIGESD,
17328 IX86_BUILTIN_COMINEQSD,
17329 IX86_BUILTIN_UCOMIEQSD,
17330 IX86_BUILTIN_UCOMILTSD,
17331 IX86_BUILTIN_UCOMILESD,
17332 IX86_BUILTIN_UCOMIGTSD,
17333 IX86_BUILTIN_UCOMIGESD,
17334 IX86_BUILTIN_UCOMINEQSD,
17336 IX86_BUILTIN_MAXPD,
17337 IX86_BUILTIN_MAXSD,
17338 IX86_BUILTIN_MINPD,
17339 IX86_BUILTIN_MINSD,
17341 IX86_BUILTIN_ANDPD,
17342 IX86_BUILTIN_ANDNPD,
17343 IX86_BUILTIN_ORPD,
17344 IX86_BUILTIN_XORPD,
17346 IX86_BUILTIN_SQRTPD,
17347 IX86_BUILTIN_SQRTSD,
17349 IX86_BUILTIN_UNPCKHPD,
17350 IX86_BUILTIN_UNPCKLPD,
17352 IX86_BUILTIN_SHUFPD,
17354 IX86_BUILTIN_LOADUPD,
17355 IX86_BUILTIN_STOREUPD,
17356 IX86_BUILTIN_MOVSD,
17358 IX86_BUILTIN_LOADHPD,
17359 IX86_BUILTIN_LOADLPD,
17361 IX86_BUILTIN_CVTDQ2PD,
17362 IX86_BUILTIN_CVTDQ2PS,
17364 IX86_BUILTIN_CVTPD2DQ,
17365 IX86_BUILTIN_CVTPD2PI,
17366 IX86_BUILTIN_CVTPD2PS,
17367 IX86_BUILTIN_CVTTPD2DQ,
17368 IX86_BUILTIN_CVTTPD2PI,
17370 IX86_BUILTIN_CVTPI2PD,
17371 IX86_BUILTIN_CVTSI2SD,
17372 IX86_BUILTIN_CVTSI642SD,
17374 IX86_BUILTIN_CVTSD2SI,
17375 IX86_BUILTIN_CVTSD2SI64,
17376 IX86_BUILTIN_CVTSD2SS,
17377 IX86_BUILTIN_CVTSS2SD,
17378 IX86_BUILTIN_CVTTSD2SI,
17379 IX86_BUILTIN_CVTTSD2SI64,
17381 IX86_BUILTIN_CVTPS2DQ,
17382 IX86_BUILTIN_CVTPS2PD,
17383 IX86_BUILTIN_CVTTPS2DQ,
17385 IX86_BUILTIN_MOVNTI,
17386 IX86_BUILTIN_MOVNTPD,
17387 IX86_BUILTIN_MOVNTDQ,
17389 /* SSE2 MMX */
17390 IX86_BUILTIN_MASKMOVDQU,
17391 IX86_BUILTIN_MOVMSKPD,
17392 IX86_BUILTIN_PMOVMSKB128,
17394 IX86_BUILTIN_PACKSSWB128,
17395 IX86_BUILTIN_PACKSSDW128,
17396 IX86_BUILTIN_PACKUSWB128,
17398 IX86_BUILTIN_PADDB128,
17399 IX86_BUILTIN_PADDW128,
17400 IX86_BUILTIN_PADDD128,
17401 IX86_BUILTIN_PADDQ128,
17402 IX86_BUILTIN_PADDSB128,
17403 IX86_BUILTIN_PADDSW128,
17404 IX86_BUILTIN_PADDUSB128,
17405 IX86_BUILTIN_PADDUSW128,
17406 IX86_BUILTIN_PSUBB128,
17407 IX86_BUILTIN_PSUBW128,
17408 IX86_BUILTIN_PSUBD128,
17409 IX86_BUILTIN_PSUBQ128,
17410 IX86_BUILTIN_PSUBSB128,
17411 IX86_BUILTIN_PSUBSW128,
17412 IX86_BUILTIN_PSUBUSB128,
17413 IX86_BUILTIN_PSUBUSW128,
17415 IX86_BUILTIN_PAND128,
17416 IX86_BUILTIN_PANDN128,
17417 IX86_BUILTIN_POR128,
17418 IX86_BUILTIN_PXOR128,
17420 IX86_BUILTIN_PAVGB128,
17421 IX86_BUILTIN_PAVGW128,
17423 IX86_BUILTIN_PCMPEQB128,
17424 IX86_BUILTIN_PCMPEQW128,
17425 IX86_BUILTIN_PCMPEQD128,
17426 IX86_BUILTIN_PCMPGTB128,
17427 IX86_BUILTIN_PCMPGTW128,
17428 IX86_BUILTIN_PCMPGTD128,
17430 IX86_BUILTIN_PMADDWD128,
17432 IX86_BUILTIN_PMAXSW128,
17433 IX86_BUILTIN_PMAXUB128,
17434 IX86_BUILTIN_PMINSW128,
17435 IX86_BUILTIN_PMINUB128,
17437 IX86_BUILTIN_PMULUDQ,
17438 IX86_BUILTIN_PMULUDQ128,
17439 IX86_BUILTIN_PMULHUW128,
17440 IX86_BUILTIN_PMULHW128,
17441 IX86_BUILTIN_PMULLW128,
17443 IX86_BUILTIN_PSADBW128,
17444 IX86_BUILTIN_PSHUFHW,
17445 IX86_BUILTIN_PSHUFLW,
17446 IX86_BUILTIN_PSHUFD,
17448 IX86_BUILTIN_PSLLDQI128,
17449 IX86_BUILTIN_PSLLWI128,
17450 IX86_BUILTIN_PSLLDI128,
17451 IX86_BUILTIN_PSLLQI128,
17452 IX86_BUILTIN_PSRAWI128,
17453 IX86_BUILTIN_PSRADI128,
17454 IX86_BUILTIN_PSRLDQI128,
17455 IX86_BUILTIN_PSRLWI128,
17456 IX86_BUILTIN_PSRLDI128,
17457 IX86_BUILTIN_PSRLQI128,
17459 IX86_BUILTIN_PSLLDQ128,
17460 IX86_BUILTIN_PSLLW128,
17461 IX86_BUILTIN_PSLLD128,
17462 IX86_BUILTIN_PSLLQ128,
17463 IX86_BUILTIN_PSRAW128,
17464 IX86_BUILTIN_PSRAD128,
17465 IX86_BUILTIN_PSRLW128,
17466 IX86_BUILTIN_PSRLD128,
17467 IX86_BUILTIN_PSRLQ128,
17469 IX86_BUILTIN_PUNPCKHBW128,
17470 IX86_BUILTIN_PUNPCKHWD128,
17471 IX86_BUILTIN_PUNPCKHDQ128,
17472 IX86_BUILTIN_PUNPCKHQDQ128,
17473 IX86_BUILTIN_PUNPCKLBW128,
17474 IX86_BUILTIN_PUNPCKLWD128,
17475 IX86_BUILTIN_PUNPCKLDQ128,
17476 IX86_BUILTIN_PUNPCKLQDQ128,
17478 IX86_BUILTIN_CLFLUSH,
17479 IX86_BUILTIN_MFENCE,
17480 IX86_BUILTIN_LFENCE,
17482 /* Prescott New Instructions. */
17483 IX86_BUILTIN_ADDSUBPS,
17484 IX86_BUILTIN_HADDPS,
17485 IX86_BUILTIN_HSUBPS,
17486 IX86_BUILTIN_MOVSHDUP,
17487 IX86_BUILTIN_MOVSLDUP,
17488 IX86_BUILTIN_ADDSUBPD,
17489 IX86_BUILTIN_HADDPD,
17490 IX86_BUILTIN_HSUBPD,
17491 IX86_BUILTIN_LDDQU,
17493 IX86_BUILTIN_MONITOR,
17494 IX86_BUILTIN_MWAIT,
17496 /* SSSE3. */
17497 IX86_BUILTIN_PHADDW,
17498 IX86_BUILTIN_PHADDD,
17499 IX86_BUILTIN_PHADDSW,
17500 IX86_BUILTIN_PHSUBW,
17501 IX86_BUILTIN_PHSUBD,
17502 IX86_BUILTIN_PHSUBSW,
17503 IX86_BUILTIN_PMADDUBSW,
17504 IX86_BUILTIN_PMULHRSW,
17505 IX86_BUILTIN_PSHUFB,
17506 IX86_BUILTIN_PSIGNB,
17507 IX86_BUILTIN_PSIGNW,
17508 IX86_BUILTIN_PSIGND,
17509 IX86_BUILTIN_PALIGNR,
17510 IX86_BUILTIN_PABSB,
17511 IX86_BUILTIN_PABSW,
17512 IX86_BUILTIN_PABSD,
17514 IX86_BUILTIN_PHADDW128,
17515 IX86_BUILTIN_PHADDD128,
17516 IX86_BUILTIN_PHADDSW128,
17517 IX86_BUILTIN_PHSUBW128,
17518 IX86_BUILTIN_PHSUBD128,
17519 IX86_BUILTIN_PHSUBSW128,
17520 IX86_BUILTIN_PMADDUBSW128,
17521 IX86_BUILTIN_PMULHRSW128,
17522 IX86_BUILTIN_PSHUFB128,
17523 IX86_BUILTIN_PSIGNB128,
17524 IX86_BUILTIN_PSIGNW128,
17525 IX86_BUILTIN_PSIGND128,
17526 IX86_BUILTIN_PALIGNR128,
17527 IX86_BUILTIN_PABSB128,
17528 IX86_BUILTIN_PABSW128,
17529 IX86_BUILTIN_PABSD128,
17531 /* AMDFAM10 - SSE4A New Instructions. */
17532 IX86_BUILTIN_MOVNTSD,
17533 IX86_BUILTIN_MOVNTSS,
17534 IX86_BUILTIN_EXTRQI,
17535 IX86_BUILTIN_EXTRQ,
17536 IX86_BUILTIN_INSERTQI,
17537 IX86_BUILTIN_INSERTQ,
17539 /* SSE4.1. */
17540 IX86_BUILTIN_BLENDPD,
17541 IX86_BUILTIN_BLENDPS,
17542 IX86_BUILTIN_BLENDVPD,
17543 IX86_BUILTIN_BLENDVPS,
17544 IX86_BUILTIN_PBLENDVB128,
17545 IX86_BUILTIN_PBLENDW128,
17547 IX86_BUILTIN_DPPD,
17548 IX86_BUILTIN_DPPS,
17550 IX86_BUILTIN_INSERTPS128,
17552 IX86_BUILTIN_MOVNTDQA,
17553 IX86_BUILTIN_MPSADBW128,
17554 IX86_BUILTIN_PACKUSDW128,
17555 IX86_BUILTIN_PCMPEQQ,
17556 IX86_BUILTIN_PHMINPOSUW128,
17558 IX86_BUILTIN_PMAXSB128,
17559 IX86_BUILTIN_PMAXSD128,
17560 IX86_BUILTIN_PMAXUD128,
17561 IX86_BUILTIN_PMAXUW128,
17563 IX86_BUILTIN_PMINSB128,
17564 IX86_BUILTIN_PMINSD128,
17565 IX86_BUILTIN_PMINUD128,
17566 IX86_BUILTIN_PMINUW128,
17568 IX86_BUILTIN_PMOVSXBW128,
17569 IX86_BUILTIN_PMOVSXBD128,
17570 IX86_BUILTIN_PMOVSXBQ128,
17571 IX86_BUILTIN_PMOVSXWD128,
17572 IX86_BUILTIN_PMOVSXWQ128,
17573 IX86_BUILTIN_PMOVSXDQ128,
17575 IX86_BUILTIN_PMOVZXBW128,
17576 IX86_BUILTIN_PMOVZXBD128,
17577 IX86_BUILTIN_PMOVZXBQ128,
17578 IX86_BUILTIN_PMOVZXWD128,
17579 IX86_BUILTIN_PMOVZXWQ128,
17580 IX86_BUILTIN_PMOVZXDQ128,
17582 IX86_BUILTIN_PMULDQ128,
17583 IX86_BUILTIN_PMULLD128,
17585 IX86_BUILTIN_ROUNDPD,
17586 IX86_BUILTIN_ROUNDPS,
17587 IX86_BUILTIN_ROUNDSD,
17588 IX86_BUILTIN_ROUNDSS,
17590 IX86_BUILTIN_PTESTZ,
17591 IX86_BUILTIN_PTESTC,
17592 IX86_BUILTIN_PTESTNZC,
17594 IX86_BUILTIN_VEC_INIT_V2SI,
17595 IX86_BUILTIN_VEC_INIT_V4HI,
17596 IX86_BUILTIN_VEC_INIT_V8QI,
17597 IX86_BUILTIN_VEC_EXT_V2DF,
17598 IX86_BUILTIN_VEC_EXT_V2DI,
17599 IX86_BUILTIN_VEC_EXT_V4SF,
17600 IX86_BUILTIN_VEC_EXT_V4SI,
17601 IX86_BUILTIN_VEC_EXT_V8HI,
17602 IX86_BUILTIN_VEC_EXT_V2SI,
17603 IX86_BUILTIN_VEC_EXT_V4HI,
17604 IX86_BUILTIN_VEC_EXT_V16QI,
17605 IX86_BUILTIN_VEC_SET_V2DI,
17606 IX86_BUILTIN_VEC_SET_V4SF,
17607 IX86_BUILTIN_VEC_SET_V4SI,
17608 IX86_BUILTIN_VEC_SET_V8HI,
17609 IX86_BUILTIN_VEC_SET_V4HI,
17610 IX86_BUILTIN_VEC_SET_V16QI,
17612 IX86_BUILTIN_VEC_PACK_SFIX,
17614 /* SSE4.2. */
17615 IX86_BUILTIN_CRC32QI,
17616 IX86_BUILTIN_CRC32HI,
17617 IX86_BUILTIN_CRC32SI,
17618 IX86_BUILTIN_CRC32DI,
17620 IX86_BUILTIN_PCMPESTRI128,
17621 IX86_BUILTIN_PCMPESTRM128,
17622 IX86_BUILTIN_PCMPESTRA128,
17623 IX86_BUILTIN_PCMPESTRC128,
17624 IX86_BUILTIN_PCMPESTRO128,
17625 IX86_BUILTIN_PCMPESTRS128,
17626 IX86_BUILTIN_PCMPESTRZ128,
17627 IX86_BUILTIN_PCMPISTRI128,
17628 IX86_BUILTIN_PCMPISTRM128,
17629 IX86_BUILTIN_PCMPISTRA128,
17630 IX86_BUILTIN_PCMPISTRC128,
17631 IX86_BUILTIN_PCMPISTRO128,
17632 IX86_BUILTIN_PCMPISTRS128,
17633 IX86_BUILTIN_PCMPISTRZ128,
17635 IX86_BUILTIN_PCMPGTQ,
17637 /* AES instructions */
17638 IX86_BUILTIN_AESENC128,
17639 IX86_BUILTIN_AESENCLAST128,
17640 IX86_BUILTIN_AESDEC128,
17641 IX86_BUILTIN_AESDECLAST128,
17642 IX86_BUILTIN_AESIMC128,
17643 IX86_BUILTIN_AESKEYGENASSIST128,
17645 /* PCLMUL instruction */
17646 IX86_BUILTIN_PCLMULQDQ128,
17648 /* TFmode support builtins. */
17649 IX86_BUILTIN_INFQ,
17650 IX86_BUILTIN_FABSQ,
17651 IX86_BUILTIN_COPYSIGNQ,
17653 /* SSE5 instructions */
17654 IX86_BUILTIN_FMADDSS,
17655 IX86_BUILTIN_FMADDSD,
17656 IX86_BUILTIN_FMADDPS,
17657 IX86_BUILTIN_FMADDPD,
17658 IX86_BUILTIN_FMSUBSS,
17659 IX86_BUILTIN_FMSUBSD,
17660 IX86_BUILTIN_FMSUBPS,
17661 IX86_BUILTIN_FMSUBPD,
17662 IX86_BUILTIN_FNMADDSS,
17663 IX86_BUILTIN_FNMADDSD,
17664 IX86_BUILTIN_FNMADDPS,
17665 IX86_BUILTIN_FNMADDPD,
17666 IX86_BUILTIN_FNMSUBSS,
17667 IX86_BUILTIN_FNMSUBSD,
17668 IX86_BUILTIN_FNMSUBPS,
17669 IX86_BUILTIN_FNMSUBPD,
17670 IX86_BUILTIN_PCMOV_V2DI,
17671 IX86_BUILTIN_PCMOV_V4SI,
17672 IX86_BUILTIN_PCMOV_V8HI,
17673 IX86_BUILTIN_PCMOV_V16QI,
17674 IX86_BUILTIN_PCMOV_V4SF,
17675 IX86_BUILTIN_PCMOV_V2DF,
17676 IX86_BUILTIN_PPERM,
17677 IX86_BUILTIN_PERMPS,
17678 IX86_BUILTIN_PERMPD,
17679 IX86_BUILTIN_PMACSSWW,
17680 IX86_BUILTIN_PMACSWW,
17681 IX86_BUILTIN_PMACSSWD,
17682 IX86_BUILTIN_PMACSWD,
17683 IX86_BUILTIN_PMACSSDD,
17684 IX86_BUILTIN_PMACSDD,
17685 IX86_BUILTIN_PMACSSDQL,
17686 IX86_BUILTIN_PMACSSDQH,
17687 IX86_BUILTIN_PMACSDQL,
17688 IX86_BUILTIN_PMACSDQH,
17689 IX86_BUILTIN_PMADCSSWD,
17690 IX86_BUILTIN_PMADCSWD,
17691 IX86_BUILTIN_PHADDBW,
17692 IX86_BUILTIN_PHADDBD,
17693 IX86_BUILTIN_PHADDBQ,
17694 IX86_BUILTIN_PHADDWD,
17695 IX86_BUILTIN_PHADDWQ,
17696 IX86_BUILTIN_PHADDDQ,
17697 IX86_BUILTIN_PHADDUBW,
17698 IX86_BUILTIN_PHADDUBD,
17699 IX86_BUILTIN_PHADDUBQ,
17700 IX86_BUILTIN_PHADDUWD,
17701 IX86_BUILTIN_PHADDUWQ,
17702 IX86_BUILTIN_PHADDUDQ,
17703 IX86_BUILTIN_PHSUBBW,
17704 IX86_BUILTIN_PHSUBWD,
17705 IX86_BUILTIN_PHSUBDQ,
17706 IX86_BUILTIN_PROTB,
17707 IX86_BUILTIN_PROTW,
17708 IX86_BUILTIN_PROTD,
17709 IX86_BUILTIN_PROTQ,
17710 IX86_BUILTIN_PROTB_IMM,
17711 IX86_BUILTIN_PROTW_IMM,
17712 IX86_BUILTIN_PROTD_IMM,
17713 IX86_BUILTIN_PROTQ_IMM,
17714 IX86_BUILTIN_PSHLB,
17715 IX86_BUILTIN_PSHLW,
17716 IX86_BUILTIN_PSHLD,
17717 IX86_BUILTIN_PSHLQ,
17718 IX86_BUILTIN_PSHAB,
17719 IX86_BUILTIN_PSHAW,
17720 IX86_BUILTIN_PSHAD,
17721 IX86_BUILTIN_PSHAQ,
17722 IX86_BUILTIN_FRCZSS,
17723 IX86_BUILTIN_FRCZSD,
17724 IX86_BUILTIN_FRCZPS,
17725 IX86_BUILTIN_FRCZPD,
17726 IX86_BUILTIN_CVTPH2PS,
17727 IX86_BUILTIN_CVTPS2PH,
17729 IX86_BUILTIN_COMEQSS,
17730 IX86_BUILTIN_COMNESS,
17731 IX86_BUILTIN_COMLTSS,
17732 IX86_BUILTIN_COMLESS,
17733 IX86_BUILTIN_COMGTSS,
17734 IX86_BUILTIN_COMGESS,
17735 IX86_BUILTIN_COMUEQSS,
17736 IX86_BUILTIN_COMUNESS,
17737 IX86_BUILTIN_COMULTSS,
17738 IX86_BUILTIN_COMULESS,
17739 IX86_BUILTIN_COMUGTSS,
17740 IX86_BUILTIN_COMUGESS,
17741 IX86_BUILTIN_COMORDSS,
17742 IX86_BUILTIN_COMUNORDSS,
17743 IX86_BUILTIN_COMFALSESS,
17744 IX86_BUILTIN_COMTRUESS,
17746 IX86_BUILTIN_COMEQSD,
17747 IX86_BUILTIN_COMNESD,
17748 IX86_BUILTIN_COMLTSD,
17749 IX86_BUILTIN_COMLESD,
17750 IX86_BUILTIN_COMGTSD,
17751 IX86_BUILTIN_COMGESD,
17752 IX86_BUILTIN_COMUEQSD,
17753 IX86_BUILTIN_COMUNESD,
17754 IX86_BUILTIN_COMULTSD,
17755 IX86_BUILTIN_COMULESD,
17756 IX86_BUILTIN_COMUGTSD,
17757 IX86_BUILTIN_COMUGESD,
17758 IX86_BUILTIN_COMORDSD,
17759 IX86_BUILTIN_COMUNORDSD,
17760 IX86_BUILTIN_COMFALSESD,
17761 IX86_BUILTIN_COMTRUESD,
17763 IX86_BUILTIN_COMEQPS,
17764 IX86_BUILTIN_COMNEPS,
17765 IX86_BUILTIN_COMLTPS,
17766 IX86_BUILTIN_COMLEPS,
17767 IX86_BUILTIN_COMGTPS,
17768 IX86_BUILTIN_COMGEPS,
17769 IX86_BUILTIN_COMUEQPS,
17770 IX86_BUILTIN_COMUNEPS,
17771 IX86_BUILTIN_COMULTPS,
17772 IX86_BUILTIN_COMULEPS,
17773 IX86_BUILTIN_COMUGTPS,
17774 IX86_BUILTIN_COMUGEPS,
17775 IX86_BUILTIN_COMORDPS,
17776 IX86_BUILTIN_COMUNORDPS,
17777 IX86_BUILTIN_COMFALSEPS,
17778 IX86_BUILTIN_COMTRUEPS,
17780 IX86_BUILTIN_COMEQPD,
17781 IX86_BUILTIN_COMNEPD,
17782 IX86_BUILTIN_COMLTPD,
17783 IX86_BUILTIN_COMLEPD,
17784 IX86_BUILTIN_COMGTPD,
17785 IX86_BUILTIN_COMGEPD,
17786 IX86_BUILTIN_COMUEQPD,
17787 IX86_BUILTIN_COMUNEPD,
17788 IX86_BUILTIN_COMULTPD,
17789 IX86_BUILTIN_COMULEPD,
17790 IX86_BUILTIN_COMUGTPD,
17791 IX86_BUILTIN_COMUGEPD,
17792 IX86_BUILTIN_COMORDPD,
17793 IX86_BUILTIN_COMUNORDPD,
17794 IX86_BUILTIN_COMFALSEPD,
17795 IX86_BUILTIN_COMTRUEPD,
17797 IX86_BUILTIN_PCOMEQUB,
17798 IX86_BUILTIN_PCOMNEUB,
17799 IX86_BUILTIN_PCOMLTUB,
17800 IX86_BUILTIN_PCOMLEUB,
17801 IX86_BUILTIN_PCOMGTUB,
17802 IX86_BUILTIN_PCOMGEUB,
17803 IX86_BUILTIN_PCOMFALSEUB,
17804 IX86_BUILTIN_PCOMTRUEUB,
17805 IX86_BUILTIN_PCOMEQUW,
17806 IX86_BUILTIN_PCOMNEUW,
17807 IX86_BUILTIN_PCOMLTUW,
17808 IX86_BUILTIN_PCOMLEUW,
17809 IX86_BUILTIN_PCOMGTUW,
17810 IX86_BUILTIN_PCOMGEUW,
17811 IX86_BUILTIN_PCOMFALSEUW,
17812 IX86_BUILTIN_PCOMTRUEUW,
17813 IX86_BUILTIN_PCOMEQUD,
17814 IX86_BUILTIN_PCOMNEUD,
17815 IX86_BUILTIN_PCOMLTUD,
17816 IX86_BUILTIN_PCOMLEUD,
17817 IX86_BUILTIN_PCOMGTUD,
17818 IX86_BUILTIN_PCOMGEUD,
17819 IX86_BUILTIN_PCOMFALSEUD,
17820 IX86_BUILTIN_PCOMTRUEUD,
17821 IX86_BUILTIN_PCOMEQUQ,
17822 IX86_BUILTIN_PCOMNEUQ,
17823 IX86_BUILTIN_PCOMLTUQ,
17824 IX86_BUILTIN_PCOMLEUQ,
17825 IX86_BUILTIN_PCOMGTUQ,
17826 IX86_BUILTIN_PCOMGEUQ,
17827 IX86_BUILTIN_PCOMFALSEUQ,
17828 IX86_BUILTIN_PCOMTRUEUQ,
17830 IX86_BUILTIN_PCOMEQB,
17831 IX86_BUILTIN_PCOMNEB,
17832 IX86_BUILTIN_PCOMLTB,
17833 IX86_BUILTIN_PCOMLEB,
17834 IX86_BUILTIN_PCOMGTB,
17835 IX86_BUILTIN_PCOMGEB,
17836 IX86_BUILTIN_PCOMFALSEB,
17837 IX86_BUILTIN_PCOMTRUEB,
17838 IX86_BUILTIN_PCOMEQW,
17839 IX86_BUILTIN_PCOMNEW,
17840 IX86_BUILTIN_PCOMLTW,
17841 IX86_BUILTIN_PCOMLEW,
17842 IX86_BUILTIN_PCOMGTW,
17843 IX86_BUILTIN_PCOMGEW,
17844 IX86_BUILTIN_PCOMFALSEW,
17845 IX86_BUILTIN_PCOMTRUEW,
17846 IX86_BUILTIN_PCOMEQD,
17847 IX86_BUILTIN_PCOMNED,
17848 IX86_BUILTIN_PCOMLTD,
17849 IX86_BUILTIN_PCOMLED,
17850 IX86_BUILTIN_PCOMGTD,
17851 IX86_BUILTIN_PCOMGED,
17852 IX86_BUILTIN_PCOMFALSED,
17853 IX86_BUILTIN_PCOMTRUED,
17854 IX86_BUILTIN_PCOMEQQ,
17855 IX86_BUILTIN_PCOMNEQ,
17856 IX86_BUILTIN_PCOMLTQ,
17857 IX86_BUILTIN_PCOMLEQ,
17858 IX86_BUILTIN_PCOMGTQ,
17859 IX86_BUILTIN_PCOMGEQ,
17860 IX86_BUILTIN_PCOMFALSEQ,
17861 IX86_BUILTIN_PCOMTRUEQ,
17863 IX86_BUILTIN_MAX
17866 /* Table for the ix86 builtin decls. */
17867 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17869 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17870 * if the target_flags include one of MASK. Stores the function decl
17871 * in the ix86_builtins array.
17872 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17874 static inline tree
17875 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17877 tree decl = NULL_TREE;
17879 if (mask & ix86_isa_flags
17880 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17882 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17883 NULL, NULL_TREE);
17884 ix86_builtins[(int) code] = decl;
17887 return decl;
17890 /* Like def_builtin, but also marks the function decl "const". */
17892 static inline tree
17893 def_builtin_const (int mask, const char *name, tree type,
17894 enum ix86_builtins code)
17896 tree decl = def_builtin (mask, name, type, code);
17897 if (decl)
17898 TREE_READONLY (decl) = 1;
17899 return decl;
17902 /* Bits for builtin_description.flag. */
17904 /* Set when we don't support the comparison natively, and should
17905 swap_comparison in order to support it. */
17906 #define BUILTIN_DESC_SWAP_OPERANDS 1
17908 struct builtin_description
17910 const unsigned int mask;
17911 const enum insn_code icode;
17912 const char *const name;
17913 const enum ix86_builtins code;
17914 const enum rtx_code comparison;
17915 const int flag;
17918 static const struct builtin_description bdesc_comi[] =
17920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17946 static const struct builtin_description bdesc_ptest[] =
17948 /* SSE4.1 */
17949 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17950 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17951 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17954 static const struct builtin_description bdesc_pcmpestr[] =
17956 /* SSE4.2 */
17957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17966 static const struct builtin_description bdesc_pcmpistr[] =
17968 /* SSE4.2 */
17969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17978 static const struct builtin_description bdesc_crc32[] =
17980 /* SSE4.2 */
17981 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17987 /* SSE */
17988 enum sse_builtin_type
17990 SSE_CTYPE_UNKNOWN,
17991 V4SF_FTYPE_V4SF_INT,
17992 V2DI_FTYPE_V2DI_INT,
17993 V2DF_FTYPE_V2DF_INT,
17994 V16QI_FTYPE_V16QI_V16QI_V16QI,
17995 V4SF_FTYPE_V4SF_V4SF_V4SF,
17996 V2DF_FTYPE_V2DF_V2DF_V2DF,
17997 V16QI_FTYPE_V16QI_V16QI_INT,
17998 V8HI_FTYPE_V8HI_V8HI_INT,
17999 V4SI_FTYPE_V4SI_V4SI_INT,
18000 V4SF_FTYPE_V4SF_V4SF_INT,
18001 V2DI_FTYPE_V2DI_V2DI_INT,
18002 V2DF_FTYPE_V2DF_V2DF_INT
18005 /* SSE builtins with variable number of arguments. */
18006 static const struct builtin_description bdesc_sse_args[] =
18008 /* SSE */
18009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18011 /* SSE2 */
18012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18014 /* SSE4.1 */
18015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18016 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18020 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18026 /* SSE4.1 and SSE5 */
18027 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18028 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18029 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18030 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18032 /* AES */
18033 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18035 /* PCLMUL */
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18039 static const struct builtin_description bdesc_2arg[] =
18041 /* SSE */
18042 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
18046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
18048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
18049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
18051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
18052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
18053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
18054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
18055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
18056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
18057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
18058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
18059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
18060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
18063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
18064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
18065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
18066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
18067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
18068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
18069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
18070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
18074 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
18090 /* MMX */
18091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
18136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
18140 /* Special. */
18141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
18142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
18152 /* SSE2 */
18153 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
18154 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
18155 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
18156 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
18157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
18158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
18159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
18160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
18162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
18163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
18164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
18165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
18166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
18167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
18168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18197 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18199 /* SSE2 MMX */
18200 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18201 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18202 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18203 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18204 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18206 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18207 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18209 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18210 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18215 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18216 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18218 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18219 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18224 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18236 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18237 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18238 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18254 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18263 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18267 /* SSE3 MMX */
18268 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18269 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18270 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18271 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18272 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18273 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18275 /* SSSE3 */
18276 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18277 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18278 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18279 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18280 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18281 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18282 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18283 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18284 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18286 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18287 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18288 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18289 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18290 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18291 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18301 /* SSE4.1 */
18302 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18303 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18304 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18305 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18306 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18307 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18308 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18309 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18310 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18311 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18312 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18313 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18315 /* SSE4.2 */
18316 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18318 /* AES */
18319 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
18320 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
18321 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
18322 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
18325 static const struct builtin_description bdesc_1arg[] =
18327 /* SSE */
18328 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18332 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18334 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18339 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18342 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18344 /* SSE2 */
18345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18363 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18364 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18370 /* SSE3 */
18371 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18372 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18374 /* SSSE3 */
18375 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18376 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18382 /* SSE4.1 */
18383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18391 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18397 /* AES */
18398 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
18401 /* SSE5 */
18402 enum multi_arg_type {
18403 MULTI_ARG_UNKNOWN,
18404 MULTI_ARG_3_SF,
18405 MULTI_ARG_3_DF,
18406 MULTI_ARG_3_DI,
18407 MULTI_ARG_3_SI,
18408 MULTI_ARG_3_SI_DI,
18409 MULTI_ARG_3_HI,
18410 MULTI_ARG_3_HI_SI,
18411 MULTI_ARG_3_QI,
18412 MULTI_ARG_3_PERMPS,
18413 MULTI_ARG_3_PERMPD,
18414 MULTI_ARG_2_SF,
18415 MULTI_ARG_2_DF,
18416 MULTI_ARG_2_DI,
18417 MULTI_ARG_2_SI,
18418 MULTI_ARG_2_HI,
18419 MULTI_ARG_2_QI,
18420 MULTI_ARG_2_DI_IMM,
18421 MULTI_ARG_2_SI_IMM,
18422 MULTI_ARG_2_HI_IMM,
18423 MULTI_ARG_2_QI_IMM,
18424 MULTI_ARG_2_SF_CMP,
18425 MULTI_ARG_2_DF_CMP,
18426 MULTI_ARG_2_DI_CMP,
18427 MULTI_ARG_2_SI_CMP,
18428 MULTI_ARG_2_HI_CMP,
18429 MULTI_ARG_2_QI_CMP,
18430 MULTI_ARG_2_DI_TF,
18431 MULTI_ARG_2_SI_TF,
18432 MULTI_ARG_2_HI_TF,
18433 MULTI_ARG_2_QI_TF,
18434 MULTI_ARG_2_SF_TF,
18435 MULTI_ARG_2_DF_TF,
18436 MULTI_ARG_1_SF,
18437 MULTI_ARG_1_DF,
18438 MULTI_ARG_1_DI,
18439 MULTI_ARG_1_SI,
18440 MULTI_ARG_1_HI,
18441 MULTI_ARG_1_QI,
18442 MULTI_ARG_1_SI_DI,
18443 MULTI_ARG_1_HI_DI,
18444 MULTI_ARG_1_HI_SI,
18445 MULTI_ARG_1_QI_DI,
18446 MULTI_ARG_1_QI_SI,
18447 MULTI_ARG_1_QI_HI,
18448 MULTI_ARG_1_PH2PS,
18449 MULTI_ARG_1_PS2PH
18452 static const struct builtin_description bdesc_multi_arg[] =
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18625 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18627 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18630 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18633 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18636 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18638 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18639 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18640 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18641 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18642 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18643 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18644 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18646 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18647 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18648 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18649 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18650 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18651 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18652 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18654 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18655 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18656 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18657 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18658 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18659 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18660 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18662 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18663 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18664 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18668 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18669 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18671 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18672 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18673 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18674 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18675 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18676 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18677 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18678 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18680 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18681 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18682 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18683 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18684 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18685 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18686 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18687 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18690 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18691 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18692 builtins. */
18693 static void
18694 ix86_init_mmx_sse_builtins (void)
18696 const struct builtin_description * d;
18697 size_t i;
18699 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18700 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18701 tree V1DI_type_node
18702 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
18703 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18704 tree V2DI_type_node
18705 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18706 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18707 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18708 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18709 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18710 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18711 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18713 tree pchar_type_node = build_pointer_type (char_type_node);
18714 tree pcchar_type_node = build_pointer_type (
18715 build_type_variant (char_type_node, 1, 0));
18716 tree pfloat_type_node = build_pointer_type (float_type_node);
18717 tree pcfloat_type_node = build_pointer_type (
18718 build_type_variant (float_type_node, 1, 0));
18719 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18720 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18721 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18723 /* Comparisons. */
18724 tree int_ftype_v4sf_v4sf
18725 = build_function_type_list (integer_type_node,
18726 V4SF_type_node, V4SF_type_node, NULL_TREE);
18727 tree v4si_ftype_v4sf_v4sf
18728 = build_function_type_list (V4SI_type_node,
18729 V4SF_type_node, V4SF_type_node, NULL_TREE);
18730 /* MMX/SSE/integer conversions. */
18731 tree int_ftype_v4sf
18732 = build_function_type_list (integer_type_node,
18733 V4SF_type_node, NULL_TREE);
18734 tree int64_ftype_v4sf
18735 = build_function_type_list (long_long_integer_type_node,
18736 V4SF_type_node, NULL_TREE);
18737 tree int_ftype_v8qi
18738 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18739 tree v4sf_ftype_v4sf_int
18740 = build_function_type_list (V4SF_type_node,
18741 V4SF_type_node, integer_type_node, NULL_TREE);
18742 tree v4sf_ftype_v4sf_int64
18743 = build_function_type_list (V4SF_type_node,
18744 V4SF_type_node, long_long_integer_type_node,
18745 NULL_TREE);
18746 tree v4sf_ftype_v4sf_v2si
18747 = build_function_type_list (V4SF_type_node,
18748 V4SF_type_node, V2SI_type_node, NULL_TREE);
18750 /* Miscellaneous. */
18751 tree v8qi_ftype_v4hi_v4hi
18752 = build_function_type_list (V8QI_type_node,
18753 V4HI_type_node, V4HI_type_node, NULL_TREE);
18754 tree v4hi_ftype_v2si_v2si
18755 = build_function_type_list (V4HI_type_node,
18756 V2SI_type_node, V2SI_type_node, NULL_TREE);
18757 tree v4sf_ftype_v4sf_v4sf_int
18758 = build_function_type_list (V4SF_type_node,
18759 V4SF_type_node, V4SF_type_node,
18760 integer_type_node, NULL_TREE);
18761 tree v2si_ftype_v4hi_v4hi
18762 = build_function_type_list (V2SI_type_node,
18763 V4HI_type_node, V4HI_type_node, NULL_TREE);
18764 tree v4hi_ftype_v4hi_int
18765 = build_function_type_list (V4HI_type_node,
18766 V4HI_type_node, integer_type_node, NULL_TREE);
18767 tree v2si_ftype_v2si_int
18768 = build_function_type_list (V2SI_type_node,
18769 V2SI_type_node, integer_type_node, NULL_TREE);
18770 tree v1di_ftype_v1di_int
18771 = build_function_type_list (V1DI_type_node,
18772 V1DI_type_node, integer_type_node, NULL_TREE);
18774 tree void_ftype_void
18775 = build_function_type (void_type_node, void_list_node);
18776 tree void_ftype_unsigned
18777 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18778 tree void_ftype_unsigned_unsigned
18779 = build_function_type_list (void_type_node, unsigned_type_node,
18780 unsigned_type_node, NULL_TREE);
18781 tree void_ftype_pcvoid_unsigned_unsigned
18782 = build_function_type_list (void_type_node, const_ptr_type_node,
18783 unsigned_type_node, unsigned_type_node,
18784 NULL_TREE);
18785 tree unsigned_ftype_void
18786 = build_function_type (unsigned_type_node, void_list_node);
18787 tree v2si_ftype_v4sf
18788 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18789 /* Loads/stores. */
18790 tree void_ftype_v8qi_v8qi_pchar
18791 = build_function_type_list (void_type_node,
18792 V8QI_type_node, V8QI_type_node,
18793 pchar_type_node, NULL_TREE);
18794 tree v4sf_ftype_pcfloat
18795 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18796 /* @@@ the type is bogus */
18797 tree v4sf_ftype_v4sf_pv2si
18798 = build_function_type_list (V4SF_type_node,
18799 V4SF_type_node, pv2si_type_node, NULL_TREE);
18800 tree void_ftype_pv2si_v4sf
18801 = build_function_type_list (void_type_node,
18802 pv2si_type_node, V4SF_type_node, NULL_TREE);
18803 tree void_ftype_pfloat_v4sf
18804 = build_function_type_list (void_type_node,
18805 pfloat_type_node, V4SF_type_node, NULL_TREE);
18806 tree void_ftype_pdi_di
18807 = build_function_type_list (void_type_node,
18808 pdi_type_node, long_long_unsigned_type_node,
18809 NULL_TREE);
18810 tree void_ftype_pv2di_v2di
18811 = build_function_type_list (void_type_node,
18812 pv2di_type_node, V2DI_type_node, NULL_TREE);
18813 /* Normal vector unops. */
18814 tree v4sf_ftype_v4sf
18815 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18816 tree v16qi_ftype_v16qi
18817 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18818 tree v8hi_ftype_v8hi
18819 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18820 tree v4si_ftype_v4si
18821 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18822 tree v8qi_ftype_v8qi
18823 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18824 tree v4hi_ftype_v4hi
18825 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18827 /* Normal vector binops. */
18828 tree v4sf_ftype_v4sf_v4sf
18829 = build_function_type_list (V4SF_type_node,
18830 V4SF_type_node, V4SF_type_node, NULL_TREE);
18831 tree v8qi_ftype_v8qi_v8qi
18832 = build_function_type_list (V8QI_type_node,
18833 V8QI_type_node, V8QI_type_node, NULL_TREE);
18834 tree v4hi_ftype_v4hi_v4hi
18835 = build_function_type_list (V4HI_type_node,
18836 V4HI_type_node, V4HI_type_node, NULL_TREE);
18837 tree v2si_ftype_v2si_v2si
18838 = build_function_type_list (V2SI_type_node,
18839 V2SI_type_node, V2SI_type_node, NULL_TREE);
18840 tree v1di_ftype_v1di_v1di
18841 = build_function_type_list (V1DI_type_node,
18842 V1DI_type_node, V1DI_type_node, NULL_TREE);
18844 tree di_ftype_di_di_int
18845 = build_function_type_list (long_long_unsigned_type_node,
18846 long_long_unsigned_type_node,
18847 long_long_unsigned_type_node,
18848 integer_type_node, NULL_TREE);
18850 tree v2si_ftype_v2sf
18851 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18852 tree v2sf_ftype_v2si
18853 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18854 tree v2si_ftype_v2si
18855 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18856 tree v2sf_ftype_v2sf
18857 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18858 tree v2sf_ftype_v2sf_v2sf
18859 = build_function_type_list (V2SF_type_node,
18860 V2SF_type_node, V2SF_type_node, NULL_TREE);
18861 tree v2si_ftype_v2sf_v2sf
18862 = build_function_type_list (V2SI_type_node,
18863 V2SF_type_node, V2SF_type_node, NULL_TREE);
18864 tree pint_type_node = build_pointer_type (integer_type_node);
18865 tree pdouble_type_node = build_pointer_type (double_type_node);
18866 tree pcdouble_type_node = build_pointer_type (
18867 build_type_variant (double_type_node, 1, 0));
18868 tree int_ftype_v2df_v2df
18869 = build_function_type_list (integer_type_node,
18870 V2DF_type_node, V2DF_type_node, NULL_TREE);
18872 tree void_ftype_pcvoid
18873 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18874 tree v4sf_ftype_v4si
18875 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18876 tree v4si_ftype_v4sf
18877 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18878 tree v2df_ftype_v4si
18879 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18880 tree v4si_ftype_v2df
18881 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18882 tree v4si_ftype_v2df_v2df
18883 = build_function_type_list (V4SI_type_node,
18884 V2DF_type_node, V2DF_type_node, NULL_TREE);
18885 tree v2si_ftype_v2df
18886 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18887 tree v4sf_ftype_v2df
18888 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18889 tree v2df_ftype_v2si
18890 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18891 tree v2df_ftype_v4sf
18892 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18893 tree int_ftype_v2df
18894 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18895 tree int64_ftype_v2df
18896 = build_function_type_list (long_long_integer_type_node,
18897 V2DF_type_node, NULL_TREE);
18898 tree v2df_ftype_v2df_int
18899 = build_function_type_list (V2DF_type_node,
18900 V2DF_type_node, integer_type_node, NULL_TREE);
18901 tree v2df_ftype_v2df_int64
18902 = build_function_type_list (V2DF_type_node,
18903 V2DF_type_node, long_long_integer_type_node,
18904 NULL_TREE);
18905 tree v4sf_ftype_v4sf_v2df
18906 = build_function_type_list (V4SF_type_node,
18907 V4SF_type_node, V2DF_type_node, NULL_TREE);
18908 tree v2df_ftype_v2df_v4sf
18909 = build_function_type_list (V2DF_type_node,
18910 V2DF_type_node, V4SF_type_node, NULL_TREE);
18911 tree v2df_ftype_v2df_v2df_int
18912 = build_function_type_list (V2DF_type_node,
18913 V2DF_type_node, V2DF_type_node,
18914 integer_type_node,
18915 NULL_TREE);
18916 tree v2df_ftype_v2df_pcdouble
18917 = build_function_type_list (V2DF_type_node,
18918 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18919 tree void_ftype_pdouble_v2df
18920 = build_function_type_list (void_type_node,
18921 pdouble_type_node, V2DF_type_node, NULL_TREE);
18922 tree void_ftype_pint_int
18923 = build_function_type_list (void_type_node,
18924 pint_type_node, integer_type_node, NULL_TREE);
18925 tree void_ftype_v16qi_v16qi_pchar
18926 = build_function_type_list (void_type_node,
18927 V16QI_type_node, V16QI_type_node,
18928 pchar_type_node, NULL_TREE);
18929 tree v2df_ftype_pcdouble
18930 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18931 tree v2df_ftype_v2df_v2df
18932 = build_function_type_list (V2DF_type_node,
18933 V2DF_type_node, V2DF_type_node, NULL_TREE);
18934 tree v16qi_ftype_v16qi_v16qi
18935 = build_function_type_list (V16QI_type_node,
18936 V16QI_type_node, V16QI_type_node, NULL_TREE);
18937 tree v8hi_ftype_v8hi_v8hi
18938 = build_function_type_list (V8HI_type_node,
18939 V8HI_type_node, V8HI_type_node, NULL_TREE);
18940 tree v4si_ftype_v4si_v4si
18941 = build_function_type_list (V4SI_type_node,
18942 V4SI_type_node, V4SI_type_node, NULL_TREE);
18943 tree v2di_ftype_v2di_v2di
18944 = build_function_type_list (V2DI_type_node,
18945 V2DI_type_node, V2DI_type_node, NULL_TREE);
18946 tree v2di_ftype_v2df_v2df
18947 = build_function_type_list (V2DI_type_node,
18948 V2DF_type_node, V2DF_type_node, NULL_TREE);
18949 tree v2df_ftype_v2df
18950 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18951 tree v2di_ftype_v2di_int
18952 = build_function_type_list (V2DI_type_node,
18953 V2DI_type_node, integer_type_node, NULL_TREE);
18954 tree v2di_ftype_v2di_v2di_int
18955 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18956 V2DI_type_node, integer_type_node, NULL_TREE);
18957 tree v4si_ftype_v4si_int
18958 = build_function_type_list (V4SI_type_node,
18959 V4SI_type_node, integer_type_node, NULL_TREE);
18960 tree v8hi_ftype_v8hi_int
18961 = build_function_type_list (V8HI_type_node,
18962 V8HI_type_node, integer_type_node, NULL_TREE);
18963 tree v4si_ftype_v8hi_v8hi
18964 = build_function_type_list (V4SI_type_node,
18965 V8HI_type_node, V8HI_type_node, NULL_TREE);
18966 tree v1di_ftype_v8qi_v8qi
18967 = build_function_type_list (V1DI_type_node,
18968 V8QI_type_node, V8QI_type_node, NULL_TREE);
18969 tree v1di_ftype_v2si_v2si
18970 = build_function_type_list (V1DI_type_node,
18971 V2SI_type_node, V2SI_type_node, NULL_TREE);
18972 tree v2di_ftype_v16qi_v16qi
18973 = build_function_type_list (V2DI_type_node,
18974 V16QI_type_node, V16QI_type_node, NULL_TREE);
18975 tree v2di_ftype_v4si_v4si
18976 = build_function_type_list (V2DI_type_node,
18977 V4SI_type_node, V4SI_type_node, NULL_TREE);
18978 tree int_ftype_v16qi
18979 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18980 tree v16qi_ftype_pcchar
18981 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18982 tree void_ftype_pchar_v16qi
18983 = build_function_type_list (void_type_node,
18984 pchar_type_node, V16QI_type_node, NULL_TREE);
18986 tree v2di_ftype_v2di_unsigned_unsigned
18987 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18988 unsigned_type_node, unsigned_type_node,
18989 NULL_TREE);
18990 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18991 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18992 unsigned_type_node, unsigned_type_node,
18993 NULL_TREE);
18994 tree v2di_ftype_v2di_v16qi
18995 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18996 NULL_TREE);
18997 tree v2df_ftype_v2df_v2df_v2df
18998 = build_function_type_list (V2DF_type_node,
18999 V2DF_type_node, V2DF_type_node,
19000 V2DF_type_node, NULL_TREE);
19001 tree v4sf_ftype_v4sf_v4sf_v4sf
19002 = build_function_type_list (V4SF_type_node,
19003 V4SF_type_node, V4SF_type_node,
19004 V4SF_type_node, NULL_TREE);
19005 tree v8hi_ftype_v16qi
19006 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19007 NULL_TREE);
19008 tree v4si_ftype_v16qi
19009 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19010 NULL_TREE);
19011 tree v2di_ftype_v16qi
19012 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19013 NULL_TREE);
19014 tree v4si_ftype_v8hi
19015 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19016 NULL_TREE);
19017 tree v2di_ftype_v8hi
19018 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19019 NULL_TREE);
19020 tree v2di_ftype_v4si
19021 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19022 NULL_TREE);
19023 tree v2di_ftype_pv2di
19024 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19025 NULL_TREE);
19026 tree v16qi_ftype_v16qi_v16qi_int
19027 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19028 V16QI_type_node, integer_type_node,
19029 NULL_TREE);
19030 tree v16qi_ftype_v16qi_v16qi_v16qi
19031 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19032 V16QI_type_node, V16QI_type_node,
19033 NULL_TREE);
19034 tree v8hi_ftype_v8hi_v8hi_int
19035 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19036 V8HI_type_node, integer_type_node,
19037 NULL_TREE);
19038 tree v4si_ftype_v4si_v4si_int
19039 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19040 V4SI_type_node, integer_type_node,
19041 NULL_TREE);
19042 tree int_ftype_v2di_v2di
19043 = build_function_type_list (integer_type_node,
19044 V2DI_type_node, V2DI_type_node,
19045 NULL_TREE);
19046 tree int_ftype_v16qi_int_v16qi_int_int
19047 = build_function_type_list (integer_type_node,
19048 V16QI_type_node,
19049 integer_type_node,
19050 V16QI_type_node,
19051 integer_type_node,
19052 integer_type_node,
19053 NULL_TREE);
19054 tree v16qi_ftype_v16qi_int_v16qi_int_int
19055 = build_function_type_list (V16QI_type_node,
19056 V16QI_type_node,
19057 integer_type_node,
19058 V16QI_type_node,
19059 integer_type_node,
19060 integer_type_node,
19061 NULL_TREE);
19062 tree int_ftype_v16qi_v16qi_int
19063 = build_function_type_list (integer_type_node,
19064 V16QI_type_node,
19065 V16QI_type_node,
19066 integer_type_node,
19067 NULL_TREE);
19069 /* SSE5 instructions */
19070 tree v2di_ftype_v2di_v2di_v2di
19071 = build_function_type_list (V2DI_type_node,
19072 V2DI_type_node,
19073 V2DI_type_node,
19074 V2DI_type_node,
19075 NULL_TREE);
19077 tree v4si_ftype_v4si_v4si_v4si
19078 = build_function_type_list (V4SI_type_node,
19079 V4SI_type_node,
19080 V4SI_type_node,
19081 V4SI_type_node,
19082 NULL_TREE);
19084 tree v4si_ftype_v4si_v4si_v2di
19085 = build_function_type_list (V4SI_type_node,
19086 V4SI_type_node,
19087 V4SI_type_node,
19088 V2DI_type_node,
19089 NULL_TREE);
19091 tree v8hi_ftype_v8hi_v8hi_v8hi
19092 = build_function_type_list (V8HI_type_node,
19093 V8HI_type_node,
19094 V8HI_type_node,
19095 V8HI_type_node,
19096 NULL_TREE);
19098 tree v8hi_ftype_v8hi_v8hi_v4si
19099 = build_function_type_list (V8HI_type_node,
19100 V8HI_type_node,
19101 V8HI_type_node,
19102 V4SI_type_node,
19103 NULL_TREE);
19105 tree v2df_ftype_v2df_v2df_v16qi
19106 = build_function_type_list (V2DF_type_node,
19107 V2DF_type_node,
19108 V2DF_type_node,
19109 V16QI_type_node,
19110 NULL_TREE);
19112 tree v4sf_ftype_v4sf_v4sf_v16qi
19113 = build_function_type_list (V4SF_type_node,
19114 V4SF_type_node,
19115 V4SF_type_node,
19116 V16QI_type_node,
19117 NULL_TREE);
19119 tree v2di_ftype_v2di_si
19120 = build_function_type_list (V2DI_type_node,
19121 V2DI_type_node,
19122 integer_type_node,
19123 NULL_TREE);
19125 tree v4si_ftype_v4si_si
19126 = build_function_type_list (V4SI_type_node,
19127 V4SI_type_node,
19128 integer_type_node,
19129 NULL_TREE);
19131 tree v8hi_ftype_v8hi_si
19132 = build_function_type_list (V8HI_type_node,
19133 V8HI_type_node,
19134 integer_type_node,
19135 NULL_TREE);
19137 tree v16qi_ftype_v16qi_si
19138 = build_function_type_list (V16QI_type_node,
19139 V16QI_type_node,
19140 integer_type_node,
19141 NULL_TREE);
19142 tree v4sf_ftype_v4hi
19143 = build_function_type_list (V4SF_type_node,
19144 V4HI_type_node,
19145 NULL_TREE);
19147 tree v4hi_ftype_v4sf
19148 = build_function_type_list (V4HI_type_node,
19149 V4SF_type_node,
19150 NULL_TREE);
19152 tree v2di_ftype_v2di
19153 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19155 tree ftype;
19157 /* The __float80 type. */
19158 if (TYPE_MODE (long_double_type_node) == XFmode)
19159 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19160 "__float80");
19161 else
19163 /* The __float80 type. */
19164 tree float80_type_node = make_node (REAL_TYPE);
19166 TYPE_PRECISION (float80_type_node) = 80;
19167 layout_type (float80_type_node);
19168 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19169 "__float80");
19172 if (TARGET_64BIT)
19174 tree float128_type_node = make_node (REAL_TYPE);
19176 TYPE_PRECISION (float128_type_node) = 128;
19177 layout_type (float128_type_node);
19178 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19179 "__float128");
19181 /* TFmode support builtins. */
19182 ftype = build_function_type (float128_type_node,
19183 void_list_node);
19184 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19186 ftype = build_function_type_list (float128_type_node,
19187 float128_type_node,
19188 NULL_TREE);
19189 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19191 ftype = build_function_type_list (float128_type_node,
19192 float128_type_node,
19193 float128_type_node,
19194 NULL_TREE);
19195 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19198 /* Add all SSE builtins with variable number of operands. */
19199 for (i = 0, d = bdesc_sse_args;
19200 i < ARRAY_SIZE (bdesc_sse_args);
19201 i++, d++)
19203 tree type;
19205 if (d->name == 0)
19206 continue;
19208 switch ((enum sse_builtin_type) d->flag)
19210 case V4SF_FTYPE_V4SF_INT:
19211 type = v4sf_ftype_v4sf_int;
19212 break;
19213 case V2DI_FTYPE_V2DI_INT:
19214 type = v2di_ftype_v2di_int;
19215 break;
19216 case V2DF_FTYPE_V2DF_INT:
19217 type = v2df_ftype_v2df_int;
19218 break;
19219 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19220 type = v16qi_ftype_v16qi_v16qi_v16qi;
19221 break;
19222 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19223 type = v4sf_ftype_v4sf_v4sf_v4sf;
19224 break;
19225 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19226 type = v2df_ftype_v2df_v2df_v2df;
19227 break;
19228 case V16QI_FTYPE_V16QI_V16QI_INT:
19229 type = v16qi_ftype_v16qi_v16qi_int;
19230 break;
19231 case V8HI_FTYPE_V8HI_V8HI_INT:
19232 type = v8hi_ftype_v8hi_v8hi_int;
19233 break;
19234 case V4SI_FTYPE_V4SI_V4SI_INT:
19235 type = v4si_ftype_v4si_v4si_int;
19236 break;
19237 case V4SF_FTYPE_V4SF_V4SF_INT:
19238 type = v4sf_ftype_v4sf_v4sf_int;
19239 break;
19240 case V2DI_FTYPE_V2DI_V2DI_INT:
19241 type = v2di_ftype_v2di_v2di_int;
19242 break;
19243 case V2DF_FTYPE_V2DF_V2DF_INT:
19244 type = v2df_ftype_v2df_v2df_int;
19245 break;
19246 default:
19247 gcc_unreachable ();
19250 def_builtin_const (d->mask, d->name, type, d->code);
19253 /* Add all builtins that are more or less simple operations on two
19254 operands. */
19255 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19257 /* Use one of the operands; the target can have a different mode for
19258 mask-generating compares. */
19259 enum machine_mode mode;
19260 tree type;
19262 if (d->name == 0)
19263 continue;
19264 mode = insn_data[d->icode].operand[1].mode;
19266 switch (mode)
19268 case V16QImode:
19269 type = v16qi_ftype_v16qi_v16qi;
19270 break;
19271 case V8HImode:
19272 type = v8hi_ftype_v8hi_v8hi;
19273 break;
19274 case V4SImode:
19275 type = v4si_ftype_v4si_v4si;
19276 break;
19277 case V2DImode:
19278 type = v2di_ftype_v2di_v2di;
19279 break;
19280 case V2DFmode:
19281 type = v2df_ftype_v2df_v2df;
19282 break;
19283 case V4SFmode:
19284 type = v4sf_ftype_v4sf_v4sf;
19285 break;
19286 case V8QImode:
19287 type = v8qi_ftype_v8qi_v8qi;
19288 break;
19289 case V4HImode:
19290 type = v4hi_ftype_v4hi_v4hi;
19291 break;
19292 case V2SImode:
19293 type = v2si_ftype_v2si_v2si;
19294 break;
19295 case V1DImode:
19296 type = v1di_ftype_v1di_v1di;
19297 break;
19299 default:
19300 gcc_unreachable ();
19303 /* Override for comparisons. */
19304 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19305 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19306 type = v4si_ftype_v4sf_v4sf;
19308 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19309 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19310 type = v2di_ftype_v2df_v2df;
19312 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19313 type = v4si_ftype_v2df_v2df;
19315 def_builtin_const (d->mask, d->name, type, d->code);
19318 /* Add all builtins that are more or less simple operations on 1 operand. */
19319 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19321 enum machine_mode mode;
19322 tree type;
19324 if (d->name == 0)
19325 continue;
19326 mode = insn_data[d->icode].operand[1].mode;
19328 switch (mode)
19330 case V16QImode:
19331 type = v16qi_ftype_v16qi;
19332 break;
19333 case V8HImode:
19334 type = v8hi_ftype_v8hi;
19335 break;
19336 case V4SImode:
19337 type = v4si_ftype_v4si;
19338 break;
19339 case V2DFmode:
19340 type = v2df_ftype_v2df;
19341 break;
19342 case V4SFmode:
19343 type = v4sf_ftype_v4sf;
19344 break;
19345 case V8QImode:
19346 type = v8qi_ftype_v8qi;
19347 break;
19348 case V4HImode:
19349 type = v4hi_ftype_v4hi;
19350 break;
19351 case V2SImode:
19352 type = v2si_ftype_v2si;
19353 break;
19355 default:
19356 abort ();
19359 def_builtin_const (d->mask, d->name, type, d->code);
19362 /* pcmpestr[im] insns. */
19363 for (i = 0, d = bdesc_pcmpestr;
19364 i < ARRAY_SIZE (bdesc_pcmpestr);
19365 i++, d++)
19367 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19368 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19369 else
19370 ftype = int_ftype_v16qi_int_v16qi_int_int;
19371 def_builtin_const (d->mask, d->name, ftype, d->code);
19374 /* pcmpistr[im] insns. */
19375 for (i = 0, d = bdesc_pcmpistr;
19376 i < ARRAY_SIZE (bdesc_pcmpistr);
19377 i++, d++)
19379 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19380 ftype = v16qi_ftype_v16qi_v16qi_int;
19381 else
19382 ftype = int_ftype_v16qi_v16qi_int;
19383 def_builtin_const (d->mask, d->name, ftype, d->code);
19386 /* Add the remaining MMX insns with somewhat more complicated types. */
19387 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19389 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
19390 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
19391 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
19392 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
19393 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
19394 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
19396 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
19397 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
19398 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
19399 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
19400 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
19401 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
19403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
19404 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
19405 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
19406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
19408 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19411 /* comi/ucomi insns. */
19412 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19413 if (d->mask == OPTION_MASK_ISA_SSE2)
19414 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19415 else
19416 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19418 /* ptest insns. */
19419 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19420 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19422 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19423 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19424 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19426 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19427 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19428 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19429 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19430 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19431 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19432 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19433 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19434 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19435 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19436 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19438 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19440 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19441 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19443 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19444 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19445 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19446 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19448 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19449 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19450 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19451 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19453 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19455 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19457 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19458 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19459 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19460 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19461 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19462 ftype = build_function_type_list (float_type_node,
19463 float_type_node,
19464 NULL_TREE);
19465 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19466 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19467 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19468 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19470 /* Original 3DNow! */
19471 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19472 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19473 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19474 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19475 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19476 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19477 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19478 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19479 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19480 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19481 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19482 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19483 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19484 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19485 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19486 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19487 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19488 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19489 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19490 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19492 /* 3DNow! extension as used in the Athlon CPU. */
19493 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19494 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19495 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19496 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19497 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19498 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19500 /* SSE2 */
19501 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19503 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19504 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19506 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19507 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19509 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19510 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19511 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19512 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19513 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19515 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19516 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19517 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19518 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19520 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19521 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19523 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19524 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19526 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19527 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19528 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19529 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19530 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19532 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19534 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19535 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19536 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19537 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19540 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19541 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19543 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19544 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19545 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19546 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19548 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19549 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19550 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19552 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19553 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19555 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19556 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19558 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19559 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19560 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19561 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19562 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19563 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19564 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19566 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19567 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19568 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19569 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19570 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19571 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19572 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19574 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19575 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19576 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19577 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19579 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19581 /* Prescott New Instructions. */
19582 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19583 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19584 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19586 /* SSSE3. */
19587 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19588 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19590 /* SSE4.1. */
19591 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19592 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19593 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19594 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19595 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19596 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19597 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19598 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19599 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19600 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19601 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19602 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19603 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19604 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19606 /* SSE4.2. */
19607 ftype = build_function_type_list (unsigned_type_node,
19608 unsigned_type_node,
19609 unsigned_char_type_node,
19610 NULL_TREE);
19611 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19612 ftype = build_function_type_list (unsigned_type_node,
19613 unsigned_type_node,
19614 short_unsigned_type_node,
19615 NULL_TREE);
19616 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19617 ftype = build_function_type_list (unsigned_type_node,
19618 unsigned_type_node,
19619 unsigned_type_node,
19620 NULL_TREE);
19621 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19622 ftype = build_function_type_list (long_long_unsigned_type_node,
19623 long_long_unsigned_type_node,
19624 long_long_unsigned_type_node,
19625 NULL_TREE);
19626 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19628 /* AES */
19629 if (TARGET_AES)
19631 /* Define AES built-in functions only if AES is enabled. */
19632 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19633 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19634 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19635 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19636 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19637 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19640 /* PCLMUL */
19641 if (TARGET_PCLMUL)
19643 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19644 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19647 /* AMDFAM10 SSE4A New built-ins */
19648 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19649 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19650 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19651 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19652 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19653 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19655 /* Access to the vec_init patterns. */
19656 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19657 integer_type_node, NULL_TREE);
19658 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19660 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19661 short_integer_type_node,
19662 short_integer_type_node,
19663 short_integer_type_node, NULL_TREE);
19664 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19666 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19667 char_type_node, char_type_node,
19668 char_type_node, char_type_node,
19669 char_type_node, char_type_node,
19670 char_type_node, NULL_TREE);
19671 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19673 /* Access to the vec_extract patterns. */
19674 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19675 integer_type_node, NULL_TREE);
19676 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19678 ftype = build_function_type_list (long_long_integer_type_node,
19679 V2DI_type_node, integer_type_node,
19680 NULL_TREE);
19681 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19683 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19684 integer_type_node, NULL_TREE);
19685 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19687 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19688 integer_type_node, NULL_TREE);
19689 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19691 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19692 integer_type_node, NULL_TREE);
19693 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19695 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19696 integer_type_node, NULL_TREE);
19697 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19699 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19700 integer_type_node, NULL_TREE);
19701 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19703 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19704 integer_type_node, NULL_TREE);
19705 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19707 /* Access to the vec_set patterns. */
19708 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19709 intDI_type_node,
19710 integer_type_node, NULL_TREE);
19711 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19713 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19714 float_type_node,
19715 integer_type_node, NULL_TREE);
19716 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19718 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19719 intSI_type_node,
19720 integer_type_node, NULL_TREE);
19721 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19723 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19724 intHI_type_node,
19725 integer_type_node, NULL_TREE);
19726 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19728 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19729 intHI_type_node,
19730 integer_type_node, NULL_TREE);
19731 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19733 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19734 intQI_type_node,
19735 integer_type_node, NULL_TREE);
19736 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19738 /* Add SSE5 multi-arg argument instructions */
19739 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19741 tree mtype = NULL_TREE;
19743 if (d->name == 0)
19744 continue;
19746 switch ((enum multi_arg_type)d->flag)
19748 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19749 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19750 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19751 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19752 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19753 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19754 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19755 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19756 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19757 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19758 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19759 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19760 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19761 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19762 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19763 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19764 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19765 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19766 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19767 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19768 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19769 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19770 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19771 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19772 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19773 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19774 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19775 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19776 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19777 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19778 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19779 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19780 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19781 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19782 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19783 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19784 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19785 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19786 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19787 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19788 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19789 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19790 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19791 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19792 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19793 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19794 case MULTI_ARG_UNKNOWN:
19795 default:
19796 gcc_unreachable ();
19799 if (mtype)
19800 def_builtin_const (d->mask, d->name, mtype, d->code);
19804 static void
19805 ix86_init_builtins (void)
19807 if (TARGET_MMX)
19808 ix86_init_mmx_sse_builtins ();
19811 /* Errors in the source file can cause expand_expr to return const0_rtx
19812 where we expect a vector. To avoid crashing, use one of the vector
19813 clear instructions. */
19814 static rtx
19815 safe_vector_operand (rtx x, enum machine_mode mode)
19817 if (x == const0_rtx)
19818 x = CONST0_RTX (mode);
19819 return x;
19822 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19823 variable number of operands. */
19825 static rtx
19826 ix86_expand_sse_operands_builtin (enum insn_code icode, tree exp,
19827 enum sse_builtin_type type,
19828 rtx target)
19830 rtx pat;
19831 unsigned int i, nargs;
19832 int num_memory = 0;
19833 struct
19835 rtx op;
19836 enum machine_mode mode;
19837 } args[3];
19838 bool last_arg_constant = false;
19839 const struct insn_data *insn_p = &insn_data[icode];
19840 enum machine_mode tmode = insn_p->operand[0].mode;
19842 switch (type)
19844 case V4SF_FTYPE_V4SF_INT:
19845 case V2DI_FTYPE_V2DI_INT:
19846 case V2DF_FTYPE_V2DF_INT:
19847 nargs = 2;
19848 last_arg_constant = true;
19849 break;
19850 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19851 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19852 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19853 nargs = 3;
19854 break;
19855 case V16QI_FTYPE_V16QI_V16QI_INT:
19856 case V8HI_FTYPE_V8HI_V8HI_INT:
19857 case V4SI_FTYPE_V4SI_V4SI_INT:
19858 case V4SF_FTYPE_V4SF_V4SF_INT:
19859 case V2DI_FTYPE_V2DI_V2DI_INT:
19860 case V2DF_FTYPE_V2DF_V2DF_INT:
19861 nargs = 3;
19862 last_arg_constant = true;
19863 break;
19864 default:
19865 gcc_unreachable ();
19868 gcc_assert (nargs <= ARRAY_SIZE (args));
19870 if (optimize
19871 || target == 0
19872 || GET_MODE (target) != tmode
19873 || ! (*insn_p->operand[0].predicate) (target, tmode))
19874 target = gen_reg_rtx (tmode);
19876 for (i = 0; i < nargs; i++)
19878 tree arg = CALL_EXPR_ARG (exp, i);
19879 rtx op = expand_normal (arg);
19880 enum machine_mode mode = insn_p->operand[i + 1].mode;
19881 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
19883 if (last_arg_constant && (i + 1) == nargs)
19885 if (!match)
19886 switch (icode)
19888 case CODE_FOR_sse4_1_roundpd:
19889 case CODE_FOR_sse4_1_roundps:
19890 case CODE_FOR_sse4_1_roundsd:
19891 case CODE_FOR_sse4_1_roundss:
19892 case CODE_FOR_sse4_1_blendps:
19893 error ("the last argument must be a 4-bit immediate");
19894 return const0_rtx;
19896 case CODE_FOR_sse4_1_blendpd:
19897 error ("the last argument must be a 2-bit immediate");
19898 return const0_rtx;
19900 default:
19901 error ("the last argument must be an 8-bit immediate");
19902 return const0_rtx;
19905 else
19907 if (VECTOR_MODE_P (mode))
19908 op = safe_vector_operand (op, mode);
19910 /* If we aren't optimizing, only allow one memory operand to
19911 be generated. */
19912 if (memory_operand (op, mode))
19913 num_memory++;
19915 gcc_assert (GET_MODE (op) == mode
19916 || GET_MODE (op) == VOIDmode);
19918 if (optimize || !match || num_memory > 1)
19919 op = copy_to_mode_reg (mode, op);
19922 args[i].op = op;
19923 args[i].mode = mode;
19926 switch (nargs)
19928 case 1:
19929 pat = GEN_FCN (icode) (target, args[0].op);
19930 break;
19931 case 2:
19932 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19933 break;
19934 case 3:
19935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19936 args[2].op);
19937 break;
19938 default:
19939 gcc_unreachable ();
19942 if (! pat)
19943 return 0;
19945 emit_insn (pat);
19946 return target;
19949 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19951 static rtx
19952 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19954 rtx pat;
19955 tree arg0 = CALL_EXPR_ARG (exp, 0);
19956 tree arg1 = CALL_EXPR_ARG (exp, 1);
19957 rtx op0 = expand_normal (arg0);
19958 rtx op1 = expand_normal (arg1);
19959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19960 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19961 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19963 if (optimize
19964 || !target
19965 || GET_MODE (target) != tmode
19966 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19967 target = gen_reg_rtx (tmode);
19969 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19970 op0 = copy_to_mode_reg (mode0, op0);
19971 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19973 op1 = copy_to_reg (op1);
19974 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19977 pat = GEN_FCN (icode) (target, op0, op1);
19978 if (! pat)
19979 return 0;
19980 emit_insn (pat);
19981 return target;
19984 /* Subroutine of ix86_expand_builtin to take care of binop insns
19985 with an immediate. */
19987 static rtx
19988 ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
19989 rtx target)
19991 rtx pat;
19992 tree arg0 = CALL_EXPR_ARG (exp, 0);
19993 tree arg1 = CALL_EXPR_ARG (exp, 1);
19994 rtx op0 = expand_normal (arg0);
19995 rtx op1 = expand_normal (arg1);
19996 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19997 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19998 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20000 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20002 op0 = copy_to_reg (op0);
20003 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
20006 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20008 error ("the last operand must be an immediate");
20009 return const0_rtx;
20012 target = gen_reg_rtx (V2DImode);
20013 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
20014 V2DImode, 0),
20015 op0, op1);
20016 if (! pat)
20017 return 0;
20018 emit_insn (pat);
20019 return target;
20022 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20024 static rtx
20025 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20027 rtx pat, xops[3];
20028 tree arg0 = CALL_EXPR_ARG (exp, 0);
20029 tree arg1 = CALL_EXPR_ARG (exp, 1);
20030 rtx op0 = expand_normal (arg0);
20031 rtx op1 = expand_normal (arg1);
20032 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20033 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20034 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20036 if (VECTOR_MODE_P (mode0))
20037 op0 = safe_vector_operand (op0, mode0);
20038 if (VECTOR_MODE_P (mode1))
20039 op1 = safe_vector_operand (op1, mode1);
20041 if (optimize || !target
20042 || GET_MODE (target) != tmode
20043 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20044 target = gen_reg_rtx (tmode);
20046 if (GET_MODE (op1) == SImode && mode1 == TImode)
20048 rtx x = gen_reg_rtx (V4SImode);
20049 emit_insn (gen_sse2_loadd (x, op1));
20050 op1 = gen_lowpart (TImode, x);
20053 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20054 op0 = copy_to_mode_reg (mode0, op0);
20055 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20056 op1 = copy_to_mode_reg (mode1, op1);
20058 /* ??? Using ix86_fixup_binary_operands is problematic when
20059 we've got mismatched modes. Fake it. */
20061 xops[0] = target;
20062 xops[1] = op0;
20063 xops[2] = op1;
20065 if (tmode == mode0 && tmode == mode1)
20067 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
20068 op0 = xops[1];
20069 op1 = xops[2];
20071 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
20073 op0 = force_reg (mode0, op0);
20074 op1 = force_reg (mode1, op1);
20075 target = gen_reg_rtx (tmode);
20078 pat = GEN_FCN (icode) (target, op0, op1);
20079 if (! pat)
20080 return 0;
20081 emit_insn (pat);
20082 return target;
20085 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20087 static rtx
20088 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20089 enum multi_arg_type m_type,
20090 enum insn_code sub_code)
20092 rtx pat;
20093 int i;
20094 int nargs;
20095 bool comparison_p = false;
20096 bool tf_p = false;
20097 bool last_arg_constant = false;
20098 int num_memory = 0;
20099 struct {
20100 rtx op;
20101 enum machine_mode mode;
20102 } args[4];
20104 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20106 switch (m_type)
20108 case MULTI_ARG_3_SF:
20109 case MULTI_ARG_3_DF:
20110 case MULTI_ARG_3_DI:
20111 case MULTI_ARG_3_SI:
20112 case MULTI_ARG_3_SI_DI:
20113 case MULTI_ARG_3_HI:
20114 case MULTI_ARG_3_HI_SI:
20115 case MULTI_ARG_3_QI:
20116 case MULTI_ARG_3_PERMPS:
20117 case MULTI_ARG_3_PERMPD:
20118 nargs = 3;
20119 break;
20121 case MULTI_ARG_2_SF:
20122 case MULTI_ARG_2_DF:
20123 case MULTI_ARG_2_DI:
20124 case MULTI_ARG_2_SI:
20125 case MULTI_ARG_2_HI:
20126 case MULTI_ARG_2_QI:
20127 nargs = 2;
20128 break;
20130 case MULTI_ARG_2_DI_IMM:
20131 case MULTI_ARG_2_SI_IMM:
20132 case MULTI_ARG_2_HI_IMM:
20133 case MULTI_ARG_2_QI_IMM:
20134 nargs = 2;
20135 last_arg_constant = true;
20136 break;
20138 case MULTI_ARG_1_SF:
20139 case MULTI_ARG_1_DF:
20140 case MULTI_ARG_1_DI:
20141 case MULTI_ARG_1_SI:
20142 case MULTI_ARG_1_HI:
20143 case MULTI_ARG_1_QI:
20144 case MULTI_ARG_1_SI_DI:
20145 case MULTI_ARG_1_HI_DI:
20146 case MULTI_ARG_1_HI_SI:
20147 case MULTI_ARG_1_QI_DI:
20148 case MULTI_ARG_1_QI_SI:
20149 case MULTI_ARG_1_QI_HI:
20150 case MULTI_ARG_1_PH2PS:
20151 case MULTI_ARG_1_PS2PH:
20152 nargs = 1;
20153 break;
20155 case MULTI_ARG_2_SF_CMP:
20156 case MULTI_ARG_2_DF_CMP:
20157 case MULTI_ARG_2_DI_CMP:
20158 case MULTI_ARG_2_SI_CMP:
20159 case MULTI_ARG_2_HI_CMP:
20160 case MULTI_ARG_2_QI_CMP:
20161 nargs = 2;
20162 comparison_p = true;
20163 break;
20165 case MULTI_ARG_2_SF_TF:
20166 case MULTI_ARG_2_DF_TF:
20167 case MULTI_ARG_2_DI_TF:
20168 case MULTI_ARG_2_SI_TF:
20169 case MULTI_ARG_2_HI_TF:
20170 case MULTI_ARG_2_QI_TF:
20171 nargs = 2;
20172 tf_p = true;
20173 break;
20175 case MULTI_ARG_UNKNOWN:
20176 default:
20177 gcc_unreachable ();
20180 if (optimize || !target
20181 || GET_MODE (target) != tmode
20182 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20183 target = gen_reg_rtx (tmode);
20185 gcc_assert (nargs <= 4);
20187 for (i = 0; i < nargs; i++)
20189 tree arg = CALL_EXPR_ARG (exp, i);
20190 rtx op = expand_normal (arg);
20191 int adjust = (comparison_p) ? 1 : 0;
20192 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20194 if (last_arg_constant && i == nargs-1)
20196 if (GET_CODE (op) != CONST_INT)
20198 error ("last argument must be an immediate");
20199 return gen_reg_rtx (tmode);
20202 else
20204 if (VECTOR_MODE_P (mode))
20205 op = safe_vector_operand (op, mode);
20207 /* If we aren't optimizing, only allow one memory operand to be
20208 generated. */
20209 if (memory_operand (op, mode))
20210 num_memory++;
20212 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20214 if (optimize
20215 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20216 || num_memory > 1)
20217 op = force_reg (mode, op);
20220 args[i].op = op;
20221 args[i].mode = mode;
20224 switch (nargs)
20226 case 1:
20227 pat = GEN_FCN (icode) (target, args[0].op);
20228 break;
20230 case 2:
20231 if (tf_p)
20232 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20233 GEN_INT ((int)sub_code));
20234 else if (! comparison_p)
20235 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20236 else
20238 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20239 args[0].op,
20240 args[1].op);
20242 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20244 break;
20246 case 3:
20247 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20248 break;
20250 default:
20251 gcc_unreachable ();
20254 if (! pat)
20255 return 0;
20257 emit_insn (pat);
20258 return target;
20261 /* Subroutine of ix86_expand_builtin to take care of stores. */
20263 static rtx
20264 ix86_expand_store_builtin (enum insn_code icode, tree exp)
20266 rtx pat;
20267 tree arg0 = CALL_EXPR_ARG (exp, 0);
20268 tree arg1 = CALL_EXPR_ARG (exp, 1);
20269 rtx op0 = expand_normal (arg0);
20270 rtx op1 = expand_normal (arg1);
20271 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
20272 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
20274 if (VECTOR_MODE_P (mode1))
20275 op1 = safe_vector_operand (op1, mode1);
20277 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20278 op1 = copy_to_mode_reg (mode1, op1);
20280 pat = GEN_FCN (icode) (op0, op1);
20281 if (pat)
20282 emit_insn (pat);
20283 return 0;
20286 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20288 static rtx
20289 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20290 rtx target, int do_load)
20292 rtx pat;
20293 tree arg0 = CALL_EXPR_ARG (exp, 0);
20294 rtx op0 = expand_normal (arg0);
20295 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20296 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20298 if (optimize || !target
20299 || GET_MODE (target) != tmode
20300 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20301 target = gen_reg_rtx (tmode);
20302 if (do_load)
20303 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20304 else
20306 if (VECTOR_MODE_P (mode0))
20307 op0 = safe_vector_operand (op0, mode0);
20309 if ((optimize && !register_operand (op0, mode0))
20310 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20311 op0 = copy_to_mode_reg (mode0, op0);
20314 pat = GEN_FCN (icode) (target, op0);
20315 if (! pat)
20316 return 0;
20317 emit_insn (pat);
20318 return target;
20321 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20322 sqrtss, rsqrtss, rcpss. */
20324 static rtx
20325 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20327 rtx pat;
20328 tree arg0 = CALL_EXPR_ARG (exp, 0);
20329 rtx op1, op0 = expand_normal (arg0);
20330 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20331 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20333 if (optimize || !target
20334 || GET_MODE (target) != tmode
20335 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20336 target = gen_reg_rtx (tmode);
20338 if (VECTOR_MODE_P (mode0))
20339 op0 = safe_vector_operand (op0, mode0);
20341 if ((optimize && !register_operand (op0, mode0))
20342 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20343 op0 = copy_to_mode_reg (mode0, op0);
20345 op1 = op0;
20346 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20347 op1 = copy_to_mode_reg (mode0, op1);
20349 pat = GEN_FCN (icode) (target, op0, op1);
20350 if (! pat)
20351 return 0;
20352 emit_insn (pat);
20353 return target;
20356 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20358 static rtx
20359 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20360 rtx target)
20362 rtx pat;
20363 tree arg0 = CALL_EXPR_ARG (exp, 0);
20364 tree arg1 = CALL_EXPR_ARG (exp, 1);
20365 rtx op0 = expand_normal (arg0);
20366 rtx op1 = expand_normal (arg1);
20367 rtx op2;
20368 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20369 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20370 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20371 enum rtx_code comparison = d->comparison;
20373 if (VECTOR_MODE_P (mode0))
20374 op0 = safe_vector_operand (op0, mode0);
20375 if (VECTOR_MODE_P (mode1))
20376 op1 = safe_vector_operand (op1, mode1);
20378 /* Swap operands if we have a comparison that isn't available in
20379 hardware. */
20380 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20382 rtx tmp = gen_reg_rtx (mode1);
20383 emit_move_insn (tmp, op1);
20384 op1 = op0;
20385 op0 = tmp;
20388 if (optimize || !target
20389 || GET_MODE (target) != tmode
20390 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20391 target = gen_reg_rtx (tmode);
20393 if ((optimize && !register_operand (op0, mode0))
20394 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20395 op0 = copy_to_mode_reg (mode0, op0);
20396 if ((optimize && !register_operand (op1, mode1))
20397 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20398 op1 = copy_to_mode_reg (mode1, op1);
20400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20401 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20402 if (! pat)
20403 return 0;
20404 emit_insn (pat);
20405 return target;
20408 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20410 static rtx
20411 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20412 rtx target)
20414 rtx pat;
20415 tree arg0 = CALL_EXPR_ARG (exp, 0);
20416 tree arg1 = CALL_EXPR_ARG (exp, 1);
20417 rtx op0 = expand_normal (arg0);
20418 rtx op1 = expand_normal (arg1);
20419 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20420 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20421 enum rtx_code comparison = d->comparison;
20423 if (VECTOR_MODE_P (mode0))
20424 op0 = safe_vector_operand (op0, mode0);
20425 if (VECTOR_MODE_P (mode1))
20426 op1 = safe_vector_operand (op1, mode1);
20428 /* Swap operands if we have a comparison that isn't available in
20429 hardware. */
20430 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20432 rtx tmp = op1;
20433 op1 = op0;
20434 op0 = tmp;
20437 target = gen_reg_rtx (SImode);
20438 emit_move_insn (target, const0_rtx);
20439 target = gen_rtx_SUBREG (QImode, target, 0);
20441 if ((optimize && !register_operand (op0, mode0))
20442 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20443 op0 = copy_to_mode_reg (mode0, op0);
20444 if ((optimize && !register_operand (op1, mode1))
20445 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20446 op1 = copy_to_mode_reg (mode1, op1);
20448 pat = GEN_FCN (d->icode) (op0, op1);
20449 if (! pat)
20450 return 0;
20451 emit_insn (pat);
20452 emit_insn (gen_rtx_SET (VOIDmode,
20453 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20454 gen_rtx_fmt_ee (comparison, QImode,
20455 SET_DEST (pat),
20456 const0_rtx)));
20458 return SUBREG_REG (target);
20461 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20463 static rtx
20464 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20465 rtx target)
20467 rtx pat;
20468 tree arg0 = CALL_EXPR_ARG (exp, 0);
20469 tree arg1 = CALL_EXPR_ARG (exp, 1);
20470 rtx op0 = expand_normal (arg0);
20471 rtx op1 = expand_normal (arg1);
20472 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20473 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20474 enum rtx_code comparison = d->comparison;
20476 if (VECTOR_MODE_P (mode0))
20477 op0 = safe_vector_operand (op0, mode0);
20478 if (VECTOR_MODE_P (mode1))
20479 op1 = safe_vector_operand (op1, mode1);
20481 target = gen_reg_rtx (SImode);
20482 emit_move_insn (target, const0_rtx);
20483 target = gen_rtx_SUBREG (QImode, target, 0);
20485 if ((optimize && !register_operand (op0, mode0))
20486 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20487 op0 = copy_to_mode_reg (mode0, op0);
20488 if ((optimize && !register_operand (op1, mode1))
20489 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20490 op1 = copy_to_mode_reg (mode1, op1);
20492 pat = GEN_FCN (d->icode) (op0, op1);
20493 if (! pat)
20494 return 0;
20495 emit_insn (pat);
20496 emit_insn (gen_rtx_SET (VOIDmode,
20497 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20498 gen_rtx_fmt_ee (comparison, QImode,
20499 SET_DEST (pat),
20500 const0_rtx)));
20502 return SUBREG_REG (target);
20505 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20507 static rtx
20508 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20509 tree exp, rtx target)
20511 rtx pat;
20512 tree arg0 = CALL_EXPR_ARG (exp, 0);
20513 tree arg1 = CALL_EXPR_ARG (exp, 1);
20514 tree arg2 = CALL_EXPR_ARG (exp, 2);
20515 tree arg3 = CALL_EXPR_ARG (exp, 3);
20516 tree arg4 = CALL_EXPR_ARG (exp, 4);
20517 rtx scratch0, scratch1;
20518 rtx op0 = expand_normal (arg0);
20519 rtx op1 = expand_normal (arg1);
20520 rtx op2 = expand_normal (arg2);
20521 rtx op3 = expand_normal (arg3);
20522 rtx op4 = expand_normal (arg4);
20523 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20525 tmode0 = insn_data[d->icode].operand[0].mode;
20526 tmode1 = insn_data[d->icode].operand[1].mode;
20527 modev2 = insn_data[d->icode].operand[2].mode;
20528 modei3 = insn_data[d->icode].operand[3].mode;
20529 modev4 = insn_data[d->icode].operand[4].mode;
20530 modei5 = insn_data[d->icode].operand[5].mode;
20531 modeimm = insn_data[d->icode].operand[6].mode;
20533 if (VECTOR_MODE_P (modev2))
20534 op0 = safe_vector_operand (op0, modev2);
20535 if (VECTOR_MODE_P (modev4))
20536 op2 = safe_vector_operand (op2, modev4);
20538 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20539 op0 = copy_to_mode_reg (modev2, op0);
20540 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20541 op1 = copy_to_mode_reg (modei3, op1);
20542 if ((optimize && !register_operand (op2, modev4))
20543 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20544 op2 = copy_to_mode_reg (modev4, op2);
20545 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20546 op3 = copy_to_mode_reg (modei5, op3);
20548 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20550 error ("the fifth argument must be a 8-bit immediate");
20551 return const0_rtx;
20554 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20556 if (optimize || !target
20557 || GET_MODE (target) != tmode0
20558 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20559 target = gen_reg_rtx (tmode0);
20561 scratch1 = gen_reg_rtx (tmode1);
20563 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20565 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20567 if (optimize || !target
20568 || GET_MODE (target) != tmode1
20569 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20570 target = gen_reg_rtx (tmode1);
20572 scratch0 = gen_reg_rtx (tmode0);
20574 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20576 else
20578 gcc_assert (d->flag);
20580 scratch0 = gen_reg_rtx (tmode0);
20581 scratch1 = gen_reg_rtx (tmode1);
20583 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20586 if (! pat)
20587 return 0;
20589 emit_insn (pat);
20591 if (d->flag)
20593 target = gen_reg_rtx (SImode);
20594 emit_move_insn (target, const0_rtx);
20595 target = gen_rtx_SUBREG (QImode, target, 0);
20597 emit_insn
20598 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20599 gen_rtx_fmt_ee (EQ, QImode,
20600 gen_rtx_REG ((enum machine_mode) d->flag,
20601 FLAGS_REG),
20602 const0_rtx)));
20603 return SUBREG_REG (target);
20605 else
20606 return target;
20610 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20612 static rtx
20613 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20614 tree exp, rtx target)
20616 rtx pat;
20617 tree arg0 = CALL_EXPR_ARG (exp, 0);
20618 tree arg1 = CALL_EXPR_ARG (exp, 1);
20619 tree arg2 = CALL_EXPR_ARG (exp, 2);
20620 rtx scratch0, scratch1;
20621 rtx op0 = expand_normal (arg0);
20622 rtx op1 = expand_normal (arg1);
20623 rtx op2 = expand_normal (arg2);
20624 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20626 tmode0 = insn_data[d->icode].operand[0].mode;
20627 tmode1 = insn_data[d->icode].operand[1].mode;
20628 modev2 = insn_data[d->icode].operand[2].mode;
20629 modev3 = insn_data[d->icode].operand[3].mode;
20630 modeimm = insn_data[d->icode].operand[4].mode;
20632 if (VECTOR_MODE_P (modev2))
20633 op0 = safe_vector_operand (op0, modev2);
20634 if (VECTOR_MODE_P (modev3))
20635 op1 = safe_vector_operand (op1, modev3);
20637 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20638 op0 = copy_to_mode_reg (modev2, op0);
20639 if ((optimize && !register_operand (op1, modev3))
20640 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20641 op1 = copy_to_mode_reg (modev3, op1);
20643 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20645 error ("the third argument must be a 8-bit immediate");
20646 return const0_rtx;
20649 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20651 if (optimize || !target
20652 || GET_MODE (target) != tmode0
20653 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20654 target = gen_reg_rtx (tmode0);
20656 scratch1 = gen_reg_rtx (tmode1);
20658 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20660 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20662 if (optimize || !target
20663 || GET_MODE (target) != tmode1
20664 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20665 target = gen_reg_rtx (tmode1);
20667 scratch0 = gen_reg_rtx (tmode0);
20669 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20671 else
20673 gcc_assert (d->flag);
20675 scratch0 = gen_reg_rtx (tmode0);
20676 scratch1 = gen_reg_rtx (tmode1);
20678 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20681 if (! pat)
20682 return 0;
20684 emit_insn (pat);
20686 if (d->flag)
20688 target = gen_reg_rtx (SImode);
20689 emit_move_insn (target, const0_rtx);
20690 target = gen_rtx_SUBREG (QImode, target, 0);
20692 emit_insn
20693 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20694 gen_rtx_fmt_ee (EQ, QImode,
20695 gen_rtx_REG ((enum machine_mode) d->flag,
20696 FLAGS_REG),
20697 const0_rtx)));
20698 return SUBREG_REG (target);
20700 else
20701 return target;
20704 /* Return the integer constant in ARG. Constrain it to be in the range
20705 of the subparts of VEC_TYPE; issue an error if not. */
20707 static int
20708 get_element_number (tree vec_type, tree arg)
20710 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20712 if (!host_integerp (arg, 1)
20713 || (elt = tree_low_cst (arg, 1), elt > max))
20715 error ("selector must be an integer constant in the range 0..%wi", max);
20716 return 0;
20719 return elt;
20722 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20723 ix86_expand_vector_init. We DO have language-level syntax for this, in
20724 the form of (type){ init-list }. Except that since we can't place emms
20725 instructions from inside the compiler, we can't allow the use of MMX
20726 registers unless the user explicitly asks for it. So we do *not* define
20727 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20728 we have builtins invoked by mmintrin.h that gives us license to emit
20729 these sorts of instructions. */
20731 static rtx
20732 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20734 enum machine_mode tmode = TYPE_MODE (type);
20735 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20736 int i, n_elt = GET_MODE_NUNITS (tmode);
20737 rtvec v = rtvec_alloc (n_elt);
20739 gcc_assert (VECTOR_MODE_P (tmode));
20740 gcc_assert (call_expr_nargs (exp) == n_elt);
20742 for (i = 0; i < n_elt; ++i)
20744 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20745 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20748 if (!target || !register_operand (target, tmode))
20749 target = gen_reg_rtx (tmode);
20751 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20752 return target;
20755 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20756 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20757 had a language-level syntax for referencing vector elements. */
20759 static rtx
20760 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20762 enum machine_mode tmode, mode0;
20763 tree arg0, arg1;
20764 int elt;
20765 rtx op0;
20767 arg0 = CALL_EXPR_ARG (exp, 0);
20768 arg1 = CALL_EXPR_ARG (exp, 1);
20770 op0 = expand_normal (arg0);
20771 elt = get_element_number (TREE_TYPE (arg0), arg1);
20773 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20774 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20775 gcc_assert (VECTOR_MODE_P (mode0));
20777 op0 = force_reg (mode0, op0);
20779 if (optimize || !target || !register_operand (target, tmode))
20780 target = gen_reg_rtx (tmode);
20782 ix86_expand_vector_extract (true, target, op0, elt);
20784 return target;
20787 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20788 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20789 a language-level syntax for referencing vector elements. */
20791 static rtx
20792 ix86_expand_vec_set_builtin (tree exp)
20794 enum machine_mode tmode, mode1;
20795 tree arg0, arg1, arg2;
20796 int elt;
20797 rtx op0, op1, target;
20799 arg0 = CALL_EXPR_ARG (exp, 0);
20800 arg1 = CALL_EXPR_ARG (exp, 1);
20801 arg2 = CALL_EXPR_ARG (exp, 2);
20803 tmode = TYPE_MODE (TREE_TYPE (arg0));
20804 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20805 gcc_assert (VECTOR_MODE_P (tmode));
20807 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20808 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20809 elt = get_element_number (TREE_TYPE (arg0), arg2);
20811 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20812 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20814 op0 = force_reg (tmode, op0);
20815 op1 = force_reg (mode1, op1);
20817 /* OP0 is the source of these builtin functions and shouldn't be
20818 modified. Create a copy, use it and return it as target. */
20819 target = gen_reg_rtx (tmode);
20820 emit_move_insn (target, op0);
20821 ix86_expand_vector_set (true, target, op1, elt);
20823 return target;
20826 /* Expand an expression EXP that calls a built-in function,
20827 with result going to TARGET if that's convenient
20828 (and in mode MODE if that's convenient).
20829 SUBTARGET may be used as the target for computing one of EXP's operands.
20830 IGNORE is nonzero if the value is to be ignored. */
20832 static rtx
20833 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20834 enum machine_mode mode ATTRIBUTE_UNUSED,
20835 int ignore ATTRIBUTE_UNUSED)
20837 const struct builtin_description *d;
20838 size_t i;
20839 enum insn_code icode;
20840 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20841 tree arg0, arg1, arg2, arg3;
20842 rtx op0, op1, op2, op3, pat;
20843 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20844 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20846 switch (fcode)
20848 case IX86_BUILTIN_EMMS:
20849 emit_insn (gen_mmx_emms ());
20850 return 0;
20852 case IX86_BUILTIN_SFENCE:
20853 emit_insn (gen_sse_sfence ());
20854 return 0;
20856 case IX86_BUILTIN_MASKMOVQ:
20857 case IX86_BUILTIN_MASKMOVDQU:
20858 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20859 ? CODE_FOR_mmx_maskmovq
20860 : CODE_FOR_sse2_maskmovdqu);
20861 /* Note the arg order is different from the operand order. */
20862 arg1 = CALL_EXPR_ARG (exp, 0);
20863 arg2 = CALL_EXPR_ARG (exp, 1);
20864 arg0 = CALL_EXPR_ARG (exp, 2);
20865 op0 = expand_normal (arg0);
20866 op1 = expand_normal (arg1);
20867 op2 = expand_normal (arg2);
20868 mode0 = insn_data[icode].operand[0].mode;
20869 mode1 = insn_data[icode].operand[1].mode;
20870 mode2 = insn_data[icode].operand[2].mode;
20872 op0 = force_reg (Pmode, op0);
20873 op0 = gen_rtx_MEM (mode1, op0);
20875 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20876 op0 = copy_to_mode_reg (mode0, op0);
20877 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20878 op1 = copy_to_mode_reg (mode1, op1);
20879 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20880 op2 = copy_to_mode_reg (mode2, op2);
20881 pat = GEN_FCN (icode) (op0, op1, op2);
20882 if (! pat)
20883 return 0;
20884 emit_insn (pat);
20885 return 0;
20887 case IX86_BUILTIN_RSQRTF:
20888 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20890 case IX86_BUILTIN_SQRTSS:
20891 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20892 case IX86_BUILTIN_RSQRTSS:
20893 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20894 case IX86_BUILTIN_RCPSS:
20895 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20897 case IX86_BUILTIN_LOADUPS:
20898 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20900 case IX86_BUILTIN_STOREUPS:
20901 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20903 case IX86_BUILTIN_LOADHPS:
20904 case IX86_BUILTIN_LOADLPS:
20905 case IX86_BUILTIN_LOADHPD:
20906 case IX86_BUILTIN_LOADLPD:
20907 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20908 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20909 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20910 : CODE_FOR_sse2_loadlpd);
20911 arg0 = CALL_EXPR_ARG (exp, 0);
20912 arg1 = CALL_EXPR_ARG (exp, 1);
20913 op0 = expand_normal (arg0);
20914 op1 = expand_normal (arg1);
20915 tmode = insn_data[icode].operand[0].mode;
20916 mode0 = insn_data[icode].operand[1].mode;
20917 mode1 = insn_data[icode].operand[2].mode;
20919 op0 = force_reg (mode0, op0);
20920 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20921 if (optimize || target == 0
20922 || GET_MODE (target) != tmode
20923 || !register_operand (target, tmode))
20924 target = gen_reg_rtx (tmode);
20925 pat = GEN_FCN (icode) (target, op0, op1);
20926 if (! pat)
20927 return 0;
20928 emit_insn (pat);
20929 return target;
20931 case IX86_BUILTIN_STOREHPS:
20932 case IX86_BUILTIN_STORELPS:
20933 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20934 : CODE_FOR_sse_storelps);
20935 arg0 = CALL_EXPR_ARG (exp, 0);
20936 arg1 = CALL_EXPR_ARG (exp, 1);
20937 op0 = expand_normal (arg0);
20938 op1 = expand_normal (arg1);
20939 mode0 = insn_data[icode].operand[0].mode;
20940 mode1 = insn_data[icode].operand[1].mode;
20942 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20943 op1 = force_reg (mode1, op1);
20945 pat = GEN_FCN (icode) (op0, op1);
20946 if (! pat)
20947 return 0;
20948 emit_insn (pat);
20949 return const0_rtx;
20951 case IX86_BUILTIN_MOVNTPS:
20952 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20953 case IX86_BUILTIN_MOVNTQ:
20954 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20956 case IX86_BUILTIN_LDMXCSR:
20957 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20958 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20959 emit_move_insn (target, op0);
20960 emit_insn (gen_sse_ldmxcsr (target));
20961 return 0;
20963 case IX86_BUILTIN_STMXCSR:
20964 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20965 emit_insn (gen_sse_stmxcsr (target));
20966 return copy_to_mode_reg (SImode, target);
20968 case IX86_BUILTIN_PSHUFW:
20969 case IX86_BUILTIN_PSHUFD:
20970 case IX86_BUILTIN_PSHUFHW:
20971 case IX86_BUILTIN_PSHUFLW:
20972 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20973 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20974 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20975 : CODE_FOR_mmx_pshufw);
20976 arg0 = CALL_EXPR_ARG (exp, 0);
20977 arg1 = CALL_EXPR_ARG (exp, 1);
20978 op0 = expand_normal (arg0);
20979 op1 = expand_normal (arg1);
20980 tmode = insn_data[icode].operand[0].mode;
20981 mode1 = insn_data[icode].operand[1].mode;
20982 mode2 = insn_data[icode].operand[2].mode;
20984 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20985 op0 = copy_to_mode_reg (mode1, op0);
20986 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20988 /* @@@ better error message */
20989 error ("mask must be an immediate");
20990 return const0_rtx;
20992 if (target == 0
20993 || GET_MODE (target) != tmode
20994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20995 target = gen_reg_rtx (tmode);
20996 pat = GEN_FCN (icode) (target, op0, op1);
20997 if (! pat)
20998 return 0;
20999 emit_insn (pat);
21000 return target;
21002 case IX86_BUILTIN_PSLLW:
21003 case IX86_BUILTIN_PSLLWI:
21004 icode = CODE_FOR_mmx_ashlv4hi3;
21005 goto do_pshift;
21006 case IX86_BUILTIN_PSLLD:
21007 case IX86_BUILTIN_PSLLDI:
21008 icode = CODE_FOR_mmx_ashlv2si3;
21009 goto do_pshift;
21010 case IX86_BUILTIN_PSLLQ:
21011 case IX86_BUILTIN_PSLLQI:
21012 icode = CODE_FOR_mmx_ashlv1di3;
21013 goto do_pshift;
21014 case IX86_BUILTIN_PSRAW:
21015 case IX86_BUILTIN_PSRAWI:
21016 icode = CODE_FOR_mmx_ashrv4hi3;
21017 goto do_pshift;
21018 case IX86_BUILTIN_PSRAD:
21019 case IX86_BUILTIN_PSRADI:
21020 icode = CODE_FOR_mmx_ashrv2si3;
21021 goto do_pshift;
21022 case IX86_BUILTIN_PSRLW:
21023 case IX86_BUILTIN_PSRLWI:
21024 icode = CODE_FOR_mmx_lshrv4hi3;
21025 goto do_pshift;
21026 case IX86_BUILTIN_PSRLD:
21027 case IX86_BUILTIN_PSRLDI:
21028 icode = CODE_FOR_mmx_lshrv2si3;
21029 goto do_pshift;
21030 case IX86_BUILTIN_PSRLQ:
21031 case IX86_BUILTIN_PSRLQI:
21032 icode = CODE_FOR_mmx_lshrv1di3;
21033 goto do_pshift;
21035 case IX86_BUILTIN_PSLLW128:
21036 case IX86_BUILTIN_PSLLWI128:
21037 icode = CODE_FOR_ashlv8hi3;
21038 goto do_pshift;
21039 case IX86_BUILTIN_PSLLD128:
21040 case IX86_BUILTIN_PSLLDI128:
21041 icode = CODE_FOR_ashlv4si3;
21042 goto do_pshift;
21043 case IX86_BUILTIN_PSLLQ128:
21044 case IX86_BUILTIN_PSLLQI128:
21045 icode = CODE_FOR_ashlv2di3;
21046 goto do_pshift;
21047 case IX86_BUILTIN_PSRAW128:
21048 case IX86_BUILTIN_PSRAWI128:
21049 icode = CODE_FOR_ashrv8hi3;
21050 goto do_pshift;
21051 case IX86_BUILTIN_PSRAD128:
21052 case IX86_BUILTIN_PSRADI128:
21053 icode = CODE_FOR_ashrv4si3;
21054 goto do_pshift;
21055 case IX86_BUILTIN_PSRLW128:
21056 case IX86_BUILTIN_PSRLWI128:
21057 icode = CODE_FOR_lshrv8hi3;
21058 goto do_pshift;
21059 case IX86_BUILTIN_PSRLD128:
21060 case IX86_BUILTIN_PSRLDI128:
21061 icode = CODE_FOR_lshrv4si3;
21062 goto do_pshift;
21063 case IX86_BUILTIN_PSRLQ128:
21064 case IX86_BUILTIN_PSRLQI128:
21065 icode = CODE_FOR_lshrv2di3;
21067 do_pshift:
21068 arg0 = CALL_EXPR_ARG (exp, 0);
21069 arg1 = CALL_EXPR_ARG (exp, 1);
21070 op0 = expand_normal (arg0);
21071 op1 = expand_normal (arg1);
21073 tmode = insn_data[icode].operand[0].mode;
21074 mode1 = insn_data[icode].operand[1].mode;
21076 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21077 op0 = copy_to_reg (op0);
21079 if (!CONST_INT_P (op1))
21080 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
21082 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
21083 op1 = copy_to_reg (op1);
21085 target = gen_reg_rtx (tmode);
21086 pat = GEN_FCN (icode) (target, op0, op1);
21087 if (!pat)
21088 return 0;
21089 emit_insn (pat);
21090 return target;
21092 case IX86_BUILTIN_PSLLDQI128:
21093 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
21094 exp, target);
21095 break;
21097 case IX86_BUILTIN_PSRLDQI128:
21098 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
21099 exp, target);
21100 break;
21102 case IX86_BUILTIN_FEMMS:
21103 emit_insn (gen_mmx_femms ());
21104 return NULL_RTX;
21106 case IX86_BUILTIN_PAVGUSB:
21107 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
21109 case IX86_BUILTIN_PF2ID:
21110 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
21112 case IX86_BUILTIN_PFACC:
21113 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
21115 case IX86_BUILTIN_PFADD:
21116 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
21118 case IX86_BUILTIN_PFCMPEQ:
21119 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
21121 case IX86_BUILTIN_PFCMPGE:
21122 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
21124 case IX86_BUILTIN_PFCMPGT:
21125 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
21127 case IX86_BUILTIN_PFMAX:
21128 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
21130 case IX86_BUILTIN_PFMIN:
21131 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
21133 case IX86_BUILTIN_PFMUL:
21134 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
21136 case IX86_BUILTIN_PFRCP:
21137 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
21139 case IX86_BUILTIN_PFRCPIT1:
21140 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
21142 case IX86_BUILTIN_PFRCPIT2:
21143 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
21145 case IX86_BUILTIN_PFRSQIT1:
21146 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
21148 case IX86_BUILTIN_PFRSQRT:
21149 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
21151 case IX86_BUILTIN_PFSUB:
21152 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
21154 case IX86_BUILTIN_PFSUBR:
21155 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
21157 case IX86_BUILTIN_PI2FD:
21158 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
21160 case IX86_BUILTIN_PMULHRW:
21161 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
21163 case IX86_BUILTIN_PF2IW:
21164 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
21166 case IX86_BUILTIN_PFNACC:
21167 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
21169 case IX86_BUILTIN_PFPNACC:
21170 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
21172 case IX86_BUILTIN_PI2FW:
21173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
21175 case IX86_BUILTIN_PSWAPDSI:
21176 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
21178 case IX86_BUILTIN_PSWAPDSF:
21179 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
21181 case IX86_BUILTIN_SQRTSD:
21182 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
21183 case IX86_BUILTIN_LOADUPD:
21184 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
21185 case IX86_BUILTIN_STOREUPD:
21186 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
21188 case IX86_BUILTIN_MFENCE:
21189 emit_insn (gen_sse2_mfence ());
21190 return 0;
21191 case IX86_BUILTIN_LFENCE:
21192 emit_insn (gen_sse2_lfence ());
21193 return 0;
21195 case IX86_BUILTIN_CLFLUSH:
21196 arg0 = CALL_EXPR_ARG (exp, 0);
21197 op0 = expand_normal (arg0);
21198 icode = CODE_FOR_sse2_clflush;
21199 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21200 op0 = copy_to_mode_reg (Pmode, op0);
21202 emit_insn (gen_sse2_clflush (op0));
21203 return 0;
21205 case IX86_BUILTIN_MOVNTPD:
21206 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
21207 case IX86_BUILTIN_MOVNTDQ:
21208 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
21209 case IX86_BUILTIN_MOVNTI:
21210 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
21212 case IX86_BUILTIN_LOADDQU:
21213 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
21214 case IX86_BUILTIN_STOREDQU:
21215 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
21217 case IX86_BUILTIN_MONITOR:
21218 arg0 = CALL_EXPR_ARG (exp, 0);
21219 arg1 = CALL_EXPR_ARG (exp, 1);
21220 arg2 = CALL_EXPR_ARG (exp, 2);
21221 op0 = expand_normal (arg0);
21222 op1 = expand_normal (arg1);
21223 op2 = expand_normal (arg2);
21224 if (!REG_P (op0))
21225 op0 = copy_to_mode_reg (Pmode, op0);
21226 if (!REG_P (op1))
21227 op1 = copy_to_mode_reg (SImode, op1);
21228 if (!REG_P (op2))
21229 op2 = copy_to_mode_reg (SImode, op2);
21230 if (!TARGET_64BIT)
21231 emit_insn (gen_sse3_monitor (op0, op1, op2));
21232 else
21233 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21234 return 0;
21236 case IX86_BUILTIN_MWAIT:
21237 arg0 = CALL_EXPR_ARG (exp, 0);
21238 arg1 = CALL_EXPR_ARG (exp, 1);
21239 op0 = expand_normal (arg0);
21240 op1 = expand_normal (arg1);
21241 if (!REG_P (op0))
21242 op0 = copy_to_mode_reg (SImode, op0);
21243 if (!REG_P (op1))
21244 op1 = copy_to_mode_reg (SImode, op1);
21245 emit_insn (gen_sse3_mwait (op0, op1));
21246 return 0;
21248 case IX86_BUILTIN_LDDQU:
21249 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21250 target, 1);
21252 case IX86_BUILTIN_PALIGNR:
21253 case IX86_BUILTIN_PALIGNR128:
21254 if (fcode == IX86_BUILTIN_PALIGNR)
21256 icode = CODE_FOR_ssse3_palignrdi;
21257 mode = DImode;
21259 else
21261 icode = CODE_FOR_ssse3_palignrti;
21262 mode = V2DImode;
21264 arg0 = CALL_EXPR_ARG (exp, 0);
21265 arg1 = CALL_EXPR_ARG (exp, 1);
21266 arg2 = CALL_EXPR_ARG (exp, 2);
21267 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21268 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21269 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21270 tmode = insn_data[icode].operand[0].mode;
21271 mode1 = insn_data[icode].operand[1].mode;
21272 mode2 = insn_data[icode].operand[2].mode;
21273 mode3 = insn_data[icode].operand[3].mode;
21275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21277 op0 = copy_to_reg (op0);
21278 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21282 op1 = copy_to_reg (op1);
21283 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21285 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21287 error ("shift must be an immediate");
21288 return const0_rtx;
21290 target = gen_reg_rtx (mode);
21291 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21292 op0, op1, op2);
21293 if (! pat)
21294 return 0;
21295 emit_insn (pat);
21296 return target;
21298 case IX86_BUILTIN_MOVNTDQA:
21299 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21300 target, 1);
21302 case IX86_BUILTIN_MOVNTSD:
21303 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21305 case IX86_BUILTIN_MOVNTSS:
21306 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21308 case IX86_BUILTIN_INSERTQ:
21309 case IX86_BUILTIN_EXTRQ:
21310 icode = (fcode == IX86_BUILTIN_EXTRQ
21311 ? CODE_FOR_sse4a_extrq
21312 : CODE_FOR_sse4a_insertq);
21313 arg0 = CALL_EXPR_ARG (exp, 0);
21314 arg1 = CALL_EXPR_ARG (exp, 1);
21315 op0 = expand_normal (arg0);
21316 op1 = expand_normal (arg1);
21317 tmode = insn_data[icode].operand[0].mode;
21318 mode1 = insn_data[icode].operand[1].mode;
21319 mode2 = insn_data[icode].operand[2].mode;
21320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21321 op0 = copy_to_mode_reg (mode1, op0);
21322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21323 op1 = copy_to_mode_reg (mode2, op1);
21324 if (optimize || target == 0
21325 || GET_MODE (target) != tmode
21326 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21327 target = gen_reg_rtx (tmode);
21328 pat = GEN_FCN (icode) (target, op0, op1);
21329 if (! pat)
21330 return NULL_RTX;
21331 emit_insn (pat);
21332 return target;
21334 case IX86_BUILTIN_EXTRQI:
21335 icode = CODE_FOR_sse4a_extrqi;
21336 arg0 = CALL_EXPR_ARG (exp, 0);
21337 arg1 = CALL_EXPR_ARG (exp, 1);
21338 arg2 = CALL_EXPR_ARG (exp, 2);
21339 op0 = expand_normal (arg0);
21340 op1 = expand_normal (arg1);
21341 op2 = expand_normal (arg2);
21342 tmode = insn_data[icode].operand[0].mode;
21343 mode1 = insn_data[icode].operand[1].mode;
21344 mode2 = insn_data[icode].operand[2].mode;
21345 mode3 = insn_data[icode].operand[3].mode;
21346 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21347 op0 = copy_to_mode_reg (mode1, op0);
21348 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21350 error ("index mask must be an immediate");
21351 return gen_reg_rtx (tmode);
21353 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21355 error ("length mask must be an immediate");
21356 return gen_reg_rtx (tmode);
21358 if (optimize || target == 0
21359 || GET_MODE (target) != tmode
21360 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21361 target = gen_reg_rtx (tmode);
21362 pat = GEN_FCN (icode) (target, op0, op1, op2);
21363 if (! pat)
21364 return NULL_RTX;
21365 emit_insn (pat);
21366 return target;
21368 case IX86_BUILTIN_INSERTQI:
21369 icode = CODE_FOR_sse4a_insertqi;
21370 arg0 = CALL_EXPR_ARG (exp, 0);
21371 arg1 = CALL_EXPR_ARG (exp, 1);
21372 arg2 = CALL_EXPR_ARG (exp, 2);
21373 arg3 = CALL_EXPR_ARG (exp, 3);
21374 op0 = expand_normal (arg0);
21375 op1 = expand_normal (arg1);
21376 op2 = expand_normal (arg2);
21377 op3 = expand_normal (arg3);
21378 tmode = insn_data[icode].operand[0].mode;
21379 mode1 = insn_data[icode].operand[1].mode;
21380 mode2 = insn_data[icode].operand[2].mode;
21381 mode3 = insn_data[icode].operand[3].mode;
21382 mode4 = insn_data[icode].operand[4].mode;
21384 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21385 op0 = copy_to_mode_reg (mode1, op0);
21387 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21388 op1 = copy_to_mode_reg (mode2, op1);
21390 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21392 error ("index mask must be an immediate");
21393 return gen_reg_rtx (tmode);
21395 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21397 error ("length mask must be an immediate");
21398 return gen_reg_rtx (tmode);
21400 if (optimize || target == 0
21401 || GET_MODE (target) != tmode
21402 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21403 target = gen_reg_rtx (tmode);
21404 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21405 if (! pat)
21406 return NULL_RTX;
21407 emit_insn (pat);
21408 return target;
21410 case IX86_BUILTIN_VEC_INIT_V2SI:
21411 case IX86_BUILTIN_VEC_INIT_V4HI:
21412 case IX86_BUILTIN_VEC_INIT_V8QI:
21413 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21415 case IX86_BUILTIN_VEC_EXT_V2DF:
21416 case IX86_BUILTIN_VEC_EXT_V2DI:
21417 case IX86_BUILTIN_VEC_EXT_V4SF:
21418 case IX86_BUILTIN_VEC_EXT_V4SI:
21419 case IX86_BUILTIN_VEC_EXT_V8HI:
21420 case IX86_BUILTIN_VEC_EXT_V2SI:
21421 case IX86_BUILTIN_VEC_EXT_V4HI:
21422 case IX86_BUILTIN_VEC_EXT_V16QI:
21423 return ix86_expand_vec_ext_builtin (exp, target);
21425 case IX86_BUILTIN_VEC_SET_V2DI:
21426 case IX86_BUILTIN_VEC_SET_V4SF:
21427 case IX86_BUILTIN_VEC_SET_V4SI:
21428 case IX86_BUILTIN_VEC_SET_V8HI:
21429 case IX86_BUILTIN_VEC_SET_V4HI:
21430 case IX86_BUILTIN_VEC_SET_V16QI:
21431 return ix86_expand_vec_set_builtin (exp);
21433 case IX86_BUILTIN_INFQ:
21435 REAL_VALUE_TYPE inf;
21436 rtx tmp;
21438 real_inf (&inf);
21439 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21441 tmp = validize_mem (force_const_mem (mode, tmp));
21443 if (target == 0)
21444 target = gen_reg_rtx (mode);
21446 emit_move_insn (target, tmp);
21447 return target;
21450 case IX86_BUILTIN_FABSQ:
21451 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21453 case IX86_BUILTIN_COPYSIGNQ:
21454 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21456 default:
21457 break;
21460 for (i = 0, d = bdesc_sse_args;
21461 i < ARRAY_SIZE (bdesc_sse_args);
21462 i++, d++)
21463 if (d->code == fcode)
21465 enum sse_builtin_type type = (enum sse_builtin_type) d->flag;
21466 return ix86_expand_sse_operands_builtin (d->icode, exp,
21467 type, target);
21470 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21471 if (d->code == fcode)
21473 /* Compares are treated specially. */
21474 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21475 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21476 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21477 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21478 return ix86_expand_sse_compare (d, exp, target);
21480 return ix86_expand_binop_builtin (d->icode, exp, target);
21483 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21484 if (d->code == fcode)
21485 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21487 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21488 if (d->code == fcode)
21489 return ix86_expand_sse_comi (d, exp, target);
21491 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21492 if (d->code == fcode)
21493 return ix86_expand_sse_ptest (d, exp, target);
21495 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21496 if (d->code == fcode)
21497 return ix86_expand_crc32 (d->icode, exp, target);
21499 for (i = 0, d = bdesc_pcmpestr;
21500 i < ARRAY_SIZE (bdesc_pcmpestr);
21501 i++, d++)
21502 if (d->code == fcode)
21503 return ix86_expand_sse_pcmpestr (d, exp, target);
21505 for (i = 0, d = bdesc_pcmpistr;
21506 i < ARRAY_SIZE (bdesc_pcmpistr);
21507 i++, d++)
21508 if (d->code == fcode)
21509 return ix86_expand_sse_pcmpistr (d, exp, target);
21511 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21512 if (d->code == fcode)
21513 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21514 (enum multi_arg_type)d->flag,
21515 d->comparison);
21517 gcc_unreachable ();
21520 /* Returns a function decl for a vectorized version of the builtin function
21521 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21522 if it is not available. */
21524 static tree
21525 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21526 tree type_in)
21528 enum machine_mode in_mode, out_mode;
21529 int in_n, out_n;
21531 if (TREE_CODE (type_out) != VECTOR_TYPE
21532 || TREE_CODE (type_in) != VECTOR_TYPE)
21533 return NULL_TREE;
21535 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21536 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21537 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21538 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21540 switch (fn)
21542 case BUILT_IN_SQRT:
21543 if (out_mode == DFmode && out_n == 2
21544 && in_mode == DFmode && in_n == 2)
21545 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21546 break;
21548 case BUILT_IN_SQRTF:
21549 if (out_mode == SFmode && out_n == 4
21550 && in_mode == SFmode && in_n == 4)
21551 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21552 break;
21554 case BUILT_IN_LRINT:
21555 if (out_mode == SImode && out_n == 4
21556 && in_mode == DFmode && in_n == 2)
21557 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21558 break;
21560 case BUILT_IN_LRINTF:
21561 if (out_mode == SImode && out_n == 4
21562 && in_mode == SFmode && in_n == 4)
21563 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21564 break;
21566 default:
21570 /* Dispatch to a handler for a vectorization library. */
21571 if (ix86_veclib_handler)
21572 return (*ix86_veclib_handler)(fn, type_out, type_in);
21574 return NULL_TREE;
21577 /* Handler for an SVML-style interface to
21578 a library with vectorized intrinsics. */
21580 static tree
21581 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21583 char name[20];
21584 tree fntype, new_fndecl, args;
21585 unsigned arity;
21586 const char *bname;
21587 enum machine_mode el_mode, in_mode;
21588 int n, in_n;
21590 /* The SVML is suitable for unsafe math only. */
21591 if (!flag_unsafe_math_optimizations)
21592 return NULL_TREE;
21594 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21595 n = TYPE_VECTOR_SUBPARTS (type_out);
21596 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21597 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21598 if (el_mode != in_mode
21599 || n != in_n)
21600 return NULL_TREE;
21602 switch (fn)
21604 case BUILT_IN_EXP:
21605 case BUILT_IN_LOG:
21606 case BUILT_IN_LOG10:
21607 case BUILT_IN_POW:
21608 case BUILT_IN_TANH:
21609 case BUILT_IN_TAN:
21610 case BUILT_IN_ATAN:
21611 case BUILT_IN_ATAN2:
21612 case BUILT_IN_ATANH:
21613 case BUILT_IN_CBRT:
21614 case BUILT_IN_SINH:
21615 case BUILT_IN_SIN:
21616 case BUILT_IN_ASINH:
21617 case BUILT_IN_ASIN:
21618 case BUILT_IN_COSH:
21619 case BUILT_IN_COS:
21620 case BUILT_IN_ACOSH:
21621 case BUILT_IN_ACOS:
21622 if (el_mode != DFmode || n != 2)
21623 return NULL_TREE;
21624 break;
21626 case BUILT_IN_EXPF:
21627 case BUILT_IN_LOGF:
21628 case BUILT_IN_LOG10F:
21629 case BUILT_IN_POWF:
21630 case BUILT_IN_TANHF:
21631 case BUILT_IN_TANF:
21632 case BUILT_IN_ATANF:
21633 case BUILT_IN_ATAN2F:
21634 case BUILT_IN_ATANHF:
21635 case BUILT_IN_CBRTF:
21636 case BUILT_IN_SINHF:
21637 case BUILT_IN_SINF:
21638 case BUILT_IN_ASINHF:
21639 case BUILT_IN_ASINF:
21640 case BUILT_IN_COSHF:
21641 case BUILT_IN_COSF:
21642 case BUILT_IN_ACOSHF:
21643 case BUILT_IN_ACOSF:
21644 if (el_mode != SFmode || n != 4)
21645 return NULL_TREE;
21646 break;
21648 default:
21649 return NULL_TREE;
21652 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21654 if (fn == BUILT_IN_LOGF)
21655 strcpy (name, "vmlsLn4");
21656 else if (fn == BUILT_IN_LOG)
21657 strcpy (name, "vmldLn2");
21658 else if (n == 4)
21660 sprintf (name, "vmls%s", bname+10);
21661 name[strlen (name)-1] = '4';
21663 else
21664 sprintf (name, "vmld%s2", bname+10);
21666 /* Convert to uppercase. */
21667 name[4] &= ~0x20;
21669 arity = 0;
21670 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21671 args = TREE_CHAIN (args))
21672 arity++;
21674 if (arity == 1)
21675 fntype = build_function_type_list (type_out, type_in, NULL);
21676 else
21677 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21679 /* Build a function declaration for the vectorized function. */
21680 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21681 TREE_PUBLIC (new_fndecl) = 1;
21682 DECL_EXTERNAL (new_fndecl) = 1;
21683 DECL_IS_NOVOPS (new_fndecl) = 1;
21684 TREE_READONLY (new_fndecl) = 1;
21686 return new_fndecl;
21689 /* Handler for an ACML-style interface to
21690 a library with vectorized intrinsics. */
21692 static tree
21693 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21695 char name[20] = "__vr.._";
21696 tree fntype, new_fndecl, args;
21697 unsigned arity;
21698 const char *bname;
21699 enum machine_mode el_mode, in_mode;
21700 int n, in_n;
21702 /* The ACML is 64bits only and suitable for unsafe math only as
21703 it does not correctly support parts of IEEE with the required
21704 precision such as denormals. */
21705 if (!TARGET_64BIT
21706 || !flag_unsafe_math_optimizations)
21707 return NULL_TREE;
21709 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21710 n = TYPE_VECTOR_SUBPARTS (type_out);
21711 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21712 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21713 if (el_mode != in_mode
21714 || n != in_n)
21715 return NULL_TREE;
21717 switch (fn)
21719 case BUILT_IN_SIN:
21720 case BUILT_IN_COS:
21721 case BUILT_IN_EXP:
21722 case BUILT_IN_LOG:
21723 case BUILT_IN_LOG2:
21724 case BUILT_IN_LOG10:
21725 name[4] = 'd';
21726 name[5] = '2';
21727 if (el_mode != DFmode
21728 || n != 2)
21729 return NULL_TREE;
21730 break;
21732 case BUILT_IN_SINF:
21733 case BUILT_IN_COSF:
21734 case BUILT_IN_EXPF:
21735 case BUILT_IN_POWF:
21736 case BUILT_IN_LOGF:
21737 case BUILT_IN_LOG2F:
21738 case BUILT_IN_LOG10F:
21739 name[4] = 's';
21740 name[5] = '4';
21741 if (el_mode != SFmode
21742 || n != 4)
21743 return NULL_TREE;
21744 break;
21746 default:
21747 return NULL_TREE;
21750 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21751 sprintf (name + 7, "%s", bname+10);
21753 arity = 0;
21754 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21755 args = TREE_CHAIN (args))
21756 arity++;
21758 if (arity == 1)
21759 fntype = build_function_type_list (type_out, type_in, NULL);
21760 else
21761 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21763 /* Build a function declaration for the vectorized function. */
21764 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21765 TREE_PUBLIC (new_fndecl) = 1;
21766 DECL_EXTERNAL (new_fndecl) = 1;
21767 DECL_IS_NOVOPS (new_fndecl) = 1;
21768 TREE_READONLY (new_fndecl) = 1;
21770 return new_fndecl;
21774 /* Returns a decl of a function that implements conversion of the
21775 input vector of type TYPE, or NULL_TREE if it is not available. */
21777 static tree
21778 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21780 if (TREE_CODE (type) != VECTOR_TYPE)
21781 return NULL_TREE;
21783 switch (code)
21785 case FLOAT_EXPR:
21786 switch (TYPE_MODE (type))
21788 case V4SImode:
21789 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21790 default:
21791 return NULL_TREE;
21794 case FIX_TRUNC_EXPR:
21795 switch (TYPE_MODE (type))
21797 case V4SFmode:
21798 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21799 default:
21800 return NULL_TREE;
21802 default:
21803 return NULL_TREE;
21808 /* Returns a code for a target-specific builtin that implements
21809 reciprocal of the function, or NULL_TREE if not available. */
21811 static tree
21812 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21813 bool sqrt ATTRIBUTE_UNUSED)
21815 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21816 && flag_finite_math_only && !flag_trapping_math
21817 && flag_unsafe_math_optimizations))
21818 return NULL_TREE;
21820 if (md_fn)
21821 /* Machine dependent builtins. */
21822 switch (fn)
21824 /* Vectorized version of sqrt to rsqrt conversion. */
21825 case IX86_BUILTIN_SQRTPS_NR:
21826 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21828 default:
21829 return NULL_TREE;
21831 else
21832 /* Normal builtins. */
21833 switch (fn)
21835 /* Sqrt to rsqrt conversion. */
21836 case BUILT_IN_SQRTF:
21837 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21839 default:
21840 return NULL_TREE;
21844 /* Store OPERAND to the memory after reload is completed. This means
21845 that we can't easily use assign_stack_local. */
21847 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21849 rtx result;
21851 gcc_assert (reload_completed);
21852 if (TARGET_RED_ZONE)
21854 result = gen_rtx_MEM (mode,
21855 gen_rtx_PLUS (Pmode,
21856 stack_pointer_rtx,
21857 GEN_INT (-RED_ZONE_SIZE)));
21858 emit_move_insn (result, operand);
21860 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21862 switch (mode)
21864 case HImode:
21865 case SImode:
21866 operand = gen_lowpart (DImode, operand);
21867 /* FALLTHRU */
21868 case DImode:
21869 emit_insn (
21870 gen_rtx_SET (VOIDmode,
21871 gen_rtx_MEM (DImode,
21872 gen_rtx_PRE_DEC (DImode,
21873 stack_pointer_rtx)),
21874 operand));
21875 break;
21876 default:
21877 gcc_unreachable ();
21879 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21881 else
21883 switch (mode)
21885 case DImode:
21887 rtx operands[2];
21888 split_di (&operand, 1, operands, operands + 1);
21889 emit_insn (
21890 gen_rtx_SET (VOIDmode,
21891 gen_rtx_MEM (SImode,
21892 gen_rtx_PRE_DEC (Pmode,
21893 stack_pointer_rtx)),
21894 operands[1]));
21895 emit_insn (
21896 gen_rtx_SET (VOIDmode,
21897 gen_rtx_MEM (SImode,
21898 gen_rtx_PRE_DEC (Pmode,
21899 stack_pointer_rtx)),
21900 operands[0]));
21902 break;
21903 case HImode:
21904 /* Store HImodes as SImodes. */
21905 operand = gen_lowpart (SImode, operand);
21906 /* FALLTHRU */
21907 case SImode:
21908 emit_insn (
21909 gen_rtx_SET (VOIDmode,
21910 gen_rtx_MEM (GET_MODE (operand),
21911 gen_rtx_PRE_DEC (SImode,
21912 stack_pointer_rtx)),
21913 operand));
21914 break;
21915 default:
21916 gcc_unreachable ();
21918 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21920 return result;
21923 /* Free operand from the memory. */
21924 void
21925 ix86_free_from_memory (enum machine_mode mode)
21927 if (!TARGET_RED_ZONE)
21929 int size;
21931 if (mode == DImode || TARGET_64BIT)
21932 size = 8;
21933 else
21934 size = 4;
21935 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21936 to pop or add instruction if registers are available. */
21937 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21938 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21939 GEN_INT (size))));
21943 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21944 QImode must go into class Q_REGS.
21945 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21946 movdf to do mem-to-mem moves through integer regs. */
21947 enum reg_class
21948 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21950 enum machine_mode mode = GET_MODE (x);
21952 /* We're only allowed to return a subclass of CLASS. Many of the
21953 following checks fail for NO_REGS, so eliminate that early. */
21954 if (regclass == NO_REGS)
21955 return NO_REGS;
21957 /* All classes can load zeros. */
21958 if (x == CONST0_RTX (mode))
21959 return regclass;
21961 /* Force constants into memory if we are loading a (nonzero) constant into
21962 an MMX or SSE register. This is because there are no MMX/SSE instructions
21963 to load from a constant. */
21964 if (CONSTANT_P (x)
21965 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21966 return NO_REGS;
21968 /* Prefer SSE regs only, if we can use them for math. */
21969 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21970 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21972 /* Floating-point constants need more complex checks. */
21973 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21975 /* General regs can load everything. */
21976 if (reg_class_subset_p (regclass, GENERAL_REGS))
21977 return regclass;
21979 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21980 zero above. We only want to wind up preferring 80387 registers if
21981 we plan on doing computation with them. */
21982 if (TARGET_80387
21983 && standard_80387_constant_p (x))
21985 /* Limit class to non-sse. */
21986 if (regclass == FLOAT_SSE_REGS)
21987 return FLOAT_REGS;
21988 if (regclass == FP_TOP_SSE_REGS)
21989 return FP_TOP_REG;
21990 if (regclass == FP_SECOND_SSE_REGS)
21991 return FP_SECOND_REG;
21992 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21993 return regclass;
21996 return NO_REGS;
21999 /* Generally when we see PLUS here, it's the function invariant
22000 (plus soft-fp const_int). Which can only be computed into general
22001 regs. */
22002 if (GET_CODE (x) == PLUS)
22003 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22005 /* QImode constants are easy to load, but non-constant QImode data
22006 must go into Q_REGS. */
22007 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22009 if (reg_class_subset_p (regclass, Q_REGS))
22010 return regclass;
22011 if (reg_class_subset_p (Q_REGS, regclass))
22012 return Q_REGS;
22013 return NO_REGS;
22016 return regclass;
22019 /* Discourage putting floating-point values in SSE registers unless
22020 SSE math is being used, and likewise for the 387 registers. */
22021 enum reg_class
22022 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22024 enum machine_mode mode = GET_MODE (x);
22026 /* Restrict the output reload class to the register bank that we are doing
22027 math on. If we would like not to return a subset of CLASS, reject this
22028 alternative: if reload cannot do this, it will still use its choice. */
22029 mode = GET_MODE (x);
22030 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22031 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22033 if (X87_FLOAT_MODE_P (mode))
22035 if (regclass == FP_TOP_SSE_REGS)
22036 return FP_TOP_REG;
22037 else if (regclass == FP_SECOND_SSE_REGS)
22038 return FP_SECOND_REG;
22039 else
22040 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22043 return regclass;
22046 /* If we are copying between general and FP registers, we need a memory
22047 location. The same is true for SSE and MMX registers.
22049 To optimize register_move_cost performance, allow inline variant.
22051 The macro can't work reliably when one of the CLASSES is class containing
22052 registers from multiple units (SSE, MMX, integer). We avoid this by never
22053 combining those units in single alternative in the machine description.
22054 Ensure that this constraint holds to avoid unexpected surprises.
22056 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22057 enforce these sanity checks. */
22059 static inline int
22060 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22061 enum machine_mode mode, int strict)
22063 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22064 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22065 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22066 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22067 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22068 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22070 gcc_assert (!strict);
22071 return true;
22074 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22075 return true;
22077 /* ??? This is a lie. We do have moves between mmx/general, and for
22078 mmx/sse2. But by saying we need secondary memory we discourage the
22079 register allocator from using the mmx registers unless needed. */
22080 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22081 return true;
22083 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22085 /* SSE1 doesn't have any direct moves from other classes. */
22086 if (!TARGET_SSE2)
22087 return true;
22089 /* If the target says that inter-unit moves are more expensive
22090 than moving through memory, then don't generate them. */
22091 if (!TARGET_INTER_UNIT_MOVES)
22092 return true;
22094 /* Between SSE and general, we have moves no larger than word size. */
22095 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22096 return true;
22099 return false;
22103 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22104 enum machine_mode mode, int strict)
22106 return inline_secondary_memory_needed (class1, class2, mode, strict);
22109 /* Return true if the registers in CLASS cannot represent the change from
22110 modes FROM to TO. */
22112 bool
22113 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22114 enum reg_class regclass)
22116 if (from == to)
22117 return false;
22119 /* x87 registers can't do subreg at all, as all values are reformatted
22120 to extended precision. */
22121 if (MAYBE_FLOAT_CLASS_P (regclass))
22122 return true;
22124 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22126 /* Vector registers do not support QI or HImode loads. If we don't
22127 disallow a change to these modes, reload will assume it's ok to
22128 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22129 the vec_dupv4hi pattern. */
22130 if (GET_MODE_SIZE (from) < 4)
22131 return true;
22133 /* Vector registers do not support subreg with nonzero offsets, which
22134 are otherwise valid for integer registers. Since we can't see
22135 whether we have a nonzero offset from here, prohibit all
22136 nonparadoxical subregs changing size. */
22137 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22138 return true;
22141 return false;
22144 /* Return the cost of moving data of mode M between a
22145 register and memory. A value of 2 is the default; this cost is
22146 relative to those in `REGISTER_MOVE_COST'.
22148 This function is used extensively by register_move_cost that is used to
22149 build tables at startup. Make it inline in this case.
22150 When IN is 2, return maximum of in and out move cost.
22152 If moving between registers and memory is more expensive than
22153 between two registers, you should define this macro to express the
22154 relative cost.
22156 Model also increased moving costs of QImode registers in non
22157 Q_REGS classes.
22159 static inline int
22160 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22161 int in)
22163 int cost;
22164 if (FLOAT_CLASS_P (regclass))
22166 int index;
22167 switch (mode)
22169 case SFmode:
22170 index = 0;
22171 break;
22172 case DFmode:
22173 index = 1;
22174 break;
22175 case XFmode:
22176 index = 2;
22177 break;
22178 default:
22179 return 100;
22181 if (in == 2)
22182 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22183 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22185 if (SSE_CLASS_P (regclass))
22187 int index;
22188 switch (GET_MODE_SIZE (mode))
22190 case 4:
22191 index = 0;
22192 break;
22193 case 8:
22194 index = 1;
22195 break;
22196 case 16:
22197 index = 2;
22198 break;
22199 default:
22200 return 100;
22202 if (in == 2)
22203 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22204 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22206 if (MMX_CLASS_P (regclass))
22208 int index;
22209 switch (GET_MODE_SIZE (mode))
22211 case 4:
22212 index = 0;
22213 break;
22214 case 8:
22215 index = 1;
22216 break;
22217 default:
22218 return 100;
22220 if (in)
22221 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22222 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22224 switch (GET_MODE_SIZE (mode))
22226 case 1:
22227 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22229 if (!in)
22230 return ix86_cost->int_store[0];
22231 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22232 cost = ix86_cost->movzbl_load;
22233 else
22234 cost = ix86_cost->int_load[0];
22235 if (in == 2)
22236 return MAX (cost, ix86_cost->int_store[0]);
22237 return cost;
22239 else
22241 if (in == 2)
22242 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22243 if (in)
22244 return ix86_cost->movzbl_load;
22245 else
22246 return ix86_cost->int_store[0] + 4;
22248 break;
22249 case 2:
22250 if (in == 2)
22251 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22252 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22253 default:
22254 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22255 if (mode == TFmode)
22256 mode = XFmode;
22257 if (in == 2)
22258 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22259 else if (in)
22260 cost = ix86_cost->int_load[2];
22261 else
22262 cost = ix86_cost->int_store[2];
22263 return (cost * (((int) GET_MODE_SIZE (mode)
22264 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22269 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22271 return inline_memory_move_cost (mode, regclass, in);
22275 /* Return the cost of moving data from a register in class CLASS1 to
22276 one in class CLASS2.
22278 It is not required that the cost always equal 2 when FROM is the same as TO;
22279 on some machines it is expensive to move between registers if they are not
22280 general registers. */
22283 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22284 enum reg_class class2)
22286 /* In case we require secondary memory, compute cost of the store followed
22287 by load. In order to avoid bad register allocation choices, we need
22288 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22290 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22292 int cost = 1;
22294 cost += inline_memory_move_cost (mode, class1, 2);
22295 cost += inline_memory_move_cost (mode, class2, 2);
22297 /* In case of copying from general_purpose_register we may emit multiple
22298 stores followed by single load causing memory size mismatch stall.
22299 Count this as arbitrarily high cost of 20. */
22300 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22301 cost += 20;
22303 /* In the case of FP/MMX moves, the registers actually overlap, and we
22304 have to switch modes in order to treat them differently. */
22305 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22306 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22307 cost += 20;
22309 return cost;
22312 /* Moves between SSE/MMX and integer unit are expensive. */
22313 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22314 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22316 /* ??? By keeping returned value relatively high, we limit the number
22317 of moves between integer and MMX/SSE registers for all targets.
22318 Additionally, high value prevents problem with x86_modes_tieable_p(),
22319 where integer modes in MMX/SSE registers are not tieable
22320 because of missing QImode and HImode moves to, from or between
22321 MMX/SSE registers. */
22322 return MAX (8, ix86_cost->mmxsse_to_integer);
22324 if (MAYBE_FLOAT_CLASS_P (class1))
22325 return ix86_cost->fp_move;
22326 if (MAYBE_SSE_CLASS_P (class1))
22327 return ix86_cost->sse_move;
22328 if (MAYBE_MMX_CLASS_P (class1))
22329 return ix86_cost->mmx_move;
22330 return 2;
22333 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22335 bool
22336 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22338 /* Flags and only flags can only hold CCmode values. */
22339 if (CC_REGNO_P (regno))
22340 return GET_MODE_CLASS (mode) == MODE_CC;
22341 if (GET_MODE_CLASS (mode) == MODE_CC
22342 || GET_MODE_CLASS (mode) == MODE_RANDOM
22343 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22344 return 0;
22345 if (FP_REGNO_P (regno))
22346 return VALID_FP_MODE_P (mode);
22347 if (SSE_REGNO_P (regno))
22349 /* We implement the move patterns for all vector modes into and
22350 out of SSE registers, even when no operation instructions
22351 are available. */
22352 return (VALID_SSE_REG_MODE (mode)
22353 || VALID_SSE2_REG_MODE (mode)
22354 || VALID_MMX_REG_MODE (mode)
22355 || VALID_MMX_REG_MODE_3DNOW (mode));
22357 if (MMX_REGNO_P (regno))
22359 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22360 so if the register is available at all, then we can move data of
22361 the given mode into or out of it. */
22362 return (VALID_MMX_REG_MODE (mode)
22363 || VALID_MMX_REG_MODE_3DNOW (mode));
22366 if (mode == QImode)
22368 /* Take care for QImode values - they can be in non-QI regs,
22369 but then they do cause partial register stalls. */
22370 if (regno < 4 || TARGET_64BIT)
22371 return 1;
22372 if (!TARGET_PARTIAL_REG_STALL)
22373 return 1;
22374 return reload_in_progress || reload_completed;
22376 /* We handle both integer and floats in the general purpose registers. */
22377 else if (VALID_INT_MODE_P (mode))
22378 return 1;
22379 else if (VALID_FP_MODE_P (mode))
22380 return 1;
22381 else if (VALID_DFP_MODE_P (mode))
22382 return 1;
22383 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22384 on to use that value in smaller contexts, this can easily force a
22385 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22386 supporting DImode, allow it. */
22387 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22388 return 1;
22390 return 0;
22393 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22394 tieable integer mode. */
22396 static bool
22397 ix86_tieable_integer_mode_p (enum machine_mode mode)
22399 switch (mode)
22401 case HImode:
22402 case SImode:
22403 return true;
22405 case QImode:
22406 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22408 case DImode:
22409 return TARGET_64BIT;
22411 default:
22412 return false;
22416 /* Return true if MODE1 is accessible in a register that can hold MODE2
22417 without copying. That is, all register classes that can hold MODE2
22418 can also hold MODE1. */
22420 bool
22421 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22423 if (mode1 == mode2)
22424 return true;
22426 if (ix86_tieable_integer_mode_p (mode1)
22427 && ix86_tieable_integer_mode_p (mode2))
22428 return true;
22430 /* MODE2 being XFmode implies fp stack or general regs, which means we
22431 can tie any smaller floating point modes to it. Note that we do not
22432 tie this with TFmode. */
22433 if (mode2 == XFmode)
22434 return mode1 == SFmode || mode1 == DFmode;
22436 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22437 that we can tie it with SFmode. */
22438 if (mode2 == DFmode)
22439 return mode1 == SFmode;
22441 /* If MODE2 is only appropriate for an SSE register, then tie with
22442 any other mode acceptable to SSE registers. */
22443 if (GET_MODE_SIZE (mode2) == 16
22444 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22445 return (GET_MODE_SIZE (mode1) == 16
22446 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22448 /* If MODE2 is appropriate for an MMX register, then tie
22449 with any other mode acceptable to MMX registers. */
22450 if (GET_MODE_SIZE (mode2) == 8
22451 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22452 return (GET_MODE_SIZE (mode1) == 8
22453 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22455 return false;
22458 /* Compute a (partial) cost for rtx X. Return true if the complete
22459 cost has been computed, and false if subexpressions should be
22460 scanned. In either case, *TOTAL contains the cost result. */
22462 static bool
22463 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22465 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22466 enum machine_mode mode = GET_MODE (x);
22468 switch (code)
22470 case CONST_INT:
22471 case CONST:
22472 case LABEL_REF:
22473 case SYMBOL_REF:
22474 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22475 *total = 3;
22476 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22477 *total = 2;
22478 else if (flag_pic && SYMBOLIC_CONST (x)
22479 && (!TARGET_64BIT
22480 || (!GET_CODE (x) != LABEL_REF
22481 && (GET_CODE (x) != SYMBOL_REF
22482 || !SYMBOL_REF_LOCAL_P (x)))))
22483 *total = 1;
22484 else
22485 *total = 0;
22486 return true;
22488 case CONST_DOUBLE:
22489 if (mode == VOIDmode)
22490 *total = 0;
22491 else
22492 switch (standard_80387_constant_p (x))
22494 case 1: /* 0.0 */
22495 *total = 1;
22496 break;
22497 default: /* Other constants */
22498 *total = 2;
22499 break;
22500 case 0:
22501 case -1:
22502 /* Start with (MEM (SYMBOL_REF)), since that's where
22503 it'll probably end up. Add a penalty for size. */
22504 *total = (COSTS_N_INSNS (1)
22505 + (flag_pic != 0 && !TARGET_64BIT)
22506 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22507 break;
22509 return true;
22511 case ZERO_EXTEND:
22512 /* The zero extensions is often completely free on x86_64, so make
22513 it as cheap as possible. */
22514 if (TARGET_64BIT && mode == DImode
22515 && GET_MODE (XEXP (x, 0)) == SImode)
22516 *total = 1;
22517 else if (TARGET_ZERO_EXTEND_WITH_AND)
22518 *total = ix86_cost->add;
22519 else
22520 *total = ix86_cost->movzx;
22521 return false;
22523 case SIGN_EXTEND:
22524 *total = ix86_cost->movsx;
22525 return false;
22527 case ASHIFT:
22528 if (CONST_INT_P (XEXP (x, 1))
22529 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22531 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22532 if (value == 1)
22534 *total = ix86_cost->add;
22535 return false;
22537 if ((value == 2 || value == 3)
22538 && ix86_cost->lea <= ix86_cost->shift_const)
22540 *total = ix86_cost->lea;
22541 return false;
22544 /* FALLTHRU */
22546 case ROTATE:
22547 case ASHIFTRT:
22548 case LSHIFTRT:
22549 case ROTATERT:
22550 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22552 if (CONST_INT_P (XEXP (x, 1)))
22554 if (INTVAL (XEXP (x, 1)) > 32)
22555 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22556 else
22557 *total = ix86_cost->shift_const * 2;
22559 else
22561 if (GET_CODE (XEXP (x, 1)) == AND)
22562 *total = ix86_cost->shift_var * 2;
22563 else
22564 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22567 else
22569 if (CONST_INT_P (XEXP (x, 1)))
22570 *total = ix86_cost->shift_const;
22571 else
22572 *total = ix86_cost->shift_var;
22574 return false;
22576 case MULT:
22577 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22579 /* ??? SSE scalar cost should be used here. */
22580 *total = ix86_cost->fmul;
22581 return false;
22583 else if (X87_FLOAT_MODE_P (mode))
22585 *total = ix86_cost->fmul;
22586 return false;
22588 else if (FLOAT_MODE_P (mode))
22590 /* ??? SSE vector cost should be used here. */
22591 *total = ix86_cost->fmul;
22592 return false;
22594 else
22596 rtx op0 = XEXP (x, 0);
22597 rtx op1 = XEXP (x, 1);
22598 int nbits;
22599 if (CONST_INT_P (XEXP (x, 1)))
22601 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22602 for (nbits = 0; value != 0; value &= value - 1)
22603 nbits++;
22605 else
22606 /* This is arbitrary. */
22607 nbits = 7;
22609 /* Compute costs correctly for widening multiplication. */
22610 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22611 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22612 == GET_MODE_SIZE (mode))
22614 int is_mulwiden = 0;
22615 enum machine_mode inner_mode = GET_MODE (op0);
22617 if (GET_CODE (op0) == GET_CODE (op1))
22618 is_mulwiden = 1, op1 = XEXP (op1, 0);
22619 else if (CONST_INT_P (op1))
22621 if (GET_CODE (op0) == SIGN_EXTEND)
22622 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22623 == INTVAL (op1);
22624 else
22625 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22628 if (is_mulwiden)
22629 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22632 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22633 + nbits * ix86_cost->mult_bit
22634 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22636 return true;
22639 case DIV:
22640 case UDIV:
22641 case MOD:
22642 case UMOD:
22643 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22644 /* ??? SSE cost should be used here. */
22645 *total = ix86_cost->fdiv;
22646 else if (X87_FLOAT_MODE_P (mode))
22647 *total = ix86_cost->fdiv;
22648 else if (FLOAT_MODE_P (mode))
22649 /* ??? SSE vector cost should be used here. */
22650 *total = ix86_cost->fdiv;
22651 else
22652 *total = ix86_cost->divide[MODE_INDEX (mode)];
22653 return false;
22655 case PLUS:
22656 if (GET_MODE_CLASS (mode) == MODE_INT
22657 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22659 if (GET_CODE (XEXP (x, 0)) == PLUS
22660 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22661 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22662 && CONSTANT_P (XEXP (x, 1)))
22664 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22665 if (val == 2 || val == 4 || val == 8)
22667 *total = ix86_cost->lea;
22668 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22669 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22670 outer_code);
22671 *total += rtx_cost (XEXP (x, 1), outer_code);
22672 return true;
22675 else if (GET_CODE (XEXP (x, 0)) == MULT
22676 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22678 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22679 if (val == 2 || val == 4 || val == 8)
22681 *total = ix86_cost->lea;
22682 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22683 *total += rtx_cost (XEXP (x, 1), outer_code);
22684 return true;
22687 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22689 *total = ix86_cost->lea;
22690 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22691 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22692 *total += rtx_cost (XEXP (x, 1), outer_code);
22693 return true;
22696 /* FALLTHRU */
22698 case MINUS:
22699 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22701 /* ??? SSE cost should be used here. */
22702 *total = ix86_cost->fadd;
22703 return false;
22705 else if (X87_FLOAT_MODE_P (mode))
22707 *total = ix86_cost->fadd;
22708 return false;
22710 else if (FLOAT_MODE_P (mode))
22712 /* ??? SSE vector cost should be used here. */
22713 *total = ix86_cost->fadd;
22714 return false;
22716 /* FALLTHRU */
22718 case AND:
22719 case IOR:
22720 case XOR:
22721 if (!TARGET_64BIT && mode == DImode)
22723 *total = (ix86_cost->add * 2
22724 + (rtx_cost (XEXP (x, 0), outer_code)
22725 << (GET_MODE (XEXP (x, 0)) != DImode))
22726 + (rtx_cost (XEXP (x, 1), outer_code)
22727 << (GET_MODE (XEXP (x, 1)) != DImode)));
22728 return true;
22730 /* FALLTHRU */
22732 case NEG:
22733 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22735 /* ??? SSE cost should be used here. */
22736 *total = ix86_cost->fchs;
22737 return false;
22739 else if (X87_FLOAT_MODE_P (mode))
22741 *total = ix86_cost->fchs;
22742 return false;
22744 else if (FLOAT_MODE_P (mode))
22746 /* ??? SSE vector cost should be used here. */
22747 *total = ix86_cost->fchs;
22748 return false;
22750 /* FALLTHRU */
22752 case NOT:
22753 if (!TARGET_64BIT && mode == DImode)
22754 *total = ix86_cost->add * 2;
22755 else
22756 *total = ix86_cost->add;
22757 return false;
22759 case COMPARE:
22760 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22761 && XEXP (XEXP (x, 0), 1) == const1_rtx
22762 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22763 && XEXP (x, 1) == const0_rtx)
22765 /* This kind of construct is implemented using test[bwl].
22766 Treat it as if we had an AND. */
22767 *total = (ix86_cost->add
22768 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22769 + rtx_cost (const1_rtx, outer_code));
22770 return true;
22772 return false;
22774 case FLOAT_EXTEND:
22775 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22776 *total = 0;
22777 return false;
22779 case ABS:
22780 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22781 /* ??? SSE cost should be used here. */
22782 *total = ix86_cost->fabs;
22783 else if (X87_FLOAT_MODE_P (mode))
22784 *total = ix86_cost->fabs;
22785 else if (FLOAT_MODE_P (mode))
22786 /* ??? SSE vector cost should be used here. */
22787 *total = ix86_cost->fabs;
22788 return false;
22790 case SQRT:
22791 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22792 /* ??? SSE cost should be used here. */
22793 *total = ix86_cost->fsqrt;
22794 else if (X87_FLOAT_MODE_P (mode))
22795 *total = ix86_cost->fsqrt;
22796 else if (FLOAT_MODE_P (mode))
22797 /* ??? SSE vector cost should be used here. */
22798 *total = ix86_cost->fsqrt;
22799 return false;
22801 case UNSPEC:
22802 if (XINT (x, 1) == UNSPEC_TP)
22803 *total = 0;
22804 return false;
22806 default:
22807 return false;
22811 #if TARGET_MACHO
22813 static int current_machopic_label_num;
22815 /* Given a symbol name and its associated stub, write out the
22816 definition of the stub. */
22818 void
22819 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22821 unsigned int length;
22822 char *binder_name, *symbol_name, lazy_ptr_name[32];
22823 int label = ++current_machopic_label_num;
22825 /* For 64-bit we shouldn't get here. */
22826 gcc_assert (!TARGET_64BIT);
22828 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22829 symb = (*targetm.strip_name_encoding) (symb);
22831 length = strlen (stub);
22832 binder_name = alloca (length + 32);
22833 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22835 length = strlen (symb);
22836 symbol_name = alloca (length + 32);
22837 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22839 sprintf (lazy_ptr_name, "L%d$lz", label);
22841 if (MACHOPIC_PURE)
22842 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22843 else
22844 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22846 fprintf (file, "%s:\n", stub);
22847 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22849 if (MACHOPIC_PURE)
22851 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22852 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22853 fprintf (file, "\tjmp\t*%%edx\n");
22855 else
22856 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22858 fprintf (file, "%s:\n", binder_name);
22860 if (MACHOPIC_PURE)
22862 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22863 fprintf (file, "\tpushl\t%%eax\n");
22865 else
22866 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22868 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22870 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22871 fprintf (file, "%s:\n", lazy_ptr_name);
22872 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22873 fprintf (file, "\t.long %s\n", binder_name);
22876 void
22877 darwin_x86_file_end (void)
22879 darwin_file_end ();
22880 ix86_file_end ();
22882 #endif /* TARGET_MACHO */
22884 /* Order the registers for register allocator. */
22886 void
22887 x86_order_regs_for_local_alloc (void)
22889 int pos = 0;
22890 int i;
22892 /* First allocate the local general purpose registers. */
22893 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22894 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22895 reg_alloc_order [pos++] = i;
22897 /* Global general purpose registers. */
22898 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22899 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22900 reg_alloc_order [pos++] = i;
22902 /* x87 registers come first in case we are doing FP math
22903 using them. */
22904 if (!TARGET_SSE_MATH)
22905 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22906 reg_alloc_order [pos++] = i;
22908 /* SSE registers. */
22909 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22910 reg_alloc_order [pos++] = i;
22911 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22912 reg_alloc_order [pos++] = i;
22914 /* x87 registers. */
22915 if (TARGET_SSE_MATH)
22916 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22917 reg_alloc_order [pos++] = i;
22919 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22920 reg_alloc_order [pos++] = i;
22922 /* Initialize the rest of array as we do not allocate some registers
22923 at all. */
22924 while (pos < FIRST_PSEUDO_REGISTER)
22925 reg_alloc_order [pos++] = 0;
22928 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22929 struct attribute_spec.handler. */
22930 static tree
22931 ix86_handle_struct_attribute (tree *node, tree name,
22932 tree args ATTRIBUTE_UNUSED,
22933 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22935 tree *type = NULL;
22936 if (DECL_P (*node))
22938 if (TREE_CODE (*node) == TYPE_DECL)
22939 type = &TREE_TYPE (*node);
22941 else
22942 type = node;
22944 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22945 || TREE_CODE (*type) == UNION_TYPE)))
22947 warning (OPT_Wattributes, "%qs attribute ignored",
22948 IDENTIFIER_POINTER (name));
22949 *no_add_attrs = true;
22952 else if ((is_attribute_p ("ms_struct", name)
22953 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22954 || ((is_attribute_p ("gcc_struct", name)
22955 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22957 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22958 IDENTIFIER_POINTER (name));
22959 *no_add_attrs = true;
22962 return NULL_TREE;
22965 static bool
22966 ix86_ms_bitfield_layout_p (const_tree record_type)
22968 return (TARGET_MS_BITFIELD_LAYOUT &&
22969 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22970 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22973 /* Returns an expression indicating where the this parameter is
22974 located on entry to the FUNCTION. */
22976 static rtx
22977 x86_this_parameter (tree function)
22979 tree type = TREE_TYPE (function);
22980 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22981 int nregs;
22983 if (TARGET_64BIT)
22985 const int *parm_regs;
22987 if (TARGET_64BIT_MS_ABI)
22988 parm_regs = x86_64_ms_abi_int_parameter_registers;
22989 else
22990 parm_regs = x86_64_int_parameter_registers;
22991 return gen_rtx_REG (DImode, parm_regs[aggr]);
22994 nregs = ix86_function_regparm (type, function);
22996 if (nregs > 0 && !stdarg_p (type))
22998 int regno;
23000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23001 regno = aggr ? DX_REG : CX_REG;
23002 else
23004 regno = AX_REG;
23005 if (aggr)
23007 regno = DX_REG;
23008 if (nregs == 1)
23009 return gen_rtx_MEM (SImode,
23010 plus_constant (stack_pointer_rtx, 4));
23013 return gen_rtx_REG (SImode, regno);
23016 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23019 /* Determine whether x86_output_mi_thunk can succeed. */
23021 static bool
23022 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23023 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23024 HOST_WIDE_INT vcall_offset, const_tree function)
23026 /* 64-bit can handle anything. */
23027 if (TARGET_64BIT)
23028 return true;
23030 /* For 32-bit, everything's fine if we have one free register. */
23031 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23032 return true;
23034 /* Need a free register for vcall_offset. */
23035 if (vcall_offset)
23036 return false;
23038 /* Need a free register for GOT references. */
23039 if (flag_pic && !(*targetm.binds_local_p) (function))
23040 return false;
23042 /* Otherwise ok. */
23043 return true;
23046 /* Output the assembler code for a thunk function. THUNK_DECL is the
23047 declaration for the thunk function itself, FUNCTION is the decl for
23048 the target function. DELTA is an immediate constant offset to be
23049 added to THIS. If VCALL_OFFSET is nonzero, the word at
23050 *(*this + vcall_offset) should be added to THIS. */
23052 static void
23053 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23054 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23055 HOST_WIDE_INT vcall_offset, tree function)
23057 rtx xops[3];
23058 rtx this_param = x86_this_parameter (function);
23059 rtx this_reg, tmp;
23061 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23062 pull it in now and let DELTA benefit. */
23063 if (REG_P (this_param))
23064 this_reg = this_param;
23065 else if (vcall_offset)
23067 /* Put the this parameter into %eax. */
23068 xops[0] = this_param;
23069 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23070 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23072 else
23073 this_reg = NULL_RTX;
23075 /* Adjust the this parameter by a fixed constant. */
23076 if (delta)
23078 xops[0] = GEN_INT (delta);
23079 xops[1] = this_reg ? this_reg : this_param;
23080 if (TARGET_64BIT)
23082 if (!x86_64_general_operand (xops[0], DImode))
23084 tmp = gen_rtx_REG (DImode, R10_REG);
23085 xops[1] = tmp;
23086 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23087 xops[0] = tmp;
23088 xops[1] = this_param;
23090 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23092 else
23093 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23096 /* Adjust the this parameter by a value stored in the vtable. */
23097 if (vcall_offset)
23099 if (TARGET_64BIT)
23100 tmp = gen_rtx_REG (DImode, R10_REG);
23101 else
23103 int tmp_regno = CX_REG;
23104 if (lookup_attribute ("fastcall",
23105 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23106 tmp_regno = AX_REG;
23107 tmp = gen_rtx_REG (SImode, tmp_regno);
23110 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23111 xops[1] = tmp;
23112 if (TARGET_64BIT)
23113 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23114 else
23115 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23117 /* Adjust the this parameter. */
23118 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23119 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23121 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23122 xops[0] = GEN_INT (vcall_offset);
23123 xops[1] = tmp2;
23124 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23125 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23127 xops[1] = this_reg;
23128 if (TARGET_64BIT)
23129 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23130 else
23131 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23134 /* If necessary, drop THIS back to its stack slot. */
23135 if (this_reg && this_reg != this_param)
23137 xops[0] = this_reg;
23138 xops[1] = this_param;
23139 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23142 xops[0] = XEXP (DECL_RTL (function), 0);
23143 if (TARGET_64BIT)
23145 if (!flag_pic || (*targetm.binds_local_p) (function))
23146 output_asm_insn ("jmp\t%P0", xops);
23147 /* All thunks should be in the same object as their target,
23148 and thus binds_local_p should be true. */
23149 else if (TARGET_64BIT_MS_ABI)
23150 gcc_unreachable ();
23151 else
23153 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23154 tmp = gen_rtx_CONST (Pmode, tmp);
23155 tmp = gen_rtx_MEM (QImode, tmp);
23156 xops[0] = tmp;
23157 output_asm_insn ("jmp\t%A0", xops);
23160 else
23162 if (!flag_pic || (*targetm.binds_local_p) (function))
23163 output_asm_insn ("jmp\t%P0", xops);
23164 else
23165 #if TARGET_MACHO
23166 if (TARGET_MACHO)
23168 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23169 tmp = (gen_rtx_SYMBOL_REF
23170 (Pmode,
23171 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23172 tmp = gen_rtx_MEM (QImode, tmp);
23173 xops[0] = tmp;
23174 output_asm_insn ("jmp\t%0", xops);
23176 else
23177 #endif /* TARGET_MACHO */
23179 tmp = gen_rtx_REG (SImode, CX_REG);
23180 output_set_got (tmp, NULL_RTX);
23182 xops[1] = tmp;
23183 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23184 output_asm_insn ("jmp\t{*}%1", xops);
23189 static void
23190 x86_file_start (void)
23192 default_file_start ();
23193 #if TARGET_MACHO
23194 darwin_file_start ();
23195 #endif
23196 if (X86_FILE_START_VERSION_DIRECTIVE)
23197 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23198 if (X86_FILE_START_FLTUSED)
23199 fputs ("\t.global\t__fltused\n", asm_out_file);
23200 if (ix86_asm_dialect == ASM_INTEL)
23201 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23205 x86_field_alignment (tree field, int computed)
23207 enum machine_mode mode;
23208 tree type = TREE_TYPE (field);
23210 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23211 return computed;
23212 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23213 ? get_inner_array_type (type) : type);
23214 if (mode == DFmode || mode == DCmode
23215 || GET_MODE_CLASS (mode) == MODE_INT
23216 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23217 return MIN (32, computed);
23218 return computed;
23221 /* Output assembler code to FILE to increment profiler label # LABELNO
23222 for profiling a function entry. */
23223 void
23224 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23226 if (TARGET_64BIT)
23228 #ifndef NO_PROFILE_COUNTERS
23229 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23230 #endif
23232 if (!TARGET_64BIT_MS_ABI && flag_pic)
23233 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23234 else
23235 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23237 else if (flag_pic)
23239 #ifndef NO_PROFILE_COUNTERS
23240 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23241 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23242 #endif
23243 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23245 else
23247 #ifndef NO_PROFILE_COUNTERS
23248 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23249 PROFILE_COUNT_REGISTER);
23250 #endif
23251 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23255 /* We don't have exact information about the insn sizes, but we may assume
23256 quite safely that we are informed about all 1 byte insns and memory
23257 address sizes. This is enough to eliminate unnecessary padding in
23258 99% of cases. */
23260 static int
23261 min_insn_size (rtx insn)
23263 int l = 0;
23265 if (!INSN_P (insn) || !active_insn_p (insn))
23266 return 0;
23268 /* Discard alignments we've emit and jump instructions. */
23269 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23270 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23271 return 0;
23272 if (JUMP_P (insn)
23273 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23274 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23275 return 0;
23277 /* Important case - calls are always 5 bytes.
23278 It is common to have many calls in the row. */
23279 if (CALL_P (insn)
23280 && symbolic_reference_mentioned_p (PATTERN (insn))
23281 && !SIBLING_CALL_P (insn))
23282 return 5;
23283 if (get_attr_length (insn) <= 1)
23284 return 1;
23286 /* For normal instructions we may rely on the sizes of addresses
23287 and the presence of symbol to require 4 bytes of encoding.
23288 This is not the case for jumps where references are PC relative. */
23289 if (!JUMP_P (insn))
23291 l = get_attr_length_address (insn);
23292 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23293 l = 4;
23295 if (l)
23296 return 1+l;
23297 else
23298 return 2;
23301 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23302 window. */
23304 static void
23305 ix86_avoid_jump_misspredicts (void)
23307 rtx insn, start = get_insns ();
23308 int nbytes = 0, njumps = 0;
23309 int isjump = 0;
23311 /* Look for all minimal intervals of instructions containing 4 jumps.
23312 The intervals are bounded by START and INSN. NBYTES is the total
23313 size of instructions in the interval including INSN and not including
23314 START. When the NBYTES is smaller than 16 bytes, it is possible
23315 that the end of START and INSN ends up in the same 16byte page.
23317 The smallest offset in the page INSN can start is the case where START
23318 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23319 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23321 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23324 nbytes += min_insn_size (insn);
23325 if (dump_file)
23326 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23327 INSN_UID (insn), min_insn_size (insn));
23328 if ((JUMP_P (insn)
23329 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23330 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23331 || CALL_P (insn))
23332 njumps++;
23333 else
23334 continue;
23336 while (njumps > 3)
23338 start = NEXT_INSN (start);
23339 if ((JUMP_P (start)
23340 && GET_CODE (PATTERN (start)) != ADDR_VEC
23341 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23342 || CALL_P (start))
23343 njumps--, isjump = 1;
23344 else
23345 isjump = 0;
23346 nbytes -= min_insn_size (start);
23348 gcc_assert (njumps >= 0);
23349 if (dump_file)
23350 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23351 INSN_UID (start), INSN_UID (insn), nbytes);
23353 if (njumps == 3 && isjump && nbytes < 16)
23355 int padsize = 15 - nbytes + min_insn_size (insn);
23357 if (dump_file)
23358 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23359 INSN_UID (insn), padsize);
23360 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23365 /* AMD Athlon works faster
23366 when RET is not destination of conditional jump or directly preceded
23367 by other jump instruction. We avoid the penalty by inserting NOP just
23368 before the RET instructions in such cases. */
23369 static void
23370 ix86_pad_returns (void)
23372 edge e;
23373 edge_iterator ei;
23375 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23377 basic_block bb = e->src;
23378 rtx ret = BB_END (bb);
23379 rtx prev;
23380 bool replace = false;
23382 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23383 || !maybe_hot_bb_p (bb))
23384 continue;
23385 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23386 if (active_insn_p (prev) || LABEL_P (prev))
23387 break;
23388 if (prev && LABEL_P (prev))
23390 edge e;
23391 edge_iterator ei;
23393 FOR_EACH_EDGE (e, ei, bb->preds)
23394 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23395 && !(e->flags & EDGE_FALLTHRU))
23396 replace = true;
23398 if (!replace)
23400 prev = prev_active_insn (ret);
23401 if (prev
23402 && ((JUMP_P (prev) && any_condjump_p (prev))
23403 || CALL_P (prev)))
23404 replace = true;
23405 /* Empty functions get branch mispredict even when the jump destination
23406 is not visible to us. */
23407 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23408 replace = true;
23410 if (replace)
23412 emit_insn_before (gen_return_internal_long (), ret);
23413 delete_insn (ret);
23418 /* Implement machine specific optimizations. We implement padding of returns
23419 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23420 static void
23421 ix86_reorg (void)
23423 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23424 ix86_pad_returns ();
23425 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23426 ix86_avoid_jump_misspredicts ();
23429 /* Return nonzero when QImode register that must be represented via REX prefix
23430 is used. */
23431 bool
23432 x86_extended_QIreg_mentioned_p (rtx insn)
23434 int i;
23435 extract_insn_cached (insn);
23436 for (i = 0; i < recog_data.n_operands; i++)
23437 if (REG_P (recog_data.operand[i])
23438 && REGNO (recog_data.operand[i]) >= 4)
23439 return true;
23440 return false;
23443 /* Return nonzero when P points to register encoded via REX prefix.
23444 Called via for_each_rtx. */
23445 static int
23446 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23448 unsigned int regno;
23449 if (!REG_P (*p))
23450 return 0;
23451 regno = REGNO (*p);
23452 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23455 /* Return true when INSN mentions register that must be encoded using REX
23456 prefix. */
23457 bool
23458 x86_extended_reg_mentioned_p (rtx insn)
23460 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23463 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23464 optabs would emit if we didn't have TFmode patterns. */
23466 void
23467 x86_emit_floatuns (rtx operands[2])
23469 rtx neglab, donelab, i0, i1, f0, in, out;
23470 enum machine_mode mode, inmode;
23472 inmode = GET_MODE (operands[1]);
23473 gcc_assert (inmode == SImode || inmode == DImode);
23475 out = operands[0];
23476 in = force_reg (inmode, operands[1]);
23477 mode = GET_MODE (out);
23478 neglab = gen_label_rtx ();
23479 donelab = gen_label_rtx ();
23480 f0 = gen_reg_rtx (mode);
23482 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23484 expand_float (out, in, 0);
23486 emit_jump_insn (gen_jump (donelab));
23487 emit_barrier ();
23489 emit_label (neglab);
23491 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23492 1, OPTAB_DIRECT);
23493 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23494 1, OPTAB_DIRECT);
23495 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23497 expand_float (f0, i0, 0);
23499 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23501 emit_label (donelab);
23504 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23505 with all elements equal to VAR. Return true if successful. */
23507 static bool
23508 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23509 rtx target, rtx val)
23511 enum machine_mode smode, wsmode, wvmode;
23512 rtx x;
23514 switch (mode)
23516 case V2SImode:
23517 case V2SFmode:
23518 if (!mmx_ok)
23519 return false;
23520 /* FALLTHRU */
23522 case V2DFmode:
23523 case V2DImode:
23524 case V4SFmode:
23525 case V4SImode:
23526 val = force_reg (GET_MODE_INNER (mode), val);
23527 x = gen_rtx_VEC_DUPLICATE (mode, val);
23528 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23529 return true;
23531 case V4HImode:
23532 if (!mmx_ok)
23533 return false;
23534 if (TARGET_SSE || TARGET_3DNOW_A)
23536 val = gen_lowpart (SImode, val);
23537 x = gen_rtx_TRUNCATE (HImode, val);
23538 x = gen_rtx_VEC_DUPLICATE (mode, x);
23539 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23540 return true;
23542 else
23544 smode = HImode;
23545 wsmode = SImode;
23546 wvmode = V2SImode;
23547 goto widen;
23550 case V8QImode:
23551 if (!mmx_ok)
23552 return false;
23553 smode = QImode;
23554 wsmode = HImode;
23555 wvmode = V4HImode;
23556 goto widen;
23557 case V8HImode:
23558 if (TARGET_SSE2)
23560 rtx tmp1, tmp2;
23561 /* Extend HImode to SImode using a paradoxical SUBREG. */
23562 tmp1 = gen_reg_rtx (SImode);
23563 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23564 /* Insert the SImode value as low element of V4SImode vector. */
23565 tmp2 = gen_reg_rtx (V4SImode);
23566 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23567 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23568 CONST0_RTX (V4SImode),
23569 const1_rtx);
23570 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23571 /* Cast the V4SImode vector back to a V8HImode vector. */
23572 tmp1 = gen_reg_rtx (V8HImode);
23573 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23574 /* Duplicate the low short through the whole low SImode word. */
23575 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23576 /* Cast the V8HImode vector back to a V4SImode vector. */
23577 tmp2 = gen_reg_rtx (V4SImode);
23578 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23579 /* Replicate the low element of the V4SImode vector. */
23580 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23581 /* Cast the V2SImode back to V8HImode, and store in target. */
23582 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23583 return true;
23585 smode = HImode;
23586 wsmode = SImode;
23587 wvmode = V4SImode;
23588 goto widen;
23589 case V16QImode:
23590 if (TARGET_SSE2)
23592 rtx tmp1, tmp2;
23593 /* Extend QImode to SImode using a paradoxical SUBREG. */
23594 tmp1 = gen_reg_rtx (SImode);
23595 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23596 /* Insert the SImode value as low element of V4SImode vector. */
23597 tmp2 = gen_reg_rtx (V4SImode);
23598 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23599 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23600 CONST0_RTX (V4SImode),
23601 const1_rtx);
23602 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23603 /* Cast the V4SImode vector back to a V16QImode vector. */
23604 tmp1 = gen_reg_rtx (V16QImode);
23605 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23606 /* Duplicate the low byte through the whole low SImode word. */
23607 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23608 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23609 /* Cast the V16QImode vector back to a V4SImode vector. */
23610 tmp2 = gen_reg_rtx (V4SImode);
23611 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23612 /* Replicate the low element of the V4SImode vector. */
23613 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23614 /* Cast the V2SImode back to V16QImode, and store in target. */
23615 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23616 return true;
23618 smode = QImode;
23619 wsmode = HImode;
23620 wvmode = V8HImode;
23621 goto widen;
23622 widen:
23623 /* Replicate the value once into the next wider mode and recurse. */
23624 val = convert_modes (wsmode, smode, val, true);
23625 x = expand_simple_binop (wsmode, ASHIFT, val,
23626 GEN_INT (GET_MODE_BITSIZE (smode)),
23627 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23628 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23630 x = gen_reg_rtx (wvmode);
23631 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23632 gcc_unreachable ();
23633 emit_move_insn (target, gen_lowpart (mode, x));
23634 return true;
23636 default:
23637 return false;
23641 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23642 whose ONE_VAR element is VAR, and other elements are zero. Return true
23643 if successful. */
23645 static bool
23646 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23647 rtx target, rtx var, int one_var)
23649 enum machine_mode vsimode;
23650 rtx new_target;
23651 rtx x, tmp;
23653 switch (mode)
23655 case V2SFmode:
23656 case V2SImode:
23657 if (!mmx_ok)
23658 return false;
23659 /* FALLTHRU */
23661 case V2DFmode:
23662 case V2DImode:
23663 if (one_var != 0)
23664 return false;
23665 var = force_reg (GET_MODE_INNER (mode), var);
23666 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23667 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23668 return true;
23670 case V4SFmode:
23671 case V4SImode:
23672 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23673 new_target = gen_reg_rtx (mode);
23674 else
23675 new_target = target;
23676 var = force_reg (GET_MODE_INNER (mode), var);
23677 x = gen_rtx_VEC_DUPLICATE (mode, var);
23678 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23679 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23680 if (one_var != 0)
23682 /* We need to shuffle the value to the correct position, so
23683 create a new pseudo to store the intermediate result. */
23685 /* With SSE2, we can use the integer shuffle insns. */
23686 if (mode != V4SFmode && TARGET_SSE2)
23688 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23689 GEN_INT (1),
23690 GEN_INT (one_var == 1 ? 0 : 1),
23691 GEN_INT (one_var == 2 ? 0 : 1),
23692 GEN_INT (one_var == 3 ? 0 : 1)));
23693 if (target != new_target)
23694 emit_move_insn (target, new_target);
23695 return true;
23698 /* Otherwise convert the intermediate result to V4SFmode and
23699 use the SSE1 shuffle instructions. */
23700 if (mode != V4SFmode)
23702 tmp = gen_reg_rtx (V4SFmode);
23703 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23705 else
23706 tmp = new_target;
23708 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23709 GEN_INT (1),
23710 GEN_INT (one_var == 1 ? 0 : 1),
23711 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23712 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23714 if (mode != V4SFmode)
23715 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23716 else if (tmp != target)
23717 emit_move_insn (target, tmp);
23719 else if (target != new_target)
23720 emit_move_insn (target, new_target);
23721 return true;
23723 case V8HImode:
23724 case V16QImode:
23725 vsimode = V4SImode;
23726 goto widen;
23727 case V4HImode:
23728 case V8QImode:
23729 if (!mmx_ok)
23730 return false;
23731 vsimode = V2SImode;
23732 goto widen;
23733 widen:
23734 if (one_var != 0)
23735 return false;
23737 /* Zero extend the variable element to SImode and recurse. */
23738 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23740 x = gen_reg_rtx (vsimode);
23741 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23742 var, one_var))
23743 gcc_unreachable ();
23745 emit_move_insn (target, gen_lowpart (mode, x));
23746 return true;
23748 default:
23749 return false;
23753 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23754 consisting of the values in VALS. It is known that all elements
23755 except ONE_VAR are constants. Return true if successful. */
23757 static bool
23758 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23759 rtx target, rtx vals, int one_var)
23761 rtx var = XVECEXP (vals, 0, one_var);
23762 enum machine_mode wmode;
23763 rtx const_vec, x;
23765 const_vec = copy_rtx (vals);
23766 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23767 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23769 switch (mode)
23771 case V2DFmode:
23772 case V2DImode:
23773 case V2SFmode:
23774 case V2SImode:
23775 /* For the two element vectors, it's just as easy to use
23776 the general case. */
23777 return false;
23779 case V4SFmode:
23780 case V4SImode:
23781 case V8HImode:
23782 case V4HImode:
23783 break;
23785 case V16QImode:
23786 wmode = V8HImode;
23787 goto widen;
23788 case V8QImode:
23789 wmode = V4HImode;
23790 goto widen;
23791 widen:
23792 /* There's no way to set one QImode entry easily. Combine
23793 the variable value with its adjacent constant value, and
23794 promote to an HImode set. */
23795 x = XVECEXP (vals, 0, one_var ^ 1);
23796 if (one_var & 1)
23798 var = convert_modes (HImode, QImode, var, true);
23799 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23800 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23801 x = GEN_INT (INTVAL (x) & 0xff);
23803 else
23805 var = convert_modes (HImode, QImode, var, true);
23806 x = gen_int_mode (INTVAL (x) << 8, HImode);
23808 if (x != const0_rtx)
23809 var = expand_simple_binop (HImode, IOR, var, x, var,
23810 1, OPTAB_LIB_WIDEN);
23812 x = gen_reg_rtx (wmode);
23813 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23814 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23816 emit_move_insn (target, gen_lowpart (mode, x));
23817 return true;
23819 default:
23820 return false;
23823 emit_move_insn (target, const_vec);
23824 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23825 return true;
23828 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23829 all values variable, and none identical. */
23831 static void
23832 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23833 rtx target, rtx vals)
23835 enum machine_mode half_mode = GET_MODE_INNER (mode);
23836 rtx op0 = NULL, op1 = NULL;
23837 bool use_vec_concat = false;
23839 switch (mode)
23841 case V2SFmode:
23842 case V2SImode:
23843 if (!mmx_ok && !TARGET_SSE)
23844 break;
23845 /* FALLTHRU */
23847 case V2DFmode:
23848 case V2DImode:
23849 /* For the two element vectors, we always implement VEC_CONCAT. */
23850 op0 = XVECEXP (vals, 0, 0);
23851 op1 = XVECEXP (vals, 0, 1);
23852 use_vec_concat = true;
23853 break;
23855 case V4SFmode:
23856 half_mode = V2SFmode;
23857 goto half;
23858 case V4SImode:
23859 half_mode = V2SImode;
23860 goto half;
23861 half:
23863 rtvec v;
23865 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23866 Recurse to load the two halves. */
23868 op0 = gen_reg_rtx (half_mode);
23869 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23870 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23872 op1 = gen_reg_rtx (half_mode);
23873 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23874 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23876 use_vec_concat = true;
23878 break;
23880 case V8HImode:
23881 case V16QImode:
23882 case V4HImode:
23883 case V8QImode:
23884 break;
23886 default:
23887 gcc_unreachable ();
23890 if (use_vec_concat)
23892 if (!register_operand (op0, half_mode))
23893 op0 = force_reg (half_mode, op0);
23894 if (!register_operand (op1, half_mode))
23895 op1 = force_reg (half_mode, op1);
23897 emit_insn (gen_rtx_SET (VOIDmode, target,
23898 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23900 else
23902 int i, j, n_elts, n_words, n_elt_per_word;
23903 enum machine_mode inner_mode;
23904 rtx words[4], shift;
23906 inner_mode = GET_MODE_INNER (mode);
23907 n_elts = GET_MODE_NUNITS (mode);
23908 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23909 n_elt_per_word = n_elts / n_words;
23910 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23912 for (i = 0; i < n_words; ++i)
23914 rtx word = NULL_RTX;
23916 for (j = 0; j < n_elt_per_word; ++j)
23918 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23919 elt = convert_modes (word_mode, inner_mode, elt, true);
23921 if (j == 0)
23922 word = elt;
23923 else
23925 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23926 word, 1, OPTAB_LIB_WIDEN);
23927 word = expand_simple_binop (word_mode, IOR, word, elt,
23928 word, 1, OPTAB_LIB_WIDEN);
23932 words[i] = word;
23935 if (n_words == 1)
23936 emit_move_insn (target, gen_lowpart (mode, words[0]));
23937 else if (n_words == 2)
23939 rtx tmp = gen_reg_rtx (mode);
23940 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23941 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23942 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23943 emit_move_insn (target, tmp);
23945 else if (n_words == 4)
23947 rtx tmp = gen_reg_rtx (V4SImode);
23948 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23949 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23950 emit_move_insn (target, gen_lowpart (mode, tmp));
23952 else
23953 gcc_unreachable ();
23957 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23958 instructions unless MMX_OK is true. */
23960 void
23961 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23963 enum machine_mode mode = GET_MODE (target);
23964 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23965 int n_elts = GET_MODE_NUNITS (mode);
23966 int n_var = 0, one_var = -1;
23967 bool all_same = true, all_const_zero = true;
23968 int i;
23969 rtx x;
23971 for (i = 0; i < n_elts; ++i)
23973 x = XVECEXP (vals, 0, i);
23974 if (!(CONST_INT_P (x)
23975 || GET_CODE (x) == CONST_DOUBLE
23976 || GET_CODE (x) == CONST_FIXED))
23977 n_var++, one_var = i;
23978 else if (x != CONST0_RTX (inner_mode))
23979 all_const_zero = false;
23980 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23981 all_same = false;
23984 /* Constants are best loaded from the constant pool. */
23985 if (n_var == 0)
23987 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23988 return;
23991 /* If all values are identical, broadcast the value. */
23992 if (all_same
23993 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23994 XVECEXP (vals, 0, 0)))
23995 return;
23997 /* Values where only one field is non-constant are best loaded from
23998 the pool and overwritten via move later. */
23999 if (n_var == 1)
24001 if (all_const_zero
24002 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24003 XVECEXP (vals, 0, one_var),
24004 one_var))
24005 return;
24007 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24008 return;
24011 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24014 void
24015 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24017 enum machine_mode mode = GET_MODE (target);
24018 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24019 bool use_vec_merge = false;
24020 rtx tmp;
24022 switch (mode)
24024 case V2SFmode:
24025 case V2SImode:
24026 if (mmx_ok)
24028 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24029 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24030 if (elt == 0)
24031 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24032 else
24033 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24034 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24035 return;
24037 break;
24039 case V2DImode:
24040 use_vec_merge = TARGET_SSE4_1;
24041 if (use_vec_merge)
24042 break;
24044 case V2DFmode:
24046 rtx op0, op1;
24048 /* For the two element vectors, we implement a VEC_CONCAT with
24049 the extraction of the other element. */
24051 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24052 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24054 if (elt == 0)
24055 op0 = val, op1 = tmp;
24056 else
24057 op0 = tmp, op1 = val;
24059 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24060 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24062 return;
24064 case V4SFmode:
24065 use_vec_merge = TARGET_SSE4_1;
24066 if (use_vec_merge)
24067 break;
24069 switch (elt)
24071 case 0:
24072 use_vec_merge = true;
24073 break;
24075 case 1:
24076 /* tmp = target = A B C D */
24077 tmp = copy_to_reg (target);
24078 /* target = A A B B */
24079 emit_insn (gen_sse_unpcklps (target, target, target));
24080 /* target = X A B B */
24081 ix86_expand_vector_set (false, target, val, 0);
24082 /* target = A X C D */
24083 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24084 GEN_INT (1), GEN_INT (0),
24085 GEN_INT (2+4), GEN_INT (3+4)));
24086 return;
24088 case 2:
24089 /* tmp = target = A B C D */
24090 tmp = copy_to_reg (target);
24091 /* tmp = X B C D */
24092 ix86_expand_vector_set (false, tmp, val, 0);
24093 /* target = A B X D */
24094 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24095 GEN_INT (0), GEN_INT (1),
24096 GEN_INT (0+4), GEN_INT (3+4)));
24097 return;
24099 case 3:
24100 /* tmp = target = A B C D */
24101 tmp = copy_to_reg (target);
24102 /* tmp = X B C D */
24103 ix86_expand_vector_set (false, tmp, val, 0);
24104 /* target = A B X D */
24105 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24106 GEN_INT (0), GEN_INT (1),
24107 GEN_INT (2+4), GEN_INT (0+4)));
24108 return;
24110 default:
24111 gcc_unreachable ();
24113 break;
24115 case V4SImode:
24116 use_vec_merge = TARGET_SSE4_1;
24117 if (use_vec_merge)
24118 break;
24120 /* Element 0 handled by vec_merge below. */
24121 if (elt == 0)
24123 use_vec_merge = true;
24124 break;
24127 if (TARGET_SSE2)
24129 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24130 store into element 0, then shuffle them back. */
24132 rtx order[4];
24134 order[0] = GEN_INT (elt);
24135 order[1] = const1_rtx;
24136 order[2] = const2_rtx;
24137 order[3] = GEN_INT (3);
24138 order[elt] = const0_rtx;
24140 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24141 order[1], order[2], order[3]));
24143 ix86_expand_vector_set (false, target, val, 0);
24145 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24146 order[1], order[2], order[3]));
24148 else
24150 /* For SSE1, we have to reuse the V4SF code. */
24151 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24152 gen_lowpart (SFmode, val), elt);
24154 return;
24156 case V8HImode:
24157 use_vec_merge = TARGET_SSE2;
24158 break;
24159 case V4HImode:
24160 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24161 break;
24163 case V16QImode:
24164 use_vec_merge = TARGET_SSE4_1;
24165 break;
24167 case V8QImode:
24168 default:
24169 break;
24172 if (use_vec_merge)
24174 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24175 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24176 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24178 else
24180 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24182 emit_move_insn (mem, target);
24184 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24185 emit_move_insn (tmp, val);
24187 emit_move_insn (target, mem);
24191 void
24192 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24194 enum machine_mode mode = GET_MODE (vec);
24195 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24196 bool use_vec_extr = false;
24197 rtx tmp;
24199 switch (mode)
24201 case V2SImode:
24202 case V2SFmode:
24203 if (!mmx_ok)
24204 break;
24205 /* FALLTHRU */
24207 case V2DFmode:
24208 case V2DImode:
24209 use_vec_extr = true;
24210 break;
24212 case V4SFmode:
24213 use_vec_extr = TARGET_SSE4_1;
24214 if (use_vec_extr)
24215 break;
24217 switch (elt)
24219 case 0:
24220 tmp = vec;
24221 break;
24223 case 1:
24224 case 3:
24225 tmp = gen_reg_rtx (mode);
24226 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24227 GEN_INT (elt), GEN_INT (elt),
24228 GEN_INT (elt+4), GEN_INT (elt+4)));
24229 break;
24231 case 2:
24232 tmp = gen_reg_rtx (mode);
24233 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24234 break;
24236 default:
24237 gcc_unreachable ();
24239 vec = tmp;
24240 use_vec_extr = true;
24241 elt = 0;
24242 break;
24244 case V4SImode:
24245 use_vec_extr = TARGET_SSE4_1;
24246 if (use_vec_extr)
24247 break;
24249 if (TARGET_SSE2)
24251 switch (elt)
24253 case 0:
24254 tmp = vec;
24255 break;
24257 case 1:
24258 case 3:
24259 tmp = gen_reg_rtx (mode);
24260 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24261 GEN_INT (elt), GEN_INT (elt),
24262 GEN_INT (elt), GEN_INT (elt)));
24263 break;
24265 case 2:
24266 tmp = gen_reg_rtx (mode);
24267 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24268 break;
24270 default:
24271 gcc_unreachable ();
24273 vec = tmp;
24274 use_vec_extr = true;
24275 elt = 0;
24277 else
24279 /* For SSE1, we have to reuse the V4SF code. */
24280 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24281 gen_lowpart (V4SFmode, vec), elt);
24282 return;
24284 break;
24286 case V8HImode:
24287 use_vec_extr = TARGET_SSE2;
24288 break;
24289 case V4HImode:
24290 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24291 break;
24293 case V16QImode:
24294 use_vec_extr = TARGET_SSE4_1;
24295 break;
24297 case V8QImode:
24298 /* ??? Could extract the appropriate HImode element and shift. */
24299 default:
24300 break;
24303 if (use_vec_extr)
24305 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24306 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24308 /* Let the rtl optimizers know about the zero extension performed. */
24309 if (inner_mode == QImode || inner_mode == HImode)
24311 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24312 target = gen_lowpart (SImode, target);
24315 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24317 else
24319 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24321 emit_move_insn (mem, vec);
24323 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24324 emit_move_insn (target, tmp);
24328 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24329 pattern to reduce; DEST is the destination; IN is the input vector. */
24331 void
24332 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24334 rtx tmp1, tmp2, tmp3;
24336 tmp1 = gen_reg_rtx (V4SFmode);
24337 tmp2 = gen_reg_rtx (V4SFmode);
24338 tmp3 = gen_reg_rtx (V4SFmode);
24340 emit_insn (gen_sse_movhlps (tmp1, in, in));
24341 emit_insn (fn (tmp2, tmp1, in));
24343 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24344 GEN_INT (1), GEN_INT (1),
24345 GEN_INT (1+4), GEN_INT (1+4)));
24346 emit_insn (fn (dest, tmp2, tmp3));
24349 /* Target hook for scalar_mode_supported_p. */
24350 static bool
24351 ix86_scalar_mode_supported_p (enum machine_mode mode)
24353 if (DECIMAL_FLOAT_MODE_P (mode))
24354 return true;
24355 else if (mode == TFmode)
24356 return TARGET_64BIT;
24357 else
24358 return default_scalar_mode_supported_p (mode);
24361 /* Implements target hook vector_mode_supported_p. */
24362 static bool
24363 ix86_vector_mode_supported_p (enum machine_mode mode)
24365 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24366 return true;
24367 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24368 return true;
24369 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24370 return true;
24371 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24372 return true;
24373 return false;
24376 /* Target hook for c_mode_for_suffix. */
24377 static enum machine_mode
24378 ix86_c_mode_for_suffix (char suffix)
24380 if (TARGET_64BIT && suffix == 'q')
24381 return TFmode;
24382 if (TARGET_MMX && suffix == 'w')
24383 return XFmode;
24385 return VOIDmode;
24388 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24390 We do this in the new i386 backend to maintain source compatibility
24391 with the old cc0-based compiler. */
24393 static tree
24394 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24395 tree inputs ATTRIBUTE_UNUSED,
24396 tree clobbers)
24398 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24399 clobbers);
24400 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24401 clobbers);
24402 return clobbers;
24405 /* Implements target vector targetm.asm.encode_section_info. This
24406 is not used by netware. */
24408 static void ATTRIBUTE_UNUSED
24409 ix86_encode_section_info (tree decl, rtx rtl, int first)
24411 default_encode_section_info (decl, rtl, first);
24413 if (TREE_CODE (decl) == VAR_DECL
24414 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24415 && ix86_in_large_data_p (decl))
24416 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24419 /* Worker function for REVERSE_CONDITION. */
24421 enum rtx_code
24422 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24424 return (mode != CCFPmode && mode != CCFPUmode
24425 ? reverse_condition (code)
24426 : reverse_condition_maybe_unordered (code));
24429 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24430 to OPERANDS[0]. */
24432 const char *
24433 output_387_reg_move (rtx insn, rtx *operands)
24435 if (REG_P (operands[0]))
24437 if (REG_P (operands[1])
24438 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24440 if (REGNO (operands[0]) == FIRST_STACK_REG)
24441 return output_387_ffreep (operands, 0);
24442 return "fstp\t%y0";
24444 if (STACK_TOP_P (operands[0]))
24445 return "fld%z1\t%y1";
24446 return "fst\t%y0";
24448 else if (MEM_P (operands[0]))
24450 gcc_assert (REG_P (operands[1]));
24451 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24452 return "fstp%z0\t%y0";
24453 else
24455 /* There is no non-popping store to memory for XFmode.
24456 So if we need one, follow the store with a load. */
24457 if (GET_MODE (operands[0]) == XFmode)
24458 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24459 else
24460 return "fst%z0\t%y0";
24463 else
24464 gcc_unreachable();
24467 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24468 FP status register is set. */
24470 void
24471 ix86_emit_fp_unordered_jump (rtx label)
24473 rtx reg = gen_reg_rtx (HImode);
24474 rtx temp;
24476 emit_insn (gen_x86_fnstsw_1 (reg));
24478 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24480 emit_insn (gen_x86_sahf_1 (reg));
24482 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24483 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24485 else
24487 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24489 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24490 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24493 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24494 gen_rtx_LABEL_REF (VOIDmode, label),
24495 pc_rtx);
24496 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24498 emit_jump_insn (temp);
24499 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24502 /* Output code to perform a log1p XFmode calculation. */
24504 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24506 rtx label1 = gen_label_rtx ();
24507 rtx label2 = gen_label_rtx ();
24509 rtx tmp = gen_reg_rtx (XFmode);
24510 rtx tmp2 = gen_reg_rtx (XFmode);
24512 emit_insn (gen_absxf2 (tmp, op1));
24513 emit_insn (gen_cmpxf (tmp,
24514 CONST_DOUBLE_FROM_REAL_VALUE (
24515 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24516 XFmode)));
24517 emit_jump_insn (gen_bge (label1));
24519 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24520 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24521 emit_jump (label2);
24523 emit_label (label1);
24524 emit_move_insn (tmp, CONST1_RTX (XFmode));
24525 emit_insn (gen_addxf3 (tmp, op1, tmp));
24526 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24527 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24529 emit_label (label2);
24532 /* Output code to perform a Newton-Rhapson approximation of a single precision
24533 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24535 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24537 rtx x0, x1, e0, e1, two;
24539 x0 = gen_reg_rtx (mode);
24540 e0 = gen_reg_rtx (mode);
24541 e1 = gen_reg_rtx (mode);
24542 x1 = gen_reg_rtx (mode);
24544 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24546 if (VECTOR_MODE_P (mode))
24547 two = ix86_build_const_vector (SFmode, true, two);
24549 two = force_reg (mode, two);
24551 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24553 /* x0 = rcp(b) estimate */
24554 emit_insn (gen_rtx_SET (VOIDmode, x0,
24555 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24556 UNSPEC_RCP)));
24557 /* e0 = x0 * b */
24558 emit_insn (gen_rtx_SET (VOIDmode, e0,
24559 gen_rtx_MULT (mode, x0, b)));
24560 /* e1 = 2. - e0 */
24561 emit_insn (gen_rtx_SET (VOIDmode, e1,
24562 gen_rtx_MINUS (mode, two, e0)));
24563 /* x1 = x0 * e1 */
24564 emit_insn (gen_rtx_SET (VOIDmode, x1,
24565 gen_rtx_MULT (mode, x0, e1)));
24566 /* res = a * x1 */
24567 emit_insn (gen_rtx_SET (VOIDmode, res,
24568 gen_rtx_MULT (mode, a, x1)));
24571 /* Output code to perform a Newton-Rhapson approximation of a
24572 single precision floating point [reciprocal] square root. */
24574 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24575 bool recip)
24577 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24578 REAL_VALUE_TYPE r;
24580 x0 = gen_reg_rtx (mode);
24581 e0 = gen_reg_rtx (mode);
24582 e1 = gen_reg_rtx (mode);
24583 e2 = gen_reg_rtx (mode);
24584 e3 = gen_reg_rtx (mode);
24586 real_from_integer (&r, VOIDmode, -3, -1, 0);
24587 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24589 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24590 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24592 if (VECTOR_MODE_P (mode))
24594 mthree = ix86_build_const_vector (SFmode, true, mthree);
24595 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24598 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24599 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24601 /* x0 = rsqrt(a) estimate */
24602 emit_insn (gen_rtx_SET (VOIDmode, x0,
24603 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24604 UNSPEC_RSQRT)));
24606 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24607 if (!recip)
24609 rtx zero, mask;
24611 zero = gen_reg_rtx (mode);
24612 mask = gen_reg_rtx (mode);
24614 zero = force_reg (mode, CONST0_RTX(mode));
24615 emit_insn (gen_rtx_SET (VOIDmode, mask,
24616 gen_rtx_NE (mode, zero, a)));
24618 emit_insn (gen_rtx_SET (VOIDmode, x0,
24619 gen_rtx_AND (mode, x0, mask)));
24622 /* e0 = x0 * a */
24623 emit_insn (gen_rtx_SET (VOIDmode, e0,
24624 gen_rtx_MULT (mode, x0, a)));
24625 /* e1 = e0 * x0 */
24626 emit_insn (gen_rtx_SET (VOIDmode, e1,
24627 gen_rtx_MULT (mode, e0, x0)));
24629 /* e2 = e1 - 3. */
24630 mthree = force_reg (mode, mthree);
24631 emit_insn (gen_rtx_SET (VOIDmode, e2,
24632 gen_rtx_PLUS (mode, e1, mthree)));
24634 mhalf = force_reg (mode, mhalf);
24635 if (recip)
24636 /* e3 = -.5 * x0 */
24637 emit_insn (gen_rtx_SET (VOIDmode, e3,
24638 gen_rtx_MULT (mode, x0, mhalf)));
24639 else
24640 /* e3 = -.5 * e0 */
24641 emit_insn (gen_rtx_SET (VOIDmode, e3,
24642 gen_rtx_MULT (mode, e0, mhalf)));
24643 /* ret = e2 * e3 */
24644 emit_insn (gen_rtx_SET (VOIDmode, res,
24645 gen_rtx_MULT (mode, e2, e3)));
24648 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24650 static void ATTRIBUTE_UNUSED
24651 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24652 tree decl)
24654 /* With Binutils 2.15, the "@unwind" marker must be specified on
24655 every occurrence of the ".eh_frame" section, not just the first
24656 one. */
24657 if (TARGET_64BIT
24658 && strcmp (name, ".eh_frame") == 0)
24660 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24661 flags & SECTION_WRITE ? "aw" : "a");
24662 return;
24664 default_elf_asm_named_section (name, flags, decl);
24667 /* Return the mangling of TYPE if it is an extended fundamental type. */
24669 static const char *
24670 ix86_mangle_type (const_tree type)
24672 type = TYPE_MAIN_VARIANT (type);
24674 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24675 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24676 return NULL;
24678 switch (TYPE_MODE (type))
24680 case TFmode:
24681 /* __float128 is "g". */
24682 return "g";
24683 case XFmode:
24684 /* "long double" or __float80 is "e". */
24685 return "e";
24686 default:
24687 return NULL;
24691 /* For 32-bit code we can save PIC register setup by using
24692 __stack_chk_fail_local hidden function instead of calling
24693 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24694 register, so it is better to call __stack_chk_fail directly. */
24696 static tree
24697 ix86_stack_protect_fail (void)
24699 return TARGET_64BIT
24700 ? default_external_stack_protect_fail ()
24701 : default_hidden_stack_protect_fail ();
24704 /* Select a format to encode pointers in exception handling data. CODE
24705 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24706 true if the symbol may be affected by dynamic relocations.
24708 ??? All x86 object file formats are capable of representing this.
24709 After all, the relocation needed is the same as for the call insn.
24710 Whether or not a particular assembler allows us to enter such, I
24711 guess we'll have to see. */
24713 asm_preferred_eh_data_format (int code, int global)
24715 if (flag_pic)
24717 int type = DW_EH_PE_sdata8;
24718 if (!TARGET_64BIT
24719 || ix86_cmodel == CM_SMALL_PIC
24720 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24721 type = DW_EH_PE_sdata4;
24722 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24724 if (ix86_cmodel == CM_SMALL
24725 || (ix86_cmodel == CM_MEDIUM && code))
24726 return DW_EH_PE_udata4;
24727 return DW_EH_PE_absptr;
24730 /* Expand copysign from SIGN to the positive value ABS_VALUE
24731 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24732 the sign-bit. */
24733 static void
24734 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24736 enum machine_mode mode = GET_MODE (sign);
24737 rtx sgn = gen_reg_rtx (mode);
24738 if (mask == NULL_RTX)
24740 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24741 if (!VECTOR_MODE_P (mode))
24743 /* We need to generate a scalar mode mask in this case. */
24744 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24745 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24746 mask = gen_reg_rtx (mode);
24747 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24750 else
24751 mask = gen_rtx_NOT (mode, mask);
24752 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24753 gen_rtx_AND (mode, mask, sign)));
24754 emit_insn (gen_rtx_SET (VOIDmode, result,
24755 gen_rtx_IOR (mode, abs_value, sgn)));
24758 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24759 mask for masking out the sign-bit is stored in *SMASK, if that is
24760 non-null. */
24761 static rtx
24762 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24764 enum machine_mode mode = GET_MODE (op0);
24765 rtx xa, mask;
24767 xa = gen_reg_rtx (mode);
24768 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24769 if (!VECTOR_MODE_P (mode))
24771 /* We need to generate a scalar mode mask in this case. */
24772 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24773 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24774 mask = gen_reg_rtx (mode);
24775 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24777 emit_insn (gen_rtx_SET (VOIDmode, xa,
24778 gen_rtx_AND (mode, op0, mask)));
24780 if (smask)
24781 *smask = mask;
24783 return xa;
24786 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24787 swapping the operands if SWAP_OPERANDS is true. The expanded
24788 code is a forward jump to a newly created label in case the
24789 comparison is true. The generated label rtx is returned. */
24790 static rtx
24791 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24792 bool swap_operands)
24794 rtx label, tmp;
24796 if (swap_operands)
24798 tmp = op0;
24799 op0 = op1;
24800 op1 = tmp;
24803 label = gen_label_rtx ();
24804 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24805 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24806 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24807 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24808 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24809 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24810 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24811 JUMP_LABEL (tmp) = label;
24813 return label;
24816 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24817 using comparison code CODE. Operands are swapped for the comparison if
24818 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24819 static rtx
24820 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24821 bool swap_operands)
24823 enum machine_mode mode = GET_MODE (op0);
24824 rtx mask = gen_reg_rtx (mode);
24826 if (swap_operands)
24828 rtx tmp = op0;
24829 op0 = op1;
24830 op1 = tmp;
24833 if (mode == DFmode)
24834 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24835 gen_rtx_fmt_ee (code, mode, op0, op1)));
24836 else
24837 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24838 gen_rtx_fmt_ee (code, mode, op0, op1)));
24840 return mask;
24843 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24844 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24845 static rtx
24846 ix86_gen_TWO52 (enum machine_mode mode)
24848 REAL_VALUE_TYPE TWO52r;
24849 rtx TWO52;
24851 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24852 TWO52 = const_double_from_real_value (TWO52r, mode);
24853 TWO52 = force_reg (mode, TWO52);
24855 return TWO52;
24858 /* Expand SSE sequence for computing lround from OP1 storing
24859 into OP0. */
24860 void
24861 ix86_expand_lround (rtx op0, rtx op1)
24863 /* C code for the stuff we're doing below:
24864 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24865 return (long)tmp;
24867 enum machine_mode mode = GET_MODE (op1);
24868 const struct real_format *fmt;
24869 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24870 rtx adj;
24872 /* load nextafter (0.5, 0.0) */
24873 fmt = REAL_MODE_FORMAT (mode);
24874 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24875 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24877 /* adj = copysign (0.5, op1) */
24878 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24879 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24881 /* adj = op1 + adj */
24882 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24884 /* op0 = (imode)adj */
24885 expand_fix (op0, adj, 0);
24888 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24889 into OPERAND0. */
24890 void
24891 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24893 /* C code for the stuff we're doing below (for do_floor):
24894 xi = (long)op1;
24895 xi -= (double)xi > op1 ? 1 : 0;
24896 return xi;
24898 enum machine_mode fmode = GET_MODE (op1);
24899 enum machine_mode imode = GET_MODE (op0);
24900 rtx ireg, freg, label, tmp;
24902 /* reg = (long)op1 */
24903 ireg = gen_reg_rtx (imode);
24904 expand_fix (ireg, op1, 0);
24906 /* freg = (double)reg */
24907 freg = gen_reg_rtx (fmode);
24908 expand_float (freg, ireg, 0);
24910 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24911 label = ix86_expand_sse_compare_and_jump (UNLE,
24912 freg, op1, !do_floor);
24913 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24914 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24915 emit_move_insn (ireg, tmp);
24917 emit_label (label);
24918 LABEL_NUSES (label) = 1;
24920 emit_move_insn (op0, ireg);
24923 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24924 result in OPERAND0. */
24925 void
24926 ix86_expand_rint (rtx operand0, rtx operand1)
24928 /* C code for the stuff we're doing below:
24929 xa = fabs (operand1);
24930 if (!isless (xa, 2**52))
24931 return operand1;
24932 xa = xa + 2**52 - 2**52;
24933 return copysign (xa, operand1);
24935 enum machine_mode mode = GET_MODE (operand0);
24936 rtx res, xa, label, TWO52, mask;
24938 res = gen_reg_rtx (mode);
24939 emit_move_insn (res, operand1);
24941 /* xa = abs (operand1) */
24942 xa = ix86_expand_sse_fabs (res, &mask);
24944 /* if (!isless (xa, TWO52)) goto label; */
24945 TWO52 = ix86_gen_TWO52 (mode);
24946 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24948 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24949 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24951 ix86_sse_copysign_to_positive (res, xa, res, mask);
24953 emit_label (label);
24954 LABEL_NUSES (label) = 1;
24956 emit_move_insn (operand0, res);
24959 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24960 into OPERAND0. */
24961 void
24962 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24964 /* C code for the stuff we expand below.
24965 double xa = fabs (x), x2;
24966 if (!isless (xa, TWO52))
24967 return x;
24968 xa = xa + TWO52 - TWO52;
24969 x2 = copysign (xa, x);
24970 Compensate. Floor:
24971 if (x2 > x)
24972 x2 -= 1;
24973 Compensate. Ceil:
24974 if (x2 < x)
24975 x2 -= -1;
24976 return x2;
24978 enum machine_mode mode = GET_MODE (operand0);
24979 rtx xa, TWO52, tmp, label, one, res, mask;
24981 TWO52 = ix86_gen_TWO52 (mode);
24983 /* Temporary for holding the result, initialized to the input
24984 operand to ease control flow. */
24985 res = gen_reg_rtx (mode);
24986 emit_move_insn (res, operand1);
24988 /* xa = abs (operand1) */
24989 xa = ix86_expand_sse_fabs (res, &mask);
24991 /* if (!isless (xa, TWO52)) goto label; */
24992 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24994 /* xa = xa + TWO52 - TWO52; */
24995 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24996 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24998 /* xa = copysign (xa, operand1) */
24999 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25001 /* generate 1.0 or -1.0 */
25002 one = force_reg (mode,
25003 const_double_from_real_value (do_floor
25004 ? dconst1 : dconstm1, mode));
25006 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25007 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25008 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25009 gen_rtx_AND (mode, one, tmp)));
25010 /* We always need to subtract here to preserve signed zero. */
25011 tmp = expand_simple_binop (mode, MINUS,
25012 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25013 emit_move_insn (res, tmp);
25015 emit_label (label);
25016 LABEL_NUSES (label) = 1;
25018 emit_move_insn (operand0, res);
25021 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25022 into OPERAND0. */
25023 void
25024 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25026 /* C code for the stuff we expand below.
25027 double xa = fabs (x), x2;
25028 if (!isless (xa, TWO52))
25029 return x;
25030 x2 = (double)(long)x;
25031 Compensate. Floor:
25032 if (x2 > x)
25033 x2 -= 1;
25034 Compensate. Ceil:
25035 if (x2 < x)
25036 x2 += 1;
25037 if (HONOR_SIGNED_ZEROS (mode))
25038 return copysign (x2, x);
25039 return x2;
25041 enum machine_mode mode = GET_MODE (operand0);
25042 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25044 TWO52 = ix86_gen_TWO52 (mode);
25046 /* Temporary for holding the result, initialized to the input
25047 operand to ease control flow. */
25048 res = gen_reg_rtx (mode);
25049 emit_move_insn (res, operand1);
25051 /* xa = abs (operand1) */
25052 xa = ix86_expand_sse_fabs (res, &mask);
25054 /* if (!isless (xa, TWO52)) goto label; */
25055 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25057 /* xa = (double)(long)x */
25058 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25059 expand_fix (xi, res, 0);
25060 expand_float (xa, xi, 0);
25062 /* generate 1.0 */
25063 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25065 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25066 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25067 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25068 gen_rtx_AND (mode, one, tmp)));
25069 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25070 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25071 emit_move_insn (res, tmp);
25073 if (HONOR_SIGNED_ZEROS (mode))
25074 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25076 emit_label (label);
25077 LABEL_NUSES (label) = 1;
25079 emit_move_insn (operand0, res);
25082 /* Expand SSE sequence for computing round from OPERAND1 storing
25083 into OPERAND0. Sequence that works without relying on DImode truncation
25084 via cvttsd2siq that is only available on 64bit targets. */
25085 void
25086 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25088 /* C code for the stuff we expand below.
25089 double xa = fabs (x), xa2, x2;
25090 if (!isless (xa, TWO52))
25091 return x;
25092 Using the absolute value and copying back sign makes
25093 -0.0 -> -0.0 correct.
25094 xa2 = xa + TWO52 - TWO52;
25095 Compensate.
25096 dxa = xa2 - xa;
25097 if (dxa <= -0.5)
25098 xa2 += 1;
25099 else if (dxa > 0.5)
25100 xa2 -= 1;
25101 x2 = copysign (xa2, x);
25102 return x2;
25104 enum machine_mode mode = GET_MODE (operand0);
25105 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25107 TWO52 = ix86_gen_TWO52 (mode);
25109 /* Temporary for holding the result, initialized to the input
25110 operand to ease control flow. */
25111 res = gen_reg_rtx (mode);
25112 emit_move_insn (res, operand1);
25114 /* xa = abs (operand1) */
25115 xa = ix86_expand_sse_fabs (res, &mask);
25117 /* if (!isless (xa, TWO52)) goto label; */
25118 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25120 /* xa2 = xa + TWO52 - TWO52; */
25121 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25122 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25124 /* dxa = xa2 - xa; */
25125 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25127 /* generate 0.5, 1.0 and -0.5 */
25128 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25129 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25130 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25131 0, OPTAB_DIRECT);
25133 /* Compensate. */
25134 tmp = gen_reg_rtx (mode);
25135 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25136 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25137 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25138 gen_rtx_AND (mode, one, tmp)));
25139 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25140 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25141 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25142 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25143 gen_rtx_AND (mode, one, tmp)));
25144 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25146 /* res = copysign (xa2, operand1) */
25147 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25149 emit_label (label);
25150 LABEL_NUSES (label) = 1;
25152 emit_move_insn (operand0, res);
25155 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25156 into OPERAND0. */
25157 void
25158 ix86_expand_trunc (rtx operand0, rtx operand1)
25160 /* C code for SSE variant we expand below.
25161 double xa = fabs (x), x2;
25162 if (!isless (xa, TWO52))
25163 return x;
25164 x2 = (double)(long)x;
25165 if (HONOR_SIGNED_ZEROS (mode))
25166 return copysign (x2, x);
25167 return x2;
25169 enum machine_mode mode = GET_MODE (operand0);
25170 rtx xa, xi, TWO52, label, res, mask;
25172 TWO52 = ix86_gen_TWO52 (mode);
25174 /* Temporary for holding the result, initialized to the input
25175 operand to ease control flow. */
25176 res = gen_reg_rtx (mode);
25177 emit_move_insn (res, operand1);
25179 /* xa = abs (operand1) */
25180 xa = ix86_expand_sse_fabs (res, &mask);
25182 /* if (!isless (xa, TWO52)) goto label; */
25183 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25185 /* x = (double)(long)x */
25186 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25187 expand_fix (xi, res, 0);
25188 expand_float (res, xi, 0);
25190 if (HONOR_SIGNED_ZEROS (mode))
25191 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25193 emit_label (label);
25194 LABEL_NUSES (label) = 1;
25196 emit_move_insn (operand0, res);
25199 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25200 into OPERAND0. */
25201 void
25202 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25204 enum machine_mode mode = GET_MODE (operand0);
25205 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25207 /* C code for SSE variant we expand below.
25208 double xa = fabs (x), x2;
25209 if (!isless (xa, TWO52))
25210 return x;
25211 xa2 = xa + TWO52 - TWO52;
25212 Compensate:
25213 if (xa2 > xa)
25214 xa2 -= 1.0;
25215 x2 = copysign (xa2, x);
25216 return x2;
25219 TWO52 = ix86_gen_TWO52 (mode);
25221 /* Temporary for holding the result, initialized to the input
25222 operand to ease control flow. */
25223 res = gen_reg_rtx (mode);
25224 emit_move_insn (res, operand1);
25226 /* xa = abs (operand1) */
25227 xa = ix86_expand_sse_fabs (res, &smask);
25229 /* if (!isless (xa, TWO52)) goto label; */
25230 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25232 /* res = xa + TWO52 - TWO52; */
25233 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25234 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25235 emit_move_insn (res, tmp);
25237 /* generate 1.0 */
25238 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25240 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25241 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25242 emit_insn (gen_rtx_SET (VOIDmode, mask,
25243 gen_rtx_AND (mode, mask, one)));
25244 tmp = expand_simple_binop (mode, MINUS,
25245 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25246 emit_move_insn (res, tmp);
25248 /* res = copysign (res, operand1) */
25249 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25251 emit_label (label);
25252 LABEL_NUSES (label) = 1;
25254 emit_move_insn (operand0, res);
25257 /* Expand SSE sequence for computing round from OPERAND1 storing
25258 into OPERAND0. */
25259 void
25260 ix86_expand_round (rtx operand0, rtx operand1)
25262 /* C code for the stuff we're doing below:
25263 double xa = fabs (x);
25264 if (!isless (xa, TWO52))
25265 return x;
25266 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25267 return copysign (xa, x);
25269 enum machine_mode mode = GET_MODE (operand0);
25270 rtx res, TWO52, xa, label, xi, half, mask;
25271 const struct real_format *fmt;
25272 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25274 /* Temporary for holding the result, initialized to the input
25275 operand to ease control flow. */
25276 res = gen_reg_rtx (mode);
25277 emit_move_insn (res, operand1);
25279 TWO52 = ix86_gen_TWO52 (mode);
25280 xa = ix86_expand_sse_fabs (res, &mask);
25281 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25283 /* load nextafter (0.5, 0.0) */
25284 fmt = REAL_MODE_FORMAT (mode);
25285 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25286 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25288 /* xa = xa + 0.5 */
25289 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25290 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25292 /* xa = (double)(int64_t)xa */
25293 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25294 expand_fix (xi, xa, 0);
25295 expand_float (xa, xi, 0);
25297 /* res = copysign (xa, operand1) */
25298 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25300 emit_label (label);
25301 LABEL_NUSES (label) = 1;
25303 emit_move_insn (operand0, res);
25307 /* Validate whether a SSE5 instruction is valid or not.
25308 OPERANDS is the array of operands.
25309 NUM is the number of operands.
25310 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25311 NUM_MEMORY is the maximum number of memory operands to accept. */
25312 bool
25313 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
25315 int mem_mask;
25316 int mem_count;
25317 int i;
25319 /* Count the number of memory arguments */
25320 mem_mask = 0;
25321 mem_count = 0;
25322 for (i = 0; i < num; i++)
25324 enum machine_mode mode = GET_MODE (operands[i]);
25325 if (register_operand (operands[i], mode))
25328 else if (memory_operand (operands[i], mode))
25330 mem_mask |= (1 << i);
25331 mem_count++;
25334 else
25336 rtx pattern = PATTERN (insn);
25338 /* allow 0 for pcmov */
25339 if (GET_CODE (pattern) != SET
25340 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25341 || i < 2
25342 || operands[i] != CONST0_RTX (mode))
25343 return false;
25347 /* If there were no memory operations, allow the insn */
25348 if (mem_mask == 0)
25349 return true;
25351 /* Do not allow the destination register to be a memory operand. */
25352 else if (mem_mask & (1 << 0))
25353 return false;
25355 /* If there are too many memory operations, disallow the instruction. While
25356 the hardware only allows 1 memory reference, before register allocation
25357 for some insns, we allow two memory operations sometimes in order to allow
25358 code like the following to be optimized:
25360 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25362 or similar cases that are vectorized into using the fmaddss
25363 instruction. */
25364 else if (mem_count > num_memory)
25365 return false;
25367 /* Don't allow more than one memory operation if not optimizing. */
25368 else if (mem_count > 1 && !optimize)
25369 return false;
25371 else if (num == 4 && mem_count == 1)
25373 /* formats (destination is the first argument), example fmaddss:
25374 xmm1, xmm1, xmm2, xmm3/mem
25375 xmm1, xmm1, xmm2/mem, xmm3
25376 xmm1, xmm2, xmm3/mem, xmm1
25377 xmm1, xmm2/mem, xmm3, xmm1 */
25378 if (uses_oc0)
25379 return ((mem_mask == (1 << 1))
25380 || (mem_mask == (1 << 2))
25381 || (mem_mask == (1 << 3)));
25383 /* format, example pmacsdd:
25384 xmm1, xmm2, xmm3/mem, xmm1 */
25385 else
25386 return (mem_mask == (1 << 2));
25389 else if (num == 4 && num_memory == 2)
25391 /* If there are two memory operations, we can load one of the memory ops
25392 into the destination register. This is for optimizing the
25393 multiply/add ops, which the combiner has optimized both the multiply
25394 and the add insns to have a memory operation. We have to be careful
25395 that the destination doesn't overlap with the inputs. */
25396 rtx op0 = operands[0];
25398 if (reg_mentioned_p (op0, operands[1])
25399 || reg_mentioned_p (op0, operands[2])
25400 || reg_mentioned_p (op0, operands[3]))
25401 return false;
25403 /* formats (destination is the first argument), example fmaddss:
25404 xmm1, xmm1, xmm2, xmm3/mem
25405 xmm1, xmm1, xmm2/mem, xmm3
25406 xmm1, xmm2, xmm3/mem, xmm1
25407 xmm1, xmm2/mem, xmm3, xmm1
25409 For the oc0 case, we will load either operands[1] or operands[3] into
25410 operands[0], so any combination of 2 memory operands is ok. */
25411 if (uses_oc0)
25412 return true;
25414 /* format, example pmacsdd:
25415 xmm1, xmm2, xmm3/mem, xmm1
25417 For the integer multiply/add instructions be more restrictive and
25418 require operands[2] and operands[3] to be the memory operands. */
25419 else
25420 return (mem_mask == ((1 << 2) | (1 << 3)));
25423 else if (num == 3 && num_memory == 1)
25425 /* formats, example protb:
25426 xmm1, xmm2, xmm3/mem
25427 xmm1, xmm2/mem, xmm3 */
25428 if (uses_oc0)
25429 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25431 /* format, example comeq:
25432 xmm1, xmm2, xmm3/mem */
25433 else
25434 return (mem_mask == (1 << 2));
25437 else
25438 gcc_unreachable ();
25440 return false;
25444 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25445 hardware will allow by using the destination register to load one of the
25446 memory operations. Presently this is used by the multiply/add routines to
25447 allow 2 memory references. */
25449 void
25450 ix86_expand_sse5_multiple_memory (rtx operands[],
25451 int num,
25452 enum machine_mode mode)
25454 rtx op0 = operands[0];
25455 if (num != 4
25456 || memory_operand (op0, mode)
25457 || reg_mentioned_p (op0, operands[1])
25458 || reg_mentioned_p (op0, operands[2])
25459 || reg_mentioned_p (op0, operands[3]))
25460 gcc_unreachable ();
25462 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25463 the destination register. */
25464 if (memory_operand (operands[1], mode))
25466 emit_move_insn (op0, operands[1]);
25467 operands[1] = op0;
25469 else if (memory_operand (operands[3], mode))
25471 emit_move_insn (op0, operands[3]);
25472 operands[3] = op0;
25474 else
25475 gcc_unreachable ();
25477 return;
25481 /* Table of valid machine attributes. */
25482 static const struct attribute_spec ix86_attribute_table[] =
25484 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25485 /* Stdcall attribute says callee is responsible for popping arguments
25486 if they are not variable. */
25487 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25488 /* Fastcall attribute says callee is responsible for popping arguments
25489 if they are not variable. */
25490 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25491 /* Cdecl attribute says the callee is a normal C declaration */
25492 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25493 /* Regparm attribute specifies how many integer arguments are to be
25494 passed in registers. */
25495 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25496 /* Sseregparm attribute says we are using x86_64 calling conventions
25497 for FP arguments. */
25498 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25499 /* force_align_arg_pointer says this function realigns the stack at entry. */
25500 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25501 false, true, true, ix86_handle_cconv_attribute },
25502 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25503 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25504 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25505 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25506 #endif
25507 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25508 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25509 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25510 SUBTARGET_ATTRIBUTE_TABLE,
25511 #endif
25512 { NULL, 0, 0, false, false, false, NULL }
25515 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25516 static int
25517 x86_builtin_vectorization_cost (bool runtime_test)
25519 /* If the branch of the runtime test is taken - i.e. - the vectorized
25520 version is skipped - this incurs a misprediction cost (because the
25521 vectorized version is expected to be the fall-through). So we subtract
25522 the latency of a mispredicted branch from the costs that are incured
25523 when the vectorized version is executed.
25525 TODO: The values in individual target tables have to be tuned or new
25526 fields may be needed. For eg. on K8, the default branch path is the
25527 not-taken path. If the taken path is predicted correctly, the minimum
25528 penalty of going down the taken-path is 1 cycle. If the taken-path is
25529 not predicted correctly, then the minimum penalty is 10 cycles. */
25531 if (runtime_test)
25533 return (-(ix86_cost->cond_taken_branch_cost));
25535 else
25536 return 0;
25539 /* Initialize the GCC target structure. */
25540 #undef TARGET_ATTRIBUTE_TABLE
25541 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25542 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25543 # undef TARGET_MERGE_DECL_ATTRIBUTES
25544 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25545 #endif
25547 #undef TARGET_COMP_TYPE_ATTRIBUTES
25548 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25550 #undef TARGET_INIT_BUILTINS
25551 #define TARGET_INIT_BUILTINS ix86_init_builtins
25552 #undef TARGET_EXPAND_BUILTIN
25553 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25555 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25556 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25557 ix86_builtin_vectorized_function
25559 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25560 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25562 #undef TARGET_BUILTIN_RECIPROCAL
25563 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25565 #undef TARGET_ASM_FUNCTION_EPILOGUE
25566 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25568 #undef TARGET_ENCODE_SECTION_INFO
25569 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25570 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25571 #else
25572 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25573 #endif
25575 #undef TARGET_ASM_OPEN_PAREN
25576 #define TARGET_ASM_OPEN_PAREN ""
25577 #undef TARGET_ASM_CLOSE_PAREN
25578 #define TARGET_ASM_CLOSE_PAREN ""
25580 #undef TARGET_ASM_ALIGNED_HI_OP
25581 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25582 #undef TARGET_ASM_ALIGNED_SI_OP
25583 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25584 #ifdef ASM_QUAD
25585 #undef TARGET_ASM_ALIGNED_DI_OP
25586 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25587 #endif
25589 #undef TARGET_ASM_UNALIGNED_HI_OP
25590 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25591 #undef TARGET_ASM_UNALIGNED_SI_OP
25592 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25593 #undef TARGET_ASM_UNALIGNED_DI_OP
25594 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25596 #undef TARGET_SCHED_ADJUST_COST
25597 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25598 #undef TARGET_SCHED_ISSUE_RATE
25599 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25600 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25601 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25602 ia32_multipass_dfa_lookahead
25604 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25605 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25607 #ifdef HAVE_AS_TLS
25608 #undef TARGET_HAVE_TLS
25609 #define TARGET_HAVE_TLS true
25610 #endif
25611 #undef TARGET_CANNOT_FORCE_CONST_MEM
25612 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25613 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25614 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25616 #undef TARGET_DELEGITIMIZE_ADDRESS
25617 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25619 #undef TARGET_MS_BITFIELD_LAYOUT_P
25620 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25622 #if TARGET_MACHO
25623 #undef TARGET_BINDS_LOCAL_P
25624 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25625 #endif
25626 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25627 #undef TARGET_BINDS_LOCAL_P
25628 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25629 #endif
25631 #undef TARGET_ASM_OUTPUT_MI_THUNK
25632 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25633 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25634 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25636 #undef TARGET_ASM_FILE_START
25637 #define TARGET_ASM_FILE_START x86_file_start
25639 #undef TARGET_DEFAULT_TARGET_FLAGS
25640 #define TARGET_DEFAULT_TARGET_FLAGS \
25641 (TARGET_DEFAULT \
25642 | TARGET_SUBTARGET_DEFAULT \
25643 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25645 #undef TARGET_HANDLE_OPTION
25646 #define TARGET_HANDLE_OPTION ix86_handle_option
25648 #undef TARGET_RTX_COSTS
25649 #define TARGET_RTX_COSTS ix86_rtx_costs
25650 #undef TARGET_ADDRESS_COST
25651 #define TARGET_ADDRESS_COST ix86_address_cost
25653 #undef TARGET_FIXED_CONDITION_CODE_REGS
25654 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25655 #undef TARGET_CC_MODES_COMPATIBLE
25656 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25658 #undef TARGET_MACHINE_DEPENDENT_REORG
25659 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25661 #undef TARGET_BUILD_BUILTIN_VA_LIST
25662 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25664 #undef TARGET_EXPAND_BUILTIN_VA_START
25665 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25667 #undef TARGET_MD_ASM_CLOBBERS
25668 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25670 #undef TARGET_PROMOTE_PROTOTYPES
25671 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25672 #undef TARGET_STRUCT_VALUE_RTX
25673 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25674 #undef TARGET_SETUP_INCOMING_VARARGS
25675 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25676 #undef TARGET_MUST_PASS_IN_STACK
25677 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25678 #undef TARGET_PASS_BY_REFERENCE
25679 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25680 #undef TARGET_INTERNAL_ARG_POINTER
25681 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25682 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25683 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25684 #undef TARGET_STRICT_ARGUMENT_NAMING
25685 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25688 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25690 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25691 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25693 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25694 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25696 #undef TARGET_C_MODE_FOR_SUFFIX
25697 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25699 #ifdef HAVE_AS_TLS
25700 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25701 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25702 #endif
25704 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25705 #undef TARGET_INSERT_ATTRIBUTES
25706 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25707 #endif
25709 #undef TARGET_MANGLE_TYPE
25710 #define TARGET_MANGLE_TYPE ix86_mangle_type
25712 #undef TARGET_STACK_PROTECT_FAIL
25713 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25715 #undef TARGET_FUNCTION_VALUE
25716 #define TARGET_FUNCTION_VALUE ix86_function_value
25718 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25719 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25721 struct gcc_target targetm = TARGET_INITIALIZER;
25723 #include "gt-i386.h"