hashtab.h: Update GTY annotations to new syntax
[official-gcc.git] / gcc / config / i386 / i386.c
blob51a2f71a8474431b40de3e763b065ca71bf33024
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 static const
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
893 static const
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 static const
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 static const
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1113 static const
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 static const
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1305 m_486 | m_PENT,
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1321 ~m_386,
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1340 m_PPRO,
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1352 m_K6,
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1358 m_PENT4,
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1361 m_PPRO,
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1364 ~m_PENT,
1366 /* X86_TUNE_READ_MODIFY */
1367 ~(m_PENT | m_PPRO),
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1386 ~m_PPRO,
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1392 m_PPRO,
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1404 | m_GENERIC,
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1428 | m_AMDFAM10,
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1431 m_AMDFAM10,
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1437 m_ATHLON_K8,
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1440 m_AMD_MULTIPLE,
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1455 ~m_486,
1457 /* X86_TUNE_USE_FFREEP */
1458 m_AMD_MULTIPLE,
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1464 ~(m_AMDFAM10),
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1469 | m_GENERIC,
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1473 | m_GENERIC,
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1489 ~m_K8,
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1492 m_K8 | m_GENERIC64,
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1496 ~(m_386 | m_486),
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 machines. */
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1507 than a MOV. */
1508 m_PENT,
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1512 m_PENT,
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1517 m_K6,
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 from FP to FP. */
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1525 m_AMDFAM10,
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1530 m_CORE2,
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1534 m_ATOM,
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1547 ~m_386,
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1550 ~(m_386 | m_486),
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1553 ~m_386,
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1556 ~m_386,
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1561 | m_GENERIC;
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1571 epilogue code. */
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 /* FP registers */
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1591 /* arg pointer */
1592 NON_Q_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 /* SSE registers */
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1597 SSE_REGS, SSE_REGS,
1598 /* MMX registers */
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1600 MMX_REGS, MMX_REGS,
1601 /* REX registers */
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1606 SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1679 numbers.
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1727 unsigned short n;
1728 rtx rtl;
1729 struct stack_local_entry *next;
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1735 [arguments]
1736 <- ARG_POINTER
1737 saved pc
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1741 [saved regs]
1743 [padding0]
1745 [saved SSE regs]
1747 [padding1] \
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1751 [frame] (
1753 [padding2] /
1755 struct ix86_frame
1757 int padding0;
1758 int nsseregs;
1759 int nregs;
1760 int padding1;
1761 int va_arg_size;
1762 HOST_WIDE_INT frame;
1763 int padding2;
1764 int outgoing_arguments_size;
1765 int red_zone_size;
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1780 /* Asm dialect. */
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 /* TLS dialects. */
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1820 /* Alignment for incoming stack boundary in bits specified at
1821 command line. */
1822 static unsigned int ix86_user_incoming_stack_boundary;
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1843 int ix86_section_threshold = 65536;
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1849 /* Fence to use after loop using movnt. */
1850 tree x86_mfence;
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1861 X86_64_NO_CLASS,
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1864 X86_64_SSE_CLASS,
1865 X86_64_SSESF_CLASS,
1866 X86_64_SSEDF_CLASS,
1867 X86_64_SSEUP_CLASS,
1868 X86_64_X87_CLASS,
1869 X86_64_X87UP_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1871 X86_64_MEMORY_CLASS
1874 #define MAX_CLASSES 4
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static int ix86_function_regparm (const_tree, const_tree);
1884 static void ix86_compute_frame_layout (struct ix86_frame *);
1885 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 rtx, rtx, int);
1887 static void ix86_add_new_builtins (int);
1889 enum ix86_function_specific_strings
1891 IX86_FUNCTION_SPECIFIC_ARCH,
1892 IX86_FUNCTION_SPECIFIC_TUNE,
1893 IX86_FUNCTION_SPECIFIC_FPMATH,
1894 IX86_FUNCTION_SPECIFIC_MAX
1897 static char *ix86_target_string (int, int, const char *, const char *,
1898 const char *, bool);
1899 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1900 static void ix86_function_specific_save (struct cl_target_option *);
1901 static void ix86_function_specific_restore (struct cl_target_option *);
1902 static void ix86_function_specific_print (FILE *, int,
1903 struct cl_target_option *);
1904 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1905 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1906 static bool ix86_can_inline_p (tree, tree);
1907 static void ix86_set_current_function (tree);
1909 static enum calling_abi ix86_function_abi (const_tree);
1912 /* The svr4 ABI for the i386 says that records and unions are returned
1913 in memory. */
1914 #ifndef DEFAULT_PCC_STRUCT_RETURN
1915 #define DEFAULT_PCC_STRUCT_RETURN 1
1916 #endif
1918 /* Whether -mtune= or -march= were specified */
1919 static int ix86_tune_defaulted;
1920 static int ix86_arch_specified;
1922 /* Bit flags that specify the ISA we are compiling for. */
1923 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1925 /* A mask of ix86_isa_flags that includes bit X if X
1926 was set or cleared on the command line. */
1927 static int ix86_isa_flags_explicit;
1929 /* Define a set of ISAs which are available when a given ISA is
1930 enabled. MMX and SSE ISAs are handled separately. */
1932 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1933 #define OPTION_MASK_ISA_3DNOW_SET \
1934 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1936 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1937 #define OPTION_MASK_ISA_SSE2_SET \
1938 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1939 #define OPTION_MASK_ISA_SSE3_SET \
1940 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1941 #define OPTION_MASK_ISA_SSSE3_SET \
1942 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1943 #define OPTION_MASK_ISA_SSE4_1_SET \
1944 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1945 #define OPTION_MASK_ISA_SSE4_2_SET \
1946 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1947 #define OPTION_MASK_ISA_AVX_SET \
1948 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1949 #define OPTION_MASK_ISA_FMA_SET \
1950 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1952 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 as -msse4.2. */
1954 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1956 #define OPTION_MASK_ISA_SSE4A_SET \
1957 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1958 #define OPTION_MASK_ISA_SSE5_SET \
1959 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1961 /* AES and PCLMUL need SSE2 because they use xmm registers */
1962 #define OPTION_MASK_ISA_AES_SET \
1963 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1964 #define OPTION_MASK_ISA_PCLMUL_SET \
1965 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1967 #define OPTION_MASK_ISA_ABM_SET \
1968 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1973 /* Define a set of ISAs which aren't available when a given ISA is
1974 disabled. MMX and SSE ISAs are handled separately. */
1976 #define OPTION_MASK_ISA_MMX_UNSET \
1977 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1978 #define OPTION_MASK_ISA_3DNOW_UNSET \
1979 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1982 #define OPTION_MASK_ISA_SSE_UNSET \
1983 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1984 #define OPTION_MASK_ISA_SSE2_UNSET \
1985 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1986 #define OPTION_MASK_ISA_SSE3_UNSET \
1987 (OPTION_MASK_ISA_SSE3 \
1988 | OPTION_MASK_ISA_SSSE3_UNSET \
1989 | OPTION_MASK_ISA_SSE4A_UNSET )
1990 #define OPTION_MASK_ISA_SSSE3_UNSET \
1991 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1992 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1993 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1995 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1996 #define OPTION_MASK_ISA_AVX_UNSET \
1997 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1998 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2000 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2001 as -mno-sse4.1. */
2002 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2004 #define OPTION_MASK_ISA_SSE4A_UNSET \
2005 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2006 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2007 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2008 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2009 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2010 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2011 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2012 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 /* Vectorization library interface and handlers. */
2015 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2016 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2017 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2019 /* Processor target table, indexed by processor number */
2020 struct ptt
2022 const struct processor_costs *cost; /* Processor costs */
2023 const int align_loop; /* Default alignments. */
2024 const int align_loop_max_skip;
2025 const int align_jump;
2026 const int align_jump_max_skip;
2027 const int align_func;
2030 static const struct ptt processor_target_table[PROCESSOR_max] =
2032 {&i386_cost, 4, 3, 4, 3, 4},
2033 {&i486_cost, 16, 15, 16, 15, 16},
2034 {&pentium_cost, 16, 7, 16, 7, 16},
2035 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2036 {&geode_cost, 0, 0, 0, 0, 0},
2037 {&k6_cost, 32, 7, 32, 7, 32},
2038 {&athlon_cost, 16, 7, 16, 7, 16},
2039 {&pentium4_cost, 0, 0, 0, 0, 0},
2040 {&k8_cost, 16, 7, 16, 7, 16},
2041 {&nocona_cost, 0, 0, 0, 0, 0},
2042 {&core2_cost, 16, 10, 16, 10, 16},
2043 {&generic32_cost, 16, 7, 16, 7, 16},
2044 {&generic64_cost, 16, 10, 16, 10, 16},
2045 {&amdfam10_cost, 32, 24, 32, 7, 32},
2046 {&atom_cost, 16, 7, 16, 7, 16}
2049 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2051 "generic",
2052 "i386",
2053 "i486",
2054 "pentium",
2055 "pentium-mmx",
2056 "pentiumpro",
2057 "pentium2",
2058 "pentium3",
2059 "pentium4",
2060 "pentium-m",
2061 "prescott",
2062 "nocona",
2063 "core2",
2064 "atom",
2065 "geode",
2066 "k6",
2067 "k6-2",
2068 "k6-3",
2069 "athlon",
2070 "athlon-4",
2071 "k8",
2072 "amdfam10"
2075 /* Implement TARGET_HANDLE_OPTION. */
2077 static bool
2078 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2080 switch (code)
2082 case OPT_mmmx:
2083 if (value)
2085 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2088 else
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2093 return true;
2095 case OPT_m3dnow:
2096 if (value)
2098 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2101 else
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2106 return true;
2108 case OPT_m3dnowa:
2109 return false;
2111 case OPT_msse:
2112 if (value)
2114 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2117 else
2119 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2122 return true;
2124 case OPT_msse2:
2125 if (value)
2127 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2128 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2130 else
2132 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2133 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2135 return true;
2137 case OPT_msse3:
2138 if (value)
2140 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2143 else
2145 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2146 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2148 return true;
2150 case OPT_mssse3:
2151 if (value)
2153 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2154 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2156 else
2158 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2159 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2161 return true;
2163 case OPT_msse4_1:
2164 if (value)
2166 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2167 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2169 else
2171 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2172 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2174 return true;
2176 case OPT_msse4_2:
2177 if (value)
2179 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2180 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2182 else
2184 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2185 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2187 return true;
2189 case OPT_mavx:
2190 if (value)
2192 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2193 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2195 else
2197 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2198 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2200 return true;
2202 case OPT_mfma:
2203 if (value)
2205 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2206 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2208 else
2210 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2211 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2213 return true;
2215 case OPT_msse4:
2216 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2218 return true;
2220 case OPT_mno_sse4:
2221 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2223 return true;
2225 case OPT_msse4a:
2226 if (value)
2228 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2231 else
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2236 return true;
2238 case OPT_msse5:
2239 if (value)
2241 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2244 else
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2249 return true;
2251 case OPT_mabm:
2252 if (value)
2254 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2257 else
2259 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2260 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2262 return true;
2264 case OPT_mpopcnt:
2265 if (value)
2267 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2270 else
2272 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2273 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2275 return true;
2277 case OPT_msahf:
2278 if (value)
2280 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2283 else
2285 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2286 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2288 return true;
2290 case OPT_mcx16:
2291 if (value)
2293 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2296 else
2298 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2299 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2301 return true;
2303 case OPT_maes:
2304 if (value)
2306 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2309 else
2311 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2312 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2314 return true;
2316 case OPT_mpclmul:
2317 if (value)
2319 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2322 else
2324 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2325 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2327 return true;
2329 default:
2330 return true;
2334 /* Return a string the documents the current -m options. The caller is
2335 responsible for freeing the string. */
2337 static char *
2338 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2339 const char *fpmath, bool add_nl_p)
2341 struct ix86_target_opts
2343 const char *option; /* option string */
2344 int mask; /* isa mask options */
2347 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2348 preceding options while match those first. */
2349 static struct ix86_target_opts isa_opts[] =
2351 { "-m64", OPTION_MASK_ISA_64BIT },
2352 { "-msse5", OPTION_MASK_ISA_SSE5 },
2353 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2354 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2355 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2356 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2357 { "-msse3", OPTION_MASK_ISA_SSE3 },
2358 { "-msse2", OPTION_MASK_ISA_SSE2 },
2359 { "-msse", OPTION_MASK_ISA_SSE },
2360 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2361 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2362 { "-mmmx", OPTION_MASK_ISA_MMX },
2363 { "-mabm", OPTION_MASK_ISA_ABM },
2364 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2365 { "-maes", OPTION_MASK_ISA_AES },
2366 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2369 /* Flag options. */
2370 static struct ix86_target_opts flag_opts[] =
2372 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2373 { "-m80387", MASK_80387 },
2374 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2375 { "-malign-double", MASK_ALIGN_DOUBLE },
2376 { "-mcld", MASK_CLD },
2377 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2378 { "-mieee-fp", MASK_IEEE_FP },
2379 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2380 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2381 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2382 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2383 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2384 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2385 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2386 { "-mno-red-zone", MASK_NO_RED_ZONE },
2387 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2388 { "-mrecip", MASK_RECIP },
2389 { "-mrtd", MASK_RTD },
2390 { "-msseregparm", MASK_SSEREGPARM },
2391 { "-mstack-arg-probe", MASK_STACK_PROBE },
2392 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2395 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2397 char isa_other[40];
2398 char target_other[40];
2399 unsigned num = 0;
2400 unsigned i, j;
2401 char *ret;
2402 char *ptr;
2403 size_t len;
2404 size_t line_len;
2405 size_t sep_len;
2407 memset (opts, '\0', sizeof (opts));
2409 /* Add -march= option. */
2410 if (arch)
2412 opts[num][0] = "-march=";
2413 opts[num++][1] = arch;
2416 /* Add -mtune= option. */
2417 if (tune)
2419 opts[num][0] = "-mtune=";
2420 opts[num++][1] = tune;
2423 /* Pick out the options in isa options. */
2424 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2426 if ((isa & isa_opts[i].mask) != 0)
2428 opts[num++][0] = isa_opts[i].option;
2429 isa &= ~ isa_opts[i].mask;
2433 if (isa && add_nl_p)
2435 opts[num++][0] = isa_other;
2436 sprintf (isa_other, "(other isa: 0x%x)", isa);
2439 /* Add flag options. */
2440 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2442 if ((flags & flag_opts[i].mask) != 0)
2444 opts[num++][0] = flag_opts[i].option;
2445 flags &= ~ flag_opts[i].mask;
2449 if (flags && add_nl_p)
2451 opts[num++][0] = target_other;
2452 sprintf (target_other, "(other flags: 0x%x)", isa);
2455 /* Add -fpmath= option. */
2456 if (fpmath)
2458 opts[num][0] = "-mfpmath=";
2459 opts[num++][1] = fpmath;
2462 /* Any options? */
2463 if (num == 0)
2464 return NULL;
2466 gcc_assert (num < ARRAY_SIZE (opts));
2468 /* Size the string. */
2469 len = 0;
2470 sep_len = (add_nl_p) ? 3 : 1;
2471 for (i = 0; i < num; i++)
2473 len += sep_len;
2474 for (j = 0; j < 2; j++)
2475 if (opts[i][j])
2476 len += strlen (opts[i][j]);
2479 /* Build the string. */
2480 ret = ptr = (char *) xmalloc (len);
2481 line_len = 0;
2483 for (i = 0; i < num; i++)
2485 size_t len2[2];
2487 for (j = 0; j < 2; j++)
2488 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2490 if (i != 0)
2492 *ptr++ = ' ';
2493 line_len++;
2495 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2497 *ptr++ = '\\';
2498 *ptr++ = '\n';
2499 line_len = 0;
2503 for (j = 0; j < 2; j++)
2504 if (opts[i][j])
2506 memcpy (ptr, opts[i][j], len2[j]);
2507 ptr += len2[j];
2508 line_len += len2[j];
2512 *ptr = '\0';
2513 gcc_assert (ret + len >= ptr);
2515 return ret;
2518 /* Function that is callable from the debugger to print the current
2519 options. */
2520 void
2521 ix86_debug_options (void)
2523 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2524 ix86_arch_string, ix86_tune_string,
2525 ix86_fpmath_string, true);
2527 if (opts)
2529 fprintf (stderr, "%s\n\n", opts);
2530 free (opts);
2532 else
2533 fprintf (stderr, "<no options>\n\n");
2535 return;
2538 /* Sometimes certain combinations of command options do not make
2539 sense on a particular target machine. You can define a macro
2540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2541 defined, is executed once just after all the command options have
2542 been parsed.
2544 Don't use this macro to turn on various extra optimizations for
2545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2547 void
2548 override_options (bool main_args_p)
2550 int i;
2551 unsigned int ix86_arch_mask, ix86_tune_mask;
2552 const char *prefix;
2553 const char *suffix;
2554 const char *sw;
2556 /* Comes from final.c -- no real reason to change it. */
2557 #define MAX_CODE_ALIGN 16
2559 enum pta_flags
2561 PTA_SSE = 1 << 0,
2562 PTA_SSE2 = 1 << 1,
2563 PTA_SSE3 = 1 << 2,
2564 PTA_MMX = 1 << 3,
2565 PTA_PREFETCH_SSE = 1 << 4,
2566 PTA_3DNOW = 1 << 5,
2567 PTA_3DNOW_A = 1 << 6,
2568 PTA_64BIT = 1 << 7,
2569 PTA_SSSE3 = 1 << 8,
2570 PTA_CX16 = 1 << 9,
2571 PTA_POPCNT = 1 << 10,
2572 PTA_ABM = 1 << 11,
2573 PTA_SSE4A = 1 << 12,
2574 PTA_NO_SAHF = 1 << 13,
2575 PTA_SSE4_1 = 1 << 14,
2576 PTA_SSE4_2 = 1 << 15,
2577 PTA_SSE5 = 1 << 16,
2578 PTA_AES = 1 << 17,
2579 PTA_PCLMUL = 1 << 18,
2580 PTA_AVX = 1 << 19,
2581 PTA_FMA = 1 << 20
2584 static struct pta
2586 const char *const name; /* processor name or nickname. */
2587 const enum processor_type processor;
2588 const enum attr_cpu schedule;
2589 const unsigned /*enum pta_flags*/ flags;
2591 const processor_alias_table[] =
2593 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2594 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2595 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2598 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2599 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2602 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2605 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2606 PTA_MMX | PTA_SSE},
2607 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 PTA_MMX | PTA_SSE},
2609 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 PTA_MMX | PTA_SSE | PTA_SSE2},
2611 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2612 PTA_MMX |PTA_SSE | PTA_SSE2},
2613 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2614 PTA_MMX | PTA_SSE | PTA_SSE2},
2615 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2616 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2617 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2618 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2619 | PTA_CX16 | PTA_NO_SAHF},
2620 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2622 | PTA_SSSE3 | PTA_CX16},
2623 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2624 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2625 | PTA_SSSE3 | PTA_CX16},
2626 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2627 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2628 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2629 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2632 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2633 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2634 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2635 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2636 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2637 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2638 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2639 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2640 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2641 {"x86-64", PROCESSOR_K8, CPU_K8,
2642 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2643 {"k8", PROCESSOR_K8, CPU_K8,
2644 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2645 | PTA_SSE2 | PTA_NO_SAHF},
2646 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2647 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2648 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2649 {"opteron", PROCESSOR_K8, CPU_K8,
2650 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2651 | PTA_SSE2 | PTA_NO_SAHF},
2652 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2653 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2654 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2655 {"athlon64", PROCESSOR_K8, CPU_K8,
2656 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2657 | PTA_SSE2 | PTA_NO_SAHF},
2658 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2660 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2661 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2662 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2663 | PTA_SSE2 | PTA_NO_SAHF},
2664 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2665 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2666 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2667 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2668 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2669 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2670 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2671 0 /* flags are only used for -march switch. */ },
2672 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2673 PTA_64BIT /* flags are only used for -march switch. */ },
2676 int const pta_size = ARRAY_SIZE (processor_alias_table);
2678 /* Set up prefix/suffix so the error messages refer to either the command
2679 line argument, or the attribute(target). */
2680 if (main_args_p)
2682 prefix = "-m";
2683 suffix = "";
2684 sw = "switch";
2686 else
2688 prefix = "option(\"";
2689 suffix = "\")";
2690 sw = "attribute";
2693 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2694 SUBTARGET_OVERRIDE_OPTIONS;
2695 #endif
2697 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2698 SUBSUBTARGET_OVERRIDE_OPTIONS;
2699 #endif
2701 /* -fPIC is the default for x86_64. */
2702 if (TARGET_MACHO && TARGET_64BIT)
2703 flag_pic = 2;
2705 /* Set the default values for switches whose default depends on TARGET_64BIT
2706 in case they weren't overwritten by command line options. */
2707 if (TARGET_64BIT)
2709 /* Mach-O doesn't support omitting the frame pointer for now. */
2710 if (flag_omit_frame_pointer == 2)
2711 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2712 if (flag_asynchronous_unwind_tables == 2)
2713 flag_asynchronous_unwind_tables = 1;
2714 if (flag_pcc_struct_return == 2)
2715 flag_pcc_struct_return = 0;
2717 else
2719 if (flag_omit_frame_pointer == 2)
2720 flag_omit_frame_pointer = 0;
2721 if (flag_asynchronous_unwind_tables == 2)
2722 flag_asynchronous_unwind_tables = 0;
2723 if (flag_pcc_struct_return == 2)
2724 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2727 /* Need to check -mtune=generic first. */
2728 if (ix86_tune_string)
2730 if (!strcmp (ix86_tune_string, "generic")
2731 || !strcmp (ix86_tune_string, "i686")
2732 /* As special support for cross compilers we read -mtune=native
2733 as -mtune=generic. With native compilers we won't see the
2734 -mtune=native, as it was changed by the driver. */
2735 || !strcmp (ix86_tune_string, "native"))
2737 if (TARGET_64BIT)
2738 ix86_tune_string = "generic64";
2739 else
2740 ix86_tune_string = "generic32";
2742 /* If this call is for setting the option attribute, allow the
2743 generic32/generic64 that was previously set. */
2744 else if (!main_args_p
2745 && (!strcmp (ix86_tune_string, "generic32")
2746 || !strcmp (ix86_tune_string, "generic64")))
2748 else if (!strncmp (ix86_tune_string, "generic", 7))
2749 error ("bad value (%s) for %stune=%s %s",
2750 ix86_tune_string, prefix, suffix, sw);
2752 else
2754 if (ix86_arch_string)
2755 ix86_tune_string = ix86_arch_string;
2756 if (!ix86_tune_string)
2758 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2759 ix86_tune_defaulted = 1;
2762 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2763 need to use a sensible tune option. */
2764 if (!strcmp (ix86_tune_string, "generic")
2765 || !strcmp (ix86_tune_string, "x86-64")
2766 || !strcmp (ix86_tune_string, "i686"))
2768 if (TARGET_64BIT)
2769 ix86_tune_string = "generic64";
2770 else
2771 ix86_tune_string = "generic32";
2774 if (ix86_stringop_string)
2776 if (!strcmp (ix86_stringop_string, "rep_byte"))
2777 stringop_alg = rep_prefix_1_byte;
2778 else if (!strcmp (ix86_stringop_string, "libcall"))
2779 stringop_alg = libcall;
2780 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2781 stringop_alg = rep_prefix_4_byte;
2782 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2783 && TARGET_64BIT)
2784 /* rep; movq isn't available in 32-bit code. */
2785 stringop_alg = rep_prefix_8_byte;
2786 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2787 stringop_alg = loop_1_byte;
2788 else if (!strcmp (ix86_stringop_string, "loop"))
2789 stringop_alg = loop;
2790 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2791 stringop_alg = unrolled_loop;
2792 else
2793 error ("bad value (%s) for %sstringop-strategy=%s %s",
2794 ix86_stringop_string, prefix, suffix, sw);
2796 if (!strcmp (ix86_tune_string, "x86-64"))
2797 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2798 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2799 prefix, suffix, prefix, suffix, prefix, suffix);
2801 if (!ix86_arch_string)
2802 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2803 else
2804 ix86_arch_specified = 1;
2806 if (!strcmp (ix86_arch_string, "generic"))
2807 error ("generic CPU can be used only for %stune=%s %s",
2808 prefix, suffix, sw);
2809 if (!strncmp (ix86_arch_string, "generic", 7))
2810 error ("bad value (%s) for %sarch=%s %s",
2811 ix86_arch_string, prefix, suffix, sw);
2813 /* Validate -mabi= value. */
2814 if (ix86_abi_string)
2816 if (strcmp (ix86_abi_string, "sysv") == 0)
2817 ix86_abi = SYSV_ABI;
2818 else if (strcmp (ix86_abi_string, "ms") == 0)
2819 ix86_abi = MS_ABI;
2820 else
2821 error ("unknown ABI (%s) for %sabi=%s %s",
2822 ix86_abi_string, prefix, suffix, sw);
2824 else
2825 ix86_abi = DEFAULT_ABI;
2827 if (ix86_cmodel_string != 0)
2829 if (!strcmp (ix86_cmodel_string, "small"))
2830 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2831 else if (!strcmp (ix86_cmodel_string, "medium"))
2832 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2833 else if (!strcmp (ix86_cmodel_string, "large"))
2834 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2835 else if (flag_pic)
2836 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2837 else if (!strcmp (ix86_cmodel_string, "32"))
2838 ix86_cmodel = CM_32;
2839 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2840 ix86_cmodel = CM_KERNEL;
2841 else
2842 error ("bad value (%s) for %scmodel=%s %s",
2843 ix86_cmodel_string, prefix, suffix, sw);
2845 else
2847 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2848 use of rip-relative addressing. This eliminates fixups that
2849 would otherwise be needed if this object is to be placed in a
2850 DLL, and is essentially just as efficient as direct addressing. */
2851 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2852 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2853 else if (TARGET_64BIT)
2854 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2855 else
2856 ix86_cmodel = CM_32;
2858 if (ix86_asm_string != 0)
2860 if (! TARGET_MACHO
2861 && !strcmp (ix86_asm_string, "intel"))
2862 ix86_asm_dialect = ASM_INTEL;
2863 else if (!strcmp (ix86_asm_string, "att"))
2864 ix86_asm_dialect = ASM_ATT;
2865 else
2866 error ("bad value (%s) for %sasm=%s %s",
2867 ix86_asm_string, prefix, suffix, sw);
2869 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2870 error ("code model %qs not supported in the %s bit mode",
2871 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2872 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2873 sorry ("%i-bit mode not compiled in",
2874 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2876 for (i = 0; i < pta_size; i++)
2877 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2879 ix86_schedule = processor_alias_table[i].schedule;
2880 ix86_arch = processor_alias_table[i].processor;
2881 /* Default cpu tuning to the architecture. */
2882 ix86_tune = ix86_arch;
2884 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2885 error ("CPU you selected does not support x86-64 "
2886 "instruction set");
2888 if (processor_alias_table[i].flags & PTA_MMX
2889 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2890 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2891 if (processor_alias_table[i].flags & PTA_3DNOW
2892 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2893 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2894 if (processor_alias_table[i].flags & PTA_3DNOW_A
2895 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2896 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2897 if (processor_alias_table[i].flags & PTA_SSE
2898 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2899 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2900 if (processor_alias_table[i].flags & PTA_SSE2
2901 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2902 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2903 if (processor_alias_table[i].flags & PTA_SSE3
2904 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2905 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2906 if (processor_alias_table[i].flags & PTA_SSSE3
2907 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2908 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2909 if (processor_alias_table[i].flags & PTA_SSE4_1
2910 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2911 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2912 if (processor_alias_table[i].flags & PTA_SSE4_2
2913 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2914 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2915 if (processor_alias_table[i].flags & PTA_AVX
2916 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2917 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2918 if (processor_alias_table[i].flags & PTA_FMA
2919 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2920 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2921 if (processor_alias_table[i].flags & PTA_SSE4A
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2923 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2924 if (processor_alias_table[i].flags & PTA_SSE5
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2926 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2927 if (processor_alias_table[i].flags & PTA_ABM
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2929 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2930 if (processor_alias_table[i].flags & PTA_CX16
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2932 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2933 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2935 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2936 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2939 if (processor_alias_table[i].flags & PTA_AES
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2941 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2942 if (processor_alias_table[i].flags & PTA_PCLMUL
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2944 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2945 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2946 x86_prefetch_sse = true;
2948 break;
2951 if (i == pta_size)
2952 error ("bad value (%s) for %sarch=%s %s",
2953 ix86_arch_string, prefix, suffix, sw);
2955 ix86_arch_mask = 1u << ix86_arch;
2956 for (i = 0; i < X86_ARCH_LAST; ++i)
2957 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2959 for (i = 0; i < pta_size; i++)
2960 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2962 ix86_schedule = processor_alias_table[i].schedule;
2963 ix86_tune = processor_alias_table[i].processor;
2964 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2966 if (ix86_tune_defaulted)
2968 ix86_tune_string = "x86-64";
2969 for (i = 0; i < pta_size; i++)
2970 if (! strcmp (ix86_tune_string,
2971 processor_alias_table[i].name))
2972 break;
2973 ix86_schedule = processor_alias_table[i].schedule;
2974 ix86_tune = processor_alias_table[i].processor;
2976 else
2977 error ("CPU you selected does not support x86-64 "
2978 "instruction set");
2980 /* Intel CPUs have always interpreted SSE prefetch instructions as
2981 NOPs; so, we can enable SSE prefetch instructions even when
2982 -mtune (rather than -march) points us to a processor that has them.
2983 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2984 higher processors. */
2985 if (TARGET_CMOVE
2986 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2987 x86_prefetch_sse = true;
2988 break;
2990 if (i == pta_size)
2991 error ("bad value (%s) for %stune=%s %s",
2992 ix86_tune_string, prefix, suffix, sw);
2994 ix86_tune_mask = 1u << ix86_tune;
2995 for (i = 0; i < X86_TUNE_LAST; ++i)
2996 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2998 if (optimize_size)
2999 ix86_cost = &ix86_size_cost;
3000 else
3001 ix86_cost = processor_target_table[ix86_tune].cost;
3003 /* Arrange to set up i386_stack_locals for all functions. */
3004 init_machine_status = ix86_init_machine_status;
3006 /* Validate -mregparm= value. */
3007 if (ix86_regparm_string)
3009 if (TARGET_64BIT)
3010 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3011 i = atoi (ix86_regparm_string);
3012 if (i < 0 || i > REGPARM_MAX)
3013 error ("%sregparm=%d%s is not between 0 and %d",
3014 prefix, i, suffix, REGPARM_MAX);
3015 else
3016 ix86_regparm = i;
3018 if (TARGET_64BIT)
3019 ix86_regparm = REGPARM_MAX;
3021 /* If the user has provided any of the -malign-* options,
3022 warn and use that value only if -falign-* is not set.
3023 Remove this code in GCC 3.2 or later. */
3024 if (ix86_align_loops_string)
3026 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3027 prefix, suffix, suffix);
3028 if (align_loops == 0)
3030 i = atoi (ix86_align_loops_string);
3031 if (i < 0 || i > MAX_CODE_ALIGN)
3032 error ("%salign-loops=%d%s is not between 0 and %d",
3033 prefix, i, suffix, MAX_CODE_ALIGN);
3034 else
3035 align_loops = 1 << i;
3039 if (ix86_align_jumps_string)
3041 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3042 prefix, suffix, suffix);
3043 if (align_jumps == 0)
3045 i = atoi (ix86_align_jumps_string);
3046 if (i < 0 || i > MAX_CODE_ALIGN)
3047 error ("%salign-loops=%d%s is not between 0 and %d",
3048 prefix, i, suffix, MAX_CODE_ALIGN);
3049 else
3050 align_jumps = 1 << i;
3054 if (ix86_align_funcs_string)
3056 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3057 prefix, suffix, suffix);
3058 if (align_functions == 0)
3060 i = atoi (ix86_align_funcs_string);
3061 if (i < 0 || i > MAX_CODE_ALIGN)
3062 error ("%salign-loops=%d%s is not between 0 and %d",
3063 prefix, i, suffix, MAX_CODE_ALIGN);
3064 else
3065 align_functions = 1 << i;
3069 /* Default align_* from the processor table. */
3070 if (align_loops == 0)
3072 align_loops = processor_target_table[ix86_tune].align_loop;
3073 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3075 if (align_jumps == 0)
3077 align_jumps = processor_target_table[ix86_tune].align_jump;
3078 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3080 if (align_functions == 0)
3082 align_functions = processor_target_table[ix86_tune].align_func;
3085 /* Validate -mbranch-cost= value, or provide default. */
3086 ix86_branch_cost = ix86_cost->branch_cost;
3087 if (ix86_branch_cost_string)
3089 i = atoi (ix86_branch_cost_string);
3090 if (i < 0 || i > 5)
3091 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3092 else
3093 ix86_branch_cost = i;
3095 if (ix86_section_threshold_string)
3097 i = atoi (ix86_section_threshold_string);
3098 if (i < 0)
3099 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3100 else
3101 ix86_section_threshold = i;
3104 if (ix86_tls_dialect_string)
3106 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3107 ix86_tls_dialect = TLS_DIALECT_GNU;
3108 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3109 ix86_tls_dialect = TLS_DIALECT_GNU2;
3110 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3111 ix86_tls_dialect = TLS_DIALECT_SUN;
3112 else
3113 error ("bad value (%s) for %stls-dialect=%s %s",
3114 ix86_tls_dialect_string, prefix, suffix, sw);
3117 if (ix87_precision_string)
3119 i = atoi (ix87_precision_string);
3120 if (i != 32 && i != 64 && i != 80)
3121 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3124 if (TARGET_64BIT)
3126 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3128 /* Enable by default the SSE and MMX builtins. Do allow the user to
3129 explicitly disable any of these. In particular, disabling SSE and
3130 MMX for kernel code is extremely useful. */
3131 if (!ix86_arch_specified)
3132 ix86_isa_flags
3133 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3134 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3136 if (TARGET_RTD)
3137 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3139 else
3141 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3143 if (!ix86_arch_specified)
3144 ix86_isa_flags
3145 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3147 /* i386 ABI does not specify red zone. It still makes sense to use it
3148 when programmer takes care to stack from being destroyed. */
3149 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3150 target_flags |= MASK_NO_RED_ZONE;
3153 /* Keep nonleaf frame pointers. */
3154 if (flag_omit_frame_pointer)
3155 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3156 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3157 flag_omit_frame_pointer = 1;
3159 /* If we're doing fast math, we don't care about comparison order
3160 wrt NaNs. This lets us use a shorter comparison sequence. */
3161 if (flag_finite_math_only)
3162 target_flags &= ~MASK_IEEE_FP;
3164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3165 since the insns won't need emulation. */
3166 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3167 target_flags &= ~MASK_NO_FANCY_MATH_387;
3169 /* Likewise, if the target doesn't have a 387, or we've specified
3170 software floating point, don't use 387 inline intrinsics. */
3171 if (!TARGET_80387)
3172 target_flags |= MASK_NO_FANCY_MATH_387;
3174 /* Turn on MMX builtins for -msse. */
3175 if (TARGET_SSE)
3177 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3178 x86_prefetch_sse = true;
3181 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3182 if (TARGET_SSE4_2 || TARGET_ABM)
3183 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3185 /* Validate -mpreferred-stack-boundary= value or default it to
3186 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3187 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3188 if (ix86_preferred_stack_boundary_string)
3190 i = atoi (ix86_preferred_stack_boundary_string);
3191 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3192 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3193 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3194 else
3195 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3198 /* Set the default value for -mstackrealign. */
3199 if (ix86_force_align_arg_pointer == -1)
3200 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3202 /* Validate -mincoming-stack-boundary= value or default it to
3203 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3204 if (ix86_force_align_arg_pointer)
3205 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3206 else
3207 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3208 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3209 if (ix86_incoming_stack_boundary_string)
3211 i = atoi (ix86_incoming_stack_boundary_string);
3212 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3214 i, TARGET_64BIT ? 4 : 2);
3215 else
3217 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 ix86_incoming_stack_boundary
3219 = ix86_user_incoming_stack_boundary;
3223 /* Accept -msseregparm only if at least SSE support is enabled. */
3224 if (TARGET_SSEREGPARM
3225 && ! TARGET_SSE)
3226 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3228 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3229 if (ix86_fpmath_string != 0)
3231 if (! strcmp (ix86_fpmath_string, "387"))
3232 ix86_fpmath = FPMATH_387;
3233 else if (! strcmp (ix86_fpmath_string, "sse"))
3235 if (!TARGET_SSE)
3237 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3238 ix86_fpmath = FPMATH_387;
3240 else
3241 ix86_fpmath = FPMATH_SSE;
3243 else if (! strcmp (ix86_fpmath_string, "387,sse")
3244 || ! strcmp (ix86_fpmath_string, "387+sse")
3245 || ! strcmp (ix86_fpmath_string, "sse,387")
3246 || ! strcmp (ix86_fpmath_string, "sse+387")
3247 || ! strcmp (ix86_fpmath_string, "both"))
3249 if (!TARGET_SSE)
3251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3252 ix86_fpmath = FPMATH_387;
3254 else if (!TARGET_80387)
3256 warning (0, "387 instruction set disabled, using SSE arithmetics");
3257 ix86_fpmath = FPMATH_SSE;
3259 else
3260 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3262 else
3263 error ("bad value (%s) for %sfpmath=%s %s",
3264 ix86_fpmath_string, prefix, suffix, sw);
3267 /* If the i387 is disabled, then do not return values in it. */
3268 if (!TARGET_80387)
3269 target_flags &= ~MASK_FLOAT_RETURNS;
3271 /* Use external vectorized library in vectorizing intrinsics. */
3272 if (ix86_veclibabi_string)
3274 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3275 ix86_veclib_handler = ix86_veclibabi_svml;
3276 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3277 ix86_veclib_handler = ix86_veclibabi_acml;
3278 else
3279 error ("unknown vectorization library ABI type (%s) for "
3280 "%sveclibabi=%s %s", ix86_veclibabi_string,
3281 prefix, suffix, sw);
3284 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3285 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3286 && !optimize_size)
3287 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3289 /* ??? Unwind info is not correct around the CFG unless either a frame
3290 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3291 unwind info generation to be aware of the CFG and propagating states
3292 around edges. */
3293 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3294 || flag_exceptions || flag_non_call_exceptions)
3295 && flag_omit_frame_pointer
3296 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3298 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3299 warning (0, "unwind tables currently require either a frame pointer "
3300 "or %saccumulate-outgoing-args%s for correctness",
3301 prefix, suffix);
3302 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3305 /* If stack probes are required, the space used for large function
3306 arguments on the stack must also be probed, so enable
3307 -maccumulate-outgoing-args so this happens in the prologue. */
3308 if (TARGET_STACK_PROBE
3309 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3311 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3312 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3313 "for correctness", prefix, suffix);
3314 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3317 /* For sane SSE instruction set generation we need fcomi instruction.
3318 It is safe to enable all CMOVE instructions. */
3319 if (TARGET_SSE)
3320 TARGET_CMOVE = 1;
3322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3324 char *p;
3325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3326 p = strchr (internal_label_prefix, 'X');
3327 internal_label_prefix_len = p - internal_label_prefix;
3328 *p = '\0';
3331 /* When scheduling description is not available, disable scheduler pass
3332 so it won't slow down the compilation and make x87 code slower. */
3333 if (!TARGET_SCHEDULE)
3334 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3336 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3337 set_param_value ("simultaneous-prefetches",
3338 ix86_cost->simultaneous_prefetches);
3339 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3340 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3341 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3342 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3343 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3344 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3346 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3347 can be optimized to ap = __builtin_next_arg (0). */
3348 if (!TARGET_64BIT)
3349 targetm.expand_builtin_va_start = NULL;
3351 if (TARGET_64BIT)
3353 ix86_gen_leave = gen_leave_rex64;
3354 ix86_gen_pop1 = gen_popdi1;
3355 ix86_gen_add3 = gen_adddi3;
3356 ix86_gen_sub3 = gen_subdi3;
3357 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3358 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3359 ix86_gen_monitor = gen_sse3_monitor64;
3360 ix86_gen_andsp = gen_anddi3;
3362 else
3364 ix86_gen_leave = gen_leave;
3365 ix86_gen_pop1 = gen_popsi1;
3366 ix86_gen_add3 = gen_addsi3;
3367 ix86_gen_sub3 = gen_subsi3;
3368 ix86_gen_sub3_carry = gen_subsi3_carry;
3369 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3370 ix86_gen_monitor = gen_sse3_monitor;
3371 ix86_gen_andsp = gen_andsi3;
3374 #ifdef USE_IX86_CLD
3375 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3376 if (!TARGET_64BIT)
3377 target_flags |= MASK_CLD & ~target_flags_explicit;
3378 #endif
3380 /* Save the initial options in case the user does function specific options */
3381 if (main_args_p)
3382 target_option_default_node = target_option_current_node
3383 = build_target_option_node ();
3386 /* Save the current options */
3388 static void
3389 ix86_function_specific_save (struct cl_target_option *ptr)
3391 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3397 ptr->arch = ix86_arch;
3398 ptr->schedule = ix86_schedule;
3399 ptr->tune = ix86_tune;
3400 ptr->fpmath = ix86_fpmath;
3401 ptr->branch_cost = ix86_branch_cost;
3402 ptr->tune_defaulted = ix86_tune_defaulted;
3403 ptr->arch_specified = ix86_arch_specified;
3404 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3405 ptr->target_flags_explicit = target_flags_explicit;
3408 /* Restore the current options */
3410 static void
3411 ix86_function_specific_restore (struct cl_target_option *ptr)
3413 enum processor_type old_tune = ix86_tune;
3414 enum processor_type old_arch = ix86_arch;
3415 unsigned int ix86_arch_mask, ix86_tune_mask;
3416 int i;
3418 ix86_arch = ptr->arch;
3419 ix86_schedule = ptr->schedule;
3420 ix86_tune = ptr->tune;
3421 ix86_fpmath = ptr->fpmath;
3422 ix86_branch_cost = ptr->branch_cost;
3423 ix86_tune_defaulted = ptr->tune_defaulted;
3424 ix86_arch_specified = ptr->arch_specified;
3425 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3426 target_flags_explicit = ptr->target_flags_explicit;
3428 /* Recreate the arch feature tests if the arch changed */
3429 if (old_arch != ix86_arch)
3431 ix86_arch_mask = 1u << ix86_arch;
3432 for (i = 0; i < X86_ARCH_LAST; ++i)
3433 ix86_arch_features[i]
3434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3437 /* Recreate the tune optimization tests */
3438 if (old_tune != ix86_tune)
3440 ix86_tune_mask = 1u << ix86_tune;
3441 for (i = 0; i < X86_TUNE_LAST; ++i)
3442 ix86_tune_features[i]
3443 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3447 /* Print the current options */
3449 static void
3450 ix86_function_specific_print (FILE *file, int indent,
3451 struct cl_target_option *ptr)
3453 char *target_string
3454 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3455 NULL, NULL, NULL, false);
3457 fprintf (file, "%*sarch = %d (%s)\n",
3458 indent, "",
3459 ptr->arch,
3460 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3461 ? cpu_names[ptr->arch]
3462 : "<unknown>"));
3464 fprintf (file, "%*stune = %d (%s)\n",
3465 indent, "",
3466 ptr->tune,
3467 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3468 ? cpu_names[ptr->tune]
3469 : "<unknown>"));
3471 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3472 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3473 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3474 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3476 if (target_string)
3478 fprintf (file, "%*s%s\n", indent, "", target_string);
3479 free (target_string);
3484 /* Inner function to process the attribute((target(...))), take an argument and
3485 set the current options from the argument. If we have a list, recursively go
3486 over the list. */
3488 static bool
3489 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3491 char *next_optstr;
3492 bool ret = true;
3494 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3495 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3496 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3497 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3499 enum ix86_opt_type
3501 ix86_opt_unknown,
3502 ix86_opt_yes,
3503 ix86_opt_no,
3504 ix86_opt_str,
3505 ix86_opt_isa
3508 static const struct
3510 const char *string;
3511 size_t len;
3512 enum ix86_opt_type type;
3513 int opt;
3514 int mask;
3515 } attrs[] = {
3516 /* isa options */
3517 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3518 IX86_ATTR_ISA ("abm", OPT_mabm),
3519 IX86_ATTR_ISA ("aes", OPT_maes),
3520 IX86_ATTR_ISA ("avx", OPT_mavx),
3521 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3522 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3523 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3524 IX86_ATTR_ISA ("sse", OPT_msse),
3525 IX86_ATTR_ISA ("sse2", OPT_msse2),
3526 IX86_ATTR_ISA ("sse3", OPT_msse3),
3527 IX86_ATTR_ISA ("sse4", OPT_msse4),
3528 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3529 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3530 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3531 IX86_ATTR_ISA ("sse5", OPT_msse5),
3532 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3534 /* string options */
3535 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3536 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3537 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3539 /* flag options */
3540 IX86_ATTR_YES ("cld",
3541 OPT_mcld,
3542 MASK_CLD),
3544 IX86_ATTR_NO ("fancy-math-387",
3545 OPT_mfancy_math_387,
3546 MASK_NO_FANCY_MATH_387),
3548 IX86_ATTR_NO ("fused-madd",
3549 OPT_mfused_madd,
3550 MASK_NO_FUSED_MADD),
3552 IX86_ATTR_YES ("ieee-fp",
3553 OPT_mieee_fp,
3554 MASK_IEEE_FP),
3556 IX86_ATTR_YES ("inline-all-stringops",
3557 OPT_minline_all_stringops,
3558 MASK_INLINE_ALL_STRINGOPS),
3560 IX86_ATTR_YES ("inline-stringops-dynamically",
3561 OPT_minline_stringops_dynamically,
3562 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3564 IX86_ATTR_NO ("align-stringops",
3565 OPT_mno_align_stringops,
3566 MASK_NO_ALIGN_STRINGOPS),
3568 IX86_ATTR_YES ("recip",
3569 OPT_mrecip,
3570 MASK_RECIP),
3574 /* If this is a list, recurse to get the options. */
3575 if (TREE_CODE (args) == TREE_LIST)
3577 bool ret = true;
3579 for (; args; args = TREE_CHAIN (args))
3580 if (TREE_VALUE (args)
3581 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3582 ret = false;
3584 return ret;
3587 else if (TREE_CODE (args) != STRING_CST)
3588 gcc_unreachable ();
3590 /* Handle multiple arguments separated by commas. */
3591 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3593 while (next_optstr && *next_optstr != '\0')
3595 char *p = next_optstr;
3596 char *orig_p = p;
3597 char *comma = strchr (next_optstr, ',');
3598 const char *opt_string;
3599 size_t len, opt_len;
3600 int opt;
3601 bool opt_set_p;
3602 char ch;
3603 unsigned i;
3604 enum ix86_opt_type type = ix86_opt_unknown;
3605 int mask = 0;
3607 if (comma)
3609 *comma = '\0';
3610 len = comma - next_optstr;
3611 next_optstr = comma + 1;
3613 else
3615 len = strlen (p);
3616 next_optstr = NULL;
3619 /* Recognize no-xxx. */
3620 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3622 opt_set_p = false;
3623 p += 3;
3624 len -= 3;
3626 else
3627 opt_set_p = true;
3629 /* Find the option. */
3630 ch = *p;
3631 opt = N_OPTS;
3632 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3634 type = attrs[i].type;
3635 opt_len = attrs[i].len;
3636 if (ch == attrs[i].string[0]
3637 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3638 && memcmp (p, attrs[i].string, opt_len) == 0)
3640 opt = attrs[i].opt;
3641 mask = attrs[i].mask;
3642 opt_string = attrs[i].string;
3643 break;
3647 /* Process the option. */
3648 if (opt == N_OPTS)
3650 error ("attribute(target(\"%s\")) is unknown", orig_p);
3651 ret = false;
3654 else if (type == ix86_opt_isa)
3655 ix86_handle_option (opt, p, opt_set_p);
3657 else if (type == ix86_opt_yes || type == ix86_opt_no)
3659 if (type == ix86_opt_no)
3660 opt_set_p = !opt_set_p;
3662 if (opt_set_p)
3663 target_flags |= mask;
3664 else
3665 target_flags &= ~mask;
3668 else if (type == ix86_opt_str)
3670 if (p_strings[opt])
3672 error ("option(\"%s\") was already specified", opt_string);
3673 ret = false;
3675 else
3676 p_strings[opt] = xstrdup (p + opt_len);
3679 else
3680 gcc_unreachable ();
3683 return ret;
3686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3688 tree
3689 ix86_valid_target_attribute_tree (tree args)
3691 const char *orig_arch_string = ix86_arch_string;
3692 const char *orig_tune_string = ix86_tune_string;
3693 const char *orig_fpmath_string = ix86_fpmath_string;
3694 int orig_tune_defaulted = ix86_tune_defaulted;
3695 int orig_arch_specified = ix86_arch_specified;
3696 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3697 tree t = NULL_TREE;
3698 int i;
3699 struct cl_target_option *def
3700 = TREE_TARGET_OPTION (target_option_default_node);
3702 /* Process each of the options on the chain. */
3703 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3704 return NULL_TREE;
3706 /* If the changed options are different from the default, rerun override_options,
3707 and then save the options away. The string options are are attribute options,
3708 and will be undone when we copy the save structure. */
3709 if (ix86_isa_flags != def->ix86_isa_flags
3710 || target_flags != def->target_flags
3711 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3715 /* If we are using the default tune= or arch=, undo the string assigned,
3716 and use the default. */
3717 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3718 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3719 else if (!orig_arch_specified)
3720 ix86_arch_string = NULL;
3722 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3723 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3724 else if (orig_tune_defaulted)
3725 ix86_tune_string = NULL;
3727 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3728 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3729 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3730 else if (!TARGET_64BIT && TARGET_SSE)
3731 ix86_fpmath_string = "sse,387";
3733 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3734 override_options (false);
3736 /* Add any builtin functions with the new isa if any. */
3737 ix86_add_new_builtins (ix86_isa_flags);
3739 /* Save the current options unless we are validating options for
3740 #pragma. */
3741 t = build_target_option_node ();
3743 ix86_arch_string = orig_arch_string;
3744 ix86_tune_string = orig_tune_string;
3745 ix86_fpmath_string = orig_fpmath_string;
3747 /* Free up memory allocated to hold the strings */
3748 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3749 if (option_strings[i])
3750 free (option_strings[i]);
3753 return t;
3756 /* Hook to validate attribute((target("string"))). */
3758 static bool
3759 ix86_valid_target_attribute_p (tree fndecl,
3760 tree ARG_UNUSED (name),
3761 tree args,
3762 int ARG_UNUSED (flags))
3764 struct cl_target_option cur_target;
3765 bool ret = true;
3766 tree old_optimize = build_optimization_node ();
3767 tree new_target, new_optimize;
3768 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3770 /* If the function changed the optimization levels as well as setting target
3771 options, start with the optimizations specified. */
3772 if (func_optimize && func_optimize != old_optimize)
3773 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3775 /* The target attributes may also change some optimization flags, so update
3776 the optimization options if necessary. */
3777 cl_target_option_save (&cur_target);
3778 new_target = ix86_valid_target_attribute_tree (args);
3779 new_optimize = build_optimization_node ();
3781 if (!new_target)
3782 ret = false;
3784 else if (fndecl)
3786 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3788 if (old_optimize != new_optimize)
3789 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3792 cl_target_option_restore (&cur_target);
3794 if (old_optimize != new_optimize)
3795 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3797 return ret;
3801 /* Hook to determine if one function can safely inline another. */
3803 static bool
3804 ix86_can_inline_p (tree caller, tree callee)
3806 bool ret = false;
3807 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3808 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3810 /* If callee has no option attributes, then it is ok to inline. */
3811 if (!callee_tree)
3812 ret = true;
3814 /* If caller has no option attributes, but callee does then it is not ok to
3815 inline. */
3816 else if (!caller_tree)
3817 ret = false;
3819 else
3821 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3822 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3824 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3825 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3826 function. */
3827 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3828 != callee_opts->ix86_isa_flags)
3829 ret = false;
3831 /* See if we have the same non-isa options. */
3832 else if (caller_opts->target_flags != callee_opts->target_flags)
3833 ret = false;
3835 /* See if arch, tune, etc. are the same. */
3836 else if (caller_opts->arch != callee_opts->arch)
3837 ret = false;
3839 else if (caller_opts->tune != callee_opts->tune)
3840 ret = false;
3842 else if (caller_opts->fpmath != callee_opts->fpmath)
3843 ret = false;
3845 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3846 ret = false;
3848 else
3849 ret = true;
3852 return ret;
3856 /* Remember the last target of ix86_set_current_function. */
3857 static GTY(()) tree ix86_previous_fndecl;
3859 /* Establish appropriate back-end context for processing the function
3860 FNDECL. The argument might be NULL to indicate processing at top
3861 level, outside of any function scope. */
3862 static void
3863 ix86_set_current_function (tree fndecl)
3865 /* Only change the context if the function changes. This hook is called
3866 several times in the course of compiling a function, and we don't want to
3867 slow things down too much or call target_reinit when it isn't safe. */
3868 if (fndecl && fndecl != ix86_previous_fndecl)
3870 tree old_tree = (ix86_previous_fndecl
3871 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3872 : NULL_TREE);
3874 tree new_tree = (fndecl
3875 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3876 : NULL_TREE);
3878 ix86_previous_fndecl = fndecl;
3879 if (old_tree == new_tree)
3882 else if (new_tree)
3884 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3885 target_reinit ();
3888 else if (old_tree)
3890 struct cl_target_option *def
3891 = TREE_TARGET_OPTION (target_option_current_node);
3893 cl_target_option_restore (def);
3894 target_reinit ();
3900 /* Return true if this goes in large data/bss. */
3902 static bool
3903 ix86_in_large_data_p (tree exp)
3905 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3906 return false;
3908 /* Functions are never large data. */
3909 if (TREE_CODE (exp) == FUNCTION_DECL)
3910 return false;
3912 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3914 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3915 if (strcmp (section, ".ldata") == 0
3916 || strcmp (section, ".lbss") == 0)
3917 return true;
3918 return false;
3920 else
3922 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3924 /* If this is an incomplete type with size 0, then we can't put it
3925 in data because it might be too big when completed. */
3926 if (!size || size > ix86_section_threshold)
3927 return true;
3930 return false;
3933 /* Switch to the appropriate section for output of DECL.
3934 DECL is either a `VAR_DECL' node or a constant of some sort.
3935 RELOC indicates whether forming the initial value of DECL requires
3936 link-time relocations. */
3938 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3939 ATTRIBUTE_UNUSED;
3941 static section *
3942 x86_64_elf_select_section (tree decl, int reloc,
3943 unsigned HOST_WIDE_INT align)
3945 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3946 && ix86_in_large_data_p (decl))
3948 const char *sname = NULL;
3949 unsigned int flags = SECTION_WRITE;
3950 switch (categorize_decl_for_section (decl, reloc))
3952 case SECCAT_DATA:
3953 sname = ".ldata";
3954 break;
3955 case SECCAT_DATA_REL:
3956 sname = ".ldata.rel";
3957 break;
3958 case SECCAT_DATA_REL_LOCAL:
3959 sname = ".ldata.rel.local";
3960 break;
3961 case SECCAT_DATA_REL_RO:
3962 sname = ".ldata.rel.ro";
3963 break;
3964 case SECCAT_DATA_REL_RO_LOCAL:
3965 sname = ".ldata.rel.ro.local";
3966 break;
3967 case SECCAT_BSS:
3968 sname = ".lbss";
3969 flags |= SECTION_BSS;
3970 break;
3971 case SECCAT_RODATA:
3972 case SECCAT_RODATA_MERGE_STR:
3973 case SECCAT_RODATA_MERGE_STR_INIT:
3974 case SECCAT_RODATA_MERGE_CONST:
3975 sname = ".lrodata";
3976 flags = 0;
3977 break;
3978 case SECCAT_SRODATA:
3979 case SECCAT_SDATA:
3980 case SECCAT_SBSS:
3981 gcc_unreachable ();
3982 case SECCAT_TEXT:
3983 case SECCAT_TDATA:
3984 case SECCAT_TBSS:
3985 /* We don't split these for medium model. Place them into
3986 default sections and hope for best. */
3987 break;
3988 case SECCAT_EMUTLS_VAR:
3989 case SECCAT_EMUTLS_TMPL:
3990 gcc_unreachable ();
3992 if (sname)
3994 /* We might get called with string constants, but get_named_section
3995 doesn't like them as they are not DECLs. Also, we need to set
3996 flags in that case. */
3997 if (!DECL_P (decl))
3998 return get_section (sname, flags, NULL);
3999 return get_named_section (decl, sname, reloc);
4002 return default_elf_select_section (decl, reloc, align);
4005 /* Build up a unique section name, expressed as a
4006 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4007 RELOC indicates whether the initial value of EXP requires
4008 link-time relocations. */
4010 static void ATTRIBUTE_UNUSED
4011 x86_64_elf_unique_section (tree decl, int reloc)
4013 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4014 && ix86_in_large_data_p (decl))
4016 const char *prefix = NULL;
4017 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4018 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4020 switch (categorize_decl_for_section (decl, reloc))
4022 case SECCAT_DATA:
4023 case SECCAT_DATA_REL:
4024 case SECCAT_DATA_REL_LOCAL:
4025 case SECCAT_DATA_REL_RO:
4026 case SECCAT_DATA_REL_RO_LOCAL:
4027 prefix = one_only ? ".ld" : ".ldata";
4028 break;
4029 case SECCAT_BSS:
4030 prefix = one_only ? ".lb" : ".lbss";
4031 break;
4032 case SECCAT_RODATA:
4033 case SECCAT_RODATA_MERGE_STR:
4034 case SECCAT_RODATA_MERGE_STR_INIT:
4035 case SECCAT_RODATA_MERGE_CONST:
4036 prefix = one_only ? ".lr" : ".lrodata";
4037 break;
4038 case SECCAT_SRODATA:
4039 case SECCAT_SDATA:
4040 case SECCAT_SBSS:
4041 gcc_unreachable ();
4042 case SECCAT_TEXT:
4043 case SECCAT_TDATA:
4044 case SECCAT_TBSS:
4045 /* We don't split these for medium model. Place them into
4046 default sections and hope for best. */
4047 break;
4048 case SECCAT_EMUTLS_VAR:
4049 prefix = targetm.emutls.var_section;
4050 break;
4051 case SECCAT_EMUTLS_TMPL:
4052 prefix = targetm.emutls.tmpl_section;
4053 break;
4055 if (prefix)
4057 const char *name, *linkonce;
4058 char *string;
4060 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4061 name = targetm.strip_name_encoding (name);
4063 /* If we're using one_only, then there needs to be a .gnu.linkonce
4064 prefix to the section name. */
4065 linkonce = one_only ? ".gnu.linkonce" : "";
4067 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4069 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4070 return;
4073 default_unique_section (decl, reloc);
4076 #ifdef COMMON_ASM_OP
4077 /* This says how to output assembler code to declare an
4078 uninitialized external linkage data object.
4080 For medium model x86-64 we need to use .largecomm opcode for
4081 large objects. */
4082 void
4083 x86_elf_aligned_common (FILE *file,
4084 const char *name, unsigned HOST_WIDE_INT size,
4085 int align)
4087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4088 && size > (unsigned int)ix86_section_threshold)
4089 fprintf (file, ".largecomm\t");
4090 else
4091 fprintf (file, "%s", COMMON_ASM_OP);
4092 assemble_name (file, name);
4093 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4094 size, align / BITS_PER_UNIT);
4096 #endif
4098 /* Utility function for targets to use in implementing
4099 ASM_OUTPUT_ALIGNED_BSS. */
4101 void
4102 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4103 const char *name, unsigned HOST_WIDE_INT size,
4104 int align)
4106 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4107 && size > (unsigned int)ix86_section_threshold)
4108 switch_to_section (get_named_section (decl, ".lbss", 0));
4109 else
4110 switch_to_section (bss_section);
4111 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4112 #ifdef ASM_DECLARE_OBJECT_NAME
4113 last_assemble_variable_decl = decl;
4114 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4115 #else
4116 /* Standard thing is just output label for the object. */
4117 ASM_OUTPUT_LABEL (file, name);
4118 #endif /* ASM_DECLARE_OBJECT_NAME */
4119 ASM_OUTPUT_SKIP (file, size ? size : 1);
4122 void
4123 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4125 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4126 make the problem with not enough registers even worse. */
4127 #ifdef INSN_SCHEDULING
4128 if (level > 1)
4129 flag_schedule_insns = 0;
4130 #endif
4132 if (TARGET_MACHO)
4133 /* The Darwin libraries never set errno, so we might as well
4134 avoid calling them when that's the only reason we would. */
4135 flag_errno_math = 0;
4137 /* The default values of these switches depend on the TARGET_64BIT
4138 that is not known at this moment. Mark these values with 2 and
4139 let user the to override these. In case there is no command line option
4140 specifying them, we will set the defaults in override_options. */
4141 if (optimize >= 1)
4142 flag_omit_frame_pointer = 2;
4143 flag_pcc_struct_return = 2;
4144 flag_asynchronous_unwind_tables = 2;
4145 flag_vect_cost_model = 1;
4146 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4147 SUBTARGET_OPTIMIZATION_OPTIONS;
4148 #endif
4151 /* Decide whether we can make a sibling call to a function. DECL is the
4152 declaration of the function being targeted by the call and EXP is the
4153 CALL_EXPR representing the call. */
4155 static bool
4156 ix86_function_ok_for_sibcall (tree decl, tree exp)
4158 tree func;
4159 rtx a, b;
4161 /* If we are generating position-independent code, we cannot sibcall
4162 optimize any indirect call, or a direct call to a global function,
4163 as the PLT requires %ebx be live. */
4164 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4165 return false;
4167 if (decl)
4168 func = decl;
4169 else
4171 func = TREE_TYPE (CALL_EXPR_FN (exp));
4172 if (POINTER_TYPE_P (func))
4173 func = TREE_TYPE (func);
4176 /* Check that the return value locations are the same. Like
4177 if we are returning floats on the 80387 register stack, we cannot
4178 make a sibcall from a function that doesn't return a float to a
4179 function that does or, conversely, from a function that does return
4180 a float to a function that doesn't; the necessary stack adjustment
4181 would not be executed. This is also the place we notice
4182 differences in the return value ABI. Note that it is ok for one
4183 of the functions to have void return type as long as the return
4184 value of the other is passed in a register. */
4185 a = ix86_function_value (TREE_TYPE (exp), func, false);
4186 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4187 cfun->decl, false);
4188 if (STACK_REG_P (a) || STACK_REG_P (b))
4190 if (!rtx_equal_p (a, b))
4191 return false;
4193 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4195 else if (!rtx_equal_p (a, b))
4196 return false;
4198 /* If this call is indirect, we'll need to be able to use a call-clobbered
4199 register for the address of the target function. Make sure that all
4200 such registers are not used for passing parameters. */
4201 if (!decl && !TARGET_64BIT)
4203 tree type;
4205 /* We're looking at the CALL_EXPR, we need the type of the function. */
4206 type = CALL_EXPR_FN (exp); /* pointer expression */
4207 type = TREE_TYPE (type); /* pointer type */
4208 type = TREE_TYPE (type); /* function type */
4210 if (ix86_function_regparm (type, NULL) >= 3)
4212 /* ??? Need to count the actual number of registers to be used,
4213 not the possible number of registers. Fix later. */
4214 return false;
4218 /* Dllimport'd functions are also called indirectly. */
4219 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4220 && !TARGET_64BIT
4221 && decl && DECL_DLLIMPORT_P (decl)
4222 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4223 return false;
4225 /* If we need to align the outgoing stack, then sibcalling would
4226 unalign the stack, which may break the called function. */
4227 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4228 return false;
4230 /* Otherwise okay. That also includes certain types of indirect calls. */
4231 return true;
4234 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4235 calling convention attributes;
4236 arguments as in struct attribute_spec.handler. */
4238 static tree
4239 ix86_handle_cconv_attribute (tree *node, tree name,
4240 tree args,
4241 int flags ATTRIBUTE_UNUSED,
4242 bool *no_add_attrs)
4244 if (TREE_CODE (*node) != FUNCTION_TYPE
4245 && TREE_CODE (*node) != METHOD_TYPE
4246 && TREE_CODE (*node) != FIELD_DECL
4247 && TREE_CODE (*node) != TYPE_DECL)
4249 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4250 IDENTIFIER_POINTER (name));
4251 *no_add_attrs = true;
4252 return NULL_TREE;
4255 /* Can combine regparm with all attributes but fastcall. */
4256 if (is_attribute_p ("regparm", name))
4258 tree cst;
4260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4262 error ("fastcall and regparm attributes are not compatible");
4265 cst = TREE_VALUE (args);
4266 if (TREE_CODE (cst) != INTEGER_CST)
4268 warning (OPT_Wattributes,
4269 "%qs attribute requires an integer constant argument",
4270 IDENTIFIER_POINTER (name));
4271 *no_add_attrs = true;
4273 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4275 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4276 IDENTIFIER_POINTER (name), REGPARM_MAX);
4277 *no_add_attrs = true;
4280 return NULL_TREE;
4283 if (TARGET_64BIT)
4285 /* Do not warn when emulating the MS ABI. */
4286 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4287 warning (OPT_Wattributes, "%qs attribute ignored",
4288 IDENTIFIER_POINTER (name));
4289 *no_add_attrs = true;
4290 return NULL_TREE;
4293 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4294 if (is_attribute_p ("fastcall", name))
4296 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298 error ("fastcall and cdecl attributes are not compatible");
4300 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4302 error ("fastcall and stdcall attributes are not compatible");
4304 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4306 error ("fastcall and regparm attributes are not compatible");
4310 /* Can combine stdcall with fastcall (redundant), regparm and
4311 sseregparm. */
4312 else if (is_attribute_p ("stdcall", name))
4314 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4316 error ("stdcall and cdecl attributes are not compatible");
4318 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4320 error ("stdcall and fastcall attributes are not compatible");
4324 /* Can combine cdecl with regparm and sseregparm. */
4325 else if (is_attribute_p ("cdecl", name))
4327 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4329 error ("stdcall and cdecl attributes are not compatible");
4331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4333 error ("fastcall and cdecl attributes are not compatible");
4337 /* Can combine sseregparm with all attributes. */
4339 return NULL_TREE;
4342 /* Return 0 if the attributes for two types are incompatible, 1 if they
4343 are compatible, and 2 if they are nearly compatible (which causes a
4344 warning to be generated). */
4346 static int
4347 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4349 /* Check for mismatch of non-default calling convention. */
4350 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4352 if (TREE_CODE (type1) != FUNCTION_TYPE
4353 && TREE_CODE (type1) != METHOD_TYPE)
4354 return 1;
4356 /* Check for mismatched fastcall/regparm types. */
4357 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4358 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4359 || (ix86_function_regparm (type1, NULL)
4360 != ix86_function_regparm (type2, NULL)))
4361 return 0;
4363 /* Check for mismatched sseregparm types. */
4364 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4365 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4366 return 0;
4368 /* Check for mismatched return types (cdecl vs stdcall). */
4369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4371 return 0;
4373 return 1;
4376 /* Return the regparm value for a function with the indicated TYPE and DECL.
4377 DECL may be NULL when calling function indirectly
4378 or considering a libcall. */
4380 static int
4381 ix86_function_regparm (const_tree type, const_tree decl)
4383 tree attr;
4384 int regparm;
4386 static bool error_issued;
4388 if (TARGET_64BIT)
4389 return (ix86_function_type_abi (type) == SYSV_ABI
4390 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4392 regparm = ix86_regparm;
4393 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4394 if (attr)
4396 regparm
4397 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4399 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4401 /* We can't use regparm(3) for nested functions because
4402 these pass static chain pointer in %ecx register. */
4403 if (!error_issued && regparm == 3
4404 && decl_function_context (decl)
4405 && !DECL_NO_STATIC_CHAIN (decl))
4407 error ("nested functions are limited to 2 register parameters");
4408 error_issued = true;
4409 return 0;
4413 return regparm;
4416 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4417 return 2;
4419 /* Use register calling convention for local functions when possible. */
4420 if (decl
4421 && TREE_CODE (decl) == FUNCTION_DECL
4422 && optimize
4423 && !profile_flag)
4425 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4426 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4427 if (i && i->local)
4429 int local_regparm, globals = 0, regno;
4430 struct function *f;
4432 /* Make sure no regparm register is taken by a
4433 fixed register variable. */
4434 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4435 if (fixed_regs[local_regparm])
4436 break;
4438 /* We can't use regparm(3) for nested functions as these use
4439 static chain pointer in third argument. */
4440 if (local_regparm == 3
4441 && decl_function_context (decl)
4442 && !DECL_NO_STATIC_CHAIN (decl))
4443 local_regparm = 2;
4445 /* If the function realigns its stackpointer, the prologue will
4446 clobber %ecx. If we've already generated code for the callee,
4447 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4448 scanning the attributes for the self-realigning property. */
4449 f = DECL_STRUCT_FUNCTION (decl);
4450 /* Since current internal arg pointer won't conflict with
4451 parameter passing regs, so no need to change stack
4452 realignment and adjust regparm number.
4454 Each fixed register usage increases register pressure,
4455 so less registers should be used for argument passing.
4456 This functionality can be overriden by an explicit
4457 regparm value. */
4458 for (regno = 0; regno <= DI_REG; regno++)
4459 if (fixed_regs[regno])
4460 globals++;
4462 local_regparm
4463 = globals < local_regparm ? local_regparm - globals : 0;
4465 if (local_regparm > regparm)
4466 regparm = local_regparm;
4470 return regparm;
4473 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4474 DFmode (2) arguments in SSE registers for a function with the
4475 indicated TYPE and DECL. DECL may be NULL when calling function
4476 indirectly or considering a libcall. Otherwise return 0. */
4478 static int
4479 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4481 gcc_assert (!TARGET_64BIT);
4483 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4484 by the sseregparm attribute. */
4485 if (TARGET_SSEREGPARM
4486 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4488 if (!TARGET_SSE)
4490 if (warn)
4492 if (decl)
4493 error ("Calling %qD with attribute sseregparm without "
4494 "SSE/SSE2 enabled", decl);
4495 else
4496 error ("Calling %qT with attribute sseregparm without "
4497 "SSE/SSE2 enabled", type);
4499 return 0;
4502 return 2;
4505 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4506 (and DFmode for SSE2) arguments in SSE registers. */
4507 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4509 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4510 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4511 if (i && i->local)
4512 return TARGET_SSE2 ? 2 : 1;
4515 return 0;
4518 /* Return true if EAX is live at the start of the function. Used by
4519 ix86_expand_prologue to determine if we need special help before
4520 calling allocate_stack_worker. */
4522 static bool
4523 ix86_eax_live_at_start_p (void)
4525 /* Cheat. Don't bother working forward from ix86_function_regparm
4526 to the function type to whether an actual argument is located in
4527 eax. Instead just look at cfg info, which is still close enough
4528 to correct at this point. This gives false positives for broken
4529 functions that might use uninitialized data that happens to be
4530 allocated in eax, but who cares? */
4531 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4534 /* Value is the number of bytes of arguments automatically
4535 popped when returning from a subroutine call.
4536 FUNDECL is the declaration node of the function (as a tree),
4537 FUNTYPE is the data type of the function (as a tree),
4538 or for a library call it is an identifier node for the subroutine name.
4539 SIZE is the number of bytes of arguments passed on the stack.
4541 On the 80386, the RTD insn may be used to pop them if the number
4542 of args is fixed, but if the number is variable then the caller
4543 must pop them all. RTD can't be used for library calls now
4544 because the library is compiled with the Unix compiler.
4545 Use of RTD is a selectable option, since it is incompatible with
4546 standard Unix calling sequences. If the option is not selected,
4547 the caller must always pop the args.
4549 The attribute stdcall is equivalent to RTD on a per module basis. */
4552 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4554 int rtd;
4556 /* None of the 64-bit ABIs pop arguments. */
4557 if (TARGET_64BIT)
4558 return 0;
4560 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4562 /* Cdecl functions override -mrtd, and never pop the stack. */
4563 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4565 /* Stdcall and fastcall functions will pop the stack if not
4566 variable args. */
4567 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4568 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4569 rtd = 1;
4571 if (rtd && ! stdarg_p (funtype))
4572 return size;
4575 /* Lose any fake structure return argument if it is passed on the stack. */
4576 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4577 && !KEEP_AGGREGATE_RETURN_POINTER)
4579 int nregs = ix86_function_regparm (funtype, fundecl);
4580 if (nregs == 0)
4581 return GET_MODE_SIZE (Pmode);
4584 return 0;
4587 /* Argument support functions. */
4589 /* Return true when register may be used to pass function parameters. */
4590 bool
4591 ix86_function_arg_regno_p (int regno)
4593 int i;
4594 const int *parm_regs;
4596 if (!TARGET_64BIT)
4598 if (TARGET_MACHO)
4599 return (regno < REGPARM_MAX
4600 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4601 else
4602 return (regno < REGPARM_MAX
4603 || (TARGET_MMX && MMX_REGNO_P (regno)
4604 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4605 || (TARGET_SSE && SSE_REGNO_P (regno)
4606 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4609 if (TARGET_MACHO)
4611 if (SSE_REGNO_P (regno) && TARGET_SSE)
4612 return true;
4614 else
4616 if (TARGET_SSE && SSE_REGNO_P (regno)
4617 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4618 return true;
4621 /* TODO: The function should depend on current function ABI but
4622 builtins.c would need updating then. Therefore we use the
4623 default ABI. */
4625 /* RAX is used as hidden argument to va_arg functions. */
4626 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4627 return true;
4629 if (ix86_abi == MS_ABI)
4630 parm_regs = x86_64_ms_abi_int_parameter_registers;
4631 else
4632 parm_regs = x86_64_int_parameter_registers;
4633 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4634 : X86_64_REGPARM_MAX); i++)
4635 if (regno == parm_regs[i])
4636 return true;
4637 return false;
4640 /* Return if we do not know how to pass TYPE solely in registers. */
4642 static bool
4643 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4645 if (must_pass_in_stack_var_size_or_pad (mode, type))
4646 return true;
4648 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4649 The layout_type routine is crafty and tries to trick us into passing
4650 currently unsupported vector types on the stack by using TImode. */
4651 return (!TARGET_64BIT && mode == TImode
4652 && type && TREE_CODE (type) != VECTOR_TYPE);
4655 /* It returns the size, in bytes, of the area reserved for arguments passed
4656 in registers for the function represented by fndecl dependent to the used
4657 abi format. */
4659 ix86_reg_parm_stack_space (const_tree fndecl)
4661 enum calling_abi call_abi = SYSV_ABI;
4662 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4663 call_abi = ix86_function_abi (fndecl);
4664 else
4665 call_abi = ix86_function_type_abi (fndecl);
4666 if (call_abi == MS_ABI)
4667 return 32;
4668 return 0;
4671 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4672 call abi used. */
4673 enum calling_abi
4674 ix86_function_type_abi (const_tree fntype)
4676 if (TARGET_64BIT && fntype != NULL)
4678 enum calling_abi abi = ix86_abi;
4679 if (abi == SYSV_ABI)
4681 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4682 abi = MS_ABI;
4684 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4685 abi = SYSV_ABI;
4686 return abi;
4688 return ix86_abi;
4691 static enum calling_abi
4692 ix86_function_abi (const_tree fndecl)
4694 if (! fndecl)
4695 return ix86_abi;
4696 return ix86_function_type_abi (TREE_TYPE (fndecl));
4699 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4700 call abi used. */
4701 enum calling_abi
4702 ix86_cfun_abi (void)
4704 if (! cfun || ! TARGET_64BIT)
4705 return ix86_abi;
4706 return cfun->machine->call_abi;
4709 /* regclass.c */
4710 extern void init_regs (void);
4712 /* Implementation of call abi switching target hook. Specific to FNDECL
4713 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4714 for more details. */
4715 void
4716 ix86_call_abi_override (const_tree fndecl)
4718 if (fndecl == NULL_TREE)
4719 cfun->machine->call_abi = ix86_abi;
4720 else
4721 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4724 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4725 re-initialization of init_regs each time we switch function context since
4726 this is needed only during RTL expansion. */
4727 static void
4728 ix86_maybe_switch_abi (void)
4730 if (TARGET_64BIT &&
4731 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4732 reinit_regs ();
4735 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4736 for a call to a function whose data type is FNTYPE.
4737 For a library call, FNTYPE is 0. */
4739 void
4740 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4741 tree fntype, /* tree ptr for function decl */
4742 rtx libname, /* SYMBOL_REF of library name or 0 */
4743 tree fndecl)
4745 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4746 memset (cum, 0, sizeof (*cum));
4748 if (fndecl)
4749 cum->call_abi = ix86_function_abi (fndecl);
4750 else
4751 cum->call_abi = ix86_function_type_abi (fntype);
4752 /* Set up the number of registers to use for passing arguments. */
4754 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4755 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4756 cum->nregs = ix86_regparm;
4757 if (TARGET_64BIT)
4759 if (cum->call_abi != ix86_abi)
4760 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4761 : X64_REGPARM_MAX;
4763 if (TARGET_SSE)
4765 cum->sse_nregs = SSE_REGPARM_MAX;
4766 if (TARGET_64BIT)
4768 if (cum->call_abi != ix86_abi)
4769 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4770 : X64_SSE_REGPARM_MAX;
4773 if (TARGET_MMX)
4774 cum->mmx_nregs = MMX_REGPARM_MAX;
4775 cum->warn_avx = true;
4776 cum->warn_sse = true;
4777 cum->warn_mmx = true;
4779 /* Because type might mismatch in between caller and callee, we need to
4780 use actual type of function for local calls.
4781 FIXME: cgraph_analyze can be told to actually record if function uses
4782 va_start so for local functions maybe_vaarg can be made aggressive
4783 helping K&R code.
4784 FIXME: once typesytem is fixed, we won't need this code anymore. */
4785 if (i && i->local)
4786 fntype = TREE_TYPE (fndecl);
4787 cum->maybe_vaarg = (fntype
4788 ? (!prototype_p (fntype) || stdarg_p (fntype))
4789 : !libname);
4791 if (!TARGET_64BIT)
4793 /* If there are variable arguments, then we won't pass anything
4794 in registers in 32-bit mode. */
4795 if (stdarg_p (fntype))
4797 cum->nregs = 0;
4798 cum->sse_nregs = 0;
4799 cum->mmx_nregs = 0;
4800 cum->warn_avx = 0;
4801 cum->warn_sse = 0;
4802 cum->warn_mmx = 0;
4803 return;
4806 /* Use ecx and edx registers if function has fastcall attribute,
4807 else look for regparm information. */
4808 if (fntype)
4810 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4812 cum->nregs = 2;
4813 cum->fastcall = 1;
4815 else
4816 cum->nregs = ix86_function_regparm (fntype, fndecl);
4819 /* Set up the number of SSE registers used for passing SFmode
4820 and DFmode arguments. Warn for mismatching ABI. */
4821 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4825 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4826 But in the case of vector types, it is some vector mode.
4828 When we have only some of our vector isa extensions enabled, then there
4829 are some modes for which vector_mode_supported_p is false. For these
4830 modes, the generic vector support in gcc will choose some non-vector mode
4831 in order to implement the type. By computing the natural mode, we'll
4832 select the proper ABI location for the operand and not depend on whatever
4833 the middle-end decides to do with these vector types.
4835 The midde-end can't deal with the vector types > 16 bytes. In this
4836 case, we return the original mode and warn ABI change if CUM isn't
4837 NULL. */
4839 static enum machine_mode
4840 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4842 enum machine_mode mode = TYPE_MODE (type);
4844 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4846 HOST_WIDE_INT size = int_size_in_bytes (type);
4847 if ((size == 8 || size == 16 || size == 32)
4848 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4849 && TYPE_VECTOR_SUBPARTS (type) > 1)
4851 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4853 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4854 mode = MIN_MODE_VECTOR_FLOAT;
4855 else
4856 mode = MIN_MODE_VECTOR_INT;
4858 /* Get the mode which has this inner mode and number of units. */
4859 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4860 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4861 && GET_MODE_INNER (mode) == innermode)
4863 if (size == 32 && !TARGET_AVX)
4865 static bool warnedavx;
4867 if (cum
4868 && !warnedavx
4869 && cum->warn_avx)
4871 warnedavx = true;
4872 warning (0, "AVX vector argument without AVX "
4873 "enabled changes the ABI");
4875 return TYPE_MODE (type);
4877 else
4878 return mode;
4881 gcc_unreachable ();
4885 return mode;
4888 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4889 this may not agree with the mode that the type system has chosen for the
4890 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4891 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4893 static rtx
4894 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4895 unsigned int regno)
4897 rtx tmp;
4899 if (orig_mode != BLKmode)
4900 tmp = gen_rtx_REG (orig_mode, regno);
4901 else
4903 tmp = gen_rtx_REG (mode, regno);
4904 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4905 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4908 return tmp;
4911 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4912 of this code is to classify each 8bytes of incoming argument by the register
4913 class and assign registers accordingly. */
4915 /* Return the union class of CLASS1 and CLASS2.
4916 See the x86-64 PS ABI for details. */
4918 static enum x86_64_reg_class
4919 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4921 /* Rule #1: If both classes are equal, this is the resulting class. */
4922 if (class1 == class2)
4923 return class1;
4925 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4926 the other class. */
4927 if (class1 == X86_64_NO_CLASS)
4928 return class2;
4929 if (class2 == X86_64_NO_CLASS)
4930 return class1;
4932 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4933 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4934 return X86_64_MEMORY_CLASS;
4936 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4937 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4938 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4939 return X86_64_INTEGERSI_CLASS;
4940 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4941 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4942 return X86_64_INTEGER_CLASS;
4944 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4945 MEMORY is used. */
4946 if (class1 == X86_64_X87_CLASS
4947 || class1 == X86_64_X87UP_CLASS
4948 || class1 == X86_64_COMPLEX_X87_CLASS
4949 || class2 == X86_64_X87_CLASS
4950 || class2 == X86_64_X87UP_CLASS
4951 || class2 == X86_64_COMPLEX_X87_CLASS)
4952 return X86_64_MEMORY_CLASS;
4954 /* Rule #6: Otherwise class SSE is used. */
4955 return X86_64_SSE_CLASS;
4958 /* Classify the argument of type TYPE and mode MODE.
4959 CLASSES will be filled by the register class used to pass each word
4960 of the operand. The number of words is returned. In case the parameter
4961 should be passed in memory, 0 is returned. As a special case for zero
4962 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4964 BIT_OFFSET is used internally for handling records and specifies offset
4965 of the offset in bits modulo 256 to avoid overflow cases.
4967 See the x86-64 PS ABI for details.
4970 static int
4971 classify_argument (enum machine_mode mode, const_tree type,
4972 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4974 HOST_WIDE_INT bytes =
4975 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4976 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4978 /* Variable sized entities are always passed/returned in memory. */
4979 if (bytes < 0)
4980 return 0;
4982 if (mode != VOIDmode
4983 && targetm.calls.must_pass_in_stack (mode, type))
4984 return 0;
4986 if (type && AGGREGATE_TYPE_P (type))
4988 int i;
4989 tree field;
4990 enum x86_64_reg_class subclasses[MAX_CLASSES];
4992 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4993 if (bytes > 32)
4994 return 0;
4996 for (i = 0; i < words; i++)
4997 classes[i] = X86_64_NO_CLASS;
4999 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5000 signalize memory class, so handle it as special case. */
5001 if (!words)
5003 classes[0] = X86_64_NO_CLASS;
5004 return 1;
5007 /* Classify each field of record and merge classes. */
5008 switch (TREE_CODE (type))
5010 case RECORD_TYPE:
5011 /* And now merge the fields of structure. */
5012 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5014 if (TREE_CODE (field) == FIELD_DECL)
5016 int num;
5018 if (TREE_TYPE (field) == error_mark_node)
5019 continue;
5021 /* Bitfields are always classified as integer. Handle them
5022 early, since later code would consider them to be
5023 misaligned integers. */
5024 if (DECL_BIT_FIELD (field))
5026 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5027 i < ((int_bit_position (field) + (bit_offset % 64))
5028 + tree_low_cst (DECL_SIZE (field), 0)
5029 + 63) / 8 / 8; i++)
5030 classes[i] =
5031 merge_classes (X86_64_INTEGER_CLASS,
5032 classes[i]);
5034 else
5036 int pos;
5038 type = TREE_TYPE (field);
5040 /* Flexible array member is ignored. */
5041 if (TYPE_MODE (type) == BLKmode
5042 && TREE_CODE (type) == ARRAY_TYPE
5043 && TYPE_SIZE (type) == NULL_TREE
5044 && TYPE_DOMAIN (type) != NULL_TREE
5045 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5046 == NULL_TREE))
5048 static bool warned;
5050 if (!warned && warn_psabi)
5052 warned = true;
5053 inform (input_location,
5054 "The ABI of passing struct with"
5055 " a flexible array member has"
5056 " changed in GCC 4.4");
5058 continue;
5060 num = classify_argument (TYPE_MODE (type), type,
5061 subclasses,
5062 (int_bit_position (field)
5063 + bit_offset) % 256);
5064 if (!num)
5065 return 0;
5066 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5067 for (i = 0; i < num && (i + pos) < words; i++)
5068 classes[i + pos] =
5069 merge_classes (subclasses[i], classes[i + pos]);
5073 break;
5075 case ARRAY_TYPE:
5076 /* Arrays are handled as small records. */
5078 int num;
5079 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5080 TREE_TYPE (type), subclasses, bit_offset);
5081 if (!num)
5082 return 0;
5084 /* The partial classes are now full classes. */
5085 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5086 subclasses[0] = X86_64_SSE_CLASS;
5087 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5088 && !((bit_offset % 64) == 0 && bytes == 4))
5089 subclasses[0] = X86_64_INTEGER_CLASS;
5091 for (i = 0; i < words; i++)
5092 classes[i] = subclasses[i % num];
5094 break;
5096 case UNION_TYPE:
5097 case QUAL_UNION_TYPE:
5098 /* Unions are similar to RECORD_TYPE but offset is always 0.
5100 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5102 if (TREE_CODE (field) == FIELD_DECL)
5104 int num;
5106 if (TREE_TYPE (field) == error_mark_node)
5107 continue;
5109 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5110 TREE_TYPE (field), subclasses,
5111 bit_offset);
5112 if (!num)
5113 return 0;
5114 for (i = 0; i < num; i++)
5115 classes[i] = merge_classes (subclasses[i], classes[i]);
5118 break;
5120 default:
5121 gcc_unreachable ();
5124 if (words > 2)
5126 /* When size > 16 bytes, if the first one isn't
5127 X86_64_SSE_CLASS or any other ones aren't
5128 X86_64_SSEUP_CLASS, everything should be passed in
5129 memory. */
5130 if (classes[0] != X86_64_SSE_CLASS)
5131 return 0;
5133 for (i = 1; i < words; i++)
5134 if (classes[i] != X86_64_SSEUP_CLASS)
5135 return 0;
5138 /* Final merger cleanup. */
5139 for (i = 0; i < words; i++)
5141 /* If one class is MEMORY, everything should be passed in
5142 memory. */
5143 if (classes[i] == X86_64_MEMORY_CLASS)
5144 return 0;
5146 /* The X86_64_SSEUP_CLASS should be always preceded by
5147 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5148 if (classes[i] == X86_64_SSEUP_CLASS
5149 && classes[i - 1] != X86_64_SSE_CLASS
5150 && classes[i - 1] != X86_64_SSEUP_CLASS)
5152 /* The first one should never be X86_64_SSEUP_CLASS. */
5153 gcc_assert (i != 0);
5154 classes[i] = X86_64_SSE_CLASS;
5157 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5158 everything should be passed in memory. */
5159 if (classes[i] == X86_64_X87UP_CLASS
5160 && (classes[i - 1] != X86_64_X87_CLASS))
5162 static bool warned;
5164 /* The first one should never be X86_64_X87UP_CLASS. */
5165 gcc_assert (i != 0);
5166 if (!warned && warn_psabi)
5168 warned = true;
5169 inform (input_location,
5170 "The ABI of passing union with long double"
5171 " has changed in GCC 4.4");
5173 return 0;
5176 return words;
5179 /* Compute alignment needed. We align all types to natural boundaries with
5180 exception of XFmode that is aligned to 64bits. */
5181 if (mode != VOIDmode && mode != BLKmode)
5183 int mode_alignment = GET_MODE_BITSIZE (mode);
5185 if (mode == XFmode)
5186 mode_alignment = 128;
5187 else if (mode == XCmode)
5188 mode_alignment = 256;
5189 if (COMPLEX_MODE_P (mode))
5190 mode_alignment /= 2;
5191 /* Misaligned fields are always returned in memory. */
5192 if (bit_offset % mode_alignment)
5193 return 0;
5196 /* for V1xx modes, just use the base mode */
5197 if (VECTOR_MODE_P (mode) && mode != V1DImode
5198 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5199 mode = GET_MODE_INNER (mode);
5201 /* Classification of atomic types. */
5202 switch (mode)
5204 case SDmode:
5205 case DDmode:
5206 classes[0] = X86_64_SSE_CLASS;
5207 return 1;
5208 case TDmode:
5209 classes[0] = X86_64_SSE_CLASS;
5210 classes[1] = X86_64_SSEUP_CLASS;
5211 return 2;
5212 case DImode:
5213 case SImode:
5214 case HImode:
5215 case QImode:
5216 case CSImode:
5217 case CHImode:
5218 case CQImode:
5220 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5222 if (size <= 32)
5224 classes[0] = X86_64_INTEGERSI_CLASS;
5225 return 1;
5227 else if (size <= 64)
5229 classes[0] = X86_64_INTEGER_CLASS;
5230 return 1;
5232 else if (size <= 64+32)
5234 classes[0] = X86_64_INTEGER_CLASS;
5235 classes[1] = X86_64_INTEGERSI_CLASS;
5236 return 2;
5238 else if (size <= 64+64)
5240 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5241 return 2;
5243 else
5244 gcc_unreachable ();
5246 case CDImode:
5247 case TImode:
5248 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5249 return 2;
5250 case COImode:
5251 case OImode:
5252 /* OImode shouldn't be used directly. */
5253 gcc_unreachable ();
5254 case CTImode:
5255 return 0;
5256 case SFmode:
5257 if (!(bit_offset % 64))
5258 classes[0] = X86_64_SSESF_CLASS;
5259 else
5260 classes[0] = X86_64_SSE_CLASS;
5261 return 1;
5262 case DFmode:
5263 classes[0] = X86_64_SSEDF_CLASS;
5264 return 1;
5265 case XFmode:
5266 classes[0] = X86_64_X87_CLASS;
5267 classes[1] = X86_64_X87UP_CLASS;
5268 return 2;
5269 case TFmode:
5270 classes[0] = X86_64_SSE_CLASS;
5271 classes[1] = X86_64_SSEUP_CLASS;
5272 return 2;
5273 case SCmode:
5274 classes[0] = X86_64_SSE_CLASS;
5275 if (!(bit_offset % 64))
5276 return 1;
5277 else
5279 static bool warned;
5281 if (!warned && warn_psabi)
5283 warned = true;
5284 inform (input_location,
5285 "The ABI of passing structure with complex float"
5286 " member has changed in GCC 4.4");
5288 classes[1] = X86_64_SSESF_CLASS;
5289 return 2;
5291 case DCmode:
5292 classes[0] = X86_64_SSEDF_CLASS;
5293 classes[1] = X86_64_SSEDF_CLASS;
5294 return 2;
5295 case XCmode:
5296 classes[0] = X86_64_COMPLEX_X87_CLASS;
5297 return 1;
5298 case TCmode:
5299 /* This modes is larger than 16 bytes. */
5300 return 0;
5301 case V8SFmode:
5302 case V8SImode:
5303 case V32QImode:
5304 case V16HImode:
5305 case V4DFmode:
5306 case V4DImode:
5307 classes[0] = X86_64_SSE_CLASS;
5308 classes[1] = X86_64_SSEUP_CLASS;
5309 classes[2] = X86_64_SSEUP_CLASS;
5310 classes[3] = X86_64_SSEUP_CLASS;
5311 return 4;
5312 case V4SFmode:
5313 case V4SImode:
5314 case V16QImode:
5315 case V8HImode:
5316 case V2DFmode:
5317 case V2DImode:
5318 classes[0] = X86_64_SSE_CLASS;
5319 classes[1] = X86_64_SSEUP_CLASS;
5320 return 2;
5321 case V1DImode:
5322 case V2SFmode:
5323 case V2SImode:
5324 case V4HImode:
5325 case V8QImode:
5326 classes[0] = X86_64_SSE_CLASS;
5327 return 1;
5328 case BLKmode:
5329 case VOIDmode:
5330 return 0;
5331 default:
5332 gcc_assert (VECTOR_MODE_P (mode));
5334 if (bytes > 16)
5335 return 0;
5337 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5339 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5340 classes[0] = X86_64_INTEGERSI_CLASS;
5341 else
5342 classes[0] = X86_64_INTEGER_CLASS;
5343 classes[1] = X86_64_INTEGER_CLASS;
5344 return 1 + (bytes > 8);
5348 /* Examine the argument and return set number of register required in each
5349 class. Return 0 iff parameter should be passed in memory. */
5350 static int
5351 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5352 int *int_nregs, int *sse_nregs)
5354 enum x86_64_reg_class regclass[MAX_CLASSES];
5355 int n = classify_argument (mode, type, regclass, 0);
5357 *int_nregs = 0;
5358 *sse_nregs = 0;
5359 if (!n)
5360 return 0;
5361 for (n--; n >= 0; n--)
5362 switch (regclass[n])
5364 case X86_64_INTEGER_CLASS:
5365 case X86_64_INTEGERSI_CLASS:
5366 (*int_nregs)++;
5367 break;
5368 case X86_64_SSE_CLASS:
5369 case X86_64_SSESF_CLASS:
5370 case X86_64_SSEDF_CLASS:
5371 (*sse_nregs)++;
5372 break;
5373 case X86_64_NO_CLASS:
5374 case X86_64_SSEUP_CLASS:
5375 break;
5376 case X86_64_X87_CLASS:
5377 case X86_64_X87UP_CLASS:
5378 if (!in_return)
5379 return 0;
5380 break;
5381 case X86_64_COMPLEX_X87_CLASS:
5382 return in_return ? 2 : 0;
5383 case X86_64_MEMORY_CLASS:
5384 gcc_unreachable ();
5386 return 1;
5389 /* Construct container for the argument used by GCC interface. See
5390 FUNCTION_ARG for the detailed description. */
5392 static rtx
5393 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5394 const_tree type, int in_return, int nintregs, int nsseregs,
5395 const int *intreg, int sse_regno)
5397 /* The following variables hold the static issued_error state. */
5398 static bool issued_sse_arg_error;
5399 static bool issued_sse_ret_error;
5400 static bool issued_x87_ret_error;
5402 enum machine_mode tmpmode;
5403 int bytes =
5404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5405 enum x86_64_reg_class regclass[MAX_CLASSES];
5406 int n;
5407 int i;
5408 int nexps = 0;
5409 int needed_sseregs, needed_intregs;
5410 rtx exp[MAX_CLASSES];
5411 rtx ret;
5413 n = classify_argument (mode, type, regclass, 0);
5414 if (!n)
5415 return NULL;
5416 if (!examine_argument (mode, type, in_return, &needed_intregs,
5417 &needed_sseregs))
5418 return NULL;
5419 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5420 return NULL;
5422 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5423 some less clueful developer tries to use floating-point anyway. */
5424 if (needed_sseregs && !TARGET_SSE)
5426 if (in_return)
5428 if (!issued_sse_ret_error)
5430 error ("SSE register return with SSE disabled");
5431 issued_sse_ret_error = true;
5434 else if (!issued_sse_arg_error)
5436 error ("SSE register argument with SSE disabled");
5437 issued_sse_arg_error = true;
5439 return NULL;
5442 /* Likewise, error if the ABI requires us to return values in the
5443 x87 registers and the user specified -mno-80387. */
5444 if (!TARGET_80387 && in_return)
5445 for (i = 0; i < n; i++)
5446 if (regclass[i] == X86_64_X87_CLASS
5447 || regclass[i] == X86_64_X87UP_CLASS
5448 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5450 if (!issued_x87_ret_error)
5452 error ("x87 register return with x87 disabled");
5453 issued_x87_ret_error = true;
5455 return NULL;
5458 /* First construct simple cases. Avoid SCmode, since we want to use
5459 single register to pass this type. */
5460 if (n == 1 && mode != SCmode)
5461 switch (regclass[0])
5463 case X86_64_INTEGER_CLASS:
5464 case X86_64_INTEGERSI_CLASS:
5465 return gen_rtx_REG (mode, intreg[0]);
5466 case X86_64_SSE_CLASS:
5467 case X86_64_SSESF_CLASS:
5468 case X86_64_SSEDF_CLASS:
5469 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5470 case X86_64_X87_CLASS:
5471 case X86_64_COMPLEX_X87_CLASS:
5472 return gen_rtx_REG (mode, FIRST_STACK_REG);
5473 case X86_64_NO_CLASS:
5474 /* Zero sized array, struct or class. */
5475 return NULL;
5476 default:
5477 gcc_unreachable ();
5479 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5480 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5481 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5482 if (n == 4
5483 && regclass[0] == X86_64_SSE_CLASS
5484 && regclass[1] == X86_64_SSEUP_CLASS
5485 && regclass[2] == X86_64_SSEUP_CLASS
5486 && regclass[3] == X86_64_SSEUP_CLASS
5487 && mode != BLKmode)
5488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5490 if (n == 2
5491 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5492 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5493 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5494 && regclass[1] == X86_64_INTEGER_CLASS
5495 && (mode == CDImode || mode == TImode || mode == TFmode)
5496 && intreg[0] + 1 == intreg[1])
5497 return gen_rtx_REG (mode, intreg[0]);
5499 /* Otherwise figure out the entries of the PARALLEL. */
5500 for (i = 0; i < n; i++)
5502 int pos;
5504 switch (regclass[i])
5506 case X86_64_NO_CLASS:
5507 break;
5508 case X86_64_INTEGER_CLASS:
5509 case X86_64_INTEGERSI_CLASS:
5510 /* Merge TImodes on aligned occasions here too. */
5511 if (i * 8 + 8 > bytes)
5512 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5513 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5514 tmpmode = SImode;
5515 else
5516 tmpmode = DImode;
5517 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5518 if (tmpmode == BLKmode)
5519 tmpmode = DImode;
5520 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5521 gen_rtx_REG (tmpmode, *intreg),
5522 GEN_INT (i*8));
5523 intreg++;
5524 break;
5525 case X86_64_SSESF_CLASS:
5526 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5527 gen_rtx_REG (SFmode,
5528 SSE_REGNO (sse_regno)),
5529 GEN_INT (i*8));
5530 sse_regno++;
5531 break;
5532 case X86_64_SSEDF_CLASS:
5533 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5534 gen_rtx_REG (DFmode,
5535 SSE_REGNO (sse_regno)),
5536 GEN_INT (i*8));
5537 sse_regno++;
5538 break;
5539 case X86_64_SSE_CLASS:
5540 pos = i;
5541 switch (n)
5543 case 1:
5544 tmpmode = DImode;
5545 break;
5546 case 2:
5547 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5549 tmpmode = TImode;
5550 i++;
5552 else
5553 tmpmode = DImode;
5554 break;
5555 case 4:
5556 gcc_assert (i == 0
5557 && regclass[1] == X86_64_SSEUP_CLASS
5558 && regclass[2] == X86_64_SSEUP_CLASS
5559 && regclass[3] == X86_64_SSEUP_CLASS);
5560 tmpmode = OImode;
5561 i += 3;
5562 break;
5563 default:
5564 gcc_unreachable ();
5566 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5567 gen_rtx_REG (tmpmode,
5568 SSE_REGNO (sse_regno)),
5569 GEN_INT (pos*8));
5570 sse_regno++;
5571 break;
5572 default:
5573 gcc_unreachable ();
5577 /* Empty aligned struct, union or class. */
5578 if (nexps == 0)
5579 return NULL;
5581 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5582 for (i = 0; i < nexps; i++)
5583 XVECEXP (ret, 0, i) = exp [i];
5584 return ret;
5587 /* Update the data in CUM to advance over an argument of mode MODE
5588 and data type TYPE. (TYPE is null for libcalls where that information
5589 may not be available.) */
5591 static void
5592 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5593 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5595 switch (mode)
5597 default:
5598 break;
5600 case BLKmode:
5601 if (bytes < 0)
5602 break;
5603 /* FALLTHRU */
5605 case DImode:
5606 case SImode:
5607 case HImode:
5608 case QImode:
5609 cum->words += words;
5610 cum->nregs -= words;
5611 cum->regno += words;
5613 if (cum->nregs <= 0)
5615 cum->nregs = 0;
5616 cum->regno = 0;
5618 break;
5620 case OImode:
5621 /* OImode shouldn't be used directly. */
5622 gcc_unreachable ();
5624 case DFmode:
5625 if (cum->float_in_sse < 2)
5626 break;
5627 case SFmode:
5628 if (cum->float_in_sse < 1)
5629 break;
5630 /* FALLTHRU */
5632 case V8SFmode:
5633 case V8SImode:
5634 case V32QImode:
5635 case V16HImode:
5636 case V4DFmode:
5637 case V4DImode:
5638 case TImode:
5639 case V16QImode:
5640 case V8HImode:
5641 case V4SImode:
5642 case V2DImode:
5643 case V4SFmode:
5644 case V2DFmode:
5645 if (!type || !AGGREGATE_TYPE_P (type))
5647 cum->sse_words += words;
5648 cum->sse_nregs -= 1;
5649 cum->sse_regno += 1;
5650 if (cum->sse_nregs <= 0)
5652 cum->sse_nregs = 0;
5653 cum->sse_regno = 0;
5656 break;
5658 case V8QImode:
5659 case V4HImode:
5660 case V2SImode:
5661 case V2SFmode:
5662 case V1DImode:
5663 if (!type || !AGGREGATE_TYPE_P (type))
5665 cum->mmx_words += words;
5666 cum->mmx_nregs -= 1;
5667 cum->mmx_regno += 1;
5668 if (cum->mmx_nregs <= 0)
5670 cum->mmx_nregs = 0;
5671 cum->mmx_regno = 0;
5674 break;
5678 static void
5679 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5680 tree type, HOST_WIDE_INT words, int named)
5682 int int_nregs, sse_nregs;
5684 /* Unnamed 256bit vector mode parameters are passed on stack. */
5685 if (!named && VALID_AVX256_REG_MODE (mode))
5686 return;
5688 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5689 cum->words += words;
5690 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5692 cum->nregs -= int_nregs;
5693 cum->sse_nregs -= sse_nregs;
5694 cum->regno += int_nregs;
5695 cum->sse_regno += sse_nregs;
5697 else
5698 cum->words += words;
5701 static void
5702 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5703 HOST_WIDE_INT words)
5705 /* Otherwise, this should be passed indirect. */
5706 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5708 cum->words += words;
5709 if (cum->nregs > 0)
5711 cum->nregs -= 1;
5712 cum->regno += 1;
5716 void
5717 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5718 tree type, int named)
5720 HOST_WIDE_INT bytes, words;
5722 if (mode == BLKmode)
5723 bytes = int_size_in_bytes (type);
5724 else
5725 bytes = GET_MODE_SIZE (mode);
5726 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5728 if (type)
5729 mode = type_natural_mode (type, NULL);
5731 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5732 function_arg_advance_ms_64 (cum, bytes, words);
5733 else if (TARGET_64BIT)
5734 function_arg_advance_64 (cum, mode, type, words, named);
5735 else
5736 function_arg_advance_32 (cum, mode, type, bytes, words);
5739 /* Define where to put the arguments to a function.
5740 Value is zero to push the argument on the stack,
5741 or a hard register in which to store the argument.
5743 MODE is the argument's machine mode.
5744 TYPE is the data type of the argument (as a tree).
5745 This is null for libcalls where that information may
5746 not be available.
5747 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5748 the preceding args and about the function being called.
5749 NAMED is nonzero if this argument is a named parameter
5750 (otherwise it is an extra parameter matching an ellipsis). */
5752 static rtx
5753 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5754 enum machine_mode orig_mode, tree type,
5755 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5757 static bool warnedsse, warnedmmx;
5759 /* Avoid the AL settings for the Unix64 ABI. */
5760 if (mode == VOIDmode)
5761 return constm1_rtx;
5763 switch (mode)
5765 default:
5766 break;
5768 case BLKmode:
5769 if (bytes < 0)
5770 break;
5771 /* FALLTHRU */
5772 case DImode:
5773 case SImode:
5774 case HImode:
5775 case QImode:
5776 if (words <= cum->nregs)
5778 int regno = cum->regno;
5780 /* Fastcall allocates the first two DWORD (SImode) or
5781 smaller arguments to ECX and EDX if it isn't an
5782 aggregate type . */
5783 if (cum->fastcall)
5785 if (mode == BLKmode
5786 || mode == DImode
5787 || (type && AGGREGATE_TYPE_P (type)))
5788 break;
5790 /* ECX not EAX is the first allocated register. */
5791 if (regno == AX_REG)
5792 regno = CX_REG;
5794 return gen_rtx_REG (mode, regno);
5796 break;
5798 case DFmode:
5799 if (cum->float_in_sse < 2)
5800 break;
5801 case SFmode:
5802 if (cum->float_in_sse < 1)
5803 break;
5804 /* FALLTHRU */
5805 case TImode:
5806 /* In 32bit, we pass TImode in xmm registers. */
5807 case V16QImode:
5808 case V8HImode:
5809 case V4SImode:
5810 case V2DImode:
5811 case V4SFmode:
5812 case V2DFmode:
5813 if (!type || !AGGREGATE_TYPE_P (type))
5815 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5817 warnedsse = true;
5818 warning (0, "SSE vector argument without SSE enabled "
5819 "changes the ABI");
5821 if (cum->sse_nregs)
5822 return gen_reg_or_parallel (mode, orig_mode,
5823 cum->sse_regno + FIRST_SSE_REG);
5825 break;
5827 case OImode:
5828 /* OImode shouldn't be used directly. */
5829 gcc_unreachable ();
5831 case V8SFmode:
5832 case V8SImode:
5833 case V32QImode:
5834 case V16HImode:
5835 case V4DFmode:
5836 case V4DImode:
5837 if (!type || !AGGREGATE_TYPE_P (type))
5839 if (cum->sse_nregs)
5840 return gen_reg_or_parallel (mode, orig_mode,
5841 cum->sse_regno + FIRST_SSE_REG);
5843 break;
5845 case V8QImode:
5846 case V4HImode:
5847 case V2SImode:
5848 case V2SFmode:
5849 case V1DImode:
5850 if (!type || !AGGREGATE_TYPE_P (type))
5852 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5854 warnedmmx = true;
5855 warning (0, "MMX vector argument without MMX enabled "
5856 "changes the ABI");
5858 if (cum->mmx_nregs)
5859 return gen_reg_or_parallel (mode, orig_mode,
5860 cum->mmx_regno + FIRST_MMX_REG);
5862 break;
5865 return NULL_RTX;
5868 static rtx
5869 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5870 enum machine_mode orig_mode, tree type, int named)
5872 /* Handle a hidden AL argument containing number of registers
5873 for varargs x86-64 functions. */
5874 if (mode == VOIDmode)
5875 return GEN_INT (cum->maybe_vaarg
5876 ? (cum->sse_nregs < 0
5877 ? (cum->call_abi == ix86_abi
5878 ? SSE_REGPARM_MAX
5879 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5880 : X64_SSE_REGPARM_MAX))
5881 : cum->sse_regno)
5882 : -1);
5884 switch (mode)
5886 default:
5887 break;
5889 case V8SFmode:
5890 case V8SImode:
5891 case V32QImode:
5892 case V16HImode:
5893 case V4DFmode:
5894 case V4DImode:
5895 /* Unnamed 256bit vector mode parameters are passed on stack. */
5896 if (!named)
5897 return NULL;
5898 break;
5901 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5902 cum->sse_nregs,
5903 &x86_64_int_parameter_registers [cum->regno],
5904 cum->sse_regno);
5907 static rtx
5908 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5909 enum machine_mode orig_mode, int named,
5910 HOST_WIDE_INT bytes)
5912 unsigned int regno;
5914 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5915 We use value of -2 to specify that current function call is MSABI. */
5916 if (mode == VOIDmode)
5917 return GEN_INT (-2);
5919 /* If we've run out of registers, it goes on the stack. */
5920 if (cum->nregs == 0)
5921 return NULL_RTX;
5923 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5925 /* Only floating point modes are passed in anything but integer regs. */
5926 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5928 if (named)
5929 regno = cum->regno + FIRST_SSE_REG;
5930 else
5932 rtx t1, t2;
5934 /* Unnamed floating parameters are passed in both the
5935 SSE and integer registers. */
5936 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5937 t2 = gen_rtx_REG (mode, regno);
5938 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5939 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5940 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5943 /* Handle aggregated types passed in register. */
5944 if (orig_mode == BLKmode)
5946 if (bytes > 0 && bytes <= 8)
5947 mode = (bytes > 4 ? DImode : SImode);
5948 if (mode == BLKmode)
5949 mode = DImode;
5952 return gen_reg_or_parallel (mode, orig_mode, regno);
5956 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5957 tree type, int named)
5959 enum machine_mode mode = omode;
5960 HOST_WIDE_INT bytes, words;
5962 if (mode == BLKmode)
5963 bytes = int_size_in_bytes (type);
5964 else
5965 bytes = GET_MODE_SIZE (mode);
5966 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5968 /* To simplify the code below, represent vector types with a vector mode
5969 even if MMX/SSE are not active. */
5970 if (type && TREE_CODE (type) == VECTOR_TYPE)
5971 mode = type_natural_mode (type, cum);
5973 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5974 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5975 else if (TARGET_64BIT)
5976 return function_arg_64 (cum, mode, omode, type, named);
5977 else
5978 return function_arg_32 (cum, mode, omode, type, bytes, words);
5981 /* A C expression that indicates when an argument must be passed by
5982 reference. If nonzero for an argument, a copy of that argument is
5983 made in memory and a pointer to the argument is passed instead of
5984 the argument itself. The pointer is passed in whatever way is
5985 appropriate for passing a pointer to that type. */
5987 static bool
5988 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5989 enum machine_mode mode ATTRIBUTE_UNUSED,
5990 const_tree type, bool named ATTRIBUTE_UNUSED)
5992 /* See Windows x64 Software Convention. */
5993 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5995 int msize = (int) GET_MODE_SIZE (mode);
5996 if (type)
5998 /* Arrays are passed by reference. */
5999 if (TREE_CODE (type) == ARRAY_TYPE)
6000 return true;
6002 if (AGGREGATE_TYPE_P (type))
6004 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6005 are passed by reference. */
6006 msize = int_size_in_bytes (type);
6010 /* __m128 is passed by reference. */
6011 switch (msize) {
6012 case 1: case 2: case 4: case 8:
6013 break;
6014 default:
6015 return true;
6018 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6019 return 1;
6021 return 0;
6024 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6025 ABI. */
6026 static bool
6027 contains_aligned_value_p (tree type)
6029 enum machine_mode mode = TYPE_MODE (type);
6030 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6031 || mode == TDmode
6032 || mode == TFmode
6033 || mode == TCmode)
6034 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6035 return true;
6036 if (TYPE_ALIGN (type) < 128)
6037 return false;
6039 if (AGGREGATE_TYPE_P (type))
6041 /* Walk the aggregates recursively. */
6042 switch (TREE_CODE (type))
6044 case RECORD_TYPE:
6045 case UNION_TYPE:
6046 case QUAL_UNION_TYPE:
6048 tree field;
6050 /* Walk all the structure fields. */
6051 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6053 if (TREE_CODE (field) == FIELD_DECL
6054 && contains_aligned_value_p (TREE_TYPE (field)))
6055 return true;
6057 break;
6060 case ARRAY_TYPE:
6061 /* Just for use if some languages passes arrays by value. */
6062 if (contains_aligned_value_p (TREE_TYPE (type)))
6063 return true;
6064 break;
6066 default:
6067 gcc_unreachable ();
6070 return false;
6073 /* Gives the alignment boundary, in bits, of an argument with the
6074 specified mode and type. */
6077 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6079 int align;
6080 if (type)
6082 /* Since canonical type is used for call, we convert it to
6083 canonical type if needed. */
6084 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6085 type = TYPE_CANONICAL (type);
6086 align = TYPE_ALIGN (type);
6088 else
6089 align = GET_MODE_ALIGNMENT (mode);
6090 if (align < PARM_BOUNDARY)
6091 align = PARM_BOUNDARY;
6092 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6093 natural boundaries. */
6094 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6096 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6097 make an exception for SSE modes since these require 128bit
6098 alignment.
6100 The handling here differs from field_alignment. ICC aligns MMX
6101 arguments to 4 byte boundaries, while structure fields are aligned
6102 to 8 byte boundaries. */
6103 if (!type)
6105 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6106 align = PARM_BOUNDARY;
6108 else
6110 if (!contains_aligned_value_p (type))
6111 align = PARM_BOUNDARY;
6114 if (align > BIGGEST_ALIGNMENT)
6115 align = BIGGEST_ALIGNMENT;
6116 return align;
6119 /* Return true if N is a possible register number of function value. */
6121 bool
6122 ix86_function_value_regno_p (int regno)
6124 switch (regno)
6126 case 0:
6127 return true;
6129 case FIRST_FLOAT_REG:
6130 /* TODO: The function should depend on current function ABI but
6131 builtins.c would need updating then. Therefore we use the
6132 default ABI. */
6133 if (TARGET_64BIT && ix86_abi == MS_ABI)
6134 return false;
6135 return TARGET_FLOAT_RETURNS_IN_80387;
6137 case FIRST_SSE_REG:
6138 return TARGET_SSE;
6140 case FIRST_MMX_REG:
6141 if (TARGET_MACHO || TARGET_64BIT)
6142 return false;
6143 return TARGET_MMX;
6146 return false;
6149 /* Define how to find the value returned by a function.
6150 VALTYPE is the data type of the value (as a tree).
6151 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6152 otherwise, FUNC is 0. */
6154 static rtx
6155 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6156 const_tree fntype, const_tree fn)
6158 unsigned int regno;
6160 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6161 we normally prevent this case when mmx is not available. However
6162 some ABIs may require the result to be returned like DImode. */
6163 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6164 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6166 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6167 we prevent this case when sse is not available. However some ABIs
6168 may require the result to be returned like integer TImode. */
6169 else if (mode == TImode
6170 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6171 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6173 /* 32-byte vector modes in %ymm0. */
6174 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6175 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6177 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6178 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6179 regno = FIRST_FLOAT_REG;
6180 else
6181 /* Most things go in %eax. */
6182 regno = AX_REG;
6184 /* Override FP return register with %xmm0 for local functions when
6185 SSE math is enabled or for functions with sseregparm attribute. */
6186 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6188 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6189 if ((sse_level >= 1 && mode == SFmode)
6190 || (sse_level == 2 && mode == DFmode))
6191 regno = FIRST_SSE_REG;
6194 /* OImode shouldn't be used directly. */
6195 gcc_assert (mode != OImode);
6197 return gen_rtx_REG (orig_mode, regno);
6200 static rtx
6201 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6202 const_tree valtype)
6204 rtx ret;
6206 /* Handle libcalls, which don't provide a type node. */
6207 if (valtype == NULL)
6209 switch (mode)
6211 case SFmode:
6212 case SCmode:
6213 case DFmode:
6214 case DCmode:
6215 case TFmode:
6216 case SDmode:
6217 case DDmode:
6218 case TDmode:
6219 return gen_rtx_REG (mode, FIRST_SSE_REG);
6220 case XFmode:
6221 case XCmode:
6222 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6223 case TCmode:
6224 return NULL;
6225 default:
6226 return gen_rtx_REG (mode, AX_REG);
6230 ret = construct_container (mode, orig_mode, valtype, 1,
6231 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6232 x86_64_int_return_registers, 0);
6234 /* For zero sized structures, construct_container returns NULL, but we
6235 need to keep rest of compiler happy by returning meaningful value. */
6236 if (!ret)
6237 ret = gen_rtx_REG (orig_mode, AX_REG);
6239 return ret;
6242 static rtx
6243 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6245 unsigned int regno = AX_REG;
6247 if (TARGET_SSE)
6249 switch (GET_MODE_SIZE (mode))
6251 case 16:
6252 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6253 && !COMPLEX_MODE_P (mode))
6254 regno = FIRST_SSE_REG;
6255 break;
6256 case 8:
6257 case 4:
6258 if (mode == SFmode || mode == DFmode)
6259 regno = FIRST_SSE_REG;
6260 break;
6261 default:
6262 break;
6265 return gen_rtx_REG (orig_mode, regno);
6268 static rtx
6269 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6270 enum machine_mode orig_mode, enum machine_mode mode)
6272 const_tree fn, fntype;
6274 fn = NULL_TREE;
6275 if (fntype_or_decl && DECL_P (fntype_or_decl))
6276 fn = fntype_or_decl;
6277 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6279 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6280 return function_value_ms_64 (orig_mode, mode);
6281 else if (TARGET_64BIT)
6282 return function_value_64 (orig_mode, mode, valtype);
6283 else
6284 return function_value_32 (orig_mode, mode, fntype, fn);
6287 static rtx
6288 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6289 bool outgoing ATTRIBUTE_UNUSED)
6291 enum machine_mode mode, orig_mode;
6293 orig_mode = TYPE_MODE (valtype);
6294 mode = type_natural_mode (valtype, NULL);
6295 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6299 ix86_libcall_value (enum machine_mode mode)
6301 return ix86_function_value_1 (NULL, NULL, mode, mode);
6304 /* Return true iff type is returned in memory. */
6306 static int ATTRIBUTE_UNUSED
6307 return_in_memory_32 (const_tree type, enum machine_mode mode)
6309 HOST_WIDE_INT size;
6311 if (mode == BLKmode)
6312 return 1;
6314 size = int_size_in_bytes (type);
6316 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6317 return 0;
6319 if (VECTOR_MODE_P (mode) || mode == TImode)
6321 /* User-created vectors small enough to fit in EAX. */
6322 if (size < 8)
6323 return 0;
6325 /* MMX/3dNow values are returned in MM0,
6326 except when it doesn't exits. */
6327 if (size == 8)
6328 return (TARGET_MMX ? 0 : 1);
6330 /* SSE values are returned in XMM0, except when it doesn't exist. */
6331 if (size == 16)
6332 return (TARGET_SSE ? 0 : 1);
6334 /* AVX values are returned in YMM0, except when it doesn't exist. */
6335 if (size == 32)
6336 return TARGET_AVX ? 0 : 1;
6339 if (mode == XFmode)
6340 return 0;
6342 if (size > 12)
6343 return 1;
6345 /* OImode shouldn't be used directly. */
6346 gcc_assert (mode != OImode);
6348 return 0;
6351 static int ATTRIBUTE_UNUSED
6352 return_in_memory_64 (const_tree type, enum machine_mode mode)
6354 int needed_intregs, needed_sseregs;
6355 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6358 static int ATTRIBUTE_UNUSED
6359 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6361 HOST_WIDE_INT size = int_size_in_bytes (type);
6363 /* __m128 is returned in xmm0. */
6364 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6365 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6366 return 0;
6368 /* Otherwise, the size must be exactly in [1248]. */
6369 return (size != 1 && size != 2 && size != 4 && size != 8);
6372 static bool
6373 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6375 #ifdef SUBTARGET_RETURN_IN_MEMORY
6376 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6377 #else
6378 const enum machine_mode mode = type_natural_mode (type, NULL);
6380 if (TARGET_64BIT)
6382 if (ix86_function_type_abi (fntype) == MS_ABI)
6383 return return_in_memory_ms_64 (type, mode);
6384 else
6385 return return_in_memory_64 (type, mode);
6387 else
6388 return return_in_memory_32 (type, mode);
6389 #endif
6392 /* Return false iff TYPE is returned in memory. This version is used
6393 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6394 but differs notably in that when MMX is available, 8-byte vectors
6395 are returned in memory, rather than in MMX registers. */
6397 bool
6398 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6400 int size;
6401 enum machine_mode mode = type_natural_mode (type, NULL);
6403 if (TARGET_64BIT)
6404 return return_in_memory_64 (type, mode);
6406 if (mode == BLKmode)
6407 return 1;
6409 size = int_size_in_bytes (type);
6411 if (VECTOR_MODE_P (mode))
6413 /* Return in memory only if MMX registers *are* available. This
6414 seems backwards, but it is consistent with the existing
6415 Solaris x86 ABI. */
6416 if (size == 8)
6417 return TARGET_MMX;
6418 if (size == 16)
6419 return !TARGET_SSE;
6421 else if (mode == TImode)
6422 return !TARGET_SSE;
6423 else if (mode == XFmode)
6424 return 0;
6426 return size > 12;
6429 /* When returning SSE vector types, we have a choice of either
6430 (1) being abi incompatible with a -march switch, or
6431 (2) generating an error.
6432 Given no good solution, I think the safest thing is one warning.
6433 The user won't be able to use -Werror, but....
6435 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6436 called in response to actually generating a caller or callee that
6437 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6438 via aggregate_value_p for general type probing from tree-ssa. */
6440 static rtx
6441 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6443 static bool warnedsse, warnedmmx;
6445 if (!TARGET_64BIT && type)
6447 /* Look at the return type of the function, not the function type. */
6448 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6450 if (!TARGET_SSE && !warnedsse)
6452 if (mode == TImode
6453 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6455 warnedsse = true;
6456 warning (0, "SSE vector return without SSE enabled "
6457 "changes the ABI");
6461 if (!TARGET_MMX && !warnedmmx)
6463 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6465 warnedmmx = true;
6466 warning (0, "MMX vector return without MMX enabled "
6467 "changes the ABI");
6472 return NULL;
6476 /* Create the va_list data type. */
6478 /* Returns the calling convention specific va_list date type.
6479 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6481 static tree
6482 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6484 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6486 /* For i386 we use plain pointer to argument area. */
6487 if (!TARGET_64BIT || abi == MS_ABI)
6488 return build_pointer_type (char_type_node);
6490 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6491 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6493 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6494 unsigned_type_node);
6495 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6496 unsigned_type_node);
6497 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6498 ptr_type_node);
6499 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6500 ptr_type_node);
6502 va_list_gpr_counter_field = f_gpr;
6503 va_list_fpr_counter_field = f_fpr;
6505 DECL_FIELD_CONTEXT (f_gpr) = record;
6506 DECL_FIELD_CONTEXT (f_fpr) = record;
6507 DECL_FIELD_CONTEXT (f_ovf) = record;
6508 DECL_FIELD_CONTEXT (f_sav) = record;
6510 TREE_CHAIN (record) = type_decl;
6511 TYPE_NAME (record) = type_decl;
6512 TYPE_FIELDS (record) = f_gpr;
6513 TREE_CHAIN (f_gpr) = f_fpr;
6514 TREE_CHAIN (f_fpr) = f_ovf;
6515 TREE_CHAIN (f_ovf) = f_sav;
6517 layout_type (record);
6519 /* The correct type is an array type of one element. */
6520 return build_array_type (record, build_index_type (size_zero_node));
6523 /* Setup the builtin va_list data type and for 64-bit the additional
6524 calling convention specific va_list data types. */
6526 static tree
6527 ix86_build_builtin_va_list (void)
6529 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6531 /* Initialize abi specific va_list builtin types. */
6532 if (TARGET_64BIT)
6534 tree t;
6535 if (ix86_abi == MS_ABI)
6537 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6538 if (TREE_CODE (t) != RECORD_TYPE)
6539 t = build_variant_type_copy (t);
6540 sysv_va_list_type_node = t;
6542 else
6544 t = ret;
6545 if (TREE_CODE (t) != RECORD_TYPE)
6546 t = build_variant_type_copy (t);
6547 sysv_va_list_type_node = t;
6549 if (ix86_abi != MS_ABI)
6551 t = ix86_build_builtin_va_list_abi (MS_ABI);
6552 if (TREE_CODE (t) != RECORD_TYPE)
6553 t = build_variant_type_copy (t);
6554 ms_va_list_type_node = t;
6556 else
6558 t = ret;
6559 if (TREE_CODE (t) != RECORD_TYPE)
6560 t = build_variant_type_copy (t);
6561 ms_va_list_type_node = t;
6565 return ret;
6568 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6570 static void
6571 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6573 rtx save_area, mem;
6574 rtx label;
6575 rtx label_ref;
6576 rtx tmp_reg;
6577 rtx nsse_reg;
6578 alias_set_type set;
6579 int i;
6580 int regparm = ix86_regparm;
6582 if (cum->call_abi != ix86_abi)
6583 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6585 /* GPR size of varargs save area. */
6586 if (cfun->va_list_gpr_size)
6587 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6588 else
6589 ix86_varargs_gpr_size = 0;
6591 /* FPR size of varargs save area. We don't need it if we don't pass
6592 anything in SSE registers. */
6593 if (cum->sse_nregs && cfun->va_list_fpr_size)
6594 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6595 else
6596 ix86_varargs_fpr_size = 0;
6598 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6599 return;
6601 save_area = frame_pointer_rtx;
6602 set = get_varargs_alias_set ();
6604 for (i = cum->regno;
6605 i < regparm
6606 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6607 i++)
6609 mem = gen_rtx_MEM (Pmode,
6610 plus_constant (save_area, i * UNITS_PER_WORD));
6611 MEM_NOTRAP_P (mem) = 1;
6612 set_mem_alias_set (mem, set);
6613 emit_move_insn (mem, gen_rtx_REG (Pmode,
6614 x86_64_int_parameter_registers[i]));
6617 if (ix86_varargs_fpr_size)
6619 /* Now emit code to save SSE registers. The AX parameter contains number
6620 of SSE parameter registers used to call this function. We use
6621 sse_prologue_save insn template that produces computed jump across
6622 SSE saves. We need some preparation work to get this working. */
6624 label = gen_label_rtx ();
6625 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6627 /* Compute address to jump to :
6628 label - eax*4 + nnamed_sse_arguments*4 Or
6629 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6630 tmp_reg = gen_reg_rtx (Pmode);
6631 nsse_reg = gen_reg_rtx (Pmode);
6632 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6633 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6634 gen_rtx_MULT (Pmode, nsse_reg,
6635 GEN_INT (4))));
6637 /* vmovaps is one byte longer than movaps. */
6638 if (TARGET_AVX)
6639 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6640 gen_rtx_PLUS (Pmode, tmp_reg,
6641 nsse_reg)));
6643 if (cum->sse_regno)
6644 emit_move_insn
6645 (nsse_reg,
6646 gen_rtx_CONST (DImode,
6647 gen_rtx_PLUS (DImode,
6648 label_ref,
6649 GEN_INT (cum->sse_regno
6650 * (TARGET_AVX ? 5 : 4)))));
6651 else
6652 emit_move_insn (nsse_reg, label_ref);
6653 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6655 /* Compute address of memory block we save into. We always use pointer
6656 pointing 127 bytes after first byte to store - this is needed to keep
6657 instruction size limited by 4 bytes (5 bytes for AVX) with one
6658 byte displacement. */
6659 tmp_reg = gen_reg_rtx (Pmode);
6660 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6661 plus_constant (save_area,
6662 ix86_varargs_gpr_size + 127)));
6663 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6664 MEM_NOTRAP_P (mem) = 1;
6665 set_mem_alias_set (mem, set);
6666 set_mem_align (mem, BITS_PER_WORD);
6668 /* And finally do the dirty job! */
6669 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6670 GEN_INT (cum->sse_regno), label));
6674 static void
6675 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6677 alias_set_type set = get_varargs_alias_set ();
6678 int i;
6680 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6682 rtx reg, mem;
6684 mem = gen_rtx_MEM (Pmode,
6685 plus_constant (virtual_incoming_args_rtx,
6686 i * UNITS_PER_WORD));
6687 MEM_NOTRAP_P (mem) = 1;
6688 set_mem_alias_set (mem, set);
6690 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6691 emit_move_insn (mem, reg);
6695 static void
6696 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6697 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6698 int no_rtl)
6700 CUMULATIVE_ARGS next_cum;
6701 tree fntype;
6703 /* This argument doesn't appear to be used anymore. Which is good,
6704 because the old code here didn't suppress rtl generation. */
6705 gcc_assert (!no_rtl);
6707 if (!TARGET_64BIT)
6708 return;
6710 fntype = TREE_TYPE (current_function_decl);
6712 /* For varargs, we do not want to skip the dummy va_dcl argument.
6713 For stdargs, we do want to skip the last named argument. */
6714 next_cum = *cum;
6715 if (stdarg_p (fntype))
6716 function_arg_advance (&next_cum, mode, type, 1);
6718 if (cum->call_abi == MS_ABI)
6719 setup_incoming_varargs_ms_64 (&next_cum);
6720 else
6721 setup_incoming_varargs_64 (&next_cum);
6724 /* Checks if TYPE is of kind va_list char *. */
6726 static bool
6727 is_va_list_char_pointer (tree type)
6729 tree canonic;
6731 /* For 32-bit it is always true. */
6732 if (!TARGET_64BIT)
6733 return true;
6734 canonic = ix86_canonical_va_list_type (type);
6735 return (canonic == ms_va_list_type_node
6736 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6739 /* Implement va_start. */
6741 static void
6742 ix86_va_start (tree valist, rtx nextarg)
6744 HOST_WIDE_INT words, n_gpr, n_fpr;
6745 tree f_gpr, f_fpr, f_ovf, f_sav;
6746 tree gpr, fpr, ovf, sav, t;
6747 tree type;
6749 /* Only 64bit target needs something special. */
6750 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6752 std_expand_builtin_va_start (valist, nextarg);
6753 return;
6756 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6757 f_fpr = TREE_CHAIN (f_gpr);
6758 f_ovf = TREE_CHAIN (f_fpr);
6759 f_sav = TREE_CHAIN (f_ovf);
6761 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6762 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6763 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6764 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6765 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6767 /* Count number of gp and fp argument registers used. */
6768 words = crtl->args.info.words;
6769 n_gpr = crtl->args.info.regno;
6770 n_fpr = crtl->args.info.sse_regno;
6772 if (cfun->va_list_gpr_size)
6774 type = TREE_TYPE (gpr);
6775 t = build2 (MODIFY_EXPR, type,
6776 gpr, build_int_cst (type, n_gpr * 8));
6777 TREE_SIDE_EFFECTS (t) = 1;
6778 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6781 if (TARGET_SSE && cfun->va_list_fpr_size)
6783 type = TREE_TYPE (fpr);
6784 t = build2 (MODIFY_EXPR, type, fpr,
6785 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6786 TREE_SIDE_EFFECTS (t) = 1;
6787 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6790 /* Find the overflow area. */
6791 type = TREE_TYPE (ovf);
6792 t = make_tree (type, crtl->args.internal_arg_pointer);
6793 if (words != 0)
6794 t = build2 (POINTER_PLUS_EXPR, type, t,
6795 size_int (words * UNITS_PER_WORD));
6796 t = build2 (MODIFY_EXPR, type, ovf, t);
6797 TREE_SIDE_EFFECTS (t) = 1;
6798 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6800 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6802 /* Find the register save area.
6803 Prologue of the function save it right above stack frame. */
6804 type = TREE_TYPE (sav);
6805 t = make_tree (type, frame_pointer_rtx);
6806 if (!ix86_varargs_gpr_size)
6807 t = build2 (POINTER_PLUS_EXPR, type, t,
6808 size_int (-8 * X86_64_REGPARM_MAX));
6809 t = build2 (MODIFY_EXPR, type, sav, t);
6810 TREE_SIDE_EFFECTS (t) = 1;
6811 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6815 /* Implement va_arg. */
6817 static tree
6818 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6819 gimple_seq *post_p)
6821 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6822 tree f_gpr, f_fpr, f_ovf, f_sav;
6823 tree gpr, fpr, ovf, sav, t;
6824 int size, rsize;
6825 tree lab_false, lab_over = NULL_TREE;
6826 tree addr, t2;
6827 rtx container;
6828 int indirect_p = 0;
6829 tree ptrtype;
6830 enum machine_mode nat_mode;
6831 int arg_boundary;
6833 /* Only 64bit target needs something special. */
6834 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6835 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6837 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6838 f_fpr = TREE_CHAIN (f_gpr);
6839 f_ovf = TREE_CHAIN (f_fpr);
6840 f_sav = TREE_CHAIN (f_ovf);
6842 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6843 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6844 valist = build_va_arg_indirect_ref (valist);
6845 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6846 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6847 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6849 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6850 if (indirect_p)
6851 type = build_pointer_type (type);
6852 size = int_size_in_bytes (type);
6853 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6855 nat_mode = type_natural_mode (type, NULL);
6856 switch (nat_mode)
6858 case V8SFmode:
6859 case V8SImode:
6860 case V32QImode:
6861 case V16HImode:
6862 case V4DFmode:
6863 case V4DImode:
6864 /* Unnamed 256bit vector mode parameters are passed on stack. */
6865 if (ix86_cfun_abi () == SYSV_ABI)
6867 container = NULL;
6868 break;
6871 default:
6872 container = construct_container (nat_mode, TYPE_MODE (type),
6873 type, 0, X86_64_REGPARM_MAX,
6874 X86_64_SSE_REGPARM_MAX, intreg,
6876 break;
6879 /* Pull the value out of the saved registers. */
6881 addr = create_tmp_var (ptr_type_node, "addr");
6882 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6884 if (container)
6886 int needed_intregs, needed_sseregs;
6887 bool need_temp;
6888 tree int_addr, sse_addr;
6890 lab_false = create_artificial_label ();
6891 lab_over = create_artificial_label ();
6893 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6895 need_temp = (!REG_P (container)
6896 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6897 || TYPE_ALIGN (type) > 128));
6899 /* In case we are passing structure, verify that it is consecutive block
6900 on the register save area. If not we need to do moves. */
6901 if (!need_temp && !REG_P (container))
6903 /* Verify that all registers are strictly consecutive */
6904 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6906 int i;
6908 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6910 rtx slot = XVECEXP (container, 0, i);
6911 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6912 || INTVAL (XEXP (slot, 1)) != i * 16)
6913 need_temp = 1;
6916 else
6918 int i;
6920 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6922 rtx slot = XVECEXP (container, 0, i);
6923 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6924 || INTVAL (XEXP (slot, 1)) != i * 8)
6925 need_temp = 1;
6929 if (!need_temp)
6931 int_addr = addr;
6932 sse_addr = addr;
6934 else
6936 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6937 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6938 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6939 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6942 /* First ensure that we fit completely in registers. */
6943 if (needed_intregs)
6945 t = build_int_cst (TREE_TYPE (gpr),
6946 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6947 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6948 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6949 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6950 gimplify_and_add (t, pre_p);
6952 if (needed_sseregs)
6954 t = build_int_cst (TREE_TYPE (fpr),
6955 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6956 + X86_64_REGPARM_MAX * 8);
6957 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6958 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6959 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6960 gimplify_and_add (t, pre_p);
6963 /* Compute index to start of area used for integer regs. */
6964 if (needed_intregs)
6966 /* int_addr = gpr + sav; */
6967 t = fold_convert (sizetype, gpr);
6968 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6969 gimplify_assign (int_addr, t, pre_p);
6971 if (needed_sseregs)
6973 /* sse_addr = fpr + sav; */
6974 t = fold_convert (sizetype, fpr);
6975 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6976 gimplify_assign (sse_addr, t, pre_p);
6978 if (need_temp)
6980 int i;
6981 tree temp = create_tmp_var (type, "va_arg_tmp");
6983 /* addr = &temp; */
6984 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6985 gimplify_assign (addr, t, pre_p);
6987 for (i = 0; i < XVECLEN (container, 0); i++)
6989 rtx slot = XVECEXP (container, 0, i);
6990 rtx reg = XEXP (slot, 0);
6991 enum machine_mode mode = GET_MODE (reg);
6992 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6993 tree addr_type = build_pointer_type (piece_type);
6994 tree daddr_type = build_pointer_type_for_mode (piece_type,
6995 ptr_mode, true);
6996 tree src_addr, src;
6997 int src_offset;
6998 tree dest_addr, dest;
7000 if (SSE_REGNO_P (REGNO (reg)))
7002 src_addr = sse_addr;
7003 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7005 else
7007 src_addr = int_addr;
7008 src_offset = REGNO (reg) * 8;
7010 src_addr = fold_convert (addr_type, src_addr);
7011 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7012 size_int (src_offset));
7013 src = build_va_arg_indirect_ref (src_addr);
7015 dest_addr = fold_convert (daddr_type, addr);
7016 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7017 size_int (INTVAL (XEXP (slot, 1))));
7018 dest = build_va_arg_indirect_ref (dest_addr);
7020 gimplify_assign (dest, src, pre_p);
7024 if (needed_intregs)
7026 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7027 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7028 gimplify_assign (gpr, t, pre_p);
7031 if (needed_sseregs)
7033 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7034 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7035 gimplify_assign (fpr, t, pre_p);
7038 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7040 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7043 /* ... otherwise out of the overflow area. */
7045 /* When we align parameter on stack for caller, if the parameter
7046 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7047 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7048 here with caller. */
7049 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7050 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7051 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7053 /* Care for on-stack alignment if needed. */
7054 if (arg_boundary <= 64
7055 || integer_zerop (TYPE_SIZE (type)))
7056 t = ovf;
7057 else
7059 HOST_WIDE_INT align = arg_boundary / 8;
7060 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7061 size_int (align - 1));
7062 t = fold_convert (sizetype, t);
7063 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7064 size_int (-align));
7065 t = fold_convert (TREE_TYPE (ovf), t);
7067 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7068 gimplify_assign (addr, t, pre_p);
7070 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7071 size_int (rsize * UNITS_PER_WORD));
7072 gimplify_assign (unshare_expr (ovf), t, pre_p);
7074 if (container)
7075 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7077 ptrtype = build_pointer_type (type);
7078 addr = fold_convert (ptrtype, addr);
7080 if (indirect_p)
7081 addr = build_va_arg_indirect_ref (addr);
7082 return build_va_arg_indirect_ref (addr);
7085 /* Return nonzero if OPNUM's MEM should be matched
7086 in movabs* patterns. */
7089 ix86_check_movabs (rtx insn, int opnum)
7091 rtx set, mem;
7093 set = PATTERN (insn);
7094 if (GET_CODE (set) == PARALLEL)
7095 set = XVECEXP (set, 0, 0);
7096 gcc_assert (GET_CODE (set) == SET);
7097 mem = XEXP (set, opnum);
7098 while (GET_CODE (mem) == SUBREG)
7099 mem = SUBREG_REG (mem);
7100 gcc_assert (MEM_P (mem));
7101 return (volatile_ok || !MEM_VOLATILE_P (mem));
7104 /* Initialize the table of extra 80387 mathematical constants. */
7106 static void
7107 init_ext_80387_constants (void)
7109 static const char * cst[5] =
7111 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7112 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7113 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7114 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7115 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7117 int i;
7119 for (i = 0; i < 5; i++)
7121 real_from_string (&ext_80387_constants_table[i], cst[i]);
7122 /* Ensure each constant is rounded to XFmode precision. */
7123 real_convert (&ext_80387_constants_table[i],
7124 XFmode, &ext_80387_constants_table[i]);
7127 ext_80387_constants_init = 1;
7130 /* Return true if the constant is something that can be loaded with
7131 a special instruction. */
7134 standard_80387_constant_p (rtx x)
7136 enum machine_mode mode = GET_MODE (x);
7138 REAL_VALUE_TYPE r;
7140 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7141 return -1;
7143 if (x == CONST0_RTX (mode))
7144 return 1;
7145 if (x == CONST1_RTX (mode))
7146 return 2;
7148 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7150 /* For XFmode constants, try to find a special 80387 instruction when
7151 optimizing for size or on those CPUs that benefit from them. */
7152 if (mode == XFmode
7153 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7155 int i;
7157 if (! ext_80387_constants_init)
7158 init_ext_80387_constants ();
7160 for (i = 0; i < 5; i++)
7161 if (real_identical (&r, &ext_80387_constants_table[i]))
7162 return i + 3;
7165 /* Load of the constant -0.0 or -1.0 will be split as
7166 fldz;fchs or fld1;fchs sequence. */
7167 if (real_isnegzero (&r))
7168 return 8;
7169 if (real_identical (&r, &dconstm1))
7170 return 9;
7172 return 0;
7175 /* Return the opcode of the special instruction to be used to load
7176 the constant X. */
7178 const char *
7179 standard_80387_constant_opcode (rtx x)
7181 switch (standard_80387_constant_p (x))
7183 case 1:
7184 return "fldz";
7185 case 2:
7186 return "fld1";
7187 case 3:
7188 return "fldlg2";
7189 case 4:
7190 return "fldln2";
7191 case 5:
7192 return "fldl2e";
7193 case 6:
7194 return "fldl2t";
7195 case 7:
7196 return "fldpi";
7197 case 8:
7198 case 9:
7199 return "#";
7200 default:
7201 gcc_unreachable ();
7205 /* Return the CONST_DOUBLE representing the 80387 constant that is
7206 loaded by the specified special instruction. The argument IDX
7207 matches the return value from standard_80387_constant_p. */
7210 standard_80387_constant_rtx (int idx)
7212 int i;
7214 if (! ext_80387_constants_init)
7215 init_ext_80387_constants ();
7217 switch (idx)
7219 case 3:
7220 case 4:
7221 case 5:
7222 case 6:
7223 case 7:
7224 i = idx - 3;
7225 break;
7227 default:
7228 gcc_unreachable ();
7231 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7232 XFmode);
7235 /* Return 1 if mode is a valid mode for sse. */
7236 static int
7237 standard_sse_mode_p (enum machine_mode mode)
7239 switch (mode)
7241 case V16QImode:
7242 case V8HImode:
7243 case V4SImode:
7244 case V2DImode:
7245 case V4SFmode:
7246 case V2DFmode:
7247 return 1;
7249 default:
7250 return 0;
7254 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7255 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7256 modes and AVX is enabled. */
7259 standard_sse_constant_p (rtx x)
7261 enum machine_mode mode = GET_MODE (x);
7263 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7264 return 1;
7265 if (vector_all_ones_operand (x, mode))
7267 if (standard_sse_mode_p (mode))
7268 return TARGET_SSE2 ? 2 : -2;
7269 else if (VALID_AVX256_REG_MODE (mode))
7270 return TARGET_AVX ? 3 : -3;
7273 return 0;
7276 /* Return the opcode of the special instruction to be used to load
7277 the constant X. */
7279 const char *
7280 standard_sse_constant_opcode (rtx insn, rtx x)
7282 switch (standard_sse_constant_p (x))
7284 case 1:
7285 switch (get_attr_mode (insn))
7287 case MODE_V4SF:
7288 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7289 case MODE_V2DF:
7290 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7291 case MODE_TI:
7292 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7293 case MODE_V8SF:
7294 return "vxorps\t%x0, %x0, %x0";
7295 case MODE_V4DF:
7296 return "vxorpd\t%x0, %x0, %x0";
7297 case MODE_OI:
7298 return "vpxor\t%x0, %x0, %x0";
7299 default:
7300 gcc_unreachable ();
7302 case 2:
7303 if (TARGET_AVX)
7304 switch (get_attr_mode (insn))
7306 case MODE_V4SF:
7307 case MODE_V2DF:
7308 case MODE_TI:
7309 return "vpcmpeqd\t%0, %0, %0";
7310 break;
7311 default:
7312 gcc_unreachable ();
7314 else
7315 return "pcmpeqd\t%0, %0";
7317 gcc_unreachable ();
7320 /* Returns 1 if OP contains a symbol reference */
7323 symbolic_reference_mentioned_p (rtx op)
7325 const char *fmt;
7326 int i;
7328 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7329 return 1;
7331 fmt = GET_RTX_FORMAT (GET_CODE (op));
7332 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7334 if (fmt[i] == 'E')
7336 int j;
7338 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7339 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7340 return 1;
7343 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7344 return 1;
7347 return 0;
7350 /* Return 1 if it is appropriate to emit `ret' instructions in the
7351 body of a function. Do this only if the epilogue is simple, needing a
7352 couple of insns. Prior to reloading, we can't tell how many registers
7353 must be saved, so return 0 then. Return 0 if there is no frame
7354 marker to de-allocate. */
7357 ix86_can_use_return_insn_p (void)
7359 struct ix86_frame frame;
7361 if (! reload_completed || frame_pointer_needed)
7362 return 0;
7364 /* Don't allow more than 32 pop, since that's all we can do
7365 with one instruction. */
7366 if (crtl->args.pops_args
7367 && crtl->args.size >= 32768)
7368 return 0;
7370 ix86_compute_frame_layout (&frame);
7371 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7374 /* Value should be nonzero if functions must have frame pointers.
7375 Zero means the frame pointer need not be set up (and parms may
7376 be accessed via the stack pointer) in functions that seem suitable. */
7379 ix86_frame_pointer_required (void)
7381 /* If we accessed previous frames, then the generated code expects
7382 to be able to access the saved ebp value in our frame. */
7383 if (cfun->machine->accesses_prev_frame)
7384 return 1;
7386 /* Several x86 os'es need a frame pointer for other reasons,
7387 usually pertaining to setjmp. */
7388 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7389 return 1;
7391 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7392 the frame pointer by default. Turn it back on now if we've not
7393 got a leaf function. */
7394 if (TARGET_OMIT_LEAF_FRAME_POINTER
7395 && (!current_function_is_leaf
7396 || ix86_current_function_calls_tls_descriptor))
7397 return 1;
7399 if (crtl->profile)
7400 return 1;
7402 return 0;
7405 /* Record that the current function accesses previous call frames. */
7407 void
7408 ix86_setup_frame_addresses (void)
7410 cfun->machine->accesses_prev_frame = 1;
7413 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7414 # define USE_HIDDEN_LINKONCE 1
7415 #else
7416 # define USE_HIDDEN_LINKONCE 0
7417 #endif
7419 static int pic_labels_used;
7421 /* Fills in the label name that should be used for a pc thunk for
7422 the given register. */
7424 static void
7425 get_pc_thunk_name (char name[32], unsigned int regno)
7427 gcc_assert (!TARGET_64BIT);
7429 if (USE_HIDDEN_LINKONCE)
7430 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7431 else
7432 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7436 /* This function generates code for -fpic that loads %ebx with
7437 the return address of the caller and then returns. */
7439 void
7440 ix86_file_end (void)
7442 rtx xops[2];
7443 int regno;
7445 for (regno = 0; regno < 8; ++regno)
7447 char name[32];
7449 if (! ((pic_labels_used >> regno) & 1))
7450 continue;
7452 get_pc_thunk_name (name, regno);
7454 #if TARGET_MACHO
7455 if (TARGET_MACHO)
7457 switch_to_section (darwin_sections[text_coal_section]);
7458 fputs ("\t.weak_definition\t", asm_out_file);
7459 assemble_name (asm_out_file, name);
7460 fputs ("\n\t.private_extern\t", asm_out_file);
7461 assemble_name (asm_out_file, name);
7462 fputs ("\n", asm_out_file);
7463 ASM_OUTPUT_LABEL (asm_out_file, name);
7465 else
7466 #endif
7467 if (USE_HIDDEN_LINKONCE)
7469 tree decl;
7471 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7472 error_mark_node);
7473 TREE_PUBLIC (decl) = 1;
7474 TREE_STATIC (decl) = 1;
7475 DECL_ONE_ONLY (decl) = 1;
7477 (*targetm.asm_out.unique_section) (decl, 0);
7478 switch_to_section (get_named_section (decl, NULL, 0));
7480 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7481 fputs ("\t.hidden\t", asm_out_file);
7482 assemble_name (asm_out_file, name);
7483 fputc ('\n', asm_out_file);
7484 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7486 else
7488 switch_to_section (text_section);
7489 ASM_OUTPUT_LABEL (asm_out_file, name);
7492 xops[0] = gen_rtx_REG (Pmode, regno);
7493 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7494 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7495 output_asm_insn ("ret", xops);
7498 if (NEED_INDICATE_EXEC_STACK)
7499 file_end_indicate_exec_stack ();
7502 /* Emit code for the SET_GOT patterns. */
7504 const char *
7505 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7507 rtx xops[3];
7509 xops[0] = dest;
7511 if (TARGET_VXWORKS_RTP && flag_pic)
7513 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7514 xops[2] = gen_rtx_MEM (Pmode,
7515 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7516 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7518 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7519 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7520 an unadorned address. */
7521 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7522 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7523 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7524 return "";
7527 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7529 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7531 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7533 if (!flag_pic)
7534 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7535 else
7536 output_asm_insn ("call\t%a2", xops);
7538 #if TARGET_MACHO
7539 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7540 is what will be referenced by the Mach-O PIC subsystem. */
7541 if (!label)
7542 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7543 #endif
7545 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7546 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7548 if (flag_pic)
7549 output_asm_insn ("pop%z0\t%0", xops);
7551 else
7553 char name[32];
7554 get_pc_thunk_name (name, REGNO (dest));
7555 pic_labels_used |= 1 << REGNO (dest);
7557 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7558 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7559 output_asm_insn ("call\t%X2", xops);
7560 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7561 is what will be referenced by the Mach-O PIC subsystem. */
7562 #if TARGET_MACHO
7563 if (!label)
7564 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7565 else
7566 targetm.asm_out.internal_label (asm_out_file, "L",
7567 CODE_LABEL_NUMBER (label));
7568 #endif
7571 if (TARGET_MACHO)
7572 return "";
7574 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7575 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7576 else
7577 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7579 return "";
7582 /* Generate an "push" pattern for input ARG. */
7584 static rtx
7585 gen_push (rtx arg)
7587 return gen_rtx_SET (VOIDmode,
7588 gen_rtx_MEM (Pmode,
7589 gen_rtx_PRE_DEC (Pmode,
7590 stack_pointer_rtx)),
7591 arg);
7594 /* Return >= 0 if there is an unused call-clobbered register available
7595 for the entire function. */
7597 static unsigned int
7598 ix86_select_alt_pic_regnum (void)
7600 if (current_function_is_leaf && !crtl->profile
7601 && !ix86_current_function_calls_tls_descriptor)
7603 int i, drap;
7604 /* Can't use the same register for both PIC and DRAP. */
7605 if (crtl->drap_reg)
7606 drap = REGNO (crtl->drap_reg);
7607 else
7608 drap = -1;
7609 for (i = 2; i >= 0; --i)
7610 if (i != drap && !df_regs_ever_live_p (i))
7611 return i;
7614 return INVALID_REGNUM;
7617 /* Return 1 if we need to save REGNO. */
7618 static int
7619 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7621 if (pic_offset_table_rtx
7622 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7623 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7624 || crtl->profile
7625 || crtl->calls_eh_return
7626 || crtl->uses_const_pool))
7628 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7629 return 0;
7630 return 1;
7633 if (crtl->calls_eh_return && maybe_eh_return)
7635 unsigned i;
7636 for (i = 0; ; i++)
7638 unsigned test = EH_RETURN_DATA_REGNO (i);
7639 if (test == INVALID_REGNUM)
7640 break;
7641 if (test == regno)
7642 return 1;
7646 if (crtl->drap_reg
7647 && regno == REGNO (crtl->drap_reg))
7648 return 1;
7650 return (df_regs_ever_live_p (regno)
7651 && !call_used_regs[regno]
7652 && !fixed_regs[regno]
7653 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7656 /* Return number of saved general prupose registers. */
7658 static int
7659 ix86_nsaved_regs (void)
7661 int nregs = 0;
7662 int regno;
7664 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7665 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7666 nregs ++;
7667 return nregs;
7670 /* Return number of saved SSE registrers. */
7672 static int
7673 ix86_nsaved_sseregs (void)
7675 int nregs = 0;
7676 int regno;
7678 if (ix86_cfun_abi () != MS_ABI)
7679 return 0;
7680 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7681 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7682 nregs ++;
7683 return nregs;
7686 /* Given FROM and TO register numbers, say whether this elimination is
7687 allowed. If stack alignment is needed, we can only replace argument
7688 pointer with hard frame pointer, or replace frame pointer with stack
7689 pointer. Otherwise, frame pointer elimination is automatically
7690 handled and all other eliminations are valid. */
7693 ix86_can_eliminate (int from, int to)
7695 if (stack_realign_fp)
7696 return ((from == ARG_POINTER_REGNUM
7697 && to == HARD_FRAME_POINTER_REGNUM)
7698 || (from == FRAME_POINTER_REGNUM
7699 && to == STACK_POINTER_REGNUM));
7700 else
7701 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7704 /* Return the offset between two registers, one to be eliminated, and the other
7705 its replacement, at the start of a routine. */
7707 HOST_WIDE_INT
7708 ix86_initial_elimination_offset (int from, int to)
7710 struct ix86_frame frame;
7711 ix86_compute_frame_layout (&frame);
7713 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7714 return frame.hard_frame_pointer_offset;
7715 else if (from == FRAME_POINTER_REGNUM
7716 && to == HARD_FRAME_POINTER_REGNUM)
7717 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7718 else
7720 gcc_assert (to == STACK_POINTER_REGNUM);
7722 if (from == ARG_POINTER_REGNUM)
7723 return frame.stack_pointer_offset;
7725 gcc_assert (from == FRAME_POINTER_REGNUM);
7726 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7730 /* In a dynamically-aligned function, we can't know the offset from
7731 stack pointer to frame pointer, so we must ensure that setjmp
7732 eliminates fp against the hard fp (%ebp) rather than trying to
7733 index from %esp up to the top of the frame across a gap that is
7734 of unknown (at compile-time) size. */
7735 static rtx
7736 ix86_builtin_setjmp_frame_value (void)
7738 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7741 /* Fill structure ix86_frame about frame of currently computed function. */
7743 static void
7744 ix86_compute_frame_layout (struct ix86_frame *frame)
7746 HOST_WIDE_INT total_size;
7747 unsigned int stack_alignment_needed;
7748 HOST_WIDE_INT offset;
7749 unsigned int preferred_alignment;
7750 HOST_WIDE_INT size = get_frame_size ();
7752 frame->nregs = ix86_nsaved_regs ();
7753 frame->nsseregs = ix86_nsaved_sseregs ();
7754 total_size = size;
7756 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7757 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7759 /* MS ABI seem to require stack alignment to be always 16 except for function
7760 prologues. */
7761 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7763 preferred_alignment = 16;
7764 stack_alignment_needed = 16;
7765 crtl->preferred_stack_boundary = 128;
7766 crtl->stack_alignment_needed = 128;
7769 gcc_assert (!size || stack_alignment_needed);
7770 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7771 gcc_assert (preferred_alignment <= stack_alignment_needed);
7773 /* During reload iteration the amount of registers saved can change.
7774 Recompute the value as needed. Do not recompute when amount of registers
7775 didn't change as reload does multiple calls to the function and does not
7776 expect the decision to change within single iteration. */
7777 if (!optimize_function_for_size_p (cfun)
7778 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7780 int count = frame->nregs;
7782 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7783 /* The fast prologue uses move instead of push to save registers. This
7784 is significantly longer, but also executes faster as modern hardware
7785 can execute the moves in parallel, but can't do that for push/pop.
7787 Be careful about choosing what prologue to emit: When function takes
7788 many instructions to execute we may use slow version as well as in
7789 case function is known to be outside hot spot (this is known with
7790 feedback only). Weight the size of function by number of registers
7791 to save as it is cheap to use one or two push instructions but very
7792 slow to use many of them. */
7793 if (count)
7794 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7795 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7796 || (flag_branch_probabilities
7797 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7798 cfun->machine->use_fast_prologue_epilogue = false;
7799 else
7800 cfun->machine->use_fast_prologue_epilogue
7801 = !expensive_function_p (count);
7803 if (TARGET_PROLOGUE_USING_MOVE
7804 && cfun->machine->use_fast_prologue_epilogue)
7805 frame->save_regs_using_mov = true;
7806 else
7807 frame->save_regs_using_mov = false;
7810 /* Skip return address and saved base pointer. */
7811 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7813 frame->hard_frame_pointer_offset = offset;
7815 /* Set offset to aligned because the realigned frame starts from
7816 here. */
7817 if (stack_realign_fp)
7818 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7820 /* Register save area */
7821 offset += frame->nregs * UNITS_PER_WORD;
7823 /* Align SSE reg save area. */
7824 if (frame->nsseregs)
7825 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7826 else
7827 frame->padding0 = 0;
7829 /* SSE register save area. */
7830 offset += frame->padding0 + frame->nsseregs * 16;
7832 /* Va-arg area */
7833 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7834 offset += frame->va_arg_size;
7836 /* Align start of frame for local function. */
7837 frame->padding1 = ((offset + stack_alignment_needed - 1)
7838 & -stack_alignment_needed) - offset;
7840 offset += frame->padding1;
7842 /* Frame pointer points here. */
7843 frame->frame_pointer_offset = offset;
7845 offset += size;
7847 /* Add outgoing arguments area. Can be skipped if we eliminated
7848 all the function calls as dead code.
7849 Skipping is however impossible when function calls alloca. Alloca
7850 expander assumes that last crtl->outgoing_args_size
7851 of stack frame are unused. */
7852 if (ACCUMULATE_OUTGOING_ARGS
7853 && (!current_function_is_leaf || cfun->calls_alloca
7854 || ix86_current_function_calls_tls_descriptor))
7856 offset += crtl->outgoing_args_size;
7857 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7859 else
7860 frame->outgoing_arguments_size = 0;
7862 /* Align stack boundary. Only needed if we're calling another function
7863 or using alloca. */
7864 if (!current_function_is_leaf || cfun->calls_alloca
7865 || ix86_current_function_calls_tls_descriptor)
7866 frame->padding2 = ((offset + preferred_alignment - 1)
7867 & -preferred_alignment) - offset;
7868 else
7869 frame->padding2 = 0;
7871 offset += frame->padding2;
7873 /* We've reached end of stack frame. */
7874 frame->stack_pointer_offset = offset;
7876 /* Size prologue needs to allocate. */
7877 frame->to_allocate =
7878 (size + frame->padding1 + frame->padding2
7879 + frame->outgoing_arguments_size + frame->va_arg_size);
7881 if ((!frame->to_allocate && frame->nregs <= 1)
7882 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7883 frame->save_regs_using_mov = false;
7885 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7886 && current_function_is_leaf
7887 && !ix86_current_function_calls_tls_descriptor)
7889 frame->red_zone_size = frame->to_allocate;
7890 if (frame->save_regs_using_mov)
7891 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7892 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7893 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7895 else
7896 frame->red_zone_size = 0;
7897 frame->to_allocate -= frame->red_zone_size;
7898 frame->stack_pointer_offset -= frame->red_zone_size;
7899 #if 0
7900 fprintf (stderr, "\n");
7901 fprintf (stderr, "size: %ld\n", (long)size);
7902 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7903 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7904 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7905 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7906 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7907 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7908 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7909 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7910 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7911 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7912 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7913 (long)frame->hard_frame_pointer_offset);
7914 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7915 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7916 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7917 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7918 #endif
7921 /* Emit code to save registers in the prologue. */
7923 static void
7924 ix86_emit_save_regs (void)
7926 unsigned int regno;
7927 rtx insn;
7929 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7930 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7932 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7933 RTX_FRAME_RELATED_P (insn) = 1;
7937 /* Emit code to save registers using MOV insns. First register
7938 is restored from POINTER + OFFSET. */
7939 static void
7940 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7942 unsigned int regno;
7943 rtx insn;
7945 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7946 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7948 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7949 Pmode, offset),
7950 gen_rtx_REG (Pmode, regno));
7951 RTX_FRAME_RELATED_P (insn) = 1;
7952 offset += UNITS_PER_WORD;
7956 /* Emit code to save registers using MOV insns. First register
7957 is restored from POINTER + OFFSET. */
7958 static void
7959 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7961 unsigned int regno;
7962 rtx insn;
7963 rtx mem;
7965 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7966 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7968 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7969 set_mem_align (mem, 128);
7970 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7971 RTX_FRAME_RELATED_P (insn) = 1;
7972 offset += 16;
7976 /* Expand prologue or epilogue stack adjustment.
7977 The pattern exist to put a dependency on all ebp-based memory accesses.
7978 STYLE should be negative if instructions should be marked as frame related,
7979 zero if %r11 register is live and cannot be freely used and positive
7980 otherwise. */
7982 static void
7983 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7985 rtx insn;
7987 if (! TARGET_64BIT)
7988 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7989 else if (x86_64_immediate_operand (offset, DImode))
7990 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7991 else
7993 rtx r11;
7994 /* r11 is used by indirect sibcall return as well, set before the
7995 epilogue and used after the epilogue. ATM indirect sibcall
7996 shouldn't be used together with huge frame sizes in one
7997 function because of the frame_size check in sibcall.c. */
7998 gcc_assert (style);
7999 r11 = gen_rtx_REG (DImode, R11_REG);
8000 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8001 if (style < 0)
8002 RTX_FRAME_RELATED_P (insn) = 1;
8003 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8004 offset));
8006 if (style < 0)
8007 RTX_FRAME_RELATED_P (insn) = 1;
8010 /* Find an available register to be used as dynamic realign argument
8011 pointer regsiter. Such a register will be written in prologue and
8012 used in begin of body, so it must not be
8013 1. parameter passing register.
8014 2. GOT pointer.
8015 We reuse static-chain register if it is available. Otherwise, we
8016 use DI for i386 and R13 for x86-64. We chose R13 since it has
8017 shorter encoding.
8019 Return: the regno of chosen register. */
8021 static unsigned int
8022 find_drap_reg (void)
8024 tree decl = cfun->decl;
8026 if (TARGET_64BIT)
8028 /* Use R13 for nested function or function need static chain.
8029 Since function with tail call may use any caller-saved
8030 registers in epilogue, DRAP must not use caller-saved
8031 register in such case. */
8032 if ((decl_function_context (decl)
8033 && !DECL_NO_STATIC_CHAIN (decl))
8034 || crtl->tail_call_emit)
8035 return R13_REG;
8037 return R10_REG;
8039 else
8041 /* Use DI for nested function or function need static chain.
8042 Since function with tail call may use any caller-saved
8043 registers in epilogue, DRAP must not use caller-saved
8044 register in such case. */
8045 if ((decl_function_context (decl)
8046 && !DECL_NO_STATIC_CHAIN (decl))
8047 || crtl->tail_call_emit)
8048 return DI_REG;
8050 /* Reuse static chain register if it isn't used for parameter
8051 passing. */
8052 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8053 && !lookup_attribute ("fastcall",
8054 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8055 return CX_REG;
8056 else
8057 return DI_REG;
8061 /* Update incoming stack boundary and estimated stack alignment. */
8063 static void
8064 ix86_update_stack_boundary (void)
8066 /* Prefer the one specified at command line. */
8067 ix86_incoming_stack_boundary
8068 = (ix86_user_incoming_stack_boundary
8069 ? ix86_user_incoming_stack_boundary
8070 : ix86_default_incoming_stack_boundary);
8072 /* Incoming stack alignment can be changed on individual functions
8073 via force_align_arg_pointer attribute. We use the smallest
8074 incoming stack boundary. */
8075 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8076 && lookup_attribute (ix86_force_align_arg_pointer_string,
8077 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8078 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8080 /* The incoming stack frame has to be aligned at least at
8081 parm_stack_boundary. */
8082 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8083 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8085 /* Stack at entrance of main is aligned by runtime. We use the
8086 smallest incoming stack boundary. */
8087 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8088 && DECL_NAME (current_function_decl)
8089 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8090 && DECL_FILE_SCOPE_P (current_function_decl))
8091 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8093 /* x86_64 vararg needs 16byte stack alignment for register save
8094 area. */
8095 if (TARGET_64BIT
8096 && cfun->stdarg
8097 && crtl->stack_alignment_estimated < 128)
8098 crtl->stack_alignment_estimated = 128;
8101 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8102 needed or an rtx for DRAP otherwise. */
8104 static rtx
8105 ix86_get_drap_rtx (void)
8107 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8108 crtl->need_drap = true;
8110 if (stack_realign_drap)
8112 /* Assign DRAP to vDRAP and returns vDRAP */
8113 unsigned int regno = find_drap_reg ();
8114 rtx drap_vreg;
8115 rtx arg_ptr;
8116 rtx seq, insn;
8118 arg_ptr = gen_rtx_REG (Pmode, regno);
8119 crtl->drap_reg = arg_ptr;
8121 start_sequence ();
8122 drap_vreg = copy_to_reg (arg_ptr);
8123 seq = get_insns ();
8124 end_sequence ();
8126 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8127 RTX_FRAME_RELATED_P (insn) = 1;
8128 return drap_vreg;
8130 else
8131 return NULL;
8134 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8136 static rtx
8137 ix86_internal_arg_pointer (void)
8139 return virtual_incoming_args_rtx;
8142 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8143 This is called from dwarf2out.c to emit call frame instructions
8144 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8145 static void
8146 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8148 rtx unspec = SET_SRC (pattern);
8149 gcc_assert (GET_CODE (unspec) == UNSPEC);
8151 switch (index)
8153 case UNSPEC_REG_SAVE:
8154 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8155 SET_DEST (pattern));
8156 break;
8157 case UNSPEC_DEF_CFA:
8158 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8159 INTVAL (XVECEXP (unspec, 0, 0)));
8160 break;
8161 default:
8162 gcc_unreachable ();
8166 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8167 to be generated in correct form. */
8168 static void
8169 ix86_finalize_stack_realign_flags (void)
8171 /* Check if stack realign is really needed after reload, and
8172 stores result in cfun */
8173 unsigned int incoming_stack_boundary
8174 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8175 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8176 unsigned int stack_realign = (incoming_stack_boundary
8177 < (current_function_is_leaf
8178 ? crtl->max_used_stack_slot_alignment
8179 : crtl->stack_alignment_needed));
8181 if (crtl->stack_realign_finalized)
8183 /* After stack_realign_needed is finalized, we can't no longer
8184 change it. */
8185 gcc_assert (crtl->stack_realign_needed == stack_realign);
8187 else
8189 crtl->stack_realign_needed = stack_realign;
8190 crtl->stack_realign_finalized = true;
8194 /* Expand the prologue into a bunch of separate insns. */
8196 void
8197 ix86_expand_prologue (void)
8199 rtx insn;
8200 bool pic_reg_used;
8201 struct ix86_frame frame;
8202 HOST_WIDE_INT allocate;
8204 ix86_finalize_stack_realign_flags ();
8206 /* DRAP should not coexist with stack_realign_fp */
8207 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8209 ix86_compute_frame_layout (&frame);
8211 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8212 of DRAP is needed and stack realignment is really needed after reload */
8213 if (crtl->drap_reg && crtl->stack_realign_needed)
8215 rtx x, y;
8216 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8217 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8218 ? 0 : UNITS_PER_WORD);
8220 gcc_assert (stack_realign_drap);
8222 /* Grab the argument pointer. */
8223 x = plus_constant (stack_pointer_rtx,
8224 (UNITS_PER_WORD + param_ptr_offset));
8225 y = crtl->drap_reg;
8227 /* Only need to push parameter pointer reg if it is caller
8228 saved reg */
8229 if (!call_used_regs[REGNO (crtl->drap_reg)])
8231 /* Push arg pointer reg */
8232 insn = emit_insn (gen_push (y));
8233 RTX_FRAME_RELATED_P (insn) = 1;
8236 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8237 RTX_FRAME_RELATED_P (insn) = 1;
8239 /* Align the stack. */
8240 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8241 stack_pointer_rtx,
8242 GEN_INT (-align_bytes)));
8243 RTX_FRAME_RELATED_P (insn) = 1;
8245 /* Replicate the return address on the stack so that return
8246 address can be reached via (argp - 1) slot. This is needed
8247 to implement macro RETURN_ADDR_RTX and intrinsic function
8248 expand_builtin_return_addr etc. */
8249 x = crtl->drap_reg;
8250 x = gen_frame_mem (Pmode,
8251 plus_constant (x, -UNITS_PER_WORD));
8252 insn = emit_insn (gen_push (x));
8253 RTX_FRAME_RELATED_P (insn) = 1;
8256 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8257 slower on all targets. Also sdb doesn't like it. */
8259 if (frame_pointer_needed)
8261 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8262 RTX_FRAME_RELATED_P (insn) = 1;
8264 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8265 RTX_FRAME_RELATED_P (insn) = 1;
8268 if (stack_realign_fp)
8270 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8271 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8273 /* Align the stack. */
8274 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8275 stack_pointer_rtx,
8276 GEN_INT (-align_bytes)));
8277 RTX_FRAME_RELATED_P (insn) = 1;
8280 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8282 if (!frame.save_regs_using_mov)
8283 ix86_emit_save_regs ();
8284 else
8285 allocate += frame.nregs * UNITS_PER_WORD;
8287 /* When using red zone we may start register saving before allocating
8288 the stack frame saving one cycle of the prologue. However I will
8289 avoid doing this if I am going to have to probe the stack since
8290 at least on x86_64 the stack probe can turn into a call that clobbers
8291 a red zone location */
8292 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8293 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8294 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8295 && !crtl->stack_realign_needed)
8296 ? hard_frame_pointer_rtx
8297 : stack_pointer_rtx,
8298 -frame.nregs * UNITS_PER_WORD);
8300 if (allocate == 0)
8302 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8303 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8304 GEN_INT (-allocate), -1);
8305 else
8307 /* Only valid for Win32. */
8308 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8309 bool eax_live;
8310 rtx t;
8312 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8314 if (cfun->machine->call_abi == MS_ABI)
8315 eax_live = false;
8316 else
8317 eax_live = ix86_eax_live_at_start_p ();
8319 if (eax_live)
8321 emit_insn (gen_push (eax));
8322 allocate -= UNITS_PER_WORD;
8325 emit_move_insn (eax, GEN_INT (allocate));
8327 if (TARGET_64BIT)
8328 insn = gen_allocate_stack_worker_64 (eax, eax);
8329 else
8330 insn = gen_allocate_stack_worker_32 (eax, eax);
8331 insn = emit_insn (insn);
8332 RTX_FRAME_RELATED_P (insn) = 1;
8333 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8334 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8335 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8337 if (eax_live)
8339 if (frame_pointer_needed)
8340 t = plus_constant (hard_frame_pointer_rtx,
8341 allocate
8342 - frame.to_allocate
8343 - frame.nregs * UNITS_PER_WORD);
8344 else
8345 t = plus_constant (stack_pointer_rtx, allocate);
8346 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8350 if (frame.save_regs_using_mov
8351 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8352 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8354 if (!frame_pointer_needed
8355 || !frame.to_allocate
8356 || crtl->stack_realign_needed)
8357 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8358 frame.to_allocate
8359 + frame.nsseregs * 16 + frame.padding0);
8360 else
8361 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8362 -frame.nregs * UNITS_PER_WORD);
8364 if (!frame_pointer_needed
8365 || !frame.to_allocate
8366 || crtl->stack_realign_needed)
8367 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8368 frame.to_allocate);
8369 else
8370 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8371 - frame.nregs * UNITS_PER_WORD
8372 - frame.nsseregs * 16
8373 - frame.padding0);
8375 pic_reg_used = false;
8376 if (pic_offset_table_rtx
8377 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8378 || crtl->profile))
8380 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8382 if (alt_pic_reg_used != INVALID_REGNUM)
8383 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8385 pic_reg_used = true;
8388 if (pic_reg_used)
8390 if (TARGET_64BIT)
8392 if (ix86_cmodel == CM_LARGE_PIC)
8394 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8395 rtx label = gen_label_rtx ();
8396 emit_label (label);
8397 LABEL_PRESERVE_P (label) = 1;
8398 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8399 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8400 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8401 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8402 pic_offset_table_rtx, tmp_reg));
8404 else
8405 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8407 else
8408 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8411 /* In the pic_reg_used case, make sure that the got load isn't deleted
8412 when mcount needs it. Blockage to avoid call movement across mcount
8413 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8414 note. */
8415 if (crtl->profile && pic_reg_used)
8416 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8418 if (crtl->drap_reg && !crtl->stack_realign_needed)
8420 /* vDRAP is setup but after reload it turns out stack realign
8421 isn't necessary, here we will emit prologue to setup DRAP
8422 without stack realign adjustment */
8423 int drap_bp_offset = UNITS_PER_WORD * 2;
8424 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8425 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8428 /* Prevent instructions from being scheduled into register save push
8429 sequence when access to the redzone area is done through frame pointer.
8430 The offset betweeh the frame pointer and the stack pointer is calculated
8431 relative to the value of the stack pointer at the end of the function
8432 prologue, and moving instructions that access redzone area via frame
8433 pointer inside push sequence violates this assumption. */
8434 if (frame_pointer_needed && frame.red_zone_size)
8435 emit_insn (gen_memory_blockage ());
8437 /* Emit cld instruction if stringops are used in the function. */
8438 if (TARGET_CLD && ix86_current_function_needs_cld)
8439 emit_insn (gen_cld ());
8442 /* Emit code to restore saved registers using MOV insns. First register
8443 is restored from POINTER + OFFSET. */
8444 static void
8445 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8446 int maybe_eh_return)
8448 int regno;
8449 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8451 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8452 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8454 /* Ensure that adjust_address won't be forced to produce pointer
8455 out of range allowed by x86-64 instruction set. */
8456 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8458 rtx r11;
8460 r11 = gen_rtx_REG (DImode, R11_REG);
8461 emit_move_insn (r11, GEN_INT (offset));
8462 emit_insn (gen_adddi3 (r11, r11, pointer));
8463 base_address = gen_rtx_MEM (Pmode, r11);
8464 offset = 0;
8466 emit_move_insn (gen_rtx_REG (Pmode, regno),
8467 adjust_address (base_address, Pmode, offset));
8468 offset += UNITS_PER_WORD;
8472 /* Emit code to restore saved registers using MOV insns. First register
8473 is restored from POINTER + OFFSET. */
8474 static void
8475 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8476 int maybe_eh_return)
8478 int regno;
8479 rtx base_address = gen_rtx_MEM (TImode, pointer);
8480 rtx mem;
8482 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8483 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8485 /* Ensure that adjust_address won't be forced to produce pointer
8486 out of range allowed by x86-64 instruction set. */
8487 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8489 rtx r11;
8491 r11 = gen_rtx_REG (DImode, R11_REG);
8492 emit_move_insn (r11, GEN_INT (offset));
8493 emit_insn (gen_adddi3 (r11, r11, pointer));
8494 base_address = gen_rtx_MEM (TImode, r11);
8495 offset = 0;
8497 mem = adjust_address (base_address, TImode, offset);
8498 set_mem_align (mem, 128);
8499 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8500 offset += 16;
8504 /* Restore function stack, frame, and registers. */
8506 void
8507 ix86_expand_epilogue (int style)
8509 int regno;
8510 int sp_valid;
8511 struct ix86_frame frame;
8512 HOST_WIDE_INT offset;
8514 ix86_finalize_stack_realign_flags ();
8516 /* When stack is realigned, SP must be valid. */
8517 sp_valid = (!frame_pointer_needed
8518 || current_function_sp_is_unchanging
8519 || stack_realign_fp);
8521 ix86_compute_frame_layout (&frame);
8523 /* See the comment about red zone and frame
8524 pointer usage in ix86_expand_prologue. */
8525 if (frame_pointer_needed && frame.red_zone_size)
8526 emit_insn (gen_memory_blockage ());
8528 /* Calculate start of saved registers relative to ebp. Special care
8529 must be taken for the normal return case of a function using
8530 eh_return: the eax and edx registers are marked as saved, but not
8531 restored along this path. */
8532 offset = frame.nregs;
8533 if (crtl->calls_eh_return && style != 2)
8534 offset -= 2;
8535 offset *= -UNITS_PER_WORD;
8536 offset -= frame.nsseregs * 16 + frame.padding0;
8538 /* If we're only restoring one register and sp is not valid then
8539 using a move instruction to restore the register since it's
8540 less work than reloading sp and popping the register.
8542 The default code result in stack adjustment using add/lea instruction,
8543 while this code results in LEAVE instruction (or discrete equivalent),
8544 so it is profitable in some other cases as well. Especially when there
8545 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8546 and there is exactly one register to pop. This heuristic may need some
8547 tuning in future. */
8548 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8549 || (TARGET_EPILOGUE_USING_MOVE
8550 && cfun->machine->use_fast_prologue_epilogue
8551 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8552 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8553 || (frame_pointer_needed && TARGET_USE_LEAVE
8554 && cfun->machine->use_fast_prologue_epilogue
8555 && (frame.nregs + frame.nsseregs) == 1)
8556 || crtl->calls_eh_return)
8558 /* Restore registers. We can use ebp or esp to address the memory
8559 locations. If both are available, default to ebp, since offsets
8560 are known to be small. Only exception is esp pointing directly
8561 to the end of block of saved registers, where we may simplify
8562 addressing mode.
8564 If we are realigning stack with bp and sp, regs restore can't
8565 be addressed by bp. sp must be used instead. */
8567 if (!frame_pointer_needed
8568 || (sp_valid && !frame.to_allocate)
8569 || stack_realign_fp)
8571 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8572 frame.to_allocate, style == 2);
8573 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8574 frame.to_allocate
8575 + frame.nsseregs * 16
8576 + frame.padding0, style == 2);
8578 else
8580 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8581 offset, style == 2);
8582 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8583 offset
8584 + frame.nsseregs * 16
8585 + frame.padding0, style == 2);
8588 /* eh_return epilogues need %ecx added to the stack pointer. */
8589 if (style == 2)
8591 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8593 /* Stack align doesn't work with eh_return. */
8594 gcc_assert (!crtl->stack_realign_needed);
8596 if (frame_pointer_needed)
8598 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8599 tmp = plus_constant (tmp, UNITS_PER_WORD);
8600 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8602 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8603 emit_move_insn (hard_frame_pointer_rtx, tmp);
8605 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8606 const0_rtx, style);
8608 else
8610 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8611 tmp = plus_constant (tmp, (frame.to_allocate
8612 + frame.nregs * UNITS_PER_WORD
8613 + frame.nsseregs * 16
8614 + frame.padding0));
8615 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8618 else if (!frame_pointer_needed)
8619 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8620 GEN_INT (frame.to_allocate
8621 + frame.nregs * UNITS_PER_WORD
8622 + frame.nsseregs * 16
8623 + frame.padding0),
8624 style);
8625 /* If not an i386, mov & pop is faster than "leave". */
8626 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8627 || !cfun->machine->use_fast_prologue_epilogue)
8628 emit_insn ((*ix86_gen_leave) ());
8629 else
8631 pro_epilogue_adjust_stack (stack_pointer_rtx,
8632 hard_frame_pointer_rtx,
8633 const0_rtx, style);
8635 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8638 else
8640 /* First step is to deallocate the stack frame so that we can
8641 pop the registers.
8643 If we realign stack with frame pointer, then stack pointer
8644 won't be able to recover via lea $offset(%bp), %sp, because
8645 there is a padding area between bp and sp for realign.
8646 "add $to_allocate, %sp" must be used instead. */
8647 if (!sp_valid)
8649 gcc_assert (frame_pointer_needed);
8650 gcc_assert (!stack_realign_fp);
8651 pro_epilogue_adjust_stack (stack_pointer_rtx,
8652 hard_frame_pointer_rtx,
8653 GEN_INT (offset), style);
8654 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8655 frame.to_allocate, style == 2);
8656 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8657 GEN_INT (frame.nsseregs * 16), style);
8659 else if (frame.to_allocate || frame.nsseregs)
8661 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8662 frame.to_allocate,
8663 style == 2);
8664 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8665 GEN_INT (frame.to_allocate
8666 + frame.nsseregs * 16
8667 + frame.padding0), style);
8670 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8671 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8672 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8673 if (frame_pointer_needed)
8675 /* Leave results in shorter dependency chains on CPUs that are
8676 able to grok it fast. */
8677 if (TARGET_USE_LEAVE)
8678 emit_insn ((*ix86_gen_leave) ());
8679 else
8681 /* For stack realigned really happens, recover stack
8682 pointer to hard frame pointer is a must, if not using
8683 leave. */
8684 if (stack_realign_fp)
8685 pro_epilogue_adjust_stack (stack_pointer_rtx,
8686 hard_frame_pointer_rtx,
8687 const0_rtx, style);
8688 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8693 if (crtl->drap_reg && crtl->stack_realign_needed)
8695 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8696 ? 0 : UNITS_PER_WORD);
8697 gcc_assert (stack_realign_drap);
8698 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8699 crtl->drap_reg,
8700 GEN_INT (-(UNITS_PER_WORD
8701 + param_ptr_offset))));
8702 if (!call_used_regs[REGNO (crtl->drap_reg)])
8703 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8707 /* Sibcall epilogues don't want a return instruction. */
8708 if (style == 0)
8709 return;
8711 if (crtl->args.pops_args && crtl->args.size)
8713 rtx popc = GEN_INT (crtl->args.pops_args);
8715 /* i386 can only pop 64K bytes. If asked to pop more, pop
8716 return address, do explicit add, and jump indirectly to the
8717 caller. */
8719 if (crtl->args.pops_args >= 65536)
8721 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8723 /* There is no "pascal" calling convention in any 64bit ABI. */
8724 gcc_assert (!TARGET_64BIT);
8726 emit_insn (gen_popsi1 (ecx));
8727 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8728 emit_jump_insn (gen_return_indirect_internal (ecx));
8730 else
8731 emit_jump_insn (gen_return_pop_internal (popc));
8733 else
8734 emit_jump_insn (gen_return_internal ());
8737 /* Reset from the function's potential modifications. */
8739 static void
8740 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8741 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8743 if (pic_offset_table_rtx)
8744 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8745 #if TARGET_MACHO
8746 /* Mach-O doesn't support labels at the end of objects, so if
8747 it looks like we might want one, insert a NOP. */
8749 rtx insn = get_last_insn ();
8750 while (insn
8751 && NOTE_P (insn)
8752 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8753 insn = PREV_INSN (insn);
8754 if (insn
8755 && (LABEL_P (insn)
8756 || (NOTE_P (insn)
8757 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8758 fputs ("\tnop\n", file);
8760 #endif
8764 /* Extract the parts of an RTL expression that is a valid memory address
8765 for an instruction. Return 0 if the structure of the address is
8766 grossly off. Return -1 if the address contains ASHIFT, so it is not
8767 strictly valid, but still used for computing length of lea instruction. */
8770 ix86_decompose_address (rtx addr, struct ix86_address *out)
8772 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8773 rtx base_reg, index_reg;
8774 HOST_WIDE_INT scale = 1;
8775 rtx scale_rtx = NULL_RTX;
8776 int retval = 1;
8777 enum ix86_address_seg seg = SEG_DEFAULT;
8779 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8780 base = addr;
8781 else if (GET_CODE (addr) == PLUS)
8783 rtx addends[4], op;
8784 int n = 0, i;
8786 op = addr;
8789 if (n >= 4)
8790 return 0;
8791 addends[n++] = XEXP (op, 1);
8792 op = XEXP (op, 0);
8794 while (GET_CODE (op) == PLUS);
8795 if (n >= 4)
8796 return 0;
8797 addends[n] = op;
8799 for (i = n; i >= 0; --i)
8801 op = addends[i];
8802 switch (GET_CODE (op))
8804 case MULT:
8805 if (index)
8806 return 0;
8807 index = XEXP (op, 0);
8808 scale_rtx = XEXP (op, 1);
8809 break;
8811 case UNSPEC:
8812 if (XINT (op, 1) == UNSPEC_TP
8813 && TARGET_TLS_DIRECT_SEG_REFS
8814 && seg == SEG_DEFAULT)
8815 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8816 else
8817 return 0;
8818 break;
8820 case REG:
8821 case SUBREG:
8822 if (!base)
8823 base = op;
8824 else if (!index)
8825 index = op;
8826 else
8827 return 0;
8828 break;
8830 case CONST:
8831 case CONST_INT:
8832 case SYMBOL_REF:
8833 case LABEL_REF:
8834 if (disp)
8835 return 0;
8836 disp = op;
8837 break;
8839 default:
8840 return 0;
8844 else if (GET_CODE (addr) == MULT)
8846 index = XEXP (addr, 0); /* index*scale */
8847 scale_rtx = XEXP (addr, 1);
8849 else if (GET_CODE (addr) == ASHIFT)
8851 rtx tmp;
8853 /* We're called for lea too, which implements ashift on occasion. */
8854 index = XEXP (addr, 0);
8855 tmp = XEXP (addr, 1);
8856 if (!CONST_INT_P (tmp))
8857 return 0;
8858 scale = INTVAL (tmp);
8859 if ((unsigned HOST_WIDE_INT) scale > 3)
8860 return 0;
8861 scale = 1 << scale;
8862 retval = -1;
8864 else
8865 disp = addr; /* displacement */
8867 /* Extract the integral value of scale. */
8868 if (scale_rtx)
8870 if (!CONST_INT_P (scale_rtx))
8871 return 0;
8872 scale = INTVAL (scale_rtx);
8875 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8876 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8878 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8879 if (base_reg && index_reg && scale == 1
8880 && (index_reg == arg_pointer_rtx
8881 || index_reg == frame_pointer_rtx
8882 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8884 rtx tmp;
8885 tmp = base, base = index, index = tmp;
8886 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8889 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8890 if ((base_reg == hard_frame_pointer_rtx
8891 || base_reg == frame_pointer_rtx
8892 || base_reg == arg_pointer_rtx) && !disp)
8893 disp = const0_rtx;
8895 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8896 Avoid this by transforming to [%esi+0].
8897 Reload calls address legitimization without cfun defined, so we need
8898 to test cfun for being non-NULL. */
8899 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8900 && base_reg && !index_reg && !disp
8901 && REG_P (base_reg)
8902 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8903 disp = const0_rtx;
8905 /* Special case: encode reg+reg instead of reg*2. */
8906 if (!base && index && scale && scale == 2)
8907 base = index, base_reg = index_reg, scale = 1;
8909 /* Special case: scaling cannot be encoded without base or displacement. */
8910 if (!base && !disp && index && scale != 1)
8911 disp = const0_rtx;
8913 out->base = base;
8914 out->index = index;
8915 out->disp = disp;
8916 out->scale = scale;
8917 out->seg = seg;
8919 return retval;
8922 /* Return cost of the memory address x.
8923 For i386, it is better to use a complex address than let gcc copy
8924 the address into a reg and make a new pseudo. But not if the address
8925 requires to two regs - that would mean more pseudos with longer
8926 lifetimes. */
8927 static int
8928 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8930 struct ix86_address parts;
8931 int cost = 1;
8932 int ok = ix86_decompose_address (x, &parts);
8934 gcc_assert (ok);
8936 if (parts.base && GET_CODE (parts.base) == SUBREG)
8937 parts.base = SUBREG_REG (parts.base);
8938 if (parts.index && GET_CODE (parts.index) == SUBREG)
8939 parts.index = SUBREG_REG (parts.index);
8941 /* Attempt to minimize number of registers in the address. */
8942 if ((parts.base
8943 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8944 || (parts.index
8945 && (!REG_P (parts.index)
8946 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8947 cost++;
8949 if (parts.base
8950 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8951 && parts.index
8952 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8953 && parts.base != parts.index)
8954 cost++;
8956 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8957 since it's predecode logic can't detect the length of instructions
8958 and it degenerates to vector decoded. Increase cost of such
8959 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8960 to split such addresses or even refuse such addresses at all.
8962 Following addressing modes are affected:
8963 [base+scale*index]
8964 [scale*index+disp]
8965 [base+index]
8967 The first and last case may be avoidable by explicitly coding the zero in
8968 memory address, but I don't have AMD-K6 machine handy to check this
8969 theory. */
8971 if (TARGET_K6
8972 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8973 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8974 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8975 cost += 10;
8977 return cost;
8980 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8981 this is used for to form addresses to local data when -fPIC is in
8982 use. */
8984 static bool
8985 darwin_local_data_pic (rtx disp)
8987 return (GET_CODE (disp) == UNSPEC
8988 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8991 /* Determine if a given RTX is a valid constant. We already know this
8992 satisfies CONSTANT_P. */
8994 bool
8995 legitimate_constant_p (rtx x)
8997 switch (GET_CODE (x))
8999 case CONST:
9000 x = XEXP (x, 0);
9002 if (GET_CODE (x) == PLUS)
9004 if (!CONST_INT_P (XEXP (x, 1)))
9005 return false;
9006 x = XEXP (x, 0);
9009 if (TARGET_MACHO && darwin_local_data_pic (x))
9010 return true;
9012 /* Only some unspecs are valid as "constants". */
9013 if (GET_CODE (x) == UNSPEC)
9014 switch (XINT (x, 1))
9016 case UNSPEC_GOT:
9017 case UNSPEC_GOTOFF:
9018 case UNSPEC_PLTOFF:
9019 return TARGET_64BIT;
9020 case UNSPEC_TPOFF:
9021 case UNSPEC_NTPOFF:
9022 x = XVECEXP (x, 0, 0);
9023 return (GET_CODE (x) == SYMBOL_REF
9024 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9025 case UNSPEC_DTPOFF:
9026 x = XVECEXP (x, 0, 0);
9027 return (GET_CODE (x) == SYMBOL_REF
9028 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9029 default:
9030 return false;
9033 /* We must have drilled down to a symbol. */
9034 if (GET_CODE (x) == LABEL_REF)
9035 return true;
9036 if (GET_CODE (x) != SYMBOL_REF)
9037 return false;
9038 /* FALLTHRU */
9040 case SYMBOL_REF:
9041 /* TLS symbols are never valid. */
9042 if (SYMBOL_REF_TLS_MODEL (x))
9043 return false;
9045 /* DLLIMPORT symbols are never valid. */
9046 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9047 && SYMBOL_REF_DLLIMPORT_P (x))
9048 return false;
9049 break;
9051 case CONST_DOUBLE:
9052 if (GET_MODE (x) == TImode
9053 && x != CONST0_RTX (TImode)
9054 && !TARGET_64BIT)
9055 return false;
9056 break;
9058 case CONST_VECTOR:
9059 if (!standard_sse_constant_p (x))
9060 return false;
9062 default:
9063 break;
9066 /* Otherwise we handle everything else in the move patterns. */
9067 return true;
9070 /* Determine if it's legal to put X into the constant pool. This
9071 is not possible for the address of thread-local symbols, which
9072 is checked above. */
9074 static bool
9075 ix86_cannot_force_const_mem (rtx x)
9077 /* We can always put integral constants and vectors in memory. */
9078 switch (GET_CODE (x))
9080 case CONST_INT:
9081 case CONST_DOUBLE:
9082 case CONST_VECTOR:
9083 return false;
9085 default:
9086 break;
9088 return !legitimate_constant_p (x);
9091 /* Determine if a given RTX is a valid constant address. */
9093 bool
9094 constant_address_p (rtx x)
9096 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9099 /* Nonzero if the constant value X is a legitimate general operand
9100 when generating PIC code. It is given that flag_pic is on and
9101 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9103 bool
9104 legitimate_pic_operand_p (rtx x)
9106 rtx inner;
9108 switch (GET_CODE (x))
9110 case CONST:
9111 inner = XEXP (x, 0);
9112 if (GET_CODE (inner) == PLUS
9113 && CONST_INT_P (XEXP (inner, 1)))
9114 inner = XEXP (inner, 0);
9116 /* Only some unspecs are valid as "constants". */
9117 if (GET_CODE (inner) == UNSPEC)
9118 switch (XINT (inner, 1))
9120 case UNSPEC_GOT:
9121 case UNSPEC_GOTOFF:
9122 case UNSPEC_PLTOFF:
9123 return TARGET_64BIT;
9124 case UNSPEC_TPOFF:
9125 x = XVECEXP (inner, 0, 0);
9126 return (GET_CODE (x) == SYMBOL_REF
9127 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9128 case UNSPEC_MACHOPIC_OFFSET:
9129 return legitimate_pic_address_disp_p (x);
9130 default:
9131 return false;
9133 /* FALLTHRU */
9135 case SYMBOL_REF:
9136 case LABEL_REF:
9137 return legitimate_pic_address_disp_p (x);
9139 default:
9140 return true;
9144 /* Determine if a given CONST RTX is a valid memory displacement
9145 in PIC mode. */
9148 legitimate_pic_address_disp_p (rtx disp)
9150 bool saw_plus;
9152 /* In 64bit mode we can allow direct addresses of symbols and labels
9153 when they are not dynamic symbols. */
9154 if (TARGET_64BIT)
9156 rtx op0 = disp, op1;
9158 switch (GET_CODE (disp))
9160 case LABEL_REF:
9161 return true;
9163 case CONST:
9164 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9165 break;
9166 op0 = XEXP (XEXP (disp, 0), 0);
9167 op1 = XEXP (XEXP (disp, 0), 1);
9168 if (!CONST_INT_P (op1)
9169 || INTVAL (op1) >= 16*1024*1024
9170 || INTVAL (op1) < -16*1024*1024)
9171 break;
9172 if (GET_CODE (op0) == LABEL_REF)
9173 return true;
9174 if (GET_CODE (op0) != SYMBOL_REF)
9175 break;
9176 /* FALLTHRU */
9178 case SYMBOL_REF:
9179 /* TLS references should always be enclosed in UNSPEC. */
9180 if (SYMBOL_REF_TLS_MODEL (op0))
9181 return false;
9182 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9183 && ix86_cmodel != CM_LARGE_PIC)
9184 return true;
9185 break;
9187 default:
9188 break;
9191 if (GET_CODE (disp) != CONST)
9192 return 0;
9193 disp = XEXP (disp, 0);
9195 if (TARGET_64BIT)
9197 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9198 of GOT tables. We should not need these anyway. */
9199 if (GET_CODE (disp) != UNSPEC
9200 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9201 && XINT (disp, 1) != UNSPEC_GOTOFF
9202 && XINT (disp, 1) != UNSPEC_PLTOFF))
9203 return 0;
9205 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9206 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9207 return 0;
9208 return 1;
9211 saw_plus = false;
9212 if (GET_CODE (disp) == PLUS)
9214 if (!CONST_INT_P (XEXP (disp, 1)))
9215 return 0;
9216 disp = XEXP (disp, 0);
9217 saw_plus = true;
9220 if (TARGET_MACHO && darwin_local_data_pic (disp))
9221 return 1;
9223 if (GET_CODE (disp) != UNSPEC)
9224 return 0;
9226 switch (XINT (disp, 1))
9228 case UNSPEC_GOT:
9229 if (saw_plus)
9230 return false;
9231 /* We need to check for both symbols and labels because VxWorks loads
9232 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9233 details. */
9234 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9235 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9236 case UNSPEC_GOTOFF:
9237 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9238 While ABI specify also 32bit relocation but we don't produce it in
9239 small PIC model at all. */
9240 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9241 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9242 && !TARGET_64BIT)
9243 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9244 return false;
9245 case UNSPEC_GOTTPOFF:
9246 case UNSPEC_GOTNTPOFF:
9247 case UNSPEC_INDNTPOFF:
9248 if (saw_plus)
9249 return false;
9250 disp = XVECEXP (disp, 0, 0);
9251 return (GET_CODE (disp) == SYMBOL_REF
9252 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9253 case UNSPEC_NTPOFF:
9254 disp = XVECEXP (disp, 0, 0);
9255 return (GET_CODE (disp) == SYMBOL_REF
9256 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9257 case UNSPEC_DTPOFF:
9258 disp = XVECEXP (disp, 0, 0);
9259 return (GET_CODE (disp) == SYMBOL_REF
9260 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9263 return 0;
9266 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9267 memory address for an instruction. The MODE argument is the machine mode
9268 for the MEM expression that wants to use this address.
9270 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9271 convert common non-canonical forms to canonical form so that they will
9272 be recognized. */
9275 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9276 rtx addr, int strict)
9278 struct ix86_address parts;
9279 rtx base, index, disp;
9280 HOST_WIDE_INT scale;
9281 const char *reason = NULL;
9282 rtx reason_rtx = NULL_RTX;
9284 if (ix86_decompose_address (addr, &parts) <= 0)
9286 reason = "decomposition failed";
9287 goto report_error;
9290 base = parts.base;
9291 index = parts.index;
9292 disp = parts.disp;
9293 scale = parts.scale;
9295 /* Validate base register.
9297 Don't allow SUBREG's that span more than a word here. It can lead to spill
9298 failures when the base is one word out of a two word structure, which is
9299 represented internally as a DImode int. */
9301 if (base)
9303 rtx reg;
9304 reason_rtx = base;
9306 if (REG_P (base))
9307 reg = base;
9308 else if (GET_CODE (base) == SUBREG
9309 && REG_P (SUBREG_REG (base))
9310 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9311 <= UNITS_PER_WORD)
9312 reg = SUBREG_REG (base);
9313 else
9315 reason = "base is not a register";
9316 goto report_error;
9319 if (GET_MODE (base) != Pmode)
9321 reason = "base is not in Pmode";
9322 goto report_error;
9325 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9326 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9328 reason = "base is not valid";
9329 goto report_error;
9333 /* Validate index register.
9335 Don't allow SUBREG's that span more than a word here -- same as above. */
9337 if (index)
9339 rtx reg;
9340 reason_rtx = index;
9342 if (REG_P (index))
9343 reg = index;
9344 else if (GET_CODE (index) == SUBREG
9345 && REG_P (SUBREG_REG (index))
9346 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9347 <= UNITS_PER_WORD)
9348 reg = SUBREG_REG (index);
9349 else
9351 reason = "index is not a register";
9352 goto report_error;
9355 if (GET_MODE (index) != Pmode)
9357 reason = "index is not in Pmode";
9358 goto report_error;
9361 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9362 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9364 reason = "index is not valid";
9365 goto report_error;
9369 /* Validate scale factor. */
9370 if (scale != 1)
9372 reason_rtx = GEN_INT (scale);
9373 if (!index)
9375 reason = "scale without index";
9376 goto report_error;
9379 if (scale != 2 && scale != 4 && scale != 8)
9381 reason = "scale is not a valid multiplier";
9382 goto report_error;
9386 /* Validate displacement. */
9387 if (disp)
9389 reason_rtx = disp;
9391 if (GET_CODE (disp) == CONST
9392 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9393 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9394 switch (XINT (XEXP (disp, 0), 1))
9396 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9397 used. While ABI specify also 32bit relocations, we don't produce
9398 them at all and use IP relative instead. */
9399 case UNSPEC_GOT:
9400 case UNSPEC_GOTOFF:
9401 gcc_assert (flag_pic);
9402 if (!TARGET_64BIT)
9403 goto is_legitimate_pic;
9404 reason = "64bit address unspec";
9405 goto report_error;
9407 case UNSPEC_GOTPCREL:
9408 gcc_assert (flag_pic);
9409 goto is_legitimate_pic;
9411 case UNSPEC_GOTTPOFF:
9412 case UNSPEC_GOTNTPOFF:
9413 case UNSPEC_INDNTPOFF:
9414 case UNSPEC_NTPOFF:
9415 case UNSPEC_DTPOFF:
9416 break;
9418 default:
9419 reason = "invalid address unspec";
9420 goto report_error;
9423 else if (SYMBOLIC_CONST (disp)
9424 && (flag_pic
9425 || (TARGET_MACHO
9426 #if TARGET_MACHO
9427 && MACHOPIC_INDIRECT
9428 && !machopic_operand_p (disp)
9429 #endif
9433 is_legitimate_pic:
9434 if (TARGET_64BIT && (index || base))
9436 /* foo@dtpoff(%rX) is ok. */
9437 if (GET_CODE (disp) != CONST
9438 || GET_CODE (XEXP (disp, 0)) != PLUS
9439 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9440 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9441 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9442 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9444 reason = "non-constant pic memory reference";
9445 goto report_error;
9448 else if (! legitimate_pic_address_disp_p (disp))
9450 reason = "displacement is an invalid pic construct";
9451 goto report_error;
9454 /* This code used to verify that a symbolic pic displacement
9455 includes the pic_offset_table_rtx register.
9457 While this is good idea, unfortunately these constructs may
9458 be created by "adds using lea" optimization for incorrect
9459 code like:
9461 int a;
9462 int foo(int i)
9464 return *(&a+i);
9467 This code is nonsensical, but results in addressing
9468 GOT table with pic_offset_table_rtx base. We can't
9469 just refuse it easily, since it gets matched by
9470 "addsi3" pattern, that later gets split to lea in the
9471 case output register differs from input. While this
9472 can be handled by separate addsi pattern for this case
9473 that never results in lea, this seems to be easier and
9474 correct fix for crash to disable this test. */
9476 else if (GET_CODE (disp) != LABEL_REF
9477 && !CONST_INT_P (disp)
9478 && (GET_CODE (disp) != CONST
9479 || !legitimate_constant_p (disp))
9480 && (GET_CODE (disp) != SYMBOL_REF
9481 || !legitimate_constant_p (disp)))
9483 reason = "displacement is not constant";
9484 goto report_error;
9486 else if (TARGET_64BIT
9487 && !x86_64_immediate_operand (disp, VOIDmode))
9489 reason = "displacement is out of range";
9490 goto report_error;
9494 /* Everything looks valid. */
9495 return TRUE;
9497 report_error:
9498 return FALSE;
9501 /* Return a unique alias set for the GOT. */
9503 static alias_set_type
9504 ix86_GOT_alias_set (void)
9506 static alias_set_type set = -1;
9507 if (set == -1)
9508 set = new_alias_set ();
9509 return set;
9512 /* Return a legitimate reference for ORIG (an address) using the
9513 register REG. If REG is 0, a new pseudo is generated.
9515 There are two types of references that must be handled:
9517 1. Global data references must load the address from the GOT, via
9518 the PIC reg. An insn is emitted to do this load, and the reg is
9519 returned.
9521 2. Static data references, constant pool addresses, and code labels
9522 compute the address as an offset from the GOT, whose base is in
9523 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9524 differentiate them from global data objects. The returned
9525 address is the PIC reg + an unspec constant.
9527 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9528 reg also appears in the address. */
9530 static rtx
9531 legitimize_pic_address (rtx orig, rtx reg)
9533 rtx addr = orig;
9534 rtx new_rtx = orig;
9535 rtx base;
9537 #if TARGET_MACHO
9538 if (TARGET_MACHO && !TARGET_64BIT)
9540 if (reg == 0)
9541 reg = gen_reg_rtx (Pmode);
9542 /* Use the generic Mach-O PIC machinery. */
9543 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9545 #endif
9547 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9548 new_rtx = addr;
9549 else if (TARGET_64BIT
9550 && ix86_cmodel != CM_SMALL_PIC
9551 && gotoff_operand (addr, Pmode))
9553 rtx tmpreg;
9554 /* This symbol may be referenced via a displacement from the PIC
9555 base address (@GOTOFF). */
9557 if (reload_in_progress)
9558 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9559 if (GET_CODE (addr) == CONST)
9560 addr = XEXP (addr, 0);
9561 if (GET_CODE (addr) == PLUS)
9563 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9564 UNSPEC_GOTOFF);
9565 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9567 else
9568 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9569 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9570 if (!reg)
9571 tmpreg = gen_reg_rtx (Pmode);
9572 else
9573 tmpreg = reg;
9574 emit_move_insn (tmpreg, new_rtx);
9576 if (reg != 0)
9578 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9579 tmpreg, 1, OPTAB_DIRECT);
9580 new_rtx = reg;
9582 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9584 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9586 /* This symbol may be referenced via a displacement from the PIC
9587 base address (@GOTOFF). */
9589 if (reload_in_progress)
9590 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9591 if (GET_CODE (addr) == CONST)
9592 addr = XEXP (addr, 0);
9593 if (GET_CODE (addr) == PLUS)
9595 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9596 UNSPEC_GOTOFF);
9597 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9599 else
9600 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9601 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9602 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9604 if (reg != 0)
9606 emit_move_insn (reg, new_rtx);
9607 new_rtx = reg;
9610 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9611 /* We can't use @GOTOFF for text labels on VxWorks;
9612 see gotoff_operand. */
9613 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9615 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9617 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9618 return legitimize_dllimport_symbol (addr, true);
9619 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9620 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9621 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9623 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9624 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9628 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9630 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9631 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9632 new_rtx = gen_const_mem (Pmode, new_rtx);
9633 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9635 if (reg == 0)
9636 reg = gen_reg_rtx (Pmode);
9637 /* Use directly gen_movsi, otherwise the address is loaded
9638 into register for CSE. We don't want to CSE this addresses,
9639 instead we CSE addresses from the GOT table, so skip this. */
9640 emit_insn (gen_movsi (reg, new_rtx));
9641 new_rtx = reg;
9643 else
9645 /* This symbol must be referenced via a load from the
9646 Global Offset Table (@GOT). */
9648 if (reload_in_progress)
9649 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9650 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9651 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9652 if (TARGET_64BIT)
9653 new_rtx = force_reg (Pmode, new_rtx);
9654 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9655 new_rtx = gen_const_mem (Pmode, new_rtx);
9656 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9658 if (reg == 0)
9659 reg = gen_reg_rtx (Pmode);
9660 emit_move_insn (reg, new_rtx);
9661 new_rtx = reg;
9664 else
9666 if (CONST_INT_P (addr)
9667 && !x86_64_immediate_operand (addr, VOIDmode))
9669 if (reg)
9671 emit_move_insn (reg, addr);
9672 new_rtx = reg;
9674 else
9675 new_rtx = force_reg (Pmode, addr);
9677 else if (GET_CODE (addr) == CONST)
9679 addr = XEXP (addr, 0);
9681 /* We must match stuff we generate before. Assume the only
9682 unspecs that can get here are ours. Not that we could do
9683 anything with them anyway.... */
9684 if (GET_CODE (addr) == UNSPEC
9685 || (GET_CODE (addr) == PLUS
9686 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9687 return orig;
9688 gcc_assert (GET_CODE (addr) == PLUS);
9690 if (GET_CODE (addr) == PLUS)
9692 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9694 /* Check first to see if this is a constant offset from a @GOTOFF
9695 symbol reference. */
9696 if (gotoff_operand (op0, Pmode)
9697 && CONST_INT_P (op1))
9699 if (!TARGET_64BIT)
9701 if (reload_in_progress)
9702 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9703 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9704 UNSPEC_GOTOFF);
9705 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9706 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9707 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9709 if (reg != 0)
9711 emit_move_insn (reg, new_rtx);
9712 new_rtx = reg;
9715 else
9717 if (INTVAL (op1) < -16*1024*1024
9718 || INTVAL (op1) >= 16*1024*1024)
9720 if (!x86_64_immediate_operand (op1, Pmode))
9721 op1 = force_reg (Pmode, op1);
9722 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9726 else
9728 base = legitimize_pic_address (XEXP (addr, 0), reg);
9729 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9730 base == reg ? NULL_RTX : reg);
9732 if (CONST_INT_P (new_rtx))
9733 new_rtx = plus_constant (base, INTVAL (new_rtx));
9734 else
9736 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9738 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9739 new_rtx = XEXP (new_rtx, 1);
9741 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9746 return new_rtx;
9749 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9751 static rtx
9752 get_thread_pointer (int to_reg)
9754 rtx tp, reg, insn;
9756 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9757 if (!to_reg)
9758 return tp;
9760 reg = gen_reg_rtx (Pmode);
9761 insn = gen_rtx_SET (VOIDmode, reg, tp);
9762 insn = emit_insn (insn);
9764 return reg;
9767 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9768 false if we expect this to be used for a memory address and true if
9769 we expect to load the address into a register. */
9771 static rtx
9772 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9774 rtx dest, base, off, pic, tp;
9775 int type;
9777 switch (model)
9779 case TLS_MODEL_GLOBAL_DYNAMIC:
9780 dest = gen_reg_rtx (Pmode);
9781 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9783 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9785 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9787 start_sequence ();
9788 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9789 insns = get_insns ();
9790 end_sequence ();
9792 RTL_CONST_CALL_P (insns) = 1;
9793 emit_libcall_block (insns, dest, rax, x);
9795 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9796 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9797 else
9798 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9800 if (TARGET_GNU2_TLS)
9802 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9804 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9806 break;
9808 case TLS_MODEL_LOCAL_DYNAMIC:
9809 base = gen_reg_rtx (Pmode);
9810 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9812 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9814 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9816 start_sequence ();
9817 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9818 insns = get_insns ();
9819 end_sequence ();
9821 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9822 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9823 RTL_CONST_CALL_P (insns) = 1;
9824 emit_libcall_block (insns, base, rax, note);
9826 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9827 emit_insn (gen_tls_local_dynamic_base_64 (base));
9828 else
9829 emit_insn (gen_tls_local_dynamic_base_32 (base));
9831 if (TARGET_GNU2_TLS)
9833 rtx x = ix86_tls_module_base ();
9835 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9836 gen_rtx_MINUS (Pmode, x, tp));
9839 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9840 off = gen_rtx_CONST (Pmode, off);
9842 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9844 if (TARGET_GNU2_TLS)
9846 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9848 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9851 break;
9853 case TLS_MODEL_INITIAL_EXEC:
9854 if (TARGET_64BIT)
9856 pic = NULL;
9857 type = UNSPEC_GOTNTPOFF;
9859 else if (flag_pic)
9861 if (reload_in_progress)
9862 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9863 pic = pic_offset_table_rtx;
9864 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9866 else if (!TARGET_ANY_GNU_TLS)
9868 pic = gen_reg_rtx (Pmode);
9869 emit_insn (gen_set_got (pic));
9870 type = UNSPEC_GOTTPOFF;
9872 else
9874 pic = NULL;
9875 type = UNSPEC_INDNTPOFF;
9878 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9879 off = gen_rtx_CONST (Pmode, off);
9880 if (pic)
9881 off = gen_rtx_PLUS (Pmode, pic, off);
9882 off = gen_const_mem (Pmode, off);
9883 set_mem_alias_set (off, ix86_GOT_alias_set ());
9885 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9887 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9888 off = force_reg (Pmode, off);
9889 return gen_rtx_PLUS (Pmode, base, off);
9891 else
9893 base = get_thread_pointer (true);
9894 dest = gen_reg_rtx (Pmode);
9895 emit_insn (gen_subsi3 (dest, base, off));
9897 break;
9899 case TLS_MODEL_LOCAL_EXEC:
9900 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9901 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9902 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9903 off = gen_rtx_CONST (Pmode, off);
9905 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9907 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9908 return gen_rtx_PLUS (Pmode, base, off);
9910 else
9912 base = get_thread_pointer (true);
9913 dest = gen_reg_rtx (Pmode);
9914 emit_insn (gen_subsi3 (dest, base, off));
9916 break;
9918 default:
9919 gcc_unreachable ();
9922 return dest;
9925 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9926 to symbol DECL. */
9928 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9929 htab_t dllimport_map;
9931 static tree
9932 get_dllimport_decl (tree decl)
9934 struct tree_map *h, in;
9935 void **loc;
9936 const char *name;
9937 const char *prefix;
9938 size_t namelen, prefixlen;
9939 char *imp_name;
9940 tree to;
9941 rtx rtl;
9943 if (!dllimport_map)
9944 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9946 in.hash = htab_hash_pointer (decl);
9947 in.base.from = decl;
9948 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9949 h = (struct tree_map *) *loc;
9950 if (h)
9951 return h->to;
9953 *loc = h = GGC_NEW (struct tree_map);
9954 h->hash = in.hash;
9955 h->base.from = decl;
9956 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9957 DECL_ARTIFICIAL (to) = 1;
9958 DECL_IGNORED_P (to) = 1;
9959 DECL_EXTERNAL (to) = 1;
9960 TREE_READONLY (to) = 1;
9962 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9963 name = targetm.strip_name_encoding (name);
9964 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9965 ? "*__imp_" : "*__imp__";
9966 namelen = strlen (name);
9967 prefixlen = strlen (prefix);
9968 imp_name = (char *) alloca (namelen + prefixlen + 1);
9969 memcpy (imp_name, prefix, prefixlen);
9970 memcpy (imp_name + prefixlen, name, namelen + 1);
9972 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9973 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9974 SET_SYMBOL_REF_DECL (rtl, to);
9975 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9977 rtl = gen_const_mem (Pmode, rtl);
9978 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9980 SET_DECL_RTL (to, rtl);
9981 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9983 return to;
9986 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9987 true if we require the result be a register. */
9989 static rtx
9990 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9992 tree imp_decl;
9993 rtx x;
9995 gcc_assert (SYMBOL_REF_DECL (symbol));
9996 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9998 x = DECL_RTL (imp_decl);
9999 if (want_reg)
10000 x = force_reg (Pmode, x);
10001 return x;
10004 /* Try machine-dependent ways of modifying an illegitimate address
10005 to be legitimate. If we find one, return the new, valid address.
10006 This macro is used in only one place: `memory_address' in explow.c.
10008 OLDX is the address as it was before break_out_memory_refs was called.
10009 In some cases it is useful to look at this to decide what needs to be done.
10011 MODE and WIN are passed so that this macro can use
10012 GO_IF_LEGITIMATE_ADDRESS.
10014 It is always safe for this macro to do nothing. It exists to recognize
10015 opportunities to optimize the output.
10017 For the 80386, we handle X+REG by loading X into a register R and
10018 using R+REG. R will go in a general reg and indexing will be used.
10019 However, if REG is a broken-out memory address or multiplication,
10020 nothing needs to be done because REG can certainly go in a general reg.
10022 When -fpic is used, special handling is needed for symbolic references.
10023 See comments by legitimize_pic_address in i386.c for details. */
10026 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10028 int changed = 0;
10029 unsigned log;
10031 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10032 if (log)
10033 return legitimize_tls_address (x, (enum tls_model) log, false);
10034 if (GET_CODE (x) == CONST
10035 && GET_CODE (XEXP (x, 0)) == PLUS
10036 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10037 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10039 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10040 (enum tls_model) log, false);
10041 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10044 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10046 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10047 return legitimize_dllimport_symbol (x, true);
10048 if (GET_CODE (x) == CONST
10049 && GET_CODE (XEXP (x, 0)) == PLUS
10050 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10051 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10053 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10054 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10058 if (flag_pic && SYMBOLIC_CONST (x))
10059 return legitimize_pic_address (x, 0);
10061 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10062 if (GET_CODE (x) == ASHIFT
10063 && CONST_INT_P (XEXP (x, 1))
10064 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10066 changed = 1;
10067 log = INTVAL (XEXP (x, 1));
10068 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10069 GEN_INT (1 << log));
10072 if (GET_CODE (x) == PLUS)
10074 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10076 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10077 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10078 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10080 changed = 1;
10081 log = INTVAL (XEXP (XEXP (x, 0), 1));
10082 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10083 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10084 GEN_INT (1 << log));
10087 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10088 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10089 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10091 changed = 1;
10092 log = INTVAL (XEXP (XEXP (x, 1), 1));
10093 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10094 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10095 GEN_INT (1 << log));
10098 /* Put multiply first if it isn't already. */
10099 if (GET_CODE (XEXP (x, 1)) == MULT)
10101 rtx tmp = XEXP (x, 0);
10102 XEXP (x, 0) = XEXP (x, 1);
10103 XEXP (x, 1) = tmp;
10104 changed = 1;
10107 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10108 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10109 created by virtual register instantiation, register elimination, and
10110 similar optimizations. */
10111 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10113 changed = 1;
10114 x = gen_rtx_PLUS (Pmode,
10115 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10116 XEXP (XEXP (x, 1), 0)),
10117 XEXP (XEXP (x, 1), 1));
10120 /* Canonicalize
10121 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10122 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10123 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10125 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10126 && CONSTANT_P (XEXP (x, 1)))
10128 rtx constant;
10129 rtx other = NULL_RTX;
10131 if (CONST_INT_P (XEXP (x, 1)))
10133 constant = XEXP (x, 1);
10134 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10136 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10138 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10139 other = XEXP (x, 1);
10141 else
10142 constant = 0;
10144 if (constant)
10146 changed = 1;
10147 x = gen_rtx_PLUS (Pmode,
10148 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10149 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10150 plus_constant (other, INTVAL (constant)));
10154 if (changed && legitimate_address_p (mode, x, FALSE))
10155 return x;
10157 if (GET_CODE (XEXP (x, 0)) == MULT)
10159 changed = 1;
10160 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10163 if (GET_CODE (XEXP (x, 1)) == MULT)
10165 changed = 1;
10166 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10169 if (changed
10170 && REG_P (XEXP (x, 1))
10171 && REG_P (XEXP (x, 0)))
10172 return x;
10174 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10176 changed = 1;
10177 x = legitimize_pic_address (x, 0);
10180 if (changed && legitimate_address_p (mode, x, FALSE))
10181 return x;
10183 if (REG_P (XEXP (x, 0)))
10185 rtx temp = gen_reg_rtx (Pmode);
10186 rtx val = force_operand (XEXP (x, 1), temp);
10187 if (val != temp)
10188 emit_move_insn (temp, val);
10190 XEXP (x, 1) = temp;
10191 return x;
10194 else if (REG_P (XEXP (x, 1)))
10196 rtx temp = gen_reg_rtx (Pmode);
10197 rtx val = force_operand (XEXP (x, 0), temp);
10198 if (val != temp)
10199 emit_move_insn (temp, val);
10201 XEXP (x, 0) = temp;
10202 return x;
10206 return x;
10209 /* Print an integer constant expression in assembler syntax. Addition
10210 and subtraction are the only arithmetic that may appear in these
10211 expressions. FILE is the stdio stream to write to, X is the rtx, and
10212 CODE is the operand print code from the output string. */
10214 static void
10215 output_pic_addr_const (FILE *file, rtx x, int code)
10217 char buf[256];
10219 switch (GET_CODE (x))
10221 case PC:
10222 gcc_assert (flag_pic);
10223 putc ('.', file);
10224 break;
10226 case SYMBOL_REF:
10227 if (! TARGET_MACHO || TARGET_64BIT)
10228 output_addr_const (file, x);
10229 else
10231 const char *name = XSTR (x, 0);
10233 /* Mark the decl as referenced so that cgraph will
10234 output the function. */
10235 if (SYMBOL_REF_DECL (x))
10236 mark_decl_referenced (SYMBOL_REF_DECL (x));
10238 #if TARGET_MACHO
10239 if (MACHOPIC_INDIRECT
10240 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10241 name = machopic_indirection_name (x, /*stub_p=*/true);
10242 #endif
10243 assemble_name (file, name);
10245 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10246 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10247 fputs ("@PLT", file);
10248 break;
10250 case LABEL_REF:
10251 x = XEXP (x, 0);
10252 /* FALLTHRU */
10253 case CODE_LABEL:
10254 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10255 assemble_name (asm_out_file, buf);
10256 break;
10258 case CONST_INT:
10259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10260 break;
10262 case CONST:
10263 /* This used to output parentheses around the expression,
10264 but that does not work on the 386 (either ATT or BSD assembler). */
10265 output_pic_addr_const (file, XEXP (x, 0), code);
10266 break;
10268 case CONST_DOUBLE:
10269 if (GET_MODE (x) == VOIDmode)
10271 /* We can use %d if the number is <32 bits and positive. */
10272 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10273 fprintf (file, "0x%lx%08lx",
10274 (unsigned long) CONST_DOUBLE_HIGH (x),
10275 (unsigned long) CONST_DOUBLE_LOW (x));
10276 else
10277 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10279 else
10280 /* We can't handle floating point constants;
10281 PRINT_OPERAND must handle them. */
10282 output_operand_lossage ("floating constant misused");
10283 break;
10285 case PLUS:
10286 /* Some assemblers need integer constants to appear first. */
10287 if (CONST_INT_P (XEXP (x, 0)))
10289 output_pic_addr_const (file, XEXP (x, 0), code);
10290 putc ('+', file);
10291 output_pic_addr_const (file, XEXP (x, 1), code);
10293 else
10295 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10296 output_pic_addr_const (file, XEXP (x, 1), code);
10297 putc ('+', file);
10298 output_pic_addr_const (file, XEXP (x, 0), code);
10300 break;
10302 case MINUS:
10303 if (!TARGET_MACHO)
10304 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10305 output_pic_addr_const (file, XEXP (x, 0), code);
10306 putc ('-', file);
10307 output_pic_addr_const (file, XEXP (x, 1), code);
10308 if (!TARGET_MACHO)
10309 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10310 break;
10312 case UNSPEC:
10313 gcc_assert (XVECLEN (x, 0) == 1);
10314 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10315 switch (XINT (x, 1))
10317 case UNSPEC_GOT:
10318 fputs ("@GOT", file);
10319 break;
10320 case UNSPEC_GOTOFF:
10321 fputs ("@GOTOFF", file);
10322 break;
10323 case UNSPEC_PLTOFF:
10324 fputs ("@PLTOFF", file);
10325 break;
10326 case UNSPEC_GOTPCREL:
10327 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10328 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10329 break;
10330 case UNSPEC_GOTTPOFF:
10331 /* FIXME: This might be @TPOFF in Sun ld too. */
10332 fputs ("@GOTTPOFF", file);
10333 break;
10334 case UNSPEC_TPOFF:
10335 fputs ("@TPOFF", file);
10336 break;
10337 case UNSPEC_NTPOFF:
10338 if (TARGET_64BIT)
10339 fputs ("@TPOFF", file);
10340 else
10341 fputs ("@NTPOFF", file);
10342 break;
10343 case UNSPEC_DTPOFF:
10344 fputs ("@DTPOFF", file);
10345 break;
10346 case UNSPEC_GOTNTPOFF:
10347 if (TARGET_64BIT)
10348 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10349 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10350 else
10351 fputs ("@GOTNTPOFF", file);
10352 break;
10353 case UNSPEC_INDNTPOFF:
10354 fputs ("@INDNTPOFF", file);
10355 break;
10356 #if TARGET_MACHO
10357 case UNSPEC_MACHOPIC_OFFSET:
10358 putc ('-', file);
10359 machopic_output_function_base_name (file);
10360 break;
10361 #endif
10362 default:
10363 output_operand_lossage ("invalid UNSPEC as operand");
10364 break;
10366 break;
10368 default:
10369 output_operand_lossage ("invalid expression as operand");
10373 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10374 We need to emit DTP-relative relocations. */
10376 static void ATTRIBUTE_UNUSED
10377 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10379 fputs (ASM_LONG, file);
10380 output_addr_const (file, x);
10381 fputs ("@DTPOFF", file);
10382 switch (size)
10384 case 4:
10385 break;
10386 case 8:
10387 fputs (", 0", file);
10388 break;
10389 default:
10390 gcc_unreachable ();
10394 /* Return true if X is a representation of the PIC register. This copes
10395 with calls from ix86_find_base_term, where the register might have
10396 been replaced by a cselib value. */
10398 static bool
10399 ix86_pic_register_p (rtx x)
10401 if (GET_CODE (x) == VALUE)
10402 return (pic_offset_table_rtx
10403 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10404 else
10405 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10408 /* In the name of slightly smaller debug output, and to cater to
10409 general assembler lossage, recognize PIC+GOTOFF and turn it back
10410 into a direct symbol reference.
10412 On Darwin, this is necessary to avoid a crash, because Darwin
10413 has a different PIC label for each routine but the DWARF debugging
10414 information is not associated with any particular routine, so it's
10415 necessary to remove references to the PIC label from RTL stored by
10416 the DWARF output code. */
10418 static rtx
10419 ix86_delegitimize_address (rtx orig_x)
10421 rtx x = orig_x;
10422 /* reg_addend is NULL or a multiple of some register. */
10423 rtx reg_addend = NULL_RTX;
10424 /* const_addend is NULL or a const_int. */
10425 rtx const_addend = NULL_RTX;
10426 /* This is the result, or NULL. */
10427 rtx result = NULL_RTX;
10429 if (MEM_P (x))
10430 x = XEXP (x, 0);
10432 if (TARGET_64BIT)
10434 if (GET_CODE (x) != CONST
10435 || GET_CODE (XEXP (x, 0)) != UNSPEC
10436 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10437 || !MEM_P (orig_x))
10438 return orig_x;
10439 return XVECEXP (XEXP (x, 0), 0, 0);
10442 if (GET_CODE (x) != PLUS
10443 || GET_CODE (XEXP (x, 1)) != CONST)
10444 return orig_x;
10446 if (ix86_pic_register_p (XEXP (x, 0)))
10447 /* %ebx + GOT/GOTOFF */
10449 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10451 /* %ebx + %reg * scale + GOT/GOTOFF */
10452 reg_addend = XEXP (x, 0);
10453 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10454 reg_addend = XEXP (reg_addend, 1);
10455 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10456 reg_addend = XEXP (reg_addend, 0);
10457 else
10458 return orig_x;
10459 if (!REG_P (reg_addend)
10460 && GET_CODE (reg_addend) != MULT
10461 && GET_CODE (reg_addend) != ASHIFT)
10462 return orig_x;
10464 else
10465 return orig_x;
10467 x = XEXP (XEXP (x, 1), 0);
10468 if (GET_CODE (x) == PLUS
10469 && CONST_INT_P (XEXP (x, 1)))
10471 const_addend = XEXP (x, 1);
10472 x = XEXP (x, 0);
10475 if (GET_CODE (x) == UNSPEC
10476 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10477 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10478 result = XVECEXP (x, 0, 0);
10480 if (TARGET_MACHO && darwin_local_data_pic (x)
10481 && !MEM_P (orig_x))
10482 result = XVECEXP (x, 0, 0);
10484 if (! result)
10485 return orig_x;
10487 if (const_addend)
10488 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10489 if (reg_addend)
10490 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10491 return result;
10494 /* If X is a machine specific address (i.e. a symbol or label being
10495 referenced as a displacement from the GOT implemented using an
10496 UNSPEC), then return the base term. Otherwise return X. */
10499 ix86_find_base_term (rtx x)
10501 rtx term;
10503 if (TARGET_64BIT)
10505 if (GET_CODE (x) != CONST)
10506 return x;
10507 term = XEXP (x, 0);
10508 if (GET_CODE (term) == PLUS
10509 && (CONST_INT_P (XEXP (term, 1))
10510 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10511 term = XEXP (term, 0);
10512 if (GET_CODE (term) != UNSPEC
10513 || XINT (term, 1) != UNSPEC_GOTPCREL)
10514 return x;
10516 return XVECEXP (term, 0, 0);
10519 return ix86_delegitimize_address (x);
10522 static void
10523 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10524 int fp, FILE *file)
10526 const char *suffix;
10528 if (mode == CCFPmode || mode == CCFPUmode)
10530 enum rtx_code second_code, bypass_code;
10531 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10532 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10533 code = ix86_fp_compare_code_to_integer (code);
10534 mode = CCmode;
10536 if (reverse)
10537 code = reverse_condition (code);
10539 switch (code)
10541 case EQ:
10542 switch (mode)
10544 case CCAmode:
10545 suffix = "a";
10546 break;
10548 case CCCmode:
10549 suffix = "c";
10550 break;
10552 case CCOmode:
10553 suffix = "o";
10554 break;
10556 case CCSmode:
10557 suffix = "s";
10558 break;
10560 default:
10561 suffix = "e";
10563 break;
10564 case NE:
10565 switch (mode)
10567 case CCAmode:
10568 suffix = "na";
10569 break;
10571 case CCCmode:
10572 suffix = "nc";
10573 break;
10575 case CCOmode:
10576 suffix = "no";
10577 break;
10579 case CCSmode:
10580 suffix = "ns";
10581 break;
10583 default:
10584 suffix = "ne";
10586 break;
10587 case GT:
10588 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10589 suffix = "g";
10590 break;
10591 case GTU:
10592 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10593 Those same assemblers have the same but opposite lossage on cmov. */
10594 if (mode == CCmode)
10595 suffix = fp ? "nbe" : "a";
10596 else if (mode == CCCmode)
10597 suffix = "b";
10598 else
10599 gcc_unreachable ();
10600 break;
10601 case LT:
10602 switch (mode)
10604 case CCNOmode:
10605 case CCGOCmode:
10606 suffix = "s";
10607 break;
10609 case CCmode:
10610 case CCGCmode:
10611 suffix = "l";
10612 break;
10614 default:
10615 gcc_unreachable ();
10617 break;
10618 case LTU:
10619 gcc_assert (mode == CCmode || mode == CCCmode);
10620 suffix = "b";
10621 break;
10622 case GE:
10623 switch (mode)
10625 case CCNOmode:
10626 case CCGOCmode:
10627 suffix = "ns";
10628 break;
10630 case CCmode:
10631 case CCGCmode:
10632 suffix = "ge";
10633 break;
10635 default:
10636 gcc_unreachable ();
10638 break;
10639 case GEU:
10640 /* ??? As above. */
10641 gcc_assert (mode == CCmode || mode == CCCmode);
10642 suffix = fp ? "nb" : "ae";
10643 break;
10644 case LE:
10645 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10646 suffix = "le";
10647 break;
10648 case LEU:
10649 /* ??? As above. */
10650 if (mode == CCmode)
10651 suffix = "be";
10652 else if (mode == CCCmode)
10653 suffix = fp ? "nb" : "ae";
10654 else
10655 gcc_unreachable ();
10656 break;
10657 case UNORDERED:
10658 suffix = fp ? "u" : "p";
10659 break;
10660 case ORDERED:
10661 suffix = fp ? "nu" : "np";
10662 break;
10663 default:
10664 gcc_unreachable ();
10666 fputs (suffix, file);
10669 /* Print the name of register X to FILE based on its machine mode and number.
10670 If CODE is 'w', pretend the mode is HImode.
10671 If CODE is 'b', pretend the mode is QImode.
10672 If CODE is 'k', pretend the mode is SImode.
10673 If CODE is 'q', pretend the mode is DImode.
10674 If CODE is 'x', pretend the mode is V4SFmode.
10675 If CODE is 't', pretend the mode is V8SFmode.
10676 If CODE is 'h', pretend the reg is the 'high' byte register.
10677 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10678 If CODE is 'd', duplicate the operand for AVX instruction.
10681 void
10682 print_reg (rtx x, int code, FILE *file)
10684 const char *reg;
10685 bool duplicated = code == 'd' && TARGET_AVX;
10687 gcc_assert (x == pc_rtx
10688 || (REGNO (x) != ARG_POINTER_REGNUM
10689 && REGNO (x) != FRAME_POINTER_REGNUM
10690 && REGNO (x) != FLAGS_REG
10691 && REGNO (x) != FPSR_REG
10692 && REGNO (x) != FPCR_REG));
10694 if (ASSEMBLER_DIALECT == ASM_ATT)
10695 putc ('%', file);
10697 if (x == pc_rtx)
10699 gcc_assert (TARGET_64BIT);
10700 fputs ("rip", file);
10701 return;
10704 if (code == 'w' || MMX_REG_P (x))
10705 code = 2;
10706 else if (code == 'b')
10707 code = 1;
10708 else if (code == 'k')
10709 code = 4;
10710 else if (code == 'q')
10711 code = 8;
10712 else if (code == 'y')
10713 code = 3;
10714 else if (code == 'h')
10715 code = 0;
10716 else if (code == 'x')
10717 code = 16;
10718 else if (code == 't')
10719 code = 32;
10720 else
10721 code = GET_MODE_SIZE (GET_MODE (x));
10723 /* Irritatingly, AMD extended registers use different naming convention
10724 from the normal registers. */
10725 if (REX_INT_REG_P (x))
10727 gcc_assert (TARGET_64BIT);
10728 switch (code)
10730 case 0:
10731 error ("extended registers have no high halves");
10732 break;
10733 case 1:
10734 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10735 break;
10736 case 2:
10737 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10738 break;
10739 case 4:
10740 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10741 break;
10742 case 8:
10743 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10744 break;
10745 default:
10746 error ("unsupported operand size for extended register");
10747 break;
10749 return;
10752 reg = NULL;
10753 switch (code)
10755 case 3:
10756 if (STACK_TOP_P (x))
10758 reg = "st(0)";
10759 break;
10761 /* FALLTHRU */
10762 case 8:
10763 case 4:
10764 case 12:
10765 if (! ANY_FP_REG_P (x))
10766 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10767 /* FALLTHRU */
10768 case 16:
10769 case 2:
10770 normal:
10771 reg = hi_reg_name[REGNO (x)];
10772 break;
10773 case 1:
10774 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10775 goto normal;
10776 reg = qi_reg_name[REGNO (x)];
10777 break;
10778 case 0:
10779 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10780 goto normal;
10781 reg = qi_high_reg_name[REGNO (x)];
10782 break;
10783 case 32:
10784 if (SSE_REG_P (x))
10786 gcc_assert (!duplicated);
10787 putc ('y', file);
10788 fputs (hi_reg_name[REGNO (x)] + 1, file);
10789 return;
10791 break;
10792 default:
10793 gcc_unreachable ();
10796 fputs (reg, file);
10797 if (duplicated)
10799 if (ASSEMBLER_DIALECT == ASM_ATT)
10800 fprintf (file, ", %%%s", reg);
10801 else
10802 fprintf (file, ", %s", reg);
10806 /* Locate some local-dynamic symbol still in use by this function
10807 so that we can print its name in some tls_local_dynamic_base
10808 pattern. */
10810 static int
10811 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10813 rtx x = *px;
10815 if (GET_CODE (x) == SYMBOL_REF
10816 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10818 cfun->machine->some_ld_name = XSTR (x, 0);
10819 return 1;
10822 return 0;
10825 static const char *
10826 get_some_local_dynamic_name (void)
10828 rtx insn;
10830 if (cfun->machine->some_ld_name)
10831 return cfun->machine->some_ld_name;
10833 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10834 if (INSN_P (insn)
10835 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10836 return cfun->machine->some_ld_name;
10838 gcc_unreachable ();
10841 /* Meaning of CODE:
10842 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10843 C -- print opcode suffix for set/cmov insn.
10844 c -- like C, but print reversed condition
10845 E,e -- likewise, but for compare-and-branch fused insn.
10846 F,f -- likewise, but for floating-point.
10847 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10848 otherwise nothing
10849 R -- print the prefix for register names.
10850 z -- print the opcode suffix for the size of the current operand.
10851 * -- print a star (in certain assembler syntax)
10852 A -- print an absolute memory reference.
10853 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10854 s -- print a shift double count, followed by the assemblers argument
10855 delimiter.
10856 b -- print the QImode name of the register for the indicated operand.
10857 %b0 would print %al if operands[0] is reg 0.
10858 w -- likewise, print the HImode name of the register.
10859 k -- likewise, print the SImode name of the register.
10860 q -- likewise, print the DImode name of the register.
10861 x -- likewise, print the V4SFmode name of the register.
10862 t -- likewise, print the V8SFmode name of the register.
10863 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10864 y -- print "st(0)" instead of "st" as a register.
10865 d -- print duplicated register operand for AVX instruction.
10866 D -- print condition for SSE cmp instruction.
10867 P -- if PIC, print an @PLT suffix.
10868 X -- don't print any sort of PIC '@' suffix for a symbol.
10869 & -- print some in-use local-dynamic symbol name.
10870 H -- print a memory address offset by 8; used for sse high-parts
10871 Y -- print condition for SSE5 com* instruction.
10872 + -- print a branch hint as 'cs' or 'ds' prefix
10873 ; -- print a semicolon (after prefixes due to bug in older gas).
10876 void
10877 print_operand (FILE *file, rtx x, int code)
10879 if (code)
10881 switch (code)
10883 case '*':
10884 if (ASSEMBLER_DIALECT == ASM_ATT)
10885 putc ('*', file);
10886 return;
10888 case '&':
10889 assemble_name (file, get_some_local_dynamic_name ());
10890 return;
10892 case 'A':
10893 switch (ASSEMBLER_DIALECT)
10895 case ASM_ATT:
10896 putc ('*', file);
10897 break;
10899 case ASM_INTEL:
10900 /* Intel syntax. For absolute addresses, registers should not
10901 be surrounded by braces. */
10902 if (!REG_P (x))
10904 putc ('[', file);
10905 PRINT_OPERAND (file, x, 0);
10906 putc (']', file);
10907 return;
10909 break;
10911 default:
10912 gcc_unreachable ();
10915 PRINT_OPERAND (file, x, 0);
10916 return;
10919 case 'L':
10920 if (ASSEMBLER_DIALECT == ASM_ATT)
10921 putc ('l', file);
10922 return;
10924 case 'W':
10925 if (ASSEMBLER_DIALECT == ASM_ATT)
10926 putc ('w', file);
10927 return;
10929 case 'B':
10930 if (ASSEMBLER_DIALECT == ASM_ATT)
10931 putc ('b', file);
10932 return;
10934 case 'Q':
10935 if (ASSEMBLER_DIALECT == ASM_ATT)
10936 putc ('l', file);
10937 return;
10939 case 'S':
10940 if (ASSEMBLER_DIALECT == ASM_ATT)
10941 putc ('s', file);
10942 return;
10944 case 'T':
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10946 putc ('t', file);
10947 return;
10949 case 'z':
10950 /* 387 opcodes don't get size suffixes if the operands are
10951 registers. */
10952 if (STACK_REG_P (x))
10953 return;
10955 /* Likewise if using Intel opcodes. */
10956 if (ASSEMBLER_DIALECT == ASM_INTEL)
10957 return;
10959 /* This is the size of op from size of operand. */
10960 switch (GET_MODE_SIZE (GET_MODE (x)))
10962 case 1:
10963 putc ('b', file);
10964 return;
10966 case 2:
10967 if (MEM_P (x))
10969 #ifdef HAVE_GAS_FILDS_FISTS
10970 putc ('s', file);
10971 #endif
10972 return;
10974 else
10975 putc ('w', file);
10976 return;
10978 case 4:
10979 if (GET_MODE (x) == SFmode)
10981 putc ('s', file);
10982 return;
10984 else
10985 putc ('l', file);
10986 return;
10988 case 12:
10989 case 16:
10990 putc ('t', file);
10991 return;
10993 case 8:
10994 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10996 if (MEM_P (x))
10998 #ifdef GAS_MNEMONICS
10999 putc ('q', file);
11000 #else
11001 putc ('l', file);
11002 putc ('l', file);
11003 #endif
11005 else
11006 putc ('q', file);
11008 else
11009 putc ('l', file);
11010 return;
11012 default:
11013 gcc_unreachable ();
11016 case 'd':
11017 case 'b':
11018 case 'w':
11019 case 'k':
11020 case 'q':
11021 case 'h':
11022 case 't':
11023 case 'y':
11024 case 'x':
11025 case 'X':
11026 case 'P':
11027 break;
11029 case 's':
11030 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11032 PRINT_OPERAND (file, x, 0);
11033 fputs (", ", file);
11035 return;
11037 case 'D':
11038 /* Little bit of braindamage here. The SSE compare instructions
11039 does use completely different names for the comparisons that the
11040 fp conditional moves. */
11041 if (TARGET_AVX)
11043 switch (GET_CODE (x))
11045 case EQ:
11046 fputs ("eq", file);
11047 break;
11048 case UNEQ:
11049 fputs ("eq_us", file);
11050 break;
11051 case LT:
11052 fputs ("lt", file);
11053 break;
11054 case UNLT:
11055 fputs ("nge", file);
11056 break;
11057 case LE:
11058 fputs ("le", file);
11059 break;
11060 case UNLE:
11061 fputs ("ngt", file);
11062 break;
11063 case UNORDERED:
11064 fputs ("unord", file);
11065 break;
11066 case NE:
11067 fputs ("neq", file);
11068 break;
11069 case LTGT:
11070 fputs ("neq_oq", file);
11071 break;
11072 case GE:
11073 fputs ("ge", file);
11074 break;
11075 case UNGE:
11076 fputs ("nlt", file);
11077 break;
11078 case GT:
11079 fputs ("gt", file);
11080 break;
11081 case UNGT:
11082 fputs ("nle", file);
11083 break;
11084 case ORDERED:
11085 fputs ("ord", file);
11086 break;
11087 default:
11088 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11089 return;
11092 else
11094 switch (GET_CODE (x))
11096 case EQ:
11097 case UNEQ:
11098 fputs ("eq", file);
11099 break;
11100 case LT:
11101 case UNLT:
11102 fputs ("lt", file);
11103 break;
11104 case LE:
11105 case UNLE:
11106 fputs ("le", file);
11107 break;
11108 case UNORDERED:
11109 fputs ("unord", file);
11110 break;
11111 case NE:
11112 case LTGT:
11113 fputs ("neq", file);
11114 break;
11115 case UNGE:
11116 case GE:
11117 fputs ("nlt", file);
11118 break;
11119 case UNGT:
11120 case GT:
11121 fputs ("nle", file);
11122 break;
11123 case ORDERED:
11124 fputs ("ord", file);
11125 break;
11126 default:
11127 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11128 return;
11131 return;
11132 case 'O':
11133 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11134 if (ASSEMBLER_DIALECT == ASM_ATT)
11136 switch (GET_MODE (x))
11138 case HImode: putc ('w', file); break;
11139 case SImode:
11140 case SFmode: putc ('l', file); break;
11141 case DImode:
11142 case DFmode: putc ('q', file); break;
11143 default: gcc_unreachable ();
11145 putc ('.', file);
11147 #endif
11148 return;
11149 case 'C':
11150 if (!COMPARISON_P (x))
11152 output_operand_lossage ("operand is neither a constant nor a "
11153 "condition code, invalid operand code "
11154 "'C'");
11155 return;
11157 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11158 return;
11159 case 'F':
11160 if (!COMPARISON_P (x))
11162 output_operand_lossage ("operand is neither a constant nor a "
11163 "condition code, invalid operand code "
11164 "'F'");
11165 return;
11167 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11168 if (ASSEMBLER_DIALECT == ASM_ATT)
11169 putc ('.', file);
11170 #endif
11171 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11172 return;
11174 /* Like above, but reverse condition */
11175 case 'c':
11176 /* Check to see if argument to %c is really a constant
11177 and not a condition code which needs to be reversed. */
11178 if (!COMPARISON_P (x))
11180 output_operand_lossage ("operand is neither a constant nor a "
11181 "condition code, invalid operand "
11182 "code 'c'");
11183 return;
11185 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11186 return;
11187 case 'f':
11188 if (!COMPARISON_P (x))
11190 output_operand_lossage ("operand is neither a constant nor a "
11191 "condition code, invalid operand "
11192 "code 'f'");
11193 return;
11195 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11196 if (ASSEMBLER_DIALECT == ASM_ATT)
11197 putc ('.', file);
11198 #endif
11199 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11200 return;
11202 case 'E':
11203 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11204 return;
11206 case 'e':
11207 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11208 return;
11210 case 'H':
11211 /* It doesn't actually matter what mode we use here, as we're
11212 only going to use this for printing. */
11213 x = adjust_address_nv (x, DImode, 8);
11214 break;
11216 case '+':
11218 rtx x;
11220 if (!optimize
11221 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11222 return;
11224 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11225 if (x)
11227 int pred_val = INTVAL (XEXP (x, 0));
11229 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11230 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11232 int taken = pred_val > REG_BR_PROB_BASE / 2;
11233 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11235 /* Emit hints only in the case default branch prediction
11236 heuristics would fail. */
11237 if (taken != cputaken)
11239 /* We use 3e (DS) prefix for taken branches and
11240 2e (CS) prefix for not taken branches. */
11241 if (taken)
11242 fputs ("ds ; ", file);
11243 else
11244 fputs ("cs ; ", file);
11248 return;
11251 case 'Y':
11252 switch (GET_CODE (x))
11254 case NE:
11255 fputs ("neq", file);
11256 break;
11257 case EQ:
11258 fputs ("eq", file);
11259 break;
11260 case GE:
11261 case GEU:
11262 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11263 break;
11264 case GT:
11265 case GTU:
11266 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11267 break;
11268 case LE:
11269 case LEU:
11270 fputs ("le", file);
11271 break;
11272 case LT:
11273 case LTU:
11274 fputs ("lt", file);
11275 break;
11276 case UNORDERED:
11277 fputs ("unord", file);
11278 break;
11279 case ORDERED:
11280 fputs ("ord", file);
11281 break;
11282 case UNEQ:
11283 fputs ("ueq", file);
11284 break;
11285 case UNGE:
11286 fputs ("nlt", file);
11287 break;
11288 case UNGT:
11289 fputs ("nle", file);
11290 break;
11291 case UNLE:
11292 fputs ("ule", file);
11293 break;
11294 case UNLT:
11295 fputs ("ult", file);
11296 break;
11297 case LTGT:
11298 fputs ("une", file);
11299 break;
11300 default:
11301 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11302 return;
11304 return;
11306 case ';':
11307 #if TARGET_MACHO
11308 fputs (" ; ", file);
11309 #else
11310 fputc (' ', file);
11311 #endif
11312 return;
11314 default:
11315 output_operand_lossage ("invalid operand code '%c'", code);
11319 if (REG_P (x))
11320 print_reg (x, code, file);
11322 else if (MEM_P (x))
11324 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11325 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11326 && GET_MODE (x) != BLKmode)
11328 const char * size;
11329 switch (GET_MODE_SIZE (GET_MODE (x)))
11331 case 1: size = "BYTE"; break;
11332 case 2: size = "WORD"; break;
11333 case 4: size = "DWORD"; break;
11334 case 8: size = "QWORD"; break;
11335 case 12: size = "XWORD"; break;
11336 case 16:
11337 if (GET_MODE (x) == XFmode)
11338 size = "XWORD";
11339 else
11340 size = "XMMWORD";
11341 break;
11342 default:
11343 gcc_unreachable ();
11346 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11347 if (code == 'b')
11348 size = "BYTE";
11349 else if (code == 'w')
11350 size = "WORD";
11351 else if (code == 'k')
11352 size = "DWORD";
11354 fputs (size, file);
11355 fputs (" PTR ", file);
11358 x = XEXP (x, 0);
11359 /* Avoid (%rip) for call operands. */
11360 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11361 && !CONST_INT_P (x))
11362 output_addr_const (file, x);
11363 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11364 output_operand_lossage ("invalid constraints for operand");
11365 else
11366 output_address (x);
11369 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11371 REAL_VALUE_TYPE r;
11372 long l;
11374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11375 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11377 if (ASSEMBLER_DIALECT == ASM_ATT)
11378 putc ('$', file);
11379 fprintf (file, "0x%08lx", (long unsigned int) l);
11382 /* These float cases don't actually occur as immediate operands. */
11383 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11385 char dstr[30];
11387 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11388 fprintf (file, "%s", dstr);
11391 else if (GET_CODE (x) == CONST_DOUBLE
11392 && GET_MODE (x) == XFmode)
11394 char dstr[30];
11396 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11397 fprintf (file, "%s", dstr);
11400 else
11402 /* We have patterns that allow zero sets of memory, for instance.
11403 In 64-bit mode, we should probably support all 8-byte vectors,
11404 since we can in fact encode that into an immediate. */
11405 if (GET_CODE (x) == CONST_VECTOR)
11407 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11408 x = const0_rtx;
11411 if (code != 'P')
11413 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11415 if (ASSEMBLER_DIALECT == ASM_ATT)
11416 putc ('$', file);
11418 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11419 || GET_CODE (x) == LABEL_REF)
11421 if (ASSEMBLER_DIALECT == ASM_ATT)
11422 putc ('$', file);
11423 else
11424 fputs ("OFFSET FLAT:", file);
11427 if (CONST_INT_P (x))
11428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11429 else if (flag_pic)
11430 output_pic_addr_const (file, x, code);
11431 else
11432 output_addr_const (file, x);
11436 /* Print a memory operand whose address is ADDR. */
11438 void
11439 print_operand_address (FILE *file, rtx addr)
11441 struct ix86_address parts;
11442 rtx base, index, disp;
11443 int scale;
11444 int ok = ix86_decompose_address (addr, &parts);
11446 gcc_assert (ok);
11448 base = parts.base;
11449 index = parts.index;
11450 disp = parts.disp;
11451 scale = parts.scale;
11453 switch (parts.seg)
11455 case SEG_DEFAULT:
11456 break;
11457 case SEG_FS:
11458 case SEG_GS:
11459 if (ASSEMBLER_DIALECT == ASM_ATT)
11460 putc ('%', file);
11461 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11462 break;
11463 default:
11464 gcc_unreachable ();
11467 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11468 if (TARGET_64BIT && !base && !index)
11470 rtx symbol = disp;
11472 if (GET_CODE (disp) == CONST
11473 && GET_CODE (XEXP (disp, 0)) == PLUS
11474 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11475 symbol = XEXP (XEXP (disp, 0), 0);
11477 if (GET_CODE (symbol) == LABEL_REF
11478 || (GET_CODE (symbol) == SYMBOL_REF
11479 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11480 base = pc_rtx;
11482 if (!base && !index)
11484 /* Displacement only requires special attention. */
11486 if (CONST_INT_P (disp))
11488 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11489 fputs ("ds:", file);
11490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11492 else if (flag_pic)
11493 output_pic_addr_const (file, disp, 0);
11494 else
11495 output_addr_const (file, disp);
11497 else
11499 if (ASSEMBLER_DIALECT == ASM_ATT)
11501 if (disp)
11503 if (flag_pic)
11504 output_pic_addr_const (file, disp, 0);
11505 else if (GET_CODE (disp) == LABEL_REF)
11506 output_asm_label (disp);
11507 else
11508 output_addr_const (file, disp);
11511 putc ('(', file);
11512 if (base)
11513 print_reg (base, 0, file);
11514 if (index)
11516 putc (',', file);
11517 print_reg (index, 0, file);
11518 if (scale != 1)
11519 fprintf (file, ",%d", scale);
11521 putc (')', file);
11523 else
11525 rtx offset = NULL_RTX;
11527 if (disp)
11529 /* Pull out the offset of a symbol; print any symbol itself. */
11530 if (GET_CODE (disp) == CONST
11531 && GET_CODE (XEXP (disp, 0)) == PLUS
11532 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11534 offset = XEXP (XEXP (disp, 0), 1);
11535 disp = gen_rtx_CONST (VOIDmode,
11536 XEXP (XEXP (disp, 0), 0));
11539 if (flag_pic)
11540 output_pic_addr_const (file, disp, 0);
11541 else if (GET_CODE (disp) == LABEL_REF)
11542 output_asm_label (disp);
11543 else if (CONST_INT_P (disp))
11544 offset = disp;
11545 else
11546 output_addr_const (file, disp);
11549 putc ('[', file);
11550 if (base)
11552 print_reg (base, 0, file);
11553 if (offset)
11555 if (INTVAL (offset) >= 0)
11556 putc ('+', file);
11557 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11560 else if (offset)
11561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11562 else
11563 putc ('0', file);
11565 if (index)
11567 putc ('+', file);
11568 print_reg (index, 0, file);
11569 if (scale != 1)
11570 fprintf (file, "*%d", scale);
11572 putc (']', file);
11577 bool
11578 output_addr_const_extra (FILE *file, rtx x)
11580 rtx op;
11582 if (GET_CODE (x) != UNSPEC)
11583 return false;
11585 op = XVECEXP (x, 0, 0);
11586 switch (XINT (x, 1))
11588 case UNSPEC_GOTTPOFF:
11589 output_addr_const (file, op);
11590 /* FIXME: This might be @TPOFF in Sun ld. */
11591 fputs ("@GOTTPOFF", file);
11592 break;
11593 case UNSPEC_TPOFF:
11594 output_addr_const (file, op);
11595 fputs ("@TPOFF", file);
11596 break;
11597 case UNSPEC_NTPOFF:
11598 output_addr_const (file, op);
11599 if (TARGET_64BIT)
11600 fputs ("@TPOFF", file);
11601 else
11602 fputs ("@NTPOFF", file);
11603 break;
11604 case UNSPEC_DTPOFF:
11605 output_addr_const (file, op);
11606 fputs ("@DTPOFF", file);
11607 break;
11608 case UNSPEC_GOTNTPOFF:
11609 output_addr_const (file, op);
11610 if (TARGET_64BIT)
11611 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11612 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11613 else
11614 fputs ("@GOTNTPOFF", file);
11615 break;
11616 case UNSPEC_INDNTPOFF:
11617 output_addr_const (file, op);
11618 fputs ("@INDNTPOFF", file);
11619 break;
11620 #if TARGET_MACHO
11621 case UNSPEC_MACHOPIC_OFFSET:
11622 output_addr_const (file, op);
11623 putc ('-', file);
11624 machopic_output_function_base_name (file);
11625 break;
11626 #endif
11628 default:
11629 return false;
11632 return true;
11635 /* Split one or more DImode RTL references into pairs of SImode
11636 references. The RTL can be REG, offsettable MEM, integer constant, or
11637 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11638 split and "num" is its length. lo_half and hi_half are output arrays
11639 that parallel "operands". */
11641 void
11642 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11644 while (num--)
11646 rtx op = operands[num];
11648 /* simplify_subreg refuse to split volatile memory addresses,
11649 but we still have to handle it. */
11650 if (MEM_P (op))
11652 lo_half[num] = adjust_address (op, SImode, 0);
11653 hi_half[num] = adjust_address (op, SImode, 4);
11655 else
11657 lo_half[num] = simplify_gen_subreg (SImode, op,
11658 GET_MODE (op) == VOIDmode
11659 ? DImode : GET_MODE (op), 0);
11660 hi_half[num] = simplify_gen_subreg (SImode, op,
11661 GET_MODE (op) == VOIDmode
11662 ? DImode : GET_MODE (op), 4);
11666 /* Split one or more TImode RTL references into pairs of DImode
11667 references. The RTL can be REG, offsettable MEM, integer constant, or
11668 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11669 split and "num" is its length. lo_half and hi_half are output arrays
11670 that parallel "operands". */
11672 void
11673 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11675 while (num--)
11677 rtx op = operands[num];
11679 /* simplify_subreg refuse to split volatile memory addresses, but we
11680 still have to handle it. */
11681 if (MEM_P (op))
11683 lo_half[num] = adjust_address (op, DImode, 0);
11684 hi_half[num] = adjust_address (op, DImode, 8);
11686 else
11688 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11689 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11694 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11695 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11696 is the expression of the binary operation. The output may either be
11697 emitted here, or returned to the caller, like all output_* functions.
11699 There is no guarantee that the operands are the same mode, as they
11700 might be within FLOAT or FLOAT_EXTEND expressions. */
11702 #ifndef SYSV386_COMPAT
11703 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11704 wants to fix the assemblers because that causes incompatibility
11705 with gcc. No-one wants to fix gcc because that causes
11706 incompatibility with assemblers... You can use the option of
11707 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11708 #define SYSV386_COMPAT 1
11709 #endif
11711 const char *
11712 output_387_binary_op (rtx insn, rtx *operands)
11714 static char buf[40];
11715 const char *p;
11716 const char *ssep;
11717 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11719 #ifdef ENABLE_CHECKING
11720 /* Even if we do not want to check the inputs, this documents input
11721 constraints. Which helps in understanding the following code. */
11722 if (STACK_REG_P (operands[0])
11723 && ((REG_P (operands[1])
11724 && REGNO (operands[0]) == REGNO (operands[1])
11725 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11726 || (REG_P (operands[2])
11727 && REGNO (operands[0]) == REGNO (operands[2])
11728 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11729 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11730 ; /* ok */
11731 else
11732 gcc_assert (is_sse);
11733 #endif
11735 switch (GET_CODE (operands[3]))
11737 case PLUS:
11738 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11739 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11740 p = "fiadd";
11741 else
11742 p = "fadd";
11743 ssep = "vadd";
11744 break;
11746 case MINUS:
11747 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11748 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11749 p = "fisub";
11750 else
11751 p = "fsub";
11752 ssep = "vsub";
11753 break;
11755 case MULT:
11756 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11757 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11758 p = "fimul";
11759 else
11760 p = "fmul";
11761 ssep = "vmul";
11762 break;
11764 case DIV:
11765 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11766 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11767 p = "fidiv";
11768 else
11769 p = "fdiv";
11770 ssep = "vdiv";
11771 break;
11773 default:
11774 gcc_unreachable ();
11777 if (is_sse)
11779 if (TARGET_AVX)
11781 strcpy (buf, ssep);
11782 if (GET_MODE (operands[0]) == SFmode)
11783 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11784 else
11785 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11787 else
11789 strcpy (buf, ssep + 1);
11790 if (GET_MODE (operands[0]) == SFmode)
11791 strcat (buf, "ss\t{%2, %0|%0, %2}");
11792 else
11793 strcat (buf, "sd\t{%2, %0|%0, %2}");
11795 return buf;
11797 strcpy (buf, p);
11799 switch (GET_CODE (operands[3]))
11801 case MULT:
11802 case PLUS:
11803 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11805 rtx temp = operands[2];
11806 operands[2] = operands[1];
11807 operands[1] = temp;
11810 /* know operands[0] == operands[1]. */
11812 if (MEM_P (operands[2]))
11814 p = "%z2\t%2";
11815 break;
11818 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11820 if (STACK_TOP_P (operands[0]))
11821 /* How is it that we are storing to a dead operand[2]?
11822 Well, presumably operands[1] is dead too. We can't
11823 store the result to st(0) as st(0) gets popped on this
11824 instruction. Instead store to operands[2] (which I
11825 think has to be st(1)). st(1) will be popped later.
11826 gcc <= 2.8.1 didn't have this check and generated
11827 assembly code that the Unixware assembler rejected. */
11828 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11829 else
11830 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11831 break;
11834 if (STACK_TOP_P (operands[0]))
11835 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11836 else
11837 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11838 break;
11840 case MINUS:
11841 case DIV:
11842 if (MEM_P (operands[1]))
11844 p = "r%z1\t%1";
11845 break;
11848 if (MEM_P (operands[2]))
11850 p = "%z2\t%2";
11851 break;
11854 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11856 #if SYSV386_COMPAT
11857 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11858 derived assemblers, confusingly reverse the direction of
11859 the operation for fsub{r} and fdiv{r} when the
11860 destination register is not st(0). The Intel assembler
11861 doesn't have this brain damage. Read !SYSV386_COMPAT to
11862 figure out what the hardware really does. */
11863 if (STACK_TOP_P (operands[0]))
11864 p = "{p\t%0, %2|rp\t%2, %0}";
11865 else
11866 p = "{rp\t%2, %0|p\t%0, %2}";
11867 #else
11868 if (STACK_TOP_P (operands[0]))
11869 /* As above for fmul/fadd, we can't store to st(0). */
11870 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11871 else
11872 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11873 #endif
11874 break;
11877 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11879 #if SYSV386_COMPAT
11880 if (STACK_TOP_P (operands[0]))
11881 p = "{rp\t%0, %1|p\t%1, %0}";
11882 else
11883 p = "{p\t%1, %0|rp\t%0, %1}";
11884 #else
11885 if (STACK_TOP_P (operands[0]))
11886 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11887 else
11888 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11889 #endif
11890 break;
11893 if (STACK_TOP_P (operands[0]))
11895 if (STACK_TOP_P (operands[1]))
11896 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11897 else
11898 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11899 break;
11901 else if (STACK_TOP_P (operands[1]))
11903 #if SYSV386_COMPAT
11904 p = "{\t%1, %0|r\t%0, %1}";
11905 #else
11906 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11907 #endif
11909 else
11911 #if SYSV386_COMPAT
11912 p = "{r\t%2, %0|\t%0, %2}";
11913 #else
11914 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11915 #endif
11917 break;
11919 default:
11920 gcc_unreachable ();
11923 strcat (buf, p);
11924 return buf;
11927 /* Return needed mode for entity in optimize_mode_switching pass. */
11930 ix86_mode_needed (int entity, rtx insn)
11932 enum attr_i387_cw mode;
11934 /* The mode UNINITIALIZED is used to store control word after a
11935 function call or ASM pattern. The mode ANY specify that function
11936 has no requirements on the control word and make no changes in the
11937 bits we are interested in. */
11939 if (CALL_P (insn)
11940 || (NONJUMP_INSN_P (insn)
11941 && (asm_noperands (PATTERN (insn)) >= 0
11942 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11943 return I387_CW_UNINITIALIZED;
11945 if (recog_memoized (insn) < 0)
11946 return I387_CW_ANY;
11948 mode = get_attr_i387_cw (insn);
11950 switch (entity)
11952 case I387_TRUNC:
11953 if (mode == I387_CW_TRUNC)
11954 return mode;
11955 break;
11957 case I387_FLOOR:
11958 if (mode == I387_CW_FLOOR)
11959 return mode;
11960 break;
11962 case I387_CEIL:
11963 if (mode == I387_CW_CEIL)
11964 return mode;
11965 break;
11967 case I387_MASK_PM:
11968 if (mode == I387_CW_MASK_PM)
11969 return mode;
11970 break;
11972 default:
11973 gcc_unreachable ();
11976 return I387_CW_ANY;
11979 /* Output code to initialize control word copies used by trunc?f?i and
11980 rounding patterns. CURRENT_MODE is set to current control word,
11981 while NEW_MODE is set to new control word. */
11983 void
11984 emit_i387_cw_initialization (int mode)
11986 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11987 rtx new_mode;
11989 enum ix86_stack_slot slot;
11991 rtx reg = gen_reg_rtx (HImode);
11993 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11994 emit_move_insn (reg, copy_rtx (stored_mode));
11996 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11997 || optimize_function_for_size_p (cfun))
11999 switch (mode)
12001 case I387_CW_TRUNC:
12002 /* round toward zero (truncate) */
12003 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12004 slot = SLOT_CW_TRUNC;
12005 break;
12007 case I387_CW_FLOOR:
12008 /* round down toward -oo */
12009 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12010 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12011 slot = SLOT_CW_FLOOR;
12012 break;
12014 case I387_CW_CEIL:
12015 /* round up toward +oo */
12016 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12017 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12018 slot = SLOT_CW_CEIL;
12019 break;
12021 case I387_CW_MASK_PM:
12022 /* mask precision exception for nearbyint() */
12023 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12024 slot = SLOT_CW_MASK_PM;
12025 break;
12027 default:
12028 gcc_unreachable ();
12031 else
12033 switch (mode)
12035 case I387_CW_TRUNC:
12036 /* round toward zero (truncate) */
12037 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12038 slot = SLOT_CW_TRUNC;
12039 break;
12041 case I387_CW_FLOOR:
12042 /* round down toward -oo */
12043 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12044 slot = SLOT_CW_FLOOR;
12045 break;
12047 case I387_CW_CEIL:
12048 /* round up toward +oo */
12049 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12050 slot = SLOT_CW_CEIL;
12051 break;
12053 case I387_CW_MASK_PM:
12054 /* mask precision exception for nearbyint() */
12055 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12056 slot = SLOT_CW_MASK_PM;
12057 break;
12059 default:
12060 gcc_unreachable ();
12064 gcc_assert (slot < MAX_386_STACK_LOCALS);
12066 new_mode = assign_386_stack_local (HImode, slot);
12067 emit_move_insn (new_mode, reg);
12070 /* Output code for INSN to convert a float to a signed int. OPERANDS
12071 are the insn operands. The output may be [HSD]Imode and the input
12072 operand may be [SDX]Fmode. */
12074 const char *
12075 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12077 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12078 int dimode_p = GET_MODE (operands[0]) == DImode;
12079 int round_mode = get_attr_i387_cw (insn);
12081 /* Jump through a hoop or two for DImode, since the hardware has no
12082 non-popping instruction. We used to do this a different way, but
12083 that was somewhat fragile and broke with post-reload splitters. */
12084 if ((dimode_p || fisttp) && !stack_top_dies)
12085 output_asm_insn ("fld\t%y1", operands);
12087 gcc_assert (STACK_TOP_P (operands[1]));
12088 gcc_assert (MEM_P (operands[0]));
12089 gcc_assert (GET_MODE (operands[1]) != TFmode);
12091 if (fisttp)
12092 output_asm_insn ("fisttp%z0\t%0", operands);
12093 else
12095 if (round_mode != I387_CW_ANY)
12096 output_asm_insn ("fldcw\t%3", operands);
12097 if (stack_top_dies || dimode_p)
12098 output_asm_insn ("fistp%z0\t%0", operands);
12099 else
12100 output_asm_insn ("fist%z0\t%0", operands);
12101 if (round_mode != I387_CW_ANY)
12102 output_asm_insn ("fldcw\t%2", operands);
12105 return "";
12108 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12109 have the values zero or one, indicates the ffreep insn's operand
12110 from the OPERANDS array. */
12112 static const char *
12113 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12115 if (TARGET_USE_FFREEP)
12116 #if HAVE_AS_IX86_FFREEP
12117 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12118 #else
12120 static char retval[] = ".word\t0xc_df";
12121 int regno = REGNO (operands[opno]);
12123 gcc_assert (FP_REGNO_P (regno));
12125 retval[9] = '0' + (regno - FIRST_STACK_REG);
12126 return retval;
12128 #endif
12130 return opno ? "fstp\t%y1" : "fstp\t%y0";
12134 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12135 should be used. UNORDERED_P is true when fucom should be used. */
12137 const char *
12138 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12140 int stack_top_dies;
12141 rtx cmp_op0, cmp_op1;
12142 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12144 if (eflags_p)
12146 cmp_op0 = operands[0];
12147 cmp_op1 = operands[1];
12149 else
12151 cmp_op0 = operands[1];
12152 cmp_op1 = operands[2];
12155 if (is_sse)
12157 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12158 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12159 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12160 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12162 if (GET_MODE (operands[0]) == SFmode)
12163 if (unordered_p)
12164 return &ucomiss[TARGET_AVX ? 0 : 1];
12165 else
12166 return &comiss[TARGET_AVX ? 0 : 1];
12167 else
12168 if (unordered_p)
12169 return &ucomisd[TARGET_AVX ? 0 : 1];
12170 else
12171 return &comisd[TARGET_AVX ? 0 : 1];
12174 gcc_assert (STACK_TOP_P (cmp_op0));
12176 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12178 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12180 if (stack_top_dies)
12182 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12183 return output_387_ffreep (operands, 1);
12185 else
12186 return "ftst\n\tfnstsw\t%0";
12189 if (STACK_REG_P (cmp_op1)
12190 && stack_top_dies
12191 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12192 && REGNO (cmp_op1) != FIRST_STACK_REG)
12194 /* If both the top of the 387 stack dies, and the other operand
12195 is also a stack register that dies, then this must be a
12196 `fcompp' float compare */
12198 if (eflags_p)
12200 /* There is no double popping fcomi variant. Fortunately,
12201 eflags is immune from the fstp's cc clobbering. */
12202 if (unordered_p)
12203 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12204 else
12205 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12206 return output_387_ffreep (operands, 0);
12208 else
12210 if (unordered_p)
12211 return "fucompp\n\tfnstsw\t%0";
12212 else
12213 return "fcompp\n\tfnstsw\t%0";
12216 else
12218 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12220 static const char * const alt[16] =
12222 "fcom%z2\t%y2\n\tfnstsw\t%0",
12223 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12224 "fucom%z2\t%y2\n\tfnstsw\t%0",
12225 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12227 "ficom%z2\t%y2\n\tfnstsw\t%0",
12228 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12229 NULL,
12230 NULL,
12232 "fcomi\t{%y1, %0|%0, %y1}",
12233 "fcomip\t{%y1, %0|%0, %y1}",
12234 "fucomi\t{%y1, %0|%0, %y1}",
12235 "fucomip\t{%y1, %0|%0, %y1}",
12237 NULL,
12238 NULL,
12239 NULL,
12240 NULL
12243 int mask;
12244 const char *ret;
12246 mask = eflags_p << 3;
12247 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12248 mask |= unordered_p << 1;
12249 mask |= stack_top_dies;
12251 gcc_assert (mask < 16);
12252 ret = alt[mask];
12253 gcc_assert (ret);
12255 return ret;
12259 void
12260 ix86_output_addr_vec_elt (FILE *file, int value)
12262 const char *directive = ASM_LONG;
12264 #ifdef ASM_QUAD
12265 if (TARGET_64BIT)
12266 directive = ASM_QUAD;
12267 #else
12268 gcc_assert (!TARGET_64BIT);
12269 #endif
12271 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12274 void
12275 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12277 const char *directive = ASM_LONG;
12279 #ifdef ASM_QUAD
12280 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12281 directive = ASM_QUAD;
12282 #else
12283 gcc_assert (!TARGET_64BIT);
12284 #endif
12285 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12286 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12287 fprintf (file, "%s%s%d-%s%d\n",
12288 directive, LPREFIX, value, LPREFIX, rel);
12289 else if (HAVE_AS_GOTOFF_IN_DATA)
12290 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12291 #if TARGET_MACHO
12292 else if (TARGET_MACHO)
12294 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12295 machopic_output_function_base_name (file);
12296 fprintf(file, "\n");
12298 #endif
12299 else
12300 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12301 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12304 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12305 for the target. */
12307 void
12308 ix86_expand_clear (rtx dest)
12310 rtx tmp;
12312 /* We play register width games, which are only valid after reload. */
12313 gcc_assert (reload_completed);
12315 /* Avoid HImode and its attendant prefix byte. */
12316 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12317 dest = gen_rtx_REG (SImode, REGNO (dest));
12318 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12320 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12321 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12323 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12324 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12327 emit_insn (tmp);
12330 /* X is an unchanging MEM. If it is a constant pool reference, return
12331 the constant pool rtx, else NULL. */
12334 maybe_get_pool_constant (rtx x)
12336 x = ix86_delegitimize_address (XEXP (x, 0));
12338 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12339 return get_pool_constant (x);
12341 return NULL_RTX;
12344 void
12345 ix86_expand_move (enum machine_mode mode, rtx operands[])
12347 rtx op0, op1;
12348 enum tls_model model;
12350 op0 = operands[0];
12351 op1 = operands[1];
12353 if (GET_CODE (op1) == SYMBOL_REF)
12355 model = SYMBOL_REF_TLS_MODEL (op1);
12356 if (model)
12358 op1 = legitimize_tls_address (op1, model, true);
12359 op1 = force_operand (op1, op0);
12360 if (op1 == op0)
12361 return;
12363 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12364 && SYMBOL_REF_DLLIMPORT_P (op1))
12365 op1 = legitimize_dllimport_symbol (op1, false);
12367 else if (GET_CODE (op1) == CONST
12368 && GET_CODE (XEXP (op1, 0)) == PLUS
12369 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12371 rtx addend = XEXP (XEXP (op1, 0), 1);
12372 rtx symbol = XEXP (XEXP (op1, 0), 0);
12373 rtx tmp = NULL;
12375 model = SYMBOL_REF_TLS_MODEL (symbol);
12376 if (model)
12377 tmp = legitimize_tls_address (symbol, model, true);
12378 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12379 && SYMBOL_REF_DLLIMPORT_P (symbol))
12380 tmp = legitimize_dllimport_symbol (symbol, true);
12382 if (tmp)
12384 tmp = force_operand (tmp, NULL);
12385 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12386 op0, 1, OPTAB_DIRECT);
12387 if (tmp == op0)
12388 return;
12392 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12394 if (TARGET_MACHO && !TARGET_64BIT)
12396 #if TARGET_MACHO
12397 if (MACHOPIC_PURE)
12399 rtx temp = ((reload_in_progress
12400 || ((op0 && REG_P (op0))
12401 && mode == Pmode))
12402 ? op0 : gen_reg_rtx (Pmode));
12403 op1 = machopic_indirect_data_reference (op1, temp);
12404 op1 = machopic_legitimize_pic_address (op1, mode,
12405 temp == op1 ? 0 : temp);
12407 else if (MACHOPIC_INDIRECT)
12408 op1 = machopic_indirect_data_reference (op1, 0);
12409 if (op0 == op1)
12410 return;
12411 #endif
12413 else
12415 if (MEM_P (op0))
12416 op1 = force_reg (Pmode, op1);
12417 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12419 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12420 op1 = legitimize_pic_address (op1, reg);
12421 if (op0 == op1)
12422 return;
12426 else
12428 if (MEM_P (op0)
12429 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12430 || !push_operand (op0, mode))
12431 && MEM_P (op1))
12432 op1 = force_reg (mode, op1);
12434 if (push_operand (op0, mode)
12435 && ! general_no_elim_operand (op1, mode))
12436 op1 = copy_to_mode_reg (mode, op1);
12438 /* Force large constants in 64bit compilation into register
12439 to get them CSEed. */
12440 if (can_create_pseudo_p ()
12441 && (mode == DImode) && TARGET_64BIT
12442 && immediate_operand (op1, mode)
12443 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12444 && !register_operand (op0, mode)
12445 && optimize)
12446 op1 = copy_to_mode_reg (mode, op1);
12448 if (can_create_pseudo_p ()
12449 && FLOAT_MODE_P (mode)
12450 && GET_CODE (op1) == CONST_DOUBLE)
12452 /* If we are loading a floating point constant to a register,
12453 force the value to memory now, since we'll get better code
12454 out the back end. */
12456 op1 = validize_mem (force_const_mem (mode, op1));
12457 if (!register_operand (op0, mode))
12459 rtx temp = gen_reg_rtx (mode);
12460 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12461 emit_move_insn (op0, temp);
12462 return;
12467 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12470 void
12471 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12473 rtx op0 = operands[0], op1 = operands[1];
12474 unsigned int align = GET_MODE_ALIGNMENT (mode);
12476 /* Force constants other than zero into memory. We do not know how
12477 the instructions used to build constants modify the upper 64 bits
12478 of the register, once we have that information we may be able
12479 to handle some of them more efficiently. */
12480 if (can_create_pseudo_p ()
12481 && register_operand (op0, mode)
12482 && (CONSTANT_P (op1)
12483 || (GET_CODE (op1) == SUBREG
12484 && CONSTANT_P (SUBREG_REG (op1))))
12485 && standard_sse_constant_p (op1) <= 0)
12486 op1 = validize_mem (force_const_mem (mode, op1));
12488 /* We need to check memory alignment for SSE mode since attribute
12489 can make operands unaligned. */
12490 if (can_create_pseudo_p ()
12491 && SSE_REG_MODE_P (mode)
12492 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12493 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12495 rtx tmp[2];
12497 /* ix86_expand_vector_move_misalign() does not like constants ... */
12498 if (CONSTANT_P (op1)
12499 || (GET_CODE (op1) == SUBREG
12500 && CONSTANT_P (SUBREG_REG (op1))))
12501 op1 = validize_mem (force_const_mem (mode, op1));
12503 /* ... nor both arguments in memory. */
12504 if (!register_operand (op0, mode)
12505 && !register_operand (op1, mode))
12506 op1 = force_reg (mode, op1);
12508 tmp[0] = op0; tmp[1] = op1;
12509 ix86_expand_vector_move_misalign (mode, tmp);
12510 return;
12513 /* Make operand1 a register if it isn't already. */
12514 if (can_create_pseudo_p ()
12515 && !register_operand (op0, mode)
12516 && !register_operand (op1, mode))
12518 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12519 return;
12522 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12525 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12526 straight to ix86_expand_vector_move. */
12527 /* Code generation for scalar reg-reg moves of single and double precision data:
12528 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12529 movaps reg, reg
12530 else
12531 movss reg, reg
12532 if (x86_sse_partial_reg_dependency == true)
12533 movapd reg, reg
12534 else
12535 movsd reg, reg
12537 Code generation for scalar loads of double precision data:
12538 if (x86_sse_split_regs == true)
12539 movlpd mem, reg (gas syntax)
12540 else
12541 movsd mem, reg
12543 Code generation for unaligned packed loads of single precision data
12544 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12545 if (x86_sse_unaligned_move_optimal)
12546 movups mem, reg
12548 if (x86_sse_partial_reg_dependency == true)
12550 xorps reg, reg
12551 movlps mem, reg
12552 movhps mem+8, reg
12554 else
12556 movlps mem, reg
12557 movhps mem+8, reg
12560 Code generation for unaligned packed loads of double precision data
12561 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12562 if (x86_sse_unaligned_move_optimal)
12563 movupd mem, reg
12565 if (x86_sse_split_regs == true)
12567 movlpd mem, reg
12568 movhpd mem+8, reg
12570 else
12572 movsd mem, reg
12573 movhpd mem+8, reg
12577 void
12578 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12580 rtx op0, op1, m;
12582 op0 = operands[0];
12583 op1 = operands[1];
12585 if (TARGET_AVX)
12587 switch (GET_MODE_CLASS (mode))
12589 case MODE_VECTOR_INT:
12590 case MODE_INT:
12591 switch (GET_MODE_SIZE (mode))
12593 case 16:
12594 op0 = gen_lowpart (V16QImode, op0);
12595 op1 = gen_lowpart (V16QImode, op1);
12596 emit_insn (gen_avx_movdqu (op0, op1));
12597 break;
12598 case 32:
12599 op0 = gen_lowpart (V32QImode, op0);
12600 op1 = gen_lowpart (V32QImode, op1);
12601 emit_insn (gen_avx_movdqu256 (op0, op1));
12602 break;
12603 default:
12604 gcc_unreachable ();
12606 break;
12607 case MODE_VECTOR_FLOAT:
12608 op0 = gen_lowpart (mode, op0);
12609 op1 = gen_lowpart (mode, op1);
12611 switch (mode)
12613 case V4SFmode:
12614 emit_insn (gen_avx_movups (op0, op1));
12615 break;
12616 case V8SFmode:
12617 emit_insn (gen_avx_movups256 (op0, op1));
12618 break;
12619 case V2DFmode:
12620 emit_insn (gen_avx_movupd (op0, op1));
12621 break;
12622 case V4DFmode:
12623 emit_insn (gen_avx_movupd256 (op0, op1));
12624 break;
12625 default:
12626 gcc_unreachable ();
12628 break;
12630 default:
12631 gcc_unreachable ();
12634 return;
12637 if (MEM_P (op1))
12639 /* If we're optimizing for size, movups is the smallest. */
12640 if (optimize_insn_for_size_p ())
12642 op0 = gen_lowpart (V4SFmode, op0);
12643 op1 = gen_lowpart (V4SFmode, op1);
12644 emit_insn (gen_sse_movups (op0, op1));
12645 return;
12648 /* ??? If we have typed data, then it would appear that using
12649 movdqu is the only way to get unaligned data loaded with
12650 integer type. */
12651 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12653 op0 = gen_lowpart (V16QImode, op0);
12654 op1 = gen_lowpart (V16QImode, op1);
12655 emit_insn (gen_sse2_movdqu (op0, op1));
12656 return;
12659 if (TARGET_SSE2 && mode == V2DFmode)
12661 rtx zero;
12663 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12665 op0 = gen_lowpart (V2DFmode, op0);
12666 op1 = gen_lowpart (V2DFmode, op1);
12667 emit_insn (gen_sse2_movupd (op0, op1));
12668 return;
12671 /* When SSE registers are split into halves, we can avoid
12672 writing to the top half twice. */
12673 if (TARGET_SSE_SPLIT_REGS)
12675 emit_clobber (op0);
12676 zero = op0;
12678 else
12680 /* ??? Not sure about the best option for the Intel chips.
12681 The following would seem to satisfy; the register is
12682 entirely cleared, breaking the dependency chain. We
12683 then store to the upper half, with a dependency depth
12684 of one. A rumor has it that Intel recommends two movsd
12685 followed by an unpacklpd, but this is unconfirmed. And
12686 given that the dependency depth of the unpacklpd would
12687 still be one, I'm not sure why this would be better. */
12688 zero = CONST0_RTX (V2DFmode);
12691 m = adjust_address (op1, DFmode, 0);
12692 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12693 m = adjust_address (op1, DFmode, 8);
12694 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12696 else
12698 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12700 op0 = gen_lowpart (V4SFmode, op0);
12701 op1 = gen_lowpart (V4SFmode, op1);
12702 emit_insn (gen_sse_movups (op0, op1));
12703 return;
12706 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12707 emit_move_insn (op0, CONST0_RTX (mode));
12708 else
12709 emit_clobber (op0);
12711 if (mode != V4SFmode)
12712 op0 = gen_lowpart (V4SFmode, op0);
12713 m = adjust_address (op1, V2SFmode, 0);
12714 emit_insn (gen_sse_loadlps (op0, op0, m));
12715 m = adjust_address (op1, V2SFmode, 8);
12716 emit_insn (gen_sse_loadhps (op0, op0, m));
12719 else if (MEM_P (op0))
12721 /* If we're optimizing for size, movups is the smallest. */
12722 if (optimize_insn_for_size_p ())
12724 op0 = gen_lowpart (V4SFmode, op0);
12725 op1 = gen_lowpart (V4SFmode, op1);
12726 emit_insn (gen_sse_movups (op0, op1));
12727 return;
12730 /* ??? Similar to above, only less clear because of quote
12731 typeless stores unquote. */
12732 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12733 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12735 op0 = gen_lowpart (V16QImode, op0);
12736 op1 = gen_lowpart (V16QImode, op1);
12737 emit_insn (gen_sse2_movdqu (op0, op1));
12738 return;
12741 if (TARGET_SSE2 && mode == V2DFmode)
12743 m = adjust_address (op0, DFmode, 0);
12744 emit_insn (gen_sse2_storelpd (m, op1));
12745 m = adjust_address (op0, DFmode, 8);
12746 emit_insn (gen_sse2_storehpd (m, op1));
12748 else
12750 if (mode != V4SFmode)
12751 op1 = gen_lowpart (V4SFmode, op1);
12752 m = adjust_address (op0, V2SFmode, 0);
12753 emit_insn (gen_sse_storelps (m, op1));
12754 m = adjust_address (op0, V2SFmode, 8);
12755 emit_insn (gen_sse_storehps (m, op1));
12758 else
12759 gcc_unreachable ();
12762 /* Expand a push in MODE. This is some mode for which we do not support
12763 proper push instructions, at least from the registers that we expect
12764 the value to live in. */
12766 void
12767 ix86_expand_push (enum machine_mode mode, rtx x)
12769 rtx tmp;
12771 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12772 GEN_INT (-GET_MODE_SIZE (mode)),
12773 stack_pointer_rtx, 1, OPTAB_DIRECT);
12774 if (tmp != stack_pointer_rtx)
12775 emit_move_insn (stack_pointer_rtx, tmp);
12777 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12779 /* When we push an operand onto stack, it has to be aligned at least
12780 at the function argument boundary. However since we don't have
12781 the argument type, we can't determine the actual argument
12782 boundary. */
12783 emit_move_insn (tmp, x);
12786 /* Helper function of ix86_fixup_binary_operands to canonicalize
12787 operand order. Returns true if the operands should be swapped. */
12789 static bool
12790 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12791 rtx operands[])
12793 rtx dst = operands[0];
12794 rtx src1 = operands[1];
12795 rtx src2 = operands[2];
12797 /* If the operation is not commutative, we can't do anything. */
12798 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12799 return false;
12801 /* Highest priority is that src1 should match dst. */
12802 if (rtx_equal_p (dst, src1))
12803 return false;
12804 if (rtx_equal_p (dst, src2))
12805 return true;
12807 /* Next highest priority is that immediate constants come second. */
12808 if (immediate_operand (src2, mode))
12809 return false;
12810 if (immediate_operand (src1, mode))
12811 return true;
12813 /* Lowest priority is that memory references should come second. */
12814 if (MEM_P (src2))
12815 return false;
12816 if (MEM_P (src1))
12817 return true;
12819 return false;
12823 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12824 destination to use for the operation. If different from the true
12825 destination in operands[0], a copy operation will be required. */
12828 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12829 rtx operands[])
12831 rtx dst = operands[0];
12832 rtx src1 = operands[1];
12833 rtx src2 = operands[2];
12835 /* Canonicalize operand order. */
12836 if (ix86_swap_binary_operands_p (code, mode, operands))
12838 rtx temp;
12840 /* It is invalid to swap operands of different modes. */
12841 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12843 temp = src1;
12844 src1 = src2;
12845 src2 = temp;
12848 /* Both source operands cannot be in memory. */
12849 if (MEM_P (src1) && MEM_P (src2))
12851 /* Optimization: Only read from memory once. */
12852 if (rtx_equal_p (src1, src2))
12854 src2 = force_reg (mode, src2);
12855 src1 = src2;
12857 else
12858 src2 = force_reg (mode, src2);
12861 /* If the destination is memory, and we do not have matching source
12862 operands, do things in registers. */
12863 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12864 dst = gen_reg_rtx (mode);
12866 /* Source 1 cannot be a constant. */
12867 if (CONSTANT_P (src1))
12868 src1 = force_reg (mode, src1);
12870 /* Source 1 cannot be a non-matching memory. */
12871 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12872 src1 = force_reg (mode, src1);
12874 operands[1] = src1;
12875 operands[2] = src2;
12876 return dst;
12879 /* Similarly, but assume that the destination has already been
12880 set up properly. */
12882 void
12883 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12884 enum machine_mode mode, rtx operands[])
12886 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12887 gcc_assert (dst == operands[0]);
12890 /* Attempt to expand a binary operator. Make the expansion closer to the
12891 actual machine, then just general_operand, which will allow 3 separate
12892 memory references (one output, two input) in a single insn. */
12894 void
12895 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12896 rtx operands[])
12898 rtx src1, src2, dst, op, clob;
12900 dst = ix86_fixup_binary_operands (code, mode, operands);
12901 src1 = operands[1];
12902 src2 = operands[2];
12904 /* Emit the instruction. */
12906 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12907 if (reload_in_progress)
12909 /* Reload doesn't know about the flags register, and doesn't know that
12910 it doesn't want to clobber it. We can only do this with PLUS. */
12911 gcc_assert (code == PLUS);
12912 emit_insn (op);
12914 else
12916 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12917 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12920 /* Fix up the destination if needed. */
12921 if (dst != operands[0])
12922 emit_move_insn (operands[0], dst);
12925 /* Return TRUE or FALSE depending on whether the binary operator meets the
12926 appropriate constraints. */
12929 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12930 rtx operands[3])
12932 rtx dst = operands[0];
12933 rtx src1 = operands[1];
12934 rtx src2 = operands[2];
12936 /* Both source operands cannot be in memory. */
12937 if (MEM_P (src1) && MEM_P (src2))
12938 return 0;
12940 /* Canonicalize operand order for commutative operators. */
12941 if (ix86_swap_binary_operands_p (code, mode, operands))
12943 rtx temp = src1;
12944 src1 = src2;
12945 src2 = temp;
12948 /* If the destination is memory, we must have a matching source operand. */
12949 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12950 return 0;
12952 /* Source 1 cannot be a constant. */
12953 if (CONSTANT_P (src1))
12954 return 0;
12956 /* Source 1 cannot be a non-matching memory. */
12957 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12958 return 0;
12960 return 1;
12963 /* Attempt to expand a unary operator. Make the expansion closer to the
12964 actual machine, then just general_operand, which will allow 2 separate
12965 memory references (one output, one input) in a single insn. */
12967 void
12968 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12969 rtx operands[])
12971 int matching_memory;
12972 rtx src, dst, op, clob;
12974 dst = operands[0];
12975 src = operands[1];
12977 /* If the destination is memory, and we do not have matching source
12978 operands, do things in registers. */
12979 matching_memory = 0;
12980 if (MEM_P (dst))
12982 if (rtx_equal_p (dst, src))
12983 matching_memory = 1;
12984 else
12985 dst = gen_reg_rtx (mode);
12988 /* When source operand is memory, destination must match. */
12989 if (MEM_P (src) && !matching_memory)
12990 src = force_reg (mode, src);
12992 /* Emit the instruction. */
12994 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12995 if (reload_in_progress || code == NOT)
12997 /* Reload doesn't know about the flags register, and doesn't know that
12998 it doesn't want to clobber it. */
12999 gcc_assert (code == NOT);
13000 emit_insn (op);
13002 else
13004 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13005 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13008 /* Fix up the destination if needed. */
13009 if (dst != operands[0])
13010 emit_move_insn (operands[0], dst);
13013 #define LEA_SEARCH_THRESHOLD 12
13015 /* Search backward for non-agu definition of register number REGNO1
13016 or register number REGNO2 in INSN's basic block until
13017 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13018 2. Reach BB boundary, or
13019 3. Reach agu definition.
13020 Returns the distance between the non-agu definition point and INSN.
13021 If no definition point, returns -1. */
13023 static int
13024 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13025 rtx insn)
13027 basic_block bb = BLOCK_FOR_INSN (insn);
13028 int distance = 0;
13029 df_ref *def_rec;
13030 enum attr_type insn_type;
13032 if (insn != BB_HEAD (bb))
13034 rtx prev = PREV_INSN (insn);
13035 while (prev && distance < LEA_SEARCH_THRESHOLD)
13037 if (INSN_P (prev))
13039 distance++;
13040 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13041 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13042 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13043 && (regno1 == DF_REF_REGNO (*def_rec)
13044 || regno2 == DF_REF_REGNO (*def_rec)))
13046 insn_type = get_attr_type (prev);
13047 if (insn_type != TYPE_LEA)
13048 goto done;
13051 if (prev == BB_HEAD (bb))
13052 break;
13053 prev = PREV_INSN (prev);
13057 if (distance < LEA_SEARCH_THRESHOLD)
13059 edge e;
13060 edge_iterator ei;
13061 bool simple_loop = false;
13063 FOR_EACH_EDGE (e, ei, bb->preds)
13064 if (e->src == bb)
13066 simple_loop = true;
13067 break;
13070 if (simple_loop)
13072 rtx prev = BB_END (bb);
13073 while (prev
13074 && prev != insn
13075 && distance < LEA_SEARCH_THRESHOLD)
13077 if (INSN_P (prev))
13079 distance++;
13080 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13081 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13082 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13083 && (regno1 == DF_REF_REGNO (*def_rec)
13084 || regno2 == DF_REF_REGNO (*def_rec)))
13086 insn_type = get_attr_type (prev);
13087 if (insn_type != TYPE_LEA)
13088 goto done;
13091 prev = PREV_INSN (prev);
13096 distance = -1;
13098 done:
13099 /* get_attr_type may modify recog data. We want to make sure
13100 that recog data is valid for instruction INSN, on which
13101 distance_non_agu_define is called. INSN is unchanged here. */
13102 extract_insn_cached (insn);
13103 return distance;
13106 /* Return the distance between INSN and the next insn that uses
13107 register number REGNO0 in memory address. Return -1 if no such
13108 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13110 static int
13111 distance_agu_use (unsigned int regno0, rtx insn)
13113 basic_block bb = BLOCK_FOR_INSN (insn);
13114 int distance = 0;
13115 df_ref *def_rec;
13116 df_ref *use_rec;
13118 if (insn != BB_END (bb))
13120 rtx next = NEXT_INSN (insn);
13121 while (next && distance < LEA_SEARCH_THRESHOLD)
13123 if (INSN_P (next))
13125 distance++;
13127 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13128 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13129 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13130 && regno0 == DF_REF_REGNO (*use_rec))
13132 /* Return DISTANCE if OP0 is used in memory
13133 address in NEXT. */
13134 return distance;
13137 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13138 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13139 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13140 && regno0 == DF_REF_REGNO (*def_rec))
13142 /* Return -1 if OP0 is set in NEXT. */
13143 return -1;
13146 if (next == BB_END (bb))
13147 break;
13148 next = NEXT_INSN (next);
13152 if (distance < LEA_SEARCH_THRESHOLD)
13154 edge e;
13155 edge_iterator ei;
13156 bool simple_loop = false;
13158 FOR_EACH_EDGE (e, ei, bb->succs)
13159 if (e->dest == bb)
13161 simple_loop = true;
13162 break;
13165 if (simple_loop)
13167 rtx next = BB_HEAD (bb);
13168 while (next
13169 && next != insn
13170 && distance < LEA_SEARCH_THRESHOLD)
13172 if (INSN_P (next))
13174 distance++;
13176 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13177 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13178 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13179 && regno0 == DF_REF_REGNO (*use_rec))
13181 /* Return DISTANCE if OP0 is used in memory
13182 address in NEXT. */
13183 return distance;
13186 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13187 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13188 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13189 && regno0 == DF_REF_REGNO (*def_rec))
13191 /* Return -1 if OP0 is set in NEXT. */
13192 return -1;
13196 next = NEXT_INSN (next);
13201 return -1;
13204 /* Define this macro to tune LEA priority vs ADD, it take effect when
13205 there is a dilemma of choicing LEA or ADD
13206 Negative value: ADD is more preferred than LEA
13207 Zero: Netrual
13208 Positive value: LEA is more preferred than ADD*/
13209 #define IX86_LEA_PRIORITY 2
13211 /* Return true if it is ok to optimize an ADD operation to LEA
13212 operation to avoid flag register consumation. For the processors
13213 like ATOM, if the destination register of LEA holds an actual
13214 address which will be used soon, LEA is better and otherwise ADD
13215 is better. */
13217 bool
13218 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13219 rtx insn, rtx operands[])
13221 unsigned int regno0 = true_regnum (operands[0]);
13222 unsigned int regno1 = true_regnum (operands[1]);
13223 unsigned int regno2;
13225 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13226 return regno0 != regno1;
13228 regno2 = true_regnum (operands[2]);
13230 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13231 if (regno0 != regno1 && regno0 != regno2)
13232 return true;
13233 else
13235 int dist_define, dist_use;
13236 dist_define = distance_non_agu_define (regno1, regno2, insn);
13237 if (dist_define <= 0)
13238 return true;
13240 /* If this insn has both backward non-agu dependence and forward
13241 agu dependence, the one with short distance take effect. */
13242 dist_use = distance_agu_use (regno0, insn);
13243 if (dist_use <= 0
13244 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13245 return false;
13247 return true;
13251 /* Return true if destination reg of SET_BODY is shift count of
13252 USE_BODY. */
13254 static bool
13255 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13257 rtx set_dest;
13258 rtx shift_rtx;
13259 int i;
13261 /* Retrieve destination of SET_BODY. */
13262 switch (GET_CODE (set_body))
13264 case SET:
13265 set_dest = SET_DEST (set_body);
13266 if (!set_dest || !REG_P (set_dest))
13267 return false;
13268 break;
13269 case PARALLEL:
13270 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13271 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13272 use_body))
13273 return true;
13274 default:
13275 return false;
13276 break;
13279 /* Retrieve shift count of USE_BODY. */
13280 switch (GET_CODE (use_body))
13282 case SET:
13283 shift_rtx = XEXP (use_body, 1);
13284 break;
13285 case PARALLEL:
13286 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13287 if (ix86_dep_by_shift_count_body (set_body,
13288 XVECEXP (use_body, 0, i)))
13289 return true;
13290 default:
13291 return false;
13292 break;
13295 if (shift_rtx
13296 && (GET_CODE (shift_rtx) == ASHIFT
13297 || GET_CODE (shift_rtx) == LSHIFTRT
13298 || GET_CODE (shift_rtx) == ASHIFTRT
13299 || GET_CODE (shift_rtx) == ROTATE
13300 || GET_CODE (shift_rtx) == ROTATERT))
13302 rtx shift_count = XEXP (shift_rtx, 1);
13304 /* Return true if shift count is dest of SET_BODY. */
13305 if (REG_P (shift_count)
13306 && true_regnum (set_dest) == true_regnum (shift_count))
13307 return true;
13310 return false;
13313 /* Return true if destination reg of SET_INSN is shift count of
13314 USE_INSN. */
13316 bool
13317 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13319 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13320 PATTERN (use_insn));
13323 /* Return TRUE or FALSE depending on whether the unary operator meets the
13324 appropriate constraints. */
13327 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13328 enum machine_mode mode ATTRIBUTE_UNUSED,
13329 rtx operands[2] ATTRIBUTE_UNUSED)
13331 /* If one of operands is memory, source and destination must match. */
13332 if ((MEM_P (operands[0])
13333 || MEM_P (operands[1]))
13334 && ! rtx_equal_p (operands[0], operands[1]))
13335 return FALSE;
13336 return TRUE;
13339 /* Post-reload splitter for converting an SF or DFmode value in an
13340 SSE register into an unsigned SImode. */
13342 void
13343 ix86_split_convert_uns_si_sse (rtx operands[])
13345 enum machine_mode vecmode;
13346 rtx value, large, zero_or_two31, input, two31, x;
13348 large = operands[1];
13349 zero_or_two31 = operands[2];
13350 input = operands[3];
13351 two31 = operands[4];
13352 vecmode = GET_MODE (large);
13353 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13355 /* Load up the value into the low element. We must ensure that the other
13356 elements are valid floats -- zero is the easiest such value. */
13357 if (MEM_P (input))
13359 if (vecmode == V4SFmode)
13360 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13361 else
13362 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13364 else
13366 input = gen_rtx_REG (vecmode, REGNO (input));
13367 emit_move_insn (value, CONST0_RTX (vecmode));
13368 if (vecmode == V4SFmode)
13369 emit_insn (gen_sse_movss (value, value, input));
13370 else
13371 emit_insn (gen_sse2_movsd (value, value, input));
13374 emit_move_insn (large, two31);
13375 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13377 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13378 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13380 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13381 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13383 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13384 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13386 large = gen_rtx_REG (V4SImode, REGNO (large));
13387 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13389 x = gen_rtx_REG (V4SImode, REGNO (value));
13390 if (vecmode == V4SFmode)
13391 emit_insn (gen_sse2_cvttps2dq (x, value));
13392 else
13393 emit_insn (gen_sse2_cvttpd2dq (x, value));
13394 value = x;
13396 emit_insn (gen_xorv4si3 (value, value, large));
13399 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13400 Expects the 64-bit DImode to be supplied in a pair of integral
13401 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13402 -mfpmath=sse, !optimize_size only. */
13404 void
13405 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13407 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13408 rtx int_xmm, fp_xmm;
13409 rtx biases, exponents;
13410 rtx x;
13412 int_xmm = gen_reg_rtx (V4SImode);
13413 if (TARGET_INTER_UNIT_MOVES)
13414 emit_insn (gen_movdi_to_sse (int_xmm, input));
13415 else if (TARGET_SSE_SPLIT_REGS)
13417 emit_clobber (int_xmm);
13418 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13420 else
13422 x = gen_reg_rtx (V2DImode);
13423 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13424 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13427 x = gen_rtx_CONST_VECTOR (V4SImode,
13428 gen_rtvec (4, GEN_INT (0x43300000UL),
13429 GEN_INT (0x45300000UL),
13430 const0_rtx, const0_rtx));
13431 exponents = validize_mem (force_const_mem (V4SImode, x));
13433 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13434 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13436 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13437 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13438 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13439 (0x1.0p84 + double(fp_value_hi_xmm)).
13440 Note these exponents differ by 32. */
13442 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13444 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13445 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13446 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13447 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13448 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13449 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13450 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13451 biases = validize_mem (force_const_mem (V2DFmode, biases));
13452 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13454 /* Add the upper and lower DFmode values together. */
13455 if (TARGET_SSE3)
13456 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13457 else
13459 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13460 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13461 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13464 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13467 /* Not used, but eases macroization of patterns. */
13468 void
13469 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13470 rtx input ATTRIBUTE_UNUSED)
13472 gcc_unreachable ();
13475 /* Convert an unsigned SImode value into a DFmode. Only currently used
13476 for SSE, but applicable anywhere. */
13478 void
13479 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13481 REAL_VALUE_TYPE TWO31r;
13482 rtx x, fp;
13484 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13485 NULL, 1, OPTAB_DIRECT);
13487 fp = gen_reg_rtx (DFmode);
13488 emit_insn (gen_floatsidf2 (fp, x));
13490 real_ldexp (&TWO31r, &dconst1, 31);
13491 x = const_double_from_real_value (TWO31r, DFmode);
13493 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13494 if (x != target)
13495 emit_move_insn (target, x);
13498 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13499 32-bit mode; otherwise we have a direct convert instruction. */
13501 void
13502 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13504 REAL_VALUE_TYPE TWO32r;
13505 rtx fp_lo, fp_hi, x;
13507 fp_lo = gen_reg_rtx (DFmode);
13508 fp_hi = gen_reg_rtx (DFmode);
13510 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13512 real_ldexp (&TWO32r, &dconst1, 32);
13513 x = const_double_from_real_value (TWO32r, DFmode);
13514 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13516 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13518 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13519 0, OPTAB_DIRECT);
13520 if (x != target)
13521 emit_move_insn (target, x);
13524 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13525 For x86_32, -mfpmath=sse, !optimize_size only. */
13526 void
13527 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13529 REAL_VALUE_TYPE ONE16r;
13530 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13532 real_ldexp (&ONE16r, &dconst1, 16);
13533 x = const_double_from_real_value (ONE16r, SFmode);
13534 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13535 NULL, 0, OPTAB_DIRECT);
13536 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13537 NULL, 0, OPTAB_DIRECT);
13538 fp_hi = gen_reg_rtx (SFmode);
13539 fp_lo = gen_reg_rtx (SFmode);
13540 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13541 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13542 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13543 0, OPTAB_DIRECT);
13544 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13545 0, OPTAB_DIRECT);
13546 if (!rtx_equal_p (target, fp_hi))
13547 emit_move_insn (target, fp_hi);
13550 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13551 then replicate the value for all elements of the vector
13552 register. */
13555 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13557 rtvec v;
13558 switch (mode)
13560 case SImode:
13561 gcc_assert (vect);
13562 v = gen_rtvec (4, value, value, value, value);
13563 return gen_rtx_CONST_VECTOR (V4SImode, v);
13565 case DImode:
13566 gcc_assert (vect);
13567 v = gen_rtvec (2, value, value);
13568 return gen_rtx_CONST_VECTOR (V2DImode, v);
13570 case SFmode:
13571 if (vect)
13572 v = gen_rtvec (4, value, value, value, value);
13573 else
13574 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13575 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13576 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13578 case DFmode:
13579 if (vect)
13580 v = gen_rtvec (2, value, value);
13581 else
13582 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13583 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13585 default:
13586 gcc_unreachable ();
13590 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13591 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13592 for an SSE register. If VECT is true, then replicate the mask for
13593 all elements of the vector register. If INVERT is true, then create
13594 a mask excluding the sign bit. */
13597 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13599 enum machine_mode vec_mode, imode;
13600 HOST_WIDE_INT hi, lo;
13601 int shift = 63;
13602 rtx v;
13603 rtx mask;
13605 /* Find the sign bit, sign extended to 2*HWI. */
13606 switch (mode)
13608 case SImode:
13609 case SFmode:
13610 imode = SImode;
13611 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13612 lo = 0x80000000, hi = lo < 0;
13613 break;
13615 case DImode:
13616 case DFmode:
13617 imode = DImode;
13618 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13619 if (HOST_BITS_PER_WIDE_INT >= 64)
13620 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13621 else
13622 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13623 break;
13625 case TImode:
13626 case TFmode:
13627 vec_mode = VOIDmode;
13628 if (HOST_BITS_PER_WIDE_INT >= 64)
13630 imode = TImode;
13631 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13633 else
13635 rtvec vec;
13637 imode = DImode;
13638 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13640 if (invert)
13642 lo = ~lo, hi = ~hi;
13643 v = constm1_rtx;
13645 else
13646 v = const0_rtx;
13648 mask = immed_double_const (lo, hi, imode);
13650 vec = gen_rtvec (2, v, mask);
13651 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13652 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13654 return v;
13656 break;
13658 default:
13659 gcc_unreachable ();
13662 if (invert)
13663 lo = ~lo, hi = ~hi;
13665 /* Force this value into the low part of a fp vector constant. */
13666 mask = immed_double_const (lo, hi, imode);
13667 mask = gen_lowpart (mode, mask);
13669 if (vec_mode == VOIDmode)
13670 return force_reg (mode, mask);
13672 v = ix86_build_const_vector (mode, vect, mask);
13673 return force_reg (vec_mode, v);
13676 /* Generate code for floating point ABS or NEG. */
13678 void
13679 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13680 rtx operands[])
13682 rtx mask, set, use, clob, dst, src;
13683 bool use_sse = false;
13684 bool vector_mode = VECTOR_MODE_P (mode);
13685 enum machine_mode elt_mode = mode;
13687 if (vector_mode)
13689 elt_mode = GET_MODE_INNER (mode);
13690 use_sse = true;
13692 else if (mode == TFmode)
13693 use_sse = true;
13694 else if (TARGET_SSE_MATH)
13695 use_sse = SSE_FLOAT_MODE_P (mode);
13697 /* NEG and ABS performed with SSE use bitwise mask operations.
13698 Create the appropriate mask now. */
13699 if (use_sse)
13700 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13701 else
13702 mask = NULL_RTX;
13704 dst = operands[0];
13705 src = operands[1];
13707 if (vector_mode)
13709 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13710 set = gen_rtx_SET (VOIDmode, dst, set);
13711 emit_insn (set);
13713 else
13715 set = gen_rtx_fmt_e (code, mode, src);
13716 set = gen_rtx_SET (VOIDmode, dst, set);
13717 if (mask)
13719 use = gen_rtx_USE (VOIDmode, mask);
13720 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13721 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13722 gen_rtvec (3, set, use, clob)));
13724 else
13725 emit_insn (set);
13729 /* Expand a copysign operation. Special case operand 0 being a constant. */
13731 void
13732 ix86_expand_copysign (rtx operands[])
13734 enum machine_mode mode;
13735 rtx dest, op0, op1, mask, nmask;
13737 dest = operands[0];
13738 op0 = operands[1];
13739 op1 = operands[2];
13741 mode = GET_MODE (dest);
13743 if (GET_CODE (op0) == CONST_DOUBLE)
13745 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13747 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13748 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13750 if (mode == SFmode || mode == DFmode)
13752 enum machine_mode vmode;
13754 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13756 if (op0 == CONST0_RTX (mode))
13757 op0 = CONST0_RTX (vmode);
13758 else
13760 rtvec v;
13762 if (mode == SFmode)
13763 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13764 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13765 else
13766 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13768 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13771 else if (op0 != CONST0_RTX (mode))
13772 op0 = force_reg (mode, op0);
13774 mask = ix86_build_signbit_mask (mode, 0, 0);
13776 if (mode == SFmode)
13777 copysign_insn = gen_copysignsf3_const;
13778 else if (mode == DFmode)
13779 copysign_insn = gen_copysigndf3_const;
13780 else
13781 copysign_insn = gen_copysigntf3_const;
13783 emit_insn (copysign_insn (dest, op0, op1, mask));
13785 else
13787 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13789 nmask = ix86_build_signbit_mask (mode, 0, 1);
13790 mask = ix86_build_signbit_mask (mode, 0, 0);
13792 if (mode == SFmode)
13793 copysign_insn = gen_copysignsf3_var;
13794 else if (mode == DFmode)
13795 copysign_insn = gen_copysigndf3_var;
13796 else
13797 copysign_insn = gen_copysigntf3_var;
13799 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13803 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13804 be a constant, and so has already been expanded into a vector constant. */
13806 void
13807 ix86_split_copysign_const (rtx operands[])
13809 enum machine_mode mode, vmode;
13810 rtx dest, op0, op1, mask, x;
13812 dest = operands[0];
13813 op0 = operands[1];
13814 op1 = operands[2];
13815 mask = operands[3];
13817 mode = GET_MODE (dest);
13818 vmode = GET_MODE (mask);
13820 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13821 x = gen_rtx_AND (vmode, dest, mask);
13822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13824 if (op0 != CONST0_RTX (vmode))
13826 x = gen_rtx_IOR (vmode, dest, op0);
13827 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13831 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13832 so we have to do two masks. */
13834 void
13835 ix86_split_copysign_var (rtx operands[])
13837 enum machine_mode mode, vmode;
13838 rtx dest, scratch, op0, op1, mask, nmask, x;
13840 dest = operands[0];
13841 scratch = operands[1];
13842 op0 = operands[2];
13843 op1 = operands[3];
13844 nmask = operands[4];
13845 mask = operands[5];
13847 mode = GET_MODE (dest);
13848 vmode = GET_MODE (mask);
13850 if (rtx_equal_p (op0, op1))
13852 /* Shouldn't happen often (it's useless, obviously), but when it does
13853 we'd generate incorrect code if we continue below. */
13854 emit_move_insn (dest, op0);
13855 return;
13858 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13860 gcc_assert (REGNO (op1) == REGNO (scratch));
13862 x = gen_rtx_AND (vmode, scratch, mask);
13863 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13865 dest = mask;
13866 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13867 x = gen_rtx_NOT (vmode, dest);
13868 x = gen_rtx_AND (vmode, x, op0);
13869 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13871 else
13873 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13875 x = gen_rtx_AND (vmode, scratch, mask);
13877 else /* alternative 2,4 */
13879 gcc_assert (REGNO (mask) == REGNO (scratch));
13880 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13881 x = gen_rtx_AND (vmode, scratch, op1);
13883 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13885 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13887 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13888 x = gen_rtx_AND (vmode, dest, nmask);
13890 else /* alternative 3,4 */
13892 gcc_assert (REGNO (nmask) == REGNO (dest));
13893 dest = nmask;
13894 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13895 x = gen_rtx_AND (vmode, dest, op0);
13897 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13900 x = gen_rtx_IOR (vmode, dest, scratch);
13901 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13904 /* Return TRUE or FALSE depending on whether the first SET in INSN
13905 has source and destination with matching CC modes, and that the
13906 CC mode is at least as constrained as REQ_MODE. */
13909 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13911 rtx set;
13912 enum machine_mode set_mode;
13914 set = PATTERN (insn);
13915 if (GET_CODE (set) == PARALLEL)
13916 set = XVECEXP (set, 0, 0);
13917 gcc_assert (GET_CODE (set) == SET);
13918 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13920 set_mode = GET_MODE (SET_DEST (set));
13921 switch (set_mode)
13923 case CCNOmode:
13924 if (req_mode != CCNOmode
13925 && (req_mode != CCmode
13926 || XEXP (SET_SRC (set), 1) != const0_rtx))
13927 return 0;
13928 break;
13929 case CCmode:
13930 if (req_mode == CCGCmode)
13931 return 0;
13932 /* FALLTHRU */
13933 case CCGCmode:
13934 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13935 return 0;
13936 /* FALLTHRU */
13937 case CCGOCmode:
13938 if (req_mode == CCZmode)
13939 return 0;
13940 /* FALLTHRU */
13941 case CCAmode:
13942 case CCCmode:
13943 case CCOmode:
13944 case CCSmode:
13945 case CCZmode:
13946 break;
13948 default:
13949 gcc_unreachable ();
13952 return (GET_MODE (SET_SRC (set)) == set_mode);
13955 /* Generate insn patterns to do an integer compare of OPERANDS. */
13957 static rtx
13958 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13960 enum machine_mode cmpmode;
13961 rtx tmp, flags;
13963 cmpmode = SELECT_CC_MODE (code, op0, op1);
13964 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13966 /* This is very simple, but making the interface the same as in the
13967 FP case makes the rest of the code easier. */
13968 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13969 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13971 /* Return the test that should be put into the flags user, i.e.
13972 the bcc, scc, or cmov instruction. */
13973 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13976 /* Figure out whether to use ordered or unordered fp comparisons.
13977 Return the appropriate mode to use. */
13979 enum machine_mode
13980 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13982 /* ??? In order to make all comparisons reversible, we do all comparisons
13983 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13984 all forms trapping and nontrapping comparisons, we can make inequality
13985 comparisons trapping again, since it results in better code when using
13986 FCOM based compares. */
13987 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13990 enum machine_mode
13991 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13993 enum machine_mode mode = GET_MODE (op0);
13995 if (SCALAR_FLOAT_MODE_P (mode))
13997 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13998 return ix86_fp_compare_mode (code);
14001 switch (code)
14003 /* Only zero flag is needed. */
14004 case EQ: /* ZF=0 */
14005 case NE: /* ZF!=0 */
14006 return CCZmode;
14007 /* Codes needing carry flag. */
14008 case GEU: /* CF=0 */
14009 case LTU: /* CF=1 */
14010 /* Detect overflow checks. They need just the carry flag. */
14011 if (GET_CODE (op0) == PLUS
14012 && rtx_equal_p (op1, XEXP (op0, 0)))
14013 return CCCmode;
14014 else
14015 return CCmode;
14016 case GTU: /* CF=0 & ZF=0 */
14017 case LEU: /* CF=1 | ZF=1 */
14018 /* Detect overflow checks. They need just the carry flag. */
14019 if (GET_CODE (op0) == MINUS
14020 && rtx_equal_p (op1, XEXP (op0, 0)))
14021 return CCCmode;
14022 else
14023 return CCmode;
14024 /* Codes possibly doable only with sign flag when
14025 comparing against zero. */
14026 case GE: /* SF=OF or SF=0 */
14027 case LT: /* SF<>OF or SF=1 */
14028 if (op1 == const0_rtx)
14029 return CCGOCmode;
14030 else
14031 /* For other cases Carry flag is not required. */
14032 return CCGCmode;
14033 /* Codes doable only with sign flag when comparing
14034 against zero, but we miss jump instruction for it
14035 so we need to use relational tests against overflow
14036 that thus needs to be zero. */
14037 case GT: /* ZF=0 & SF=OF */
14038 case LE: /* ZF=1 | SF<>OF */
14039 if (op1 == const0_rtx)
14040 return CCNOmode;
14041 else
14042 return CCGCmode;
14043 /* strcmp pattern do (use flags) and combine may ask us for proper
14044 mode. */
14045 case USE:
14046 return CCmode;
14047 default:
14048 gcc_unreachable ();
14052 /* Return the fixed registers used for condition codes. */
14054 static bool
14055 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14057 *p1 = FLAGS_REG;
14058 *p2 = FPSR_REG;
14059 return true;
14062 /* If two condition code modes are compatible, return a condition code
14063 mode which is compatible with both. Otherwise, return
14064 VOIDmode. */
14066 static enum machine_mode
14067 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14069 if (m1 == m2)
14070 return m1;
14072 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14073 return VOIDmode;
14075 if ((m1 == CCGCmode && m2 == CCGOCmode)
14076 || (m1 == CCGOCmode && m2 == CCGCmode))
14077 return CCGCmode;
14079 switch (m1)
14081 default:
14082 gcc_unreachable ();
14084 case CCmode:
14085 case CCGCmode:
14086 case CCGOCmode:
14087 case CCNOmode:
14088 case CCAmode:
14089 case CCCmode:
14090 case CCOmode:
14091 case CCSmode:
14092 case CCZmode:
14093 switch (m2)
14095 default:
14096 return VOIDmode;
14098 case CCmode:
14099 case CCGCmode:
14100 case CCGOCmode:
14101 case CCNOmode:
14102 case CCAmode:
14103 case CCCmode:
14104 case CCOmode:
14105 case CCSmode:
14106 case CCZmode:
14107 return CCmode;
14110 case CCFPmode:
14111 case CCFPUmode:
14112 /* These are only compatible with themselves, which we already
14113 checked above. */
14114 return VOIDmode;
14118 /* Split comparison code CODE into comparisons we can do using branch
14119 instructions. BYPASS_CODE is comparison code for branch that will
14120 branch around FIRST_CODE and SECOND_CODE. If some of branches
14121 is not required, set value to UNKNOWN.
14122 We never require more than two branches. */
14124 void
14125 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14126 enum rtx_code *first_code,
14127 enum rtx_code *second_code)
14129 *first_code = code;
14130 *bypass_code = UNKNOWN;
14131 *second_code = UNKNOWN;
14133 /* The fcomi comparison sets flags as follows:
14135 cmp ZF PF CF
14136 > 0 0 0
14137 < 0 0 1
14138 = 1 0 0
14139 un 1 1 1 */
14141 switch (code)
14143 case GT: /* GTU - CF=0 & ZF=0 */
14144 case GE: /* GEU - CF=0 */
14145 case ORDERED: /* PF=0 */
14146 case UNORDERED: /* PF=1 */
14147 case UNEQ: /* EQ - ZF=1 */
14148 case UNLT: /* LTU - CF=1 */
14149 case UNLE: /* LEU - CF=1 | ZF=1 */
14150 case LTGT: /* EQ - ZF=0 */
14151 break;
14152 case LT: /* LTU - CF=1 - fails on unordered */
14153 *first_code = UNLT;
14154 *bypass_code = UNORDERED;
14155 break;
14156 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14157 *first_code = UNLE;
14158 *bypass_code = UNORDERED;
14159 break;
14160 case EQ: /* EQ - ZF=1 - fails on unordered */
14161 *first_code = UNEQ;
14162 *bypass_code = UNORDERED;
14163 break;
14164 case NE: /* NE - ZF=0 - fails on unordered */
14165 *first_code = LTGT;
14166 *second_code = UNORDERED;
14167 break;
14168 case UNGE: /* GEU - CF=0 - fails on unordered */
14169 *first_code = GE;
14170 *second_code = UNORDERED;
14171 break;
14172 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14173 *first_code = GT;
14174 *second_code = UNORDERED;
14175 break;
14176 default:
14177 gcc_unreachable ();
14179 if (!TARGET_IEEE_FP)
14181 *second_code = UNKNOWN;
14182 *bypass_code = UNKNOWN;
14186 /* Return cost of comparison done fcom + arithmetics operations on AX.
14187 All following functions do use number of instructions as a cost metrics.
14188 In future this should be tweaked to compute bytes for optimize_size and
14189 take into account performance of various instructions on various CPUs. */
14190 static int
14191 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14193 if (!TARGET_IEEE_FP)
14194 return 4;
14195 /* The cost of code output by ix86_expand_fp_compare. */
14196 switch (code)
14198 case UNLE:
14199 case UNLT:
14200 case LTGT:
14201 case GT:
14202 case GE:
14203 case UNORDERED:
14204 case ORDERED:
14205 case UNEQ:
14206 return 4;
14207 break;
14208 case LT:
14209 case NE:
14210 case EQ:
14211 case UNGE:
14212 return 5;
14213 break;
14214 case LE:
14215 case UNGT:
14216 return 6;
14217 break;
14218 default:
14219 gcc_unreachable ();
14223 /* Return cost of comparison done using fcomi operation.
14224 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14225 static int
14226 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14228 enum rtx_code bypass_code, first_code, second_code;
14229 /* Return arbitrarily high cost when instruction is not supported - this
14230 prevents gcc from using it. */
14231 if (!TARGET_CMOVE)
14232 return 1024;
14233 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14234 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14237 /* Return cost of comparison done using sahf operation.
14238 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14239 static int
14240 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14242 enum rtx_code bypass_code, first_code, second_code;
14243 /* Return arbitrarily high cost when instruction is not preferred - this
14244 avoids gcc from using it. */
14245 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14246 return 1024;
14247 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14248 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14251 /* Compute cost of the comparison done using any method.
14252 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14253 static int
14254 ix86_fp_comparison_cost (enum rtx_code code)
14256 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14257 int min;
14259 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14260 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14262 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14263 if (min > sahf_cost)
14264 min = sahf_cost;
14265 if (min > fcomi_cost)
14266 min = fcomi_cost;
14267 return min;
14270 /* Return true if we should use an FCOMI instruction for this
14271 fp comparison. */
14274 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14276 enum rtx_code swapped_code = swap_condition (code);
14278 return ((ix86_fp_comparison_cost (code)
14279 == ix86_fp_comparison_fcomi_cost (code))
14280 || (ix86_fp_comparison_cost (swapped_code)
14281 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14284 /* Swap, force into registers, or otherwise massage the two operands
14285 to a fp comparison. The operands are updated in place; the new
14286 comparison code is returned. */
14288 static enum rtx_code
14289 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14291 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14292 rtx op0 = *pop0, op1 = *pop1;
14293 enum machine_mode op_mode = GET_MODE (op0);
14294 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14296 /* All of the unordered compare instructions only work on registers.
14297 The same is true of the fcomi compare instructions. The XFmode
14298 compare instructions require registers except when comparing
14299 against zero or when converting operand 1 from fixed point to
14300 floating point. */
14302 if (!is_sse
14303 && (fpcmp_mode == CCFPUmode
14304 || (op_mode == XFmode
14305 && ! (standard_80387_constant_p (op0) == 1
14306 || standard_80387_constant_p (op1) == 1)
14307 && GET_CODE (op1) != FLOAT)
14308 || ix86_use_fcomi_compare (code)))
14310 op0 = force_reg (op_mode, op0);
14311 op1 = force_reg (op_mode, op1);
14313 else
14315 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14316 things around if they appear profitable, otherwise force op0
14317 into a register. */
14319 if (standard_80387_constant_p (op0) == 0
14320 || (MEM_P (op0)
14321 && ! (standard_80387_constant_p (op1) == 0
14322 || MEM_P (op1))))
14324 rtx tmp;
14325 tmp = op0, op0 = op1, op1 = tmp;
14326 code = swap_condition (code);
14329 if (!REG_P (op0))
14330 op0 = force_reg (op_mode, op0);
14332 if (CONSTANT_P (op1))
14334 int tmp = standard_80387_constant_p (op1);
14335 if (tmp == 0)
14336 op1 = validize_mem (force_const_mem (op_mode, op1));
14337 else if (tmp == 1)
14339 if (TARGET_CMOVE)
14340 op1 = force_reg (op_mode, op1);
14342 else
14343 op1 = force_reg (op_mode, op1);
14347 /* Try to rearrange the comparison to make it cheaper. */
14348 if (ix86_fp_comparison_cost (code)
14349 > ix86_fp_comparison_cost (swap_condition (code))
14350 && (REG_P (op1) || can_create_pseudo_p ()))
14352 rtx tmp;
14353 tmp = op0, op0 = op1, op1 = tmp;
14354 code = swap_condition (code);
14355 if (!REG_P (op0))
14356 op0 = force_reg (op_mode, op0);
14359 *pop0 = op0;
14360 *pop1 = op1;
14361 return code;
14364 /* Convert comparison codes we use to represent FP comparison to integer
14365 code that will result in proper branch. Return UNKNOWN if no such code
14366 is available. */
14368 enum rtx_code
14369 ix86_fp_compare_code_to_integer (enum rtx_code code)
14371 switch (code)
14373 case GT:
14374 return GTU;
14375 case GE:
14376 return GEU;
14377 case ORDERED:
14378 case UNORDERED:
14379 return code;
14380 break;
14381 case UNEQ:
14382 return EQ;
14383 break;
14384 case UNLT:
14385 return LTU;
14386 break;
14387 case UNLE:
14388 return LEU;
14389 break;
14390 case LTGT:
14391 return NE;
14392 break;
14393 default:
14394 return UNKNOWN;
14398 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14400 static rtx
14401 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14402 rtx *second_test, rtx *bypass_test)
14404 enum machine_mode fpcmp_mode, intcmp_mode;
14405 rtx tmp, tmp2;
14406 int cost = ix86_fp_comparison_cost (code);
14407 enum rtx_code bypass_code, first_code, second_code;
14409 fpcmp_mode = ix86_fp_compare_mode (code);
14410 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14412 if (second_test)
14413 *second_test = NULL_RTX;
14414 if (bypass_test)
14415 *bypass_test = NULL_RTX;
14417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14419 /* Do fcomi/sahf based test when profitable. */
14420 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14421 && (bypass_code == UNKNOWN || bypass_test)
14422 && (second_code == UNKNOWN || second_test))
14424 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14425 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14426 tmp);
14427 if (TARGET_CMOVE)
14428 emit_insn (tmp);
14429 else
14431 gcc_assert (TARGET_SAHF);
14433 if (!scratch)
14434 scratch = gen_reg_rtx (HImode);
14435 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14437 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14440 /* The FP codes work out to act like unsigned. */
14441 intcmp_mode = fpcmp_mode;
14442 code = first_code;
14443 if (bypass_code != UNKNOWN)
14444 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14445 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14446 const0_rtx);
14447 if (second_code != UNKNOWN)
14448 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14449 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14450 const0_rtx);
14452 else
14454 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14455 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14456 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14457 if (!scratch)
14458 scratch = gen_reg_rtx (HImode);
14459 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14461 /* In the unordered case, we have to check C2 for NaN's, which
14462 doesn't happen to work out to anything nice combination-wise.
14463 So do some bit twiddling on the value we've got in AH to come
14464 up with an appropriate set of condition codes. */
14466 intcmp_mode = CCNOmode;
14467 switch (code)
14469 case GT:
14470 case UNGT:
14471 if (code == GT || !TARGET_IEEE_FP)
14473 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14474 code = EQ;
14476 else
14478 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14479 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14480 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14481 intcmp_mode = CCmode;
14482 code = GEU;
14484 break;
14485 case LT:
14486 case UNLT:
14487 if (code == LT && TARGET_IEEE_FP)
14489 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14490 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14491 intcmp_mode = CCmode;
14492 code = EQ;
14494 else
14496 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14497 code = NE;
14499 break;
14500 case GE:
14501 case UNGE:
14502 if (code == GE || !TARGET_IEEE_FP)
14504 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14505 code = EQ;
14507 else
14509 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14510 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14511 GEN_INT (0x01)));
14512 code = NE;
14514 break;
14515 case LE:
14516 case UNLE:
14517 if (code == LE && TARGET_IEEE_FP)
14519 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14520 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14521 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14522 intcmp_mode = CCmode;
14523 code = LTU;
14525 else
14527 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14528 code = NE;
14530 break;
14531 case EQ:
14532 case UNEQ:
14533 if (code == EQ && TARGET_IEEE_FP)
14535 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14536 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14537 intcmp_mode = CCmode;
14538 code = EQ;
14540 else
14542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14543 code = NE;
14544 break;
14546 break;
14547 case NE:
14548 case LTGT:
14549 if (code == NE && TARGET_IEEE_FP)
14551 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14552 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14553 GEN_INT (0x40)));
14554 code = NE;
14556 else
14558 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14559 code = EQ;
14561 break;
14563 case UNORDERED:
14564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14565 code = NE;
14566 break;
14567 case ORDERED:
14568 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14569 code = EQ;
14570 break;
14572 default:
14573 gcc_unreachable ();
14577 /* Return the test that should be put into the flags user, i.e.
14578 the bcc, scc, or cmov instruction. */
14579 return gen_rtx_fmt_ee (code, VOIDmode,
14580 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14581 const0_rtx);
14585 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14587 rtx op0, op1, ret;
14588 op0 = ix86_compare_op0;
14589 op1 = ix86_compare_op1;
14591 if (second_test)
14592 *second_test = NULL_RTX;
14593 if (bypass_test)
14594 *bypass_test = NULL_RTX;
14596 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14597 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14599 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14601 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14602 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14603 second_test, bypass_test);
14605 else
14606 ret = ix86_expand_int_compare (code, op0, op1);
14608 return ret;
14611 /* Return true if the CODE will result in nontrivial jump sequence. */
14612 bool
14613 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14615 enum rtx_code bypass_code, first_code, second_code;
14616 if (!TARGET_CMOVE)
14617 return true;
14618 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14619 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14622 void
14623 ix86_expand_branch (enum rtx_code code, rtx label)
14625 rtx tmp;
14627 switch (GET_MODE (ix86_compare_op0))
14629 case QImode:
14630 case HImode:
14631 case SImode:
14632 simple:
14633 tmp = ix86_expand_compare (code, NULL, NULL);
14634 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14635 gen_rtx_LABEL_REF (VOIDmode, label),
14636 pc_rtx);
14637 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14638 return;
14640 case SFmode:
14641 case DFmode:
14642 case XFmode:
14644 rtvec vec;
14645 int use_fcomi;
14646 enum rtx_code bypass_code, first_code, second_code;
14648 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14649 &ix86_compare_op1);
14651 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14653 /* Check whether we will use the natural sequence with one jump. If
14654 so, we can expand jump early. Otherwise delay expansion by
14655 creating compound insn to not confuse optimizers. */
14656 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14658 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14659 gen_rtx_LABEL_REF (VOIDmode, label),
14660 pc_rtx, NULL_RTX, NULL_RTX);
14662 else
14664 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14665 ix86_compare_op0, ix86_compare_op1);
14666 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14667 gen_rtx_LABEL_REF (VOIDmode, label),
14668 pc_rtx);
14669 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14671 use_fcomi = ix86_use_fcomi_compare (code);
14672 vec = rtvec_alloc (3 + !use_fcomi);
14673 RTVEC_ELT (vec, 0) = tmp;
14674 RTVEC_ELT (vec, 1)
14675 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14676 RTVEC_ELT (vec, 2)
14677 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14678 if (! use_fcomi)
14679 RTVEC_ELT (vec, 3)
14680 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14682 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14684 return;
14687 case DImode:
14688 if (TARGET_64BIT)
14689 goto simple;
14690 case TImode:
14691 /* Expand DImode branch into multiple compare+branch. */
14693 rtx lo[2], hi[2], label2;
14694 enum rtx_code code1, code2, code3;
14695 enum machine_mode submode;
14697 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14699 tmp = ix86_compare_op0;
14700 ix86_compare_op0 = ix86_compare_op1;
14701 ix86_compare_op1 = tmp;
14702 code = swap_condition (code);
14704 if (GET_MODE (ix86_compare_op0) == DImode)
14706 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14707 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14708 submode = SImode;
14710 else
14712 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14713 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14714 submode = DImode;
14717 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14718 avoid two branches. This costs one extra insn, so disable when
14719 optimizing for size. */
14721 if ((code == EQ || code == NE)
14722 && (!optimize_insn_for_size_p ()
14723 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14725 rtx xor0, xor1;
14727 xor1 = hi[0];
14728 if (hi[1] != const0_rtx)
14729 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14730 NULL_RTX, 0, OPTAB_WIDEN);
14732 xor0 = lo[0];
14733 if (lo[1] != const0_rtx)
14734 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14735 NULL_RTX, 0, OPTAB_WIDEN);
14737 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14738 NULL_RTX, 0, OPTAB_WIDEN);
14740 ix86_compare_op0 = tmp;
14741 ix86_compare_op1 = const0_rtx;
14742 ix86_expand_branch (code, label);
14743 return;
14746 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14747 op1 is a constant and the low word is zero, then we can just
14748 examine the high word. Similarly for low word -1 and
14749 less-or-equal-than or greater-than. */
14751 if (CONST_INT_P (hi[1]))
14752 switch (code)
14754 case LT: case LTU: case GE: case GEU:
14755 if (lo[1] == const0_rtx)
14757 ix86_compare_op0 = hi[0];
14758 ix86_compare_op1 = hi[1];
14759 ix86_expand_branch (code, label);
14760 return;
14762 break;
14763 case LE: case LEU: case GT: case GTU:
14764 if (lo[1] == constm1_rtx)
14766 ix86_compare_op0 = hi[0];
14767 ix86_compare_op1 = hi[1];
14768 ix86_expand_branch (code, label);
14769 return;
14771 break;
14772 default:
14773 break;
14776 /* Otherwise, we need two or three jumps. */
14778 label2 = gen_label_rtx ();
14780 code1 = code;
14781 code2 = swap_condition (code);
14782 code3 = unsigned_condition (code);
14784 switch (code)
14786 case LT: case GT: case LTU: case GTU:
14787 break;
14789 case LE: code1 = LT; code2 = GT; break;
14790 case GE: code1 = GT; code2 = LT; break;
14791 case LEU: code1 = LTU; code2 = GTU; break;
14792 case GEU: code1 = GTU; code2 = LTU; break;
14794 case EQ: code1 = UNKNOWN; code2 = NE; break;
14795 case NE: code2 = UNKNOWN; break;
14797 default:
14798 gcc_unreachable ();
14802 * a < b =>
14803 * if (hi(a) < hi(b)) goto true;
14804 * if (hi(a) > hi(b)) goto false;
14805 * if (lo(a) < lo(b)) goto true;
14806 * false:
14809 ix86_compare_op0 = hi[0];
14810 ix86_compare_op1 = hi[1];
14812 if (code1 != UNKNOWN)
14813 ix86_expand_branch (code1, label);
14814 if (code2 != UNKNOWN)
14815 ix86_expand_branch (code2, label2);
14817 ix86_compare_op0 = lo[0];
14818 ix86_compare_op1 = lo[1];
14819 ix86_expand_branch (code3, label);
14821 if (code2 != UNKNOWN)
14822 emit_label (label2);
14823 return;
14826 default:
14827 /* If we have already emitted a compare insn, go straight to simple.
14828 ix86_expand_compare won't emit anything if ix86_compare_emitted
14829 is non NULL. */
14830 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14831 goto simple;
14835 /* Split branch based on floating point condition. */
14836 void
14837 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14838 rtx target1, rtx target2, rtx tmp, rtx pushed)
14840 rtx second, bypass;
14841 rtx label = NULL_RTX;
14842 rtx condition;
14843 int bypass_probability = -1, second_probability = -1, probability = -1;
14844 rtx i;
14846 if (target2 != pc_rtx)
14848 rtx tmp = target2;
14849 code = reverse_condition_maybe_unordered (code);
14850 target2 = target1;
14851 target1 = tmp;
14854 condition = ix86_expand_fp_compare (code, op1, op2,
14855 tmp, &second, &bypass);
14857 /* Remove pushed operand from stack. */
14858 if (pushed)
14859 ix86_free_from_memory (GET_MODE (pushed));
14861 if (split_branch_probability >= 0)
14863 /* Distribute the probabilities across the jumps.
14864 Assume the BYPASS and SECOND to be always test
14865 for UNORDERED. */
14866 probability = split_branch_probability;
14868 /* Value of 1 is low enough to make no need for probability
14869 to be updated. Later we may run some experiments and see
14870 if unordered values are more frequent in practice. */
14871 if (bypass)
14872 bypass_probability = 1;
14873 if (second)
14874 second_probability = 1;
14876 if (bypass != NULL_RTX)
14878 label = gen_label_rtx ();
14879 i = emit_jump_insn (gen_rtx_SET
14880 (VOIDmode, pc_rtx,
14881 gen_rtx_IF_THEN_ELSE (VOIDmode,
14882 bypass,
14883 gen_rtx_LABEL_REF (VOIDmode,
14884 label),
14885 pc_rtx)));
14886 if (bypass_probability >= 0)
14887 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14889 i = emit_jump_insn (gen_rtx_SET
14890 (VOIDmode, pc_rtx,
14891 gen_rtx_IF_THEN_ELSE (VOIDmode,
14892 condition, target1, target2)));
14893 if (probability >= 0)
14894 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14895 if (second != NULL_RTX)
14897 i = emit_jump_insn (gen_rtx_SET
14898 (VOIDmode, pc_rtx,
14899 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14900 target2)));
14901 if (second_probability >= 0)
14902 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14904 if (label != NULL_RTX)
14905 emit_label (label);
14909 ix86_expand_setcc (enum rtx_code code, rtx dest)
14911 rtx ret, tmp, tmpreg, equiv;
14912 rtx second_test, bypass_test;
14914 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14915 return 0; /* FAIL */
14917 gcc_assert (GET_MODE (dest) == QImode);
14919 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14920 PUT_MODE (ret, QImode);
14922 tmp = dest;
14923 tmpreg = dest;
14925 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14926 if (bypass_test || second_test)
14928 rtx test = second_test;
14929 int bypass = 0;
14930 rtx tmp2 = gen_reg_rtx (QImode);
14931 if (bypass_test)
14933 gcc_assert (!second_test);
14934 test = bypass_test;
14935 bypass = 1;
14936 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14938 PUT_MODE (test, QImode);
14939 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14941 if (bypass)
14942 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14943 else
14944 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14947 /* Attach a REG_EQUAL note describing the comparison result. */
14948 if (ix86_compare_op0 && ix86_compare_op1)
14950 equiv = simplify_gen_relational (code, QImode,
14951 GET_MODE (ix86_compare_op0),
14952 ix86_compare_op0, ix86_compare_op1);
14953 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14956 return 1; /* DONE */
14959 /* Expand comparison setting or clearing carry flag. Return true when
14960 successful and set pop for the operation. */
14961 static bool
14962 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14964 enum machine_mode mode =
14965 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14967 /* Do not handle DImode compares that go through special path. */
14968 if (mode == (TARGET_64BIT ? TImode : DImode))
14969 return false;
14971 if (SCALAR_FLOAT_MODE_P (mode))
14973 rtx second_test = NULL, bypass_test = NULL;
14974 rtx compare_op, compare_seq;
14976 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14978 /* Shortcut: following common codes never translate
14979 into carry flag compares. */
14980 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14981 || code == ORDERED || code == UNORDERED)
14982 return false;
14984 /* These comparisons require zero flag; swap operands so they won't. */
14985 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14986 && !TARGET_IEEE_FP)
14988 rtx tmp = op0;
14989 op0 = op1;
14990 op1 = tmp;
14991 code = swap_condition (code);
14994 /* Try to expand the comparison and verify that we end up with
14995 carry flag based comparison. This fails to be true only when
14996 we decide to expand comparison using arithmetic that is not
14997 too common scenario. */
14998 start_sequence ();
14999 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15000 &second_test, &bypass_test);
15001 compare_seq = get_insns ();
15002 end_sequence ();
15004 if (second_test || bypass_test)
15005 return false;
15007 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15008 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15009 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15010 else
15011 code = GET_CODE (compare_op);
15013 if (code != LTU && code != GEU)
15014 return false;
15016 emit_insn (compare_seq);
15017 *pop = compare_op;
15018 return true;
15021 if (!INTEGRAL_MODE_P (mode))
15022 return false;
15024 switch (code)
15026 case LTU:
15027 case GEU:
15028 break;
15030 /* Convert a==0 into (unsigned)a<1. */
15031 case EQ:
15032 case NE:
15033 if (op1 != const0_rtx)
15034 return false;
15035 op1 = const1_rtx;
15036 code = (code == EQ ? LTU : GEU);
15037 break;
15039 /* Convert a>b into b<a or a>=b-1. */
15040 case GTU:
15041 case LEU:
15042 if (CONST_INT_P (op1))
15044 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15045 /* Bail out on overflow. We still can swap operands but that
15046 would force loading of the constant into register. */
15047 if (op1 == const0_rtx
15048 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15049 return false;
15050 code = (code == GTU ? GEU : LTU);
15052 else
15054 rtx tmp = op1;
15055 op1 = op0;
15056 op0 = tmp;
15057 code = (code == GTU ? LTU : GEU);
15059 break;
15061 /* Convert a>=0 into (unsigned)a<0x80000000. */
15062 case LT:
15063 case GE:
15064 if (mode == DImode || op1 != const0_rtx)
15065 return false;
15066 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15067 code = (code == LT ? GEU : LTU);
15068 break;
15069 case LE:
15070 case GT:
15071 if (mode == DImode || op1 != constm1_rtx)
15072 return false;
15073 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15074 code = (code == LE ? GEU : LTU);
15075 break;
15077 default:
15078 return false;
15080 /* Swapping operands may cause constant to appear as first operand. */
15081 if (!nonimmediate_operand (op0, VOIDmode))
15083 if (!can_create_pseudo_p ())
15084 return false;
15085 op0 = force_reg (mode, op0);
15087 ix86_compare_op0 = op0;
15088 ix86_compare_op1 = op1;
15089 *pop = ix86_expand_compare (code, NULL, NULL);
15090 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15091 return true;
15095 ix86_expand_int_movcc (rtx operands[])
15097 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15098 rtx compare_seq, compare_op;
15099 rtx second_test, bypass_test;
15100 enum machine_mode mode = GET_MODE (operands[0]);
15101 bool sign_bit_compare_p = false;;
15103 start_sequence ();
15104 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15105 compare_seq = get_insns ();
15106 end_sequence ();
15108 compare_code = GET_CODE (compare_op);
15110 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15111 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15112 sign_bit_compare_p = true;
15114 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15115 HImode insns, we'd be swallowed in word prefix ops. */
15117 if ((mode != HImode || TARGET_FAST_PREFIX)
15118 && (mode != (TARGET_64BIT ? TImode : DImode))
15119 && CONST_INT_P (operands[2])
15120 && CONST_INT_P (operands[3]))
15122 rtx out = operands[0];
15123 HOST_WIDE_INT ct = INTVAL (operands[2]);
15124 HOST_WIDE_INT cf = INTVAL (operands[3]);
15125 HOST_WIDE_INT diff;
15127 diff = ct - cf;
15128 /* Sign bit compares are better done using shifts than we do by using
15129 sbb. */
15130 if (sign_bit_compare_p
15131 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15132 ix86_compare_op1, &compare_op))
15134 /* Detect overlap between destination and compare sources. */
15135 rtx tmp = out;
15137 if (!sign_bit_compare_p)
15139 bool fpcmp = false;
15141 compare_code = GET_CODE (compare_op);
15143 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15144 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15146 fpcmp = true;
15147 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15150 /* To simplify rest of code, restrict to the GEU case. */
15151 if (compare_code == LTU)
15153 HOST_WIDE_INT tmp = ct;
15154 ct = cf;
15155 cf = tmp;
15156 compare_code = reverse_condition (compare_code);
15157 code = reverse_condition (code);
15159 else
15161 if (fpcmp)
15162 PUT_CODE (compare_op,
15163 reverse_condition_maybe_unordered
15164 (GET_CODE (compare_op)));
15165 else
15166 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15168 diff = ct - cf;
15170 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15171 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15172 tmp = gen_reg_rtx (mode);
15174 if (mode == DImode)
15175 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15176 else
15177 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15179 else
15181 if (code == GT || code == GE)
15182 code = reverse_condition (code);
15183 else
15185 HOST_WIDE_INT tmp = ct;
15186 ct = cf;
15187 cf = tmp;
15188 diff = ct - cf;
15190 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15191 ix86_compare_op1, VOIDmode, 0, -1);
15194 if (diff == 1)
15197 * cmpl op0,op1
15198 * sbbl dest,dest
15199 * [addl dest, ct]
15201 * Size 5 - 8.
15203 if (ct)
15204 tmp = expand_simple_binop (mode, PLUS,
15205 tmp, GEN_INT (ct),
15206 copy_rtx (tmp), 1, OPTAB_DIRECT);
15208 else if (cf == -1)
15211 * cmpl op0,op1
15212 * sbbl dest,dest
15213 * orl $ct, dest
15215 * Size 8.
15217 tmp = expand_simple_binop (mode, IOR,
15218 tmp, GEN_INT (ct),
15219 copy_rtx (tmp), 1, OPTAB_DIRECT);
15221 else if (diff == -1 && ct)
15224 * cmpl op0,op1
15225 * sbbl dest,dest
15226 * notl dest
15227 * [addl dest, cf]
15229 * Size 8 - 11.
15231 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15232 if (cf)
15233 tmp = expand_simple_binop (mode, PLUS,
15234 copy_rtx (tmp), GEN_INT (cf),
15235 copy_rtx (tmp), 1, OPTAB_DIRECT);
15237 else
15240 * cmpl op0,op1
15241 * sbbl dest,dest
15242 * [notl dest]
15243 * andl cf - ct, dest
15244 * [addl dest, ct]
15246 * Size 8 - 11.
15249 if (cf == 0)
15251 cf = ct;
15252 ct = 0;
15253 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15256 tmp = expand_simple_binop (mode, AND,
15257 copy_rtx (tmp),
15258 gen_int_mode (cf - ct, mode),
15259 copy_rtx (tmp), 1, OPTAB_DIRECT);
15260 if (ct)
15261 tmp = expand_simple_binop (mode, PLUS,
15262 copy_rtx (tmp), GEN_INT (ct),
15263 copy_rtx (tmp), 1, OPTAB_DIRECT);
15266 if (!rtx_equal_p (tmp, out))
15267 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15269 return 1; /* DONE */
15272 if (diff < 0)
15274 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15276 HOST_WIDE_INT tmp;
15277 tmp = ct, ct = cf, cf = tmp;
15278 diff = -diff;
15280 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15282 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15284 /* We may be reversing unordered compare to normal compare, that
15285 is not valid in general (we may convert non-trapping condition
15286 to trapping one), however on i386 we currently emit all
15287 comparisons unordered. */
15288 compare_code = reverse_condition_maybe_unordered (compare_code);
15289 code = reverse_condition_maybe_unordered (code);
15291 else
15293 compare_code = reverse_condition (compare_code);
15294 code = reverse_condition (code);
15298 compare_code = UNKNOWN;
15299 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15300 && CONST_INT_P (ix86_compare_op1))
15302 if (ix86_compare_op1 == const0_rtx
15303 && (code == LT || code == GE))
15304 compare_code = code;
15305 else if (ix86_compare_op1 == constm1_rtx)
15307 if (code == LE)
15308 compare_code = LT;
15309 else if (code == GT)
15310 compare_code = GE;
15314 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15315 if (compare_code != UNKNOWN
15316 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15317 && (cf == -1 || ct == -1))
15319 /* If lea code below could be used, only optimize
15320 if it results in a 2 insn sequence. */
15322 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15323 || diff == 3 || diff == 5 || diff == 9)
15324 || (compare_code == LT && ct == -1)
15325 || (compare_code == GE && cf == -1))
15328 * notl op1 (if necessary)
15329 * sarl $31, op1
15330 * orl cf, op1
15332 if (ct != -1)
15334 cf = ct;
15335 ct = -1;
15336 code = reverse_condition (code);
15339 out = emit_store_flag (out, code, ix86_compare_op0,
15340 ix86_compare_op1, VOIDmode, 0, -1);
15342 out = expand_simple_binop (mode, IOR,
15343 out, GEN_INT (cf),
15344 out, 1, OPTAB_DIRECT);
15345 if (out != operands[0])
15346 emit_move_insn (operands[0], out);
15348 return 1; /* DONE */
15353 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15354 || diff == 3 || diff == 5 || diff == 9)
15355 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15356 && (mode != DImode
15357 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15360 * xorl dest,dest
15361 * cmpl op1,op2
15362 * setcc dest
15363 * lea cf(dest*(ct-cf)),dest
15365 * Size 14.
15367 * This also catches the degenerate setcc-only case.
15370 rtx tmp;
15371 int nops;
15373 out = emit_store_flag (out, code, ix86_compare_op0,
15374 ix86_compare_op1, VOIDmode, 0, 1);
15376 nops = 0;
15377 /* On x86_64 the lea instruction operates on Pmode, so we need
15378 to get arithmetics done in proper mode to match. */
15379 if (diff == 1)
15380 tmp = copy_rtx (out);
15381 else
15383 rtx out1;
15384 out1 = copy_rtx (out);
15385 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15386 nops++;
15387 if (diff & 1)
15389 tmp = gen_rtx_PLUS (mode, tmp, out1);
15390 nops++;
15393 if (cf != 0)
15395 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15396 nops++;
15398 if (!rtx_equal_p (tmp, out))
15400 if (nops == 1)
15401 out = force_operand (tmp, copy_rtx (out));
15402 else
15403 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15405 if (!rtx_equal_p (out, operands[0]))
15406 emit_move_insn (operands[0], copy_rtx (out));
15408 return 1; /* DONE */
15412 * General case: Jumpful:
15413 * xorl dest,dest cmpl op1, op2
15414 * cmpl op1, op2 movl ct, dest
15415 * setcc dest jcc 1f
15416 * decl dest movl cf, dest
15417 * andl (cf-ct),dest 1:
15418 * addl ct,dest
15420 * Size 20. Size 14.
15422 * This is reasonably steep, but branch mispredict costs are
15423 * high on modern cpus, so consider failing only if optimizing
15424 * for space.
15427 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15428 && BRANCH_COST (optimize_insn_for_speed_p (),
15429 false) >= 2)
15431 if (cf == 0)
15433 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15435 cf = ct;
15436 ct = 0;
15438 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15440 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15442 /* We may be reversing unordered compare to normal compare,
15443 that is not valid in general (we may convert non-trapping
15444 condition to trapping one), however on i386 we currently
15445 emit all comparisons unordered. */
15446 code = reverse_condition_maybe_unordered (code);
15448 else
15450 code = reverse_condition (code);
15451 if (compare_code != UNKNOWN)
15452 compare_code = reverse_condition (compare_code);
15456 if (compare_code != UNKNOWN)
15458 /* notl op1 (if needed)
15459 sarl $31, op1
15460 andl (cf-ct), op1
15461 addl ct, op1
15463 For x < 0 (resp. x <= -1) there will be no notl,
15464 so if possible swap the constants to get rid of the
15465 complement.
15466 True/false will be -1/0 while code below (store flag
15467 followed by decrement) is 0/-1, so the constants need
15468 to be exchanged once more. */
15470 if (compare_code == GE || !cf)
15472 code = reverse_condition (code);
15473 compare_code = LT;
15475 else
15477 HOST_WIDE_INT tmp = cf;
15478 cf = ct;
15479 ct = tmp;
15482 out = emit_store_flag (out, code, ix86_compare_op0,
15483 ix86_compare_op1, VOIDmode, 0, -1);
15485 else
15487 out = emit_store_flag (out, code, ix86_compare_op0,
15488 ix86_compare_op1, VOIDmode, 0, 1);
15490 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15491 copy_rtx (out), 1, OPTAB_DIRECT);
15494 out = expand_simple_binop (mode, AND, copy_rtx (out),
15495 gen_int_mode (cf - ct, mode),
15496 copy_rtx (out), 1, OPTAB_DIRECT);
15497 if (ct)
15498 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15499 copy_rtx (out), 1, OPTAB_DIRECT);
15500 if (!rtx_equal_p (out, operands[0]))
15501 emit_move_insn (operands[0], copy_rtx (out));
15503 return 1; /* DONE */
15507 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15509 /* Try a few things more with specific constants and a variable. */
15511 optab op;
15512 rtx var, orig_out, out, tmp;
15514 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15515 return 0; /* FAIL */
15517 /* If one of the two operands is an interesting constant, load a
15518 constant with the above and mask it in with a logical operation. */
15520 if (CONST_INT_P (operands[2]))
15522 var = operands[3];
15523 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15524 operands[3] = constm1_rtx, op = and_optab;
15525 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15526 operands[3] = const0_rtx, op = ior_optab;
15527 else
15528 return 0; /* FAIL */
15530 else if (CONST_INT_P (operands[3]))
15532 var = operands[2];
15533 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15534 operands[2] = constm1_rtx, op = and_optab;
15535 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15536 operands[2] = const0_rtx, op = ior_optab;
15537 else
15538 return 0; /* FAIL */
15540 else
15541 return 0; /* FAIL */
15543 orig_out = operands[0];
15544 tmp = gen_reg_rtx (mode);
15545 operands[0] = tmp;
15547 /* Recurse to get the constant loaded. */
15548 if (ix86_expand_int_movcc (operands) == 0)
15549 return 0; /* FAIL */
15551 /* Mask in the interesting variable. */
15552 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15553 OPTAB_WIDEN);
15554 if (!rtx_equal_p (out, orig_out))
15555 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15557 return 1; /* DONE */
15561 * For comparison with above,
15563 * movl cf,dest
15564 * movl ct,tmp
15565 * cmpl op1,op2
15566 * cmovcc tmp,dest
15568 * Size 15.
15571 if (! nonimmediate_operand (operands[2], mode))
15572 operands[2] = force_reg (mode, operands[2]);
15573 if (! nonimmediate_operand (operands[3], mode))
15574 operands[3] = force_reg (mode, operands[3]);
15576 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15578 rtx tmp = gen_reg_rtx (mode);
15579 emit_move_insn (tmp, operands[3]);
15580 operands[3] = tmp;
15582 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15584 rtx tmp = gen_reg_rtx (mode);
15585 emit_move_insn (tmp, operands[2]);
15586 operands[2] = tmp;
15589 if (! register_operand (operands[2], VOIDmode)
15590 && (mode == QImode
15591 || ! register_operand (operands[3], VOIDmode)))
15592 operands[2] = force_reg (mode, operands[2]);
15594 if (mode == QImode
15595 && ! register_operand (operands[3], VOIDmode))
15596 operands[3] = force_reg (mode, operands[3]);
15598 emit_insn (compare_seq);
15599 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15600 gen_rtx_IF_THEN_ELSE (mode,
15601 compare_op, operands[2],
15602 operands[3])));
15603 if (bypass_test)
15604 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15605 gen_rtx_IF_THEN_ELSE (mode,
15606 bypass_test,
15607 copy_rtx (operands[3]),
15608 copy_rtx (operands[0]))));
15609 if (second_test)
15610 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15611 gen_rtx_IF_THEN_ELSE (mode,
15612 second_test,
15613 copy_rtx (operands[2]),
15614 copy_rtx (operands[0]))));
15616 return 1; /* DONE */
15619 /* Swap, force into registers, or otherwise massage the two operands
15620 to an sse comparison with a mask result. Thus we differ a bit from
15621 ix86_prepare_fp_compare_args which expects to produce a flags result.
15623 The DEST operand exists to help determine whether to commute commutative
15624 operators. The POP0/POP1 operands are updated in place. The new
15625 comparison code is returned, or UNKNOWN if not implementable. */
15627 static enum rtx_code
15628 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15629 rtx *pop0, rtx *pop1)
15631 rtx tmp;
15633 switch (code)
15635 case LTGT:
15636 case UNEQ:
15637 /* We have no LTGT as an operator. We could implement it with
15638 NE & ORDERED, but this requires an extra temporary. It's
15639 not clear that it's worth it. */
15640 return UNKNOWN;
15642 case LT:
15643 case LE:
15644 case UNGT:
15645 case UNGE:
15646 /* These are supported directly. */
15647 break;
15649 case EQ:
15650 case NE:
15651 case UNORDERED:
15652 case ORDERED:
15653 /* For commutative operators, try to canonicalize the destination
15654 operand to be first in the comparison - this helps reload to
15655 avoid extra moves. */
15656 if (!dest || !rtx_equal_p (dest, *pop1))
15657 break;
15658 /* FALLTHRU */
15660 case GE:
15661 case GT:
15662 case UNLE:
15663 case UNLT:
15664 /* These are not supported directly. Swap the comparison operands
15665 to transform into something that is supported. */
15666 tmp = *pop0;
15667 *pop0 = *pop1;
15668 *pop1 = tmp;
15669 code = swap_condition (code);
15670 break;
15672 default:
15673 gcc_unreachable ();
15676 return code;
15679 /* Detect conditional moves that exactly match min/max operational
15680 semantics. Note that this is IEEE safe, as long as we don't
15681 interchange the operands.
15683 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15684 and TRUE if the operation is successful and instructions are emitted. */
15686 static bool
15687 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15688 rtx cmp_op1, rtx if_true, rtx if_false)
15690 enum machine_mode mode;
15691 bool is_min;
15692 rtx tmp;
15694 if (code == LT)
15696 else if (code == UNGE)
15698 tmp = if_true;
15699 if_true = if_false;
15700 if_false = tmp;
15702 else
15703 return false;
15705 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15706 is_min = true;
15707 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15708 is_min = false;
15709 else
15710 return false;
15712 mode = GET_MODE (dest);
15714 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15715 but MODE may be a vector mode and thus not appropriate. */
15716 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15718 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15719 rtvec v;
15721 if_true = force_reg (mode, if_true);
15722 v = gen_rtvec (2, if_true, if_false);
15723 tmp = gen_rtx_UNSPEC (mode, v, u);
15725 else
15727 code = is_min ? SMIN : SMAX;
15728 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15731 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15732 return true;
15735 /* Expand an sse vector comparison. Return the register with the result. */
15737 static rtx
15738 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15739 rtx op_true, rtx op_false)
15741 enum machine_mode mode = GET_MODE (dest);
15742 rtx x;
15744 cmp_op0 = force_reg (mode, cmp_op0);
15745 if (!nonimmediate_operand (cmp_op1, mode))
15746 cmp_op1 = force_reg (mode, cmp_op1);
15748 if (optimize
15749 || reg_overlap_mentioned_p (dest, op_true)
15750 || reg_overlap_mentioned_p (dest, op_false))
15751 dest = gen_reg_rtx (mode);
15753 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15754 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15756 return dest;
15759 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15760 operations. This is used for both scalar and vector conditional moves. */
15762 static void
15763 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15765 enum machine_mode mode = GET_MODE (dest);
15766 rtx t2, t3, x;
15768 if (op_false == CONST0_RTX (mode))
15770 op_true = force_reg (mode, op_true);
15771 x = gen_rtx_AND (mode, cmp, op_true);
15772 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15774 else if (op_true == CONST0_RTX (mode))
15776 op_false = force_reg (mode, op_false);
15777 x = gen_rtx_NOT (mode, cmp);
15778 x = gen_rtx_AND (mode, x, op_false);
15779 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15781 else if (TARGET_SSE5)
15783 rtx pcmov = gen_rtx_SET (mode, dest,
15784 gen_rtx_IF_THEN_ELSE (mode, cmp,
15785 op_true,
15786 op_false));
15787 emit_insn (pcmov);
15789 else
15791 op_true = force_reg (mode, op_true);
15792 op_false = force_reg (mode, op_false);
15794 t2 = gen_reg_rtx (mode);
15795 if (optimize)
15796 t3 = gen_reg_rtx (mode);
15797 else
15798 t3 = dest;
15800 x = gen_rtx_AND (mode, op_true, cmp);
15801 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15803 x = gen_rtx_NOT (mode, cmp);
15804 x = gen_rtx_AND (mode, x, op_false);
15805 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15807 x = gen_rtx_IOR (mode, t3, t2);
15808 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15812 /* Expand a floating-point conditional move. Return true if successful. */
15815 ix86_expand_fp_movcc (rtx operands[])
15817 enum machine_mode mode = GET_MODE (operands[0]);
15818 enum rtx_code code = GET_CODE (operands[1]);
15819 rtx tmp, compare_op, second_test, bypass_test;
15821 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15823 enum machine_mode cmode;
15825 /* Since we've no cmove for sse registers, don't force bad register
15826 allocation just to gain access to it. Deny movcc when the
15827 comparison mode doesn't match the move mode. */
15828 cmode = GET_MODE (ix86_compare_op0);
15829 if (cmode == VOIDmode)
15830 cmode = GET_MODE (ix86_compare_op1);
15831 if (cmode != mode)
15832 return 0;
15834 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15835 &ix86_compare_op0,
15836 &ix86_compare_op1);
15837 if (code == UNKNOWN)
15838 return 0;
15840 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15841 ix86_compare_op1, operands[2],
15842 operands[3]))
15843 return 1;
15845 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15846 ix86_compare_op1, operands[2], operands[3]);
15847 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15848 return 1;
15851 /* The floating point conditional move instructions don't directly
15852 support conditions resulting from a signed integer comparison. */
15854 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15856 /* The floating point conditional move instructions don't directly
15857 support signed integer comparisons. */
15859 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15861 gcc_assert (!second_test && !bypass_test);
15862 tmp = gen_reg_rtx (QImode);
15863 ix86_expand_setcc (code, tmp);
15864 code = NE;
15865 ix86_compare_op0 = tmp;
15866 ix86_compare_op1 = const0_rtx;
15867 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15869 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15871 tmp = gen_reg_rtx (mode);
15872 emit_move_insn (tmp, operands[3]);
15873 operands[3] = tmp;
15875 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15877 tmp = gen_reg_rtx (mode);
15878 emit_move_insn (tmp, operands[2]);
15879 operands[2] = tmp;
15882 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15883 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15884 operands[2], operands[3])));
15885 if (bypass_test)
15886 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15887 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15888 operands[3], operands[0])));
15889 if (second_test)
15890 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15891 gen_rtx_IF_THEN_ELSE (mode, second_test,
15892 operands[2], operands[0])));
15894 return 1;
15897 /* Expand a floating-point vector conditional move; a vcond operation
15898 rather than a movcc operation. */
15900 bool
15901 ix86_expand_fp_vcond (rtx operands[])
15903 enum rtx_code code = GET_CODE (operands[3]);
15904 rtx cmp;
15906 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15907 &operands[4], &operands[5]);
15908 if (code == UNKNOWN)
15909 return false;
15911 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15912 operands[5], operands[1], operands[2]))
15913 return true;
15915 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15916 operands[1], operands[2]);
15917 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15918 return true;
15921 /* Expand a signed/unsigned integral vector conditional move. */
15923 bool
15924 ix86_expand_int_vcond (rtx operands[])
15926 enum machine_mode mode = GET_MODE (operands[0]);
15927 enum rtx_code code = GET_CODE (operands[3]);
15928 bool negate = false;
15929 rtx x, cop0, cop1;
15931 cop0 = operands[4];
15932 cop1 = operands[5];
15934 /* SSE5 supports all of the comparisons on all vector int types. */
15935 if (!TARGET_SSE5)
15937 /* Canonicalize the comparison to EQ, GT, GTU. */
15938 switch (code)
15940 case EQ:
15941 case GT:
15942 case GTU:
15943 break;
15945 case NE:
15946 case LE:
15947 case LEU:
15948 code = reverse_condition (code);
15949 negate = true;
15950 break;
15952 case GE:
15953 case GEU:
15954 code = reverse_condition (code);
15955 negate = true;
15956 /* FALLTHRU */
15958 case LT:
15959 case LTU:
15960 code = swap_condition (code);
15961 x = cop0, cop0 = cop1, cop1 = x;
15962 break;
15964 default:
15965 gcc_unreachable ();
15968 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15969 if (mode == V2DImode)
15971 switch (code)
15973 case EQ:
15974 /* SSE4.1 supports EQ. */
15975 if (!TARGET_SSE4_1)
15976 return false;
15977 break;
15979 case GT:
15980 case GTU:
15981 /* SSE4.2 supports GT/GTU. */
15982 if (!TARGET_SSE4_2)
15983 return false;
15984 break;
15986 default:
15987 gcc_unreachable ();
15991 /* Unsigned parallel compare is not supported by the hardware. Play some
15992 tricks to turn this into a signed comparison against 0. */
15993 if (code == GTU)
15995 cop0 = force_reg (mode, cop0);
15997 switch (mode)
15999 case V4SImode:
16000 case V2DImode:
16002 rtx t1, t2, mask;
16004 /* Perform a parallel modulo subtraction. */
16005 t1 = gen_reg_rtx (mode);
16006 emit_insn ((mode == V4SImode
16007 ? gen_subv4si3
16008 : gen_subv2di3) (t1, cop0, cop1));
16010 /* Extract the original sign bit of op0. */
16011 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16012 true, false);
16013 t2 = gen_reg_rtx (mode);
16014 emit_insn ((mode == V4SImode
16015 ? gen_andv4si3
16016 : gen_andv2di3) (t2, cop0, mask));
16018 /* XOR it back into the result of the subtraction. This results
16019 in the sign bit set iff we saw unsigned underflow. */
16020 x = gen_reg_rtx (mode);
16021 emit_insn ((mode == V4SImode
16022 ? gen_xorv4si3
16023 : gen_xorv2di3) (x, t1, t2));
16025 code = GT;
16027 break;
16029 case V16QImode:
16030 case V8HImode:
16031 /* Perform a parallel unsigned saturating subtraction. */
16032 x = gen_reg_rtx (mode);
16033 emit_insn (gen_rtx_SET (VOIDmode, x,
16034 gen_rtx_US_MINUS (mode, cop0, cop1)));
16036 code = EQ;
16037 negate = !negate;
16038 break;
16040 default:
16041 gcc_unreachable ();
16044 cop0 = x;
16045 cop1 = CONST0_RTX (mode);
16049 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16050 operands[1+negate], operands[2-negate]);
16052 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16053 operands[2-negate]);
16054 return true;
16057 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16058 true if we should do zero extension, else sign extension. HIGH_P is
16059 true if we want the N/2 high elements, else the low elements. */
16061 void
16062 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16064 enum machine_mode imode = GET_MODE (operands[1]);
16065 rtx (*unpack)(rtx, rtx, rtx);
16066 rtx se, dest;
16068 switch (imode)
16070 case V16QImode:
16071 if (high_p)
16072 unpack = gen_vec_interleave_highv16qi;
16073 else
16074 unpack = gen_vec_interleave_lowv16qi;
16075 break;
16076 case V8HImode:
16077 if (high_p)
16078 unpack = gen_vec_interleave_highv8hi;
16079 else
16080 unpack = gen_vec_interleave_lowv8hi;
16081 break;
16082 case V4SImode:
16083 if (high_p)
16084 unpack = gen_vec_interleave_highv4si;
16085 else
16086 unpack = gen_vec_interleave_lowv4si;
16087 break;
16088 default:
16089 gcc_unreachable ();
16092 dest = gen_lowpart (imode, operands[0]);
16094 if (unsigned_p)
16095 se = force_reg (imode, CONST0_RTX (imode));
16096 else
16097 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16098 operands[1], pc_rtx, pc_rtx);
16100 emit_insn (unpack (dest, operands[1], se));
16103 /* This function performs the same task as ix86_expand_sse_unpack,
16104 but with SSE4.1 instructions. */
16106 void
16107 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16109 enum machine_mode imode = GET_MODE (operands[1]);
16110 rtx (*unpack)(rtx, rtx);
16111 rtx src, dest;
16113 switch (imode)
16115 case V16QImode:
16116 if (unsigned_p)
16117 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16118 else
16119 unpack = gen_sse4_1_extendv8qiv8hi2;
16120 break;
16121 case V8HImode:
16122 if (unsigned_p)
16123 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16124 else
16125 unpack = gen_sse4_1_extendv4hiv4si2;
16126 break;
16127 case V4SImode:
16128 if (unsigned_p)
16129 unpack = gen_sse4_1_zero_extendv2siv2di2;
16130 else
16131 unpack = gen_sse4_1_extendv2siv2di2;
16132 break;
16133 default:
16134 gcc_unreachable ();
16137 dest = operands[0];
16138 if (high_p)
16140 /* Shift higher 8 bytes to lower 8 bytes. */
16141 src = gen_reg_rtx (imode);
16142 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16143 gen_lowpart (TImode, operands[1]),
16144 GEN_INT (64)));
16146 else
16147 src = operands[1];
16149 emit_insn (unpack (dest, src));
16152 /* This function performs the same task as ix86_expand_sse_unpack,
16153 but with sse5 instructions. */
16155 void
16156 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16158 enum machine_mode imode = GET_MODE (operands[1]);
16159 int pperm_bytes[16];
16160 int i;
16161 int h = (high_p) ? 8 : 0;
16162 int h2;
16163 int sign_extend;
16164 rtvec v = rtvec_alloc (16);
16165 rtvec vs;
16166 rtx x, p;
16167 rtx op0 = operands[0], op1 = operands[1];
16169 switch (imode)
16171 case V16QImode:
16172 vs = rtvec_alloc (8);
16173 h2 = (high_p) ? 8 : 0;
16174 for (i = 0; i < 8; i++)
16176 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16177 pperm_bytes[2*i+1] = ((unsigned_p)
16178 ? PPERM_ZERO
16179 : PPERM_SIGN | PPERM_SRC2 | i | h);
16182 for (i = 0; i < 16; i++)
16183 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16185 for (i = 0; i < 8; i++)
16186 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16188 p = gen_rtx_PARALLEL (VOIDmode, vs);
16189 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16190 if (unsigned_p)
16191 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16192 else
16193 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16194 break;
16196 case V8HImode:
16197 vs = rtvec_alloc (4);
16198 h2 = (high_p) ? 4 : 0;
16199 for (i = 0; i < 4; i++)
16201 sign_extend = ((unsigned_p)
16202 ? PPERM_ZERO
16203 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16204 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16205 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16206 pperm_bytes[4*i+2] = sign_extend;
16207 pperm_bytes[4*i+3] = sign_extend;
16210 for (i = 0; i < 16; i++)
16211 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16213 for (i = 0; i < 4; i++)
16214 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16216 p = gen_rtx_PARALLEL (VOIDmode, vs);
16217 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16218 if (unsigned_p)
16219 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16220 else
16221 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16222 break;
16224 case V4SImode:
16225 vs = rtvec_alloc (2);
16226 h2 = (high_p) ? 2 : 0;
16227 for (i = 0; i < 2; i++)
16229 sign_extend = ((unsigned_p)
16230 ? PPERM_ZERO
16231 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16232 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16233 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16234 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16235 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16236 pperm_bytes[8*i+4] = sign_extend;
16237 pperm_bytes[8*i+5] = sign_extend;
16238 pperm_bytes[8*i+6] = sign_extend;
16239 pperm_bytes[8*i+7] = sign_extend;
16242 for (i = 0; i < 16; i++)
16243 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16245 for (i = 0; i < 2; i++)
16246 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16248 p = gen_rtx_PARALLEL (VOIDmode, vs);
16249 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16250 if (unsigned_p)
16251 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16252 else
16253 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16254 break;
16256 default:
16257 gcc_unreachable ();
16260 return;
16263 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16264 next narrower integer vector type */
16265 void
16266 ix86_expand_sse5_pack (rtx operands[3])
16268 enum machine_mode imode = GET_MODE (operands[0]);
16269 int pperm_bytes[16];
16270 int i;
16271 rtvec v = rtvec_alloc (16);
16272 rtx x;
16273 rtx op0 = operands[0];
16274 rtx op1 = operands[1];
16275 rtx op2 = operands[2];
16277 switch (imode)
16279 case V16QImode:
16280 for (i = 0; i < 8; i++)
16282 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16283 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16286 for (i = 0; i < 16; i++)
16287 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16289 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16290 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16291 break;
16293 case V8HImode:
16294 for (i = 0; i < 4; i++)
16296 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16297 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16298 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16299 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16302 for (i = 0; i < 16; i++)
16303 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16305 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16306 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16307 break;
16309 case V4SImode:
16310 for (i = 0; i < 2; i++)
16312 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16313 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16314 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16315 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16316 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16317 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16318 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16319 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16322 for (i = 0; i < 16; i++)
16323 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16325 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16326 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16327 break;
16329 default:
16330 gcc_unreachable ();
16333 return;
16336 /* Expand conditional increment or decrement using adb/sbb instructions.
16337 The default case using setcc followed by the conditional move can be
16338 done by generic code. */
16340 ix86_expand_int_addcc (rtx operands[])
16342 enum rtx_code code = GET_CODE (operands[1]);
16343 rtx compare_op;
16344 rtx val = const0_rtx;
16345 bool fpcmp = false;
16346 enum machine_mode mode = GET_MODE (operands[0]);
16348 if (operands[3] != const1_rtx
16349 && operands[3] != constm1_rtx)
16350 return 0;
16351 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16352 ix86_compare_op1, &compare_op))
16353 return 0;
16354 code = GET_CODE (compare_op);
16356 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16357 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16359 fpcmp = true;
16360 code = ix86_fp_compare_code_to_integer (code);
16363 if (code != LTU)
16365 val = constm1_rtx;
16366 if (fpcmp)
16367 PUT_CODE (compare_op,
16368 reverse_condition_maybe_unordered
16369 (GET_CODE (compare_op)));
16370 else
16371 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16373 PUT_MODE (compare_op, mode);
16375 /* Construct either adc or sbb insn. */
16376 if ((code == LTU) == (operands[3] == constm1_rtx))
16378 switch (GET_MODE (operands[0]))
16380 case QImode:
16381 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16382 break;
16383 case HImode:
16384 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16385 break;
16386 case SImode:
16387 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16388 break;
16389 case DImode:
16390 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16391 break;
16392 default:
16393 gcc_unreachable ();
16396 else
16398 switch (GET_MODE (operands[0]))
16400 case QImode:
16401 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16402 break;
16403 case HImode:
16404 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16405 break;
16406 case SImode:
16407 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16408 break;
16409 case DImode:
16410 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16411 break;
16412 default:
16413 gcc_unreachable ();
16416 return 1; /* DONE */
16420 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16421 works for floating pointer parameters and nonoffsetable memories.
16422 For pushes, it returns just stack offsets; the values will be saved
16423 in the right order. Maximally three parts are generated. */
16425 static int
16426 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16428 int size;
16430 if (!TARGET_64BIT)
16431 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16432 else
16433 size = (GET_MODE_SIZE (mode) + 4) / 8;
16435 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16436 gcc_assert (size >= 2 && size <= 4);
16438 /* Optimize constant pool reference to immediates. This is used by fp
16439 moves, that force all constants to memory to allow combining. */
16440 if (MEM_P (operand) && MEM_READONLY_P (operand))
16442 rtx tmp = maybe_get_pool_constant (operand);
16443 if (tmp)
16444 operand = tmp;
16447 if (MEM_P (operand) && !offsettable_memref_p (operand))
16449 /* The only non-offsetable memories we handle are pushes. */
16450 int ok = push_operand (operand, VOIDmode);
16452 gcc_assert (ok);
16454 operand = copy_rtx (operand);
16455 PUT_MODE (operand, Pmode);
16456 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16457 return size;
16460 if (GET_CODE (operand) == CONST_VECTOR)
16462 enum machine_mode imode = int_mode_for_mode (mode);
16463 /* Caution: if we looked through a constant pool memory above,
16464 the operand may actually have a different mode now. That's
16465 ok, since we want to pun this all the way back to an integer. */
16466 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16467 gcc_assert (operand != NULL);
16468 mode = imode;
16471 if (!TARGET_64BIT)
16473 if (mode == DImode)
16474 split_di (&operand, 1, &parts[0], &parts[1]);
16475 else
16477 int i;
16479 if (REG_P (operand))
16481 gcc_assert (reload_completed);
16482 for (i = 0; i < size; i++)
16483 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16485 else if (offsettable_memref_p (operand))
16487 operand = adjust_address (operand, SImode, 0);
16488 parts[0] = operand;
16489 for (i = 1; i < size; i++)
16490 parts[i] = adjust_address (operand, SImode, 4 * i);
16492 else if (GET_CODE (operand) == CONST_DOUBLE)
16494 REAL_VALUE_TYPE r;
16495 long l[4];
16497 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16498 switch (mode)
16500 case TFmode:
16501 real_to_target (l, &r, mode);
16502 parts[3] = gen_int_mode (l[3], SImode);
16503 parts[2] = gen_int_mode (l[2], SImode);
16504 break;
16505 case XFmode:
16506 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16507 parts[2] = gen_int_mode (l[2], SImode);
16508 break;
16509 case DFmode:
16510 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16511 break;
16512 default:
16513 gcc_unreachable ();
16515 parts[1] = gen_int_mode (l[1], SImode);
16516 parts[0] = gen_int_mode (l[0], SImode);
16518 else
16519 gcc_unreachable ();
16522 else
16524 if (mode == TImode)
16525 split_ti (&operand, 1, &parts[0], &parts[1]);
16526 if (mode == XFmode || mode == TFmode)
16528 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16529 if (REG_P (operand))
16531 gcc_assert (reload_completed);
16532 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16533 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16535 else if (offsettable_memref_p (operand))
16537 operand = adjust_address (operand, DImode, 0);
16538 parts[0] = operand;
16539 parts[1] = adjust_address (operand, upper_mode, 8);
16541 else if (GET_CODE (operand) == CONST_DOUBLE)
16543 REAL_VALUE_TYPE r;
16544 long l[4];
16546 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16547 real_to_target (l, &r, mode);
16549 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16550 if (HOST_BITS_PER_WIDE_INT >= 64)
16551 parts[0]
16552 = gen_int_mode
16553 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16554 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16555 DImode);
16556 else
16557 parts[0] = immed_double_const (l[0], l[1], DImode);
16559 if (upper_mode == SImode)
16560 parts[1] = gen_int_mode (l[2], SImode);
16561 else if (HOST_BITS_PER_WIDE_INT >= 64)
16562 parts[1]
16563 = gen_int_mode
16564 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16565 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16566 DImode);
16567 else
16568 parts[1] = immed_double_const (l[2], l[3], DImode);
16570 else
16571 gcc_unreachable ();
16575 return size;
16578 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16579 Return false when normal moves are needed; true when all required
16580 insns have been emitted. Operands 2-4 contain the input values
16581 int the correct order; operands 5-7 contain the output values. */
16583 void
16584 ix86_split_long_move (rtx operands[])
16586 rtx part[2][4];
16587 int nparts, i, j;
16588 int push = 0;
16589 int collisions = 0;
16590 enum machine_mode mode = GET_MODE (operands[0]);
16591 bool collisionparts[4];
16593 /* The DFmode expanders may ask us to move double.
16594 For 64bit target this is single move. By hiding the fact
16595 here we simplify i386.md splitters. */
16596 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16598 /* Optimize constant pool reference to immediates. This is used by
16599 fp moves, that force all constants to memory to allow combining. */
16601 if (MEM_P (operands[1])
16602 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16603 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16604 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16605 if (push_operand (operands[0], VOIDmode))
16607 operands[0] = copy_rtx (operands[0]);
16608 PUT_MODE (operands[0], Pmode);
16610 else
16611 operands[0] = gen_lowpart (DImode, operands[0]);
16612 operands[1] = gen_lowpart (DImode, operands[1]);
16613 emit_move_insn (operands[0], operands[1]);
16614 return;
16617 /* The only non-offsettable memory we handle is push. */
16618 if (push_operand (operands[0], VOIDmode))
16619 push = 1;
16620 else
16621 gcc_assert (!MEM_P (operands[0])
16622 || offsettable_memref_p (operands[0]));
16624 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16625 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16627 /* When emitting push, take care for source operands on the stack. */
16628 if (push && MEM_P (operands[1])
16629 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16630 for (i = 0; i < nparts - 1; i++)
16631 part[1][i] = change_address (part[1][i],
16632 GET_MODE (part[1][i]),
16633 XEXP (part[1][i + 1], 0));
16635 /* We need to do copy in the right order in case an address register
16636 of the source overlaps the destination. */
16637 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16639 rtx tmp;
16641 for (i = 0; i < nparts; i++)
16643 collisionparts[i]
16644 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16645 if (collisionparts[i])
16646 collisions++;
16649 /* Collision in the middle part can be handled by reordering. */
16650 if (collisions == 1 && nparts == 3 && collisionparts [1])
16652 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16653 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16655 else if (collisions == 1
16656 && nparts == 4
16657 && (collisionparts [1] || collisionparts [2]))
16659 if (collisionparts [1])
16661 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16662 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16664 else
16666 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16667 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16671 /* If there are more collisions, we can't handle it by reordering.
16672 Do an lea to the last part and use only one colliding move. */
16673 else if (collisions > 1)
16675 rtx base;
16677 collisions = 1;
16679 base = part[0][nparts - 1];
16681 /* Handle the case when the last part isn't valid for lea.
16682 Happens in 64-bit mode storing the 12-byte XFmode. */
16683 if (GET_MODE (base) != Pmode)
16684 base = gen_rtx_REG (Pmode, REGNO (base));
16686 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16687 part[1][0] = replace_equiv_address (part[1][0], base);
16688 for (i = 1; i < nparts; i++)
16690 tmp = plus_constant (base, UNITS_PER_WORD * i);
16691 part[1][i] = replace_equiv_address (part[1][i], tmp);
16696 if (push)
16698 if (!TARGET_64BIT)
16700 if (nparts == 3)
16702 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16703 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16704 emit_move_insn (part[0][2], part[1][2]);
16706 else if (nparts == 4)
16708 emit_move_insn (part[0][3], part[1][3]);
16709 emit_move_insn (part[0][2], part[1][2]);
16712 else
16714 /* In 64bit mode we don't have 32bit push available. In case this is
16715 register, it is OK - we will just use larger counterpart. We also
16716 retype memory - these comes from attempt to avoid REX prefix on
16717 moving of second half of TFmode value. */
16718 if (GET_MODE (part[1][1]) == SImode)
16720 switch (GET_CODE (part[1][1]))
16722 case MEM:
16723 part[1][1] = adjust_address (part[1][1], DImode, 0);
16724 break;
16726 case REG:
16727 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16728 break;
16730 default:
16731 gcc_unreachable ();
16734 if (GET_MODE (part[1][0]) == SImode)
16735 part[1][0] = part[1][1];
16738 emit_move_insn (part[0][1], part[1][1]);
16739 emit_move_insn (part[0][0], part[1][0]);
16740 return;
16743 /* Choose correct order to not overwrite the source before it is copied. */
16744 if ((REG_P (part[0][0])
16745 && REG_P (part[1][1])
16746 && (REGNO (part[0][0]) == REGNO (part[1][1])
16747 || (nparts == 3
16748 && REGNO (part[0][0]) == REGNO (part[1][2]))
16749 || (nparts == 4
16750 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16751 || (collisions > 0
16752 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16754 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16756 operands[2 + i] = part[0][j];
16757 operands[6 + i] = part[1][j];
16760 else
16762 for (i = 0; i < nparts; i++)
16764 operands[2 + i] = part[0][i];
16765 operands[6 + i] = part[1][i];
16769 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16770 if (optimize_insn_for_size_p ())
16772 for (j = 0; j < nparts - 1; j++)
16773 if (CONST_INT_P (operands[6 + j])
16774 && operands[6 + j] != const0_rtx
16775 && REG_P (operands[2 + j]))
16776 for (i = j; i < nparts - 1; i++)
16777 if (CONST_INT_P (operands[7 + i])
16778 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16779 operands[7 + i] = operands[2 + j];
16782 for (i = 0; i < nparts; i++)
16783 emit_move_insn (operands[2 + i], operands[6 + i]);
16785 return;
16788 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16789 left shift by a constant, either using a single shift or
16790 a sequence of add instructions. */
16792 static void
16793 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16795 if (count == 1)
16797 emit_insn ((mode == DImode
16798 ? gen_addsi3
16799 : gen_adddi3) (operand, operand, operand));
16801 else if (!optimize_insn_for_size_p ()
16802 && count * ix86_cost->add <= ix86_cost->shift_const)
16804 int i;
16805 for (i=0; i<count; i++)
16807 emit_insn ((mode == DImode
16808 ? gen_addsi3
16809 : gen_adddi3) (operand, operand, operand));
16812 else
16813 emit_insn ((mode == DImode
16814 ? gen_ashlsi3
16815 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16818 void
16819 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16821 rtx low[2], high[2];
16822 int count;
16823 const int single_width = mode == DImode ? 32 : 64;
16825 if (CONST_INT_P (operands[2]))
16827 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16828 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16830 if (count >= single_width)
16832 emit_move_insn (high[0], low[1]);
16833 emit_move_insn (low[0], const0_rtx);
16835 if (count > single_width)
16836 ix86_expand_ashl_const (high[0], count - single_width, mode);
16838 else
16840 if (!rtx_equal_p (operands[0], operands[1]))
16841 emit_move_insn (operands[0], operands[1]);
16842 emit_insn ((mode == DImode
16843 ? gen_x86_shld
16844 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16845 ix86_expand_ashl_const (low[0], count, mode);
16847 return;
16850 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16852 if (operands[1] == const1_rtx)
16854 /* Assuming we've chosen a QImode capable registers, then 1 << N
16855 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16856 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16858 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16860 ix86_expand_clear (low[0]);
16861 ix86_expand_clear (high[0]);
16862 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16864 d = gen_lowpart (QImode, low[0]);
16865 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16866 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16867 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16869 d = gen_lowpart (QImode, high[0]);
16870 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16871 s = gen_rtx_NE (QImode, flags, const0_rtx);
16872 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16875 /* Otherwise, we can get the same results by manually performing
16876 a bit extract operation on bit 5/6, and then performing the two
16877 shifts. The two methods of getting 0/1 into low/high are exactly
16878 the same size. Avoiding the shift in the bit extract case helps
16879 pentium4 a bit; no one else seems to care much either way. */
16880 else
16882 rtx x;
16884 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16885 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16886 else
16887 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16888 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16890 emit_insn ((mode == DImode
16891 ? gen_lshrsi3
16892 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16893 emit_insn ((mode == DImode
16894 ? gen_andsi3
16895 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16896 emit_move_insn (low[0], high[0]);
16897 emit_insn ((mode == DImode
16898 ? gen_xorsi3
16899 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16902 emit_insn ((mode == DImode
16903 ? gen_ashlsi3
16904 : gen_ashldi3) (low[0], low[0], operands[2]));
16905 emit_insn ((mode == DImode
16906 ? gen_ashlsi3
16907 : gen_ashldi3) (high[0], high[0], operands[2]));
16908 return;
16911 if (operands[1] == constm1_rtx)
16913 /* For -1 << N, we can avoid the shld instruction, because we
16914 know that we're shifting 0...31/63 ones into a -1. */
16915 emit_move_insn (low[0], constm1_rtx);
16916 if (optimize_insn_for_size_p ())
16917 emit_move_insn (high[0], low[0]);
16918 else
16919 emit_move_insn (high[0], constm1_rtx);
16921 else
16923 if (!rtx_equal_p (operands[0], operands[1]))
16924 emit_move_insn (operands[0], operands[1]);
16926 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16927 emit_insn ((mode == DImode
16928 ? gen_x86_shld
16929 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16932 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16934 if (TARGET_CMOVE && scratch)
16936 ix86_expand_clear (scratch);
16937 emit_insn ((mode == DImode
16938 ? gen_x86_shift_adj_1
16939 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16940 scratch));
16942 else
16943 emit_insn ((mode == DImode
16944 ? gen_x86_shift_adj_2
16945 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16948 void
16949 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16951 rtx low[2], high[2];
16952 int count;
16953 const int single_width = mode == DImode ? 32 : 64;
16955 if (CONST_INT_P (operands[2]))
16957 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16958 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16960 if (count == single_width * 2 - 1)
16962 emit_move_insn (high[0], high[1]);
16963 emit_insn ((mode == DImode
16964 ? gen_ashrsi3
16965 : gen_ashrdi3) (high[0], high[0],
16966 GEN_INT (single_width - 1)));
16967 emit_move_insn (low[0], high[0]);
16970 else if (count >= single_width)
16972 emit_move_insn (low[0], high[1]);
16973 emit_move_insn (high[0], low[0]);
16974 emit_insn ((mode == DImode
16975 ? gen_ashrsi3
16976 : gen_ashrdi3) (high[0], high[0],
16977 GEN_INT (single_width - 1)));
16978 if (count > single_width)
16979 emit_insn ((mode == DImode
16980 ? gen_ashrsi3
16981 : gen_ashrdi3) (low[0], low[0],
16982 GEN_INT (count - single_width)));
16984 else
16986 if (!rtx_equal_p (operands[0], operands[1]))
16987 emit_move_insn (operands[0], operands[1]);
16988 emit_insn ((mode == DImode
16989 ? gen_x86_shrd
16990 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16991 emit_insn ((mode == DImode
16992 ? gen_ashrsi3
16993 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16996 else
16998 if (!rtx_equal_p (operands[0], operands[1]))
16999 emit_move_insn (operands[0], operands[1]);
17001 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17003 emit_insn ((mode == DImode
17004 ? gen_x86_shrd
17005 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17006 emit_insn ((mode == DImode
17007 ? gen_ashrsi3
17008 : gen_ashrdi3) (high[0], high[0], operands[2]));
17010 if (TARGET_CMOVE && scratch)
17012 emit_move_insn (scratch, high[0]);
17013 emit_insn ((mode == DImode
17014 ? gen_ashrsi3
17015 : gen_ashrdi3) (scratch, scratch,
17016 GEN_INT (single_width - 1)));
17017 emit_insn ((mode == DImode
17018 ? gen_x86_shift_adj_1
17019 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17020 scratch));
17022 else
17023 emit_insn ((mode == DImode
17024 ? gen_x86_shift_adj_3
17025 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17029 void
17030 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17032 rtx low[2], high[2];
17033 int count;
17034 const int single_width = mode == DImode ? 32 : 64;
17036 if (CONST_INT_P (operands[2]))
17038 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17039 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17041 if (count >= single_width)
17043 emit_move_insn (low[0], high[1]);
17044 ix86_expand_clear (high[0]);
17046 if (count > single_width)
17047 emit_insn ((mode == DImode
17048 ? gen_lshrsi3
17049 : gen_lshrdi3) (low[0], low[0],
17050 GEN_INT (count - single_width)));
17052 else
17054 if (!rtx_equal_p (operands[0], operands[1]))
17055 emit_move_insn (operands[0], operands[1]);
17056 emit_insn ((mode == DImode
17057 ? gen_x86_shrd
17058 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17059 emit_insn ((mode == DImode
17060 ? gen_lshrsi3
17061 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17064 else
17066 if (!rtx_equal_p (operands[0], operands[1]))
17067 emit_move_insn (operands[0], operands[1]);
17069 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17071 emit_insn ((mode == DImode
17072 ? gen_x86_shrd
17073 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17074 emit_insn ((mode == DImode
17075 ? gen_lshrsi3
17076 : gen_lshrdi3) (high[0], high[0], operands[2]));
17078 /* Heh. By reversing the arguments, we can reuse this pattern. */
17079 if (TARGET_CMOVE && scratch)
17081 ix86_expand_clear (scratch);
17082 emit_insn ((mode == DImode
17083 ? gen_x86_shift_adj_1
17084 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17085 scratch));
17087 else
17088 emit_insn ((mode == DImode
17089 ? gen_x86_shift_adj_2
17090 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17094 /* Predict just emitted jump instruction to be taken with probability PROB. */
17095 static void
17096 predict_jump (int prob)
17098 rtx insn = get_last_insn ();
17099 gcc_assert (JUMP_P (insn));
17100 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17103 /* Helper function for the string operations below. Dest VARIABLE whether
17104 it is aligned to VALUE bytes. If true, jump to the label. */
17105 static rtx
17106 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17108 rtx label = gen_label_rtx ();
17109 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17110 if (GET_MODE (variable) == DImode)
17111 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17112 else
17113 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17114 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17115 1, label);
17116 if (epilogue)
17117 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17118 else
17119 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17120 return label;
17123 /* Adjust COUNTER by the VALUE. */
17124 static void
17125 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17127 if (GET_MODE (countreg) == DImode)
17128 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17129 else
17130 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17133 /* Zero extend possibly SImode EXP to Pmode register. */
17135 ix86_zero_extend_to_Pmode (rtx exp)
17137 rtx r;
17138 if (GET_MODE (exp) == VOIDmode)
17139 return force_reg (Pmode, exp);
17140 if (GET_MODE (exp) == Pmode)
17141 return copy_to_mode_reg (Pmode, exp);
17142 r = gen_reg_rtx (Pmode);
17143 emit_insn (gen_zero_extendsidi2 (r, exp));
17144 return r;
17147 /* Divide COUNTREG by SCALE. */
17148 static rtx
17149 scale_counter (rtx countreg, int scale)
17151 rtx sc;
17152 rtx piece_size_mask;
17154 if (scale == 1)
17155 return countreg;
17156 if (CONST_INT_P (countreg))
17157 return GEN_INT (INTVAL (countreg) / scale);
17158 gcc_assert (REG_P (countreg));
17160 piece_size_mask = GEN_INT (scale - 1);
17161 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17162 GEN_INT (exact_log2 (scale)),
17163 NULL, 1, OPTAB_DIRECT);
17164 return sc;
17167 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17168 DImode for constant loop counts. */
17170 static enum machine_mode
17171 counter_mode (rtx count_exp)
17173 if (GET_MODE (count_exp) != VOIDmode)
17174 return GET_MODE (count_exp);
17175 if (GET_CODE (count_exp) != CONST_INT)
17176 return Pmode;
17177 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17178 return DImode;
17179 return SImode;
17182 /* When SRCPTR is non-NULL, output simple loop to move memory
17183 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17184 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17185 equivalent loop to set memory by VALUE (supposed to be in MODE).
17187 The size is rounded down to whole number of chunk size moved at once.
17188 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17191 static void
17192 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17193 rtx destptr, rtx srcptr, rtx value,
17194 rtx count, enum machine_mode mode, int unroll,
17195 int expected_size)
17197 rtx out_label, top_label, iter, tmp;
17198 enum machine_mode iter_mode = counter_mode (count);
17199 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17200 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17201 rtx size;
17202 rtx x_addr;
17203 rtx y_addr;
17204 int i;
17206 top_label = gen_label_rtx ();
17207 out_label = gen_label_rtx ();
17208 iter = gen_reg_rtx (iter_mode);
17210 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17211 NULL, 1, OPTAB_DIRECT);
17212 /* Those two should combine. */
17213 if (piece_size == const1_rtx)
17215 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17216 true, out_label);
17217 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17219 emit_move_insn (iter, const0_rtx);
17221 emit_label (top_label);
17223 tmp = convert_modes (Pmode, iter_mode, iter, true);
17224 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17225 destmem = change_address (destmem, mode, x_addr);
17227 if (srcmem)
17229 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17230 srcmem = change_address (srcmem, mode, y_addr);
17232 /* When unrolling for chips that reorder memory reads and writes,
17233 we can save registers by using single temporary.
17234 Also using 4 temporaries is overkill in 32bit mode. */
17235 if (!TARGET_64BIT && 0)
17237 for (i = 0; i < unroll; i++)
17239 if (i)
17241 destmem =
17242 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17243 srcmem =
17244 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17246 emit_move_insn (destmem, srcmem);
17249 else
17251 rtx tmpreg[4];
17252 gcc_assert (unroll <= 4);
17253 for (i = 0; i < unroll; i++)
17255 tmpreg[i] = gen_reg_rtx (mode);
17256 if (i)
17258 srcmem =
17259 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17261 emit_move_insn (tmpreg[i], srcmem);
17263 for (i = 0; i < unroll; i++)
17265 if (i)
17267 destmem =
17268 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17270 emit_move_insn (destmem, tmpreg[i]);
17274 else
17275 for (i = 0; i < unroll; i++)
17277 if (i)
17278 destmem =
17279 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17280 emit_move_insn (destmem, value);
17283 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17284 true, OPTAB_LIB_WIDEN);
17285 if (tmp != iter)
17286 emit_move_insn (iter, tmp);
17288 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17289 true, top_label);
17290 if (expected_size != -1)
17292 expected_size /= GET_MODE_SIZE (mode) * unroll;
17293 if (expected_size == 0)
17294 predict_jump (0);
17295 else if (expected_size > REG_BR_PROB_BASE)
17296 predict_jump (REG_BR_PROB_BASE - 1);
17297 else
17298 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17300 else
17301 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17302 iter = ix86_zero_extend_to_Pmode (iter);
17303 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17304 true, OPTAB_LIB_WIDEN);
17305 if (tmp != destptr)
17306 emit_move_insn (destptr, tmp);
17307 if (srcptr)
17309 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17310 true, OPTAB_LIB_WIDEN);
17311 if (tmp != srcptr)
17312 emit_move_insn (srcptr, tmp);
17314 emit_label (out_label);
17317 /* Output "rep; mov" instruction.
17318 Arguments have same meaning as for previous function */
17319 static void
17320 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17321 rtx destptr, rtx srcptr,
17322 rtx count,
17323 enum machine_mode mode)
17325 rtx destexp;
17326 rtx srcexp;
17327 rtx countreg;
17329 /* If the size is known, it is shorter to use rep movs. */
17330 if (mode == QImode && CONST_INT_P (count)
17331 && !(INTVAL (count) & 3))
17332 mode = SImode;
17334 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17335 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17336 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17337 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17338 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17339 if (mode != QImode)
17341 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17342 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17343 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17344 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17345 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17346 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17348 else
17350 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17351 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17353 if (CONST_INT_P (count))
17355 count = GEN_INT (INTVAL (count)
17356 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17357 destmem = shallow_copy_rtx (destmem);
17358 srcmem = shallow_copy_rtx (srcmem);
17359 set_mem_size (destmem, count);
17360 set_mem_size (srcmem, count);
17362 else
17364 if (MEM_SIZE (destmem))
17365 set_mem_size (destmem, NULL_RTX);
17366 if (MEM_SIZE (srcmem))
17367 set_mem_size (srcmem, NULL_RTX);
17369 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17370 destexp, srcexp));
17373 /* Output "rep; stos" instruction.
17374 Arguments have same meaning as for previous function */
17375 static void
17376 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17377 rtx count, enum machine_mode mode,
17378 rtx orig_value)
17380 rtx destexp;
17381 rtx countreg;
17383 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17384 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17385 value = force_reg (mode, gen_lowpart (mode, value));
17386 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17387 if (mode != QImode)
17389 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17390 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17391 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17393 else
17394 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17395 if (orig_value == const0_rtx && CONST_INT_P (count))
17397 count = GEN_INT (INTVAL (count)
17398 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17399 destmem = shallow_copy_rtx (destmem);
17400 set_mem_size (destmem, count);
17402 else if (MEM_SIZE (destmem))
17403 set_mem_size (destmem, NULL_RTX);
17404 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17407 static void
17408 emit_strmov (rtx destmem, rtx srcmem,
17409 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17411 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17412 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17413 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17416 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17417 static void
17418 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17419 rtx destptr, rtx srcptr, rtx count, int max_size)
17421 rtx src, dest;
17422 if (CONST_INT_P (count))
17424 HOST_WIDE_INT countval = INTVAL (count);
17425 int offset = 0;
17427 if ((countval & 0x10) && max_size > 16)
17429 if (TARGET_64BIT)
17431 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17432 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17434 else
17435 gcc_unreachable ();
17436 offset += 16;
17438 if ((countval & 0x08) && max_size > 8)
17440 if (TARGET_64BIT)
17441 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17442 else
17444 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17445 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17447 offset += 8;
17449 if ((countval & 0x04) && max_size > 4)
17451 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17452 offset += 4;
17454 if ((countval & 0x02) && max_size > 2)
17456 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17457 offset += 2;
17459 if ((countval & 0x01) && max_size > 1)
17461 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17462 offset += 1;
17464 return;
17466 if (max_size > 8)
17468 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17469 count, 1, OPTAB_DIRECT);
17470 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17471 count, QImode, 1, 4);
17472 return;
17475 /* When there are stringops, we can cheaply increase dest and src pointers.
17476 Otherwise we save code size by maintaining offset (zero is readily
17477 available from preceding rep operation) and using x86 addressing modes.
17479 if (TARGET_SINGLE_STRINGOP)
17481 if (max_size > 4)
17483 rtx label = ix86_expand_aligntest (count, 4, true);
17484 src = change_address (srcmem, SImode, srcptr);
17485 dest = change_address (destmem, SImode, destptr);
17486 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17487 emit_label (label);
17488 LABEL_NUSES (label) = 1;
17490 if (max_size > 2)
17492 rtx label = ix86_expand_aligntest (count, 2, true);
17493 src = change_address (srcmem, HImode, srcptr);
17494 dest = change_address (destmem, HImode, destptr);
17495 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17496 emit_label (label);
17497 LABEL_NUSES (label) = 1;
17499 if (max_size > 1)
17501 rtx label = ix86_expand_aligntest (count, 1, true);
17502 src = change_address (srcmem, QImode, srcptr);
17503 dest = change_address (destmem, QImode, destptr);
17504 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17505 emit_label (label);
17506 LABEL_NUSES (label) = 1;
17509 else
17511 rtx offset = force_reg (Pmode, const0_rtx);
17512 rtx tmp;
17514 if (max_size > 4)
17516 rtx label = ix86_expand_aligntest (count, 4, true);
17517 src = change_address (srcmem, SImode, srcptr);
17518 dest = change_address (destmem, SImode, destptr);
17519 emit_move_insn (dest, src);
17520 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17521 true, OPTAB_LIB_WIDEN);
17522 if (tmp != offset)
17523 emit_move_insn (offset, tmp);
17524 emit_label (label);
17525 LABEL_NUSES (label) = 1;
17527 if (max_size > 2)
17529 rtx label = ix86_expand_aligntest (count, 2, true);
17530 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17531 src = change_address (srcmem, HImode, tmp);
17532 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17533 dest = change_address (destmem, HImode, tmp);
17534 emit_move_insn (dest, src);
17535 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17536 true, OPTAB_LIB_WIDEN);
17537 if (tmp != offset)
17538 emit_move_insn (offset, tmp);
17539 emit_label (label);
17540 LABEL_NUSES (label) = 1;
17542 if (max_size > 1)
17544 rtx label = ix86_expand_aligntest (count, 1, true);
17545 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17546 src = change_address (srcmem, QImode, tmp);
17547 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17548 dest = change_address (destmem, QImode, tmp);
17549 emit_move_insn (dest, src);
17550 emit_label (label);
17551 LABEL_NUSES (label) = 1;
17556 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17557 static void
17558 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17559 rtx count, int max_size)
17561 count =
17562 expand_simple_binop (counter_mode (count), AND, count,
17563 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17564 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17565 gen_lowpart (QImode, value), count, QImode,
17566 1, max_size / 2);
17569 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17570 static void
17571 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17573 rtx dest;
17575 if (CONST_INT_P (count))
17577 HOST_WIDE_INT countval = INTVAL (count);
17578 int offset = 0;
17580 if ((countval & 0x10) && max_size > 16)
17582 if (TARGET_64BIT)
17584 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17585 emit_insn (gen_strset (destptr, dest, value));
17586 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17587 emit_insn (gen_strset (destptr, dest, value));
17589 else
17590 gcc_unreachable ();
17591 offset += 16;
17593 if ((countval & 0x08) && max_size > 8)
17595 if (TARGET_64BIT)
17597 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17598 emit_insn (gen_strset (destptr, dest, value));
17600 else
17602 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17603 emit_insn (gen_strset (destptr, dest, value));
17604 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17605 emit_insn (gen_strset (destptr, dest, value));
17607 offset += 8;
17609 if ((countval & 0x04) && max_size > 4)
17611 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17612 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17613 offset += 4;
17615 if ((countval & 0x02) && max_size > 2)
17617 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17618 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17619 offset += 2;
17621 if ((countval & 0x01) && max_size > 1)
17623 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17624 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17625 offset += 1;
17627 return;
17629 if (max_size > 32)
17631 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17632 return;
17634 if (max_size > 16)
17636 rtx label = ix86_expand_aligntest (count, 16, true);
17637 if (TARGET_64BIT)
17639 dest = change_address (destmem, DImode, destptr);
17640 emit_insn (gen_strset (destptr, dest, value));
17641 emit_insn (gen_strset (destptr, dest, value));
17643 else
17645 dest = change_address (destmem, SImode, destptr);
17646 emit_insn (gen_strset (destptr, dest, value));
17647 emit_insn (gen_strset (destptr, dest, value));
17648 emit_insn (gen_strset (destptr, dest, value));
17649 emit_insn (gen_strset (destptr, dest, value));
17651 emit_label (label);
17652 LABEL_NUSES (label) = 1;
17654 if (max_size > 8)
17656 rtx label = ix86_expand_aligntest (count, 8, true);
17657 if (TARGET_64BIT)
17659 dest = change_address (destmem, DImode, destptr);
17660 emit_insn (gen_strset (destptr, dest, value));
17662 else
17664 dest = change_address (destmem, SImode, destptr);
17665 emit_insn (gen_strset (destptr, dest, value));
17666 emit_insn (gen_strset (destptr, dest, value));
17668 emit_label (label);
17669 LABEL_NUSES (label) = 1;
17671 if (max_size > 4)
17673 rtx label = ix86_expand_aligntest (count, 4, true);
17674 dest = change_address (destmem, SImode, destptr);
17675 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17676 emit_label (label);
17677 LABEL_NUSES (label) = 1;
17679 if (max_size > 2)
17681 rtx label = ix86_expand_aligntest (count, 2, true);
17682 dest = change_address (destmem, HImode, destptr);
17683 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17684 emit_label (label);
17685 LABEL_NUSES (label) = 1;
17687 if (max_size > 1)
17689 rtx label = ix86_expand_aligntest (count, 1, true);
17690 dest = change_address (destmem, QImode, destptr);
17691 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17692 emit_label (label);
17693 LABEL_NUSES (label) = 1;
17697 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17698 DESIRED_ALIGNMENT. */
17699 static void
17700 expand_movmem_prologue (rtx destmem, rtx srcmem,
17701 rtx destptr, rtx srcptr, rtx count,
17702 int align, int desired_alignment)
17704 if (align <= 1 && desired_alignment > 1)
17706 rtx label = ix86_expand_aligntest (destptr, 1, false);
17707 srcmem = change_address (srcmem, QImode, srcptr);
17708 destmem = change_address (destmem, QImode, destptr);
17709 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17710 ix86_adjust_counter (count, 1);
17711 emit_label (label);
17712 LABEL_NUSES (label) = 1;
17714 if (align <= 2 && desired_alignment > 2)
17716 rtx label = ix86_expand_aligntest (destptr, 2, false);
17717 srcmem = change_address (srcmem, HImode, srcptr);
17718 destmem = change_address (destmem, HImode, destptr);
17719 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17720 ix86_adjust_counter (count, 2);
17721 emit_label (label);
17722 LABEL_NUSES (label) = 1;
17724 if (align <= 4 && desired_alignment > 4)
17726 rtx label = ix86_expand_aligntest (destptr, 4, false);
17727 srcmem = change_address (srcmem, SImode, srcptr);
17728 destmem = change_address (destmem, SImode, destptr);
17729 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17730 ix86_adjust_counter (count, 4);
17731 emit_label (label);
17732 LABEL_NUSES (label) = 1;
17734 gcc_assert (desired_alignment <= 8);
17737 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17738 ALIGN_BYTES is how many bytes need to be copied. */
17739 static rtx
17740 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17741 int desired_align, int align_bytes)
17743 rtx src = *srcp;
17744 rtx src_size, dst_size;
17745 int off = 0;
17746 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17747 if (src_align_bytes >= 0)
17748 src_align_bytes = desired_align - src_align_bytes;
17749 src_size = MEM_SIZE (src);
17750 dst_size = MEM_SIZE (dst);
17751 if (align_bytes & 1)
17753 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17754 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17755 off = 1;
17756 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17758 if (align_bytes & 2)
17760 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17761 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17762 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17763 set_mem_align (dst, 2 * BITS_PER_UNIT);
17764 if (src_align_bytes >= 0
17765 && (src_align_bytes & 1) == (align_bytes & 1)
17766 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17767 set_mem_align (src, 2 * BITS_PER_UNIT);
17768 off = 2;
17769 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17771 if (align_bytes & 4)
17773 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17774 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17775 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17776 set_mem_align (dst, 4 * BITS_PER_UNIT);
17777 if (src_align_bytes >= 0)
17779 unsigned int src_align = 0;
17780 if ((src_align_bytes & 3) == (align_bytes & 3))
17781 src_align = 4;
17782 else if ((src_align_bytes & 1) == (align_bytes & 1))
17783 src_align = 2;
17784 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17785 set_mem_align (src, src_align * BITS_PER_UNIT);
17787 off = 4;
17788 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17790 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17791 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17792 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17793 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17794 if (src_align_bytes >= 0)
17796 unsigned int src_align = 0;
17797 if ((src_align_bytes & 7) == (align_bytes & 7))
17798 src_align = 8;
17799 else if ((src_align_bytes & 3) == (align_bytes & 3))
17800 src_align = 4;
17801 else if ((src_align_bytes & 1) == (align_bytes & 1))
17802 src_align = 2;
17803 if (src_align > (unsigned int) desired_align)
17804 src_align = desired_align;
17805 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17806 set_mem_align (src, src_align * BITS_PER_UNIT);
17808 if (dst_size)
17809 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17810 if (src_size)
17811 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17812 *srcp = src;
17813 return dst;
17816 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17817 DESIRED_ALIGNMENT. */
17818 static void
17819 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17820 int align, int desired_alignment)
17822 if (align <= 1 && desired_alignment > 1)
17824 rtx label = ix86_expand_aligntest (destptr, 1, false);
17825 destmem = change_address (destmem, QImode, destptr);
17826 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17827 ix86_adjust_counter (count, 1);
17828 emit_label (label);
17829 LABEL_NUSES (label) = 1;
17831 if (align <= 2 && desired_alignment > 2)
17833 rtx label = ix86_expand_aligntest (destptr, 2, false);
17834 destmem = change_address (destmem, HImode, destptr);
17835 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17836 ix86_adjust_counter (count, 2);
17837 emit_label (label);
17838 LABEL_NUSES (label) = 1;
17840 if (align <= 4 && desired_alignment > 4)
17842 rtx label = ix86_expand_aligntest (destptr, 4, false);
17843 destmem = change_address (destmem, SImode, destptr);
17844 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17845 ix86_adjust_counter (count, 4);
17846 emit_label (label);
17847 LABEL_NUSES (label) = 1;
17849 gcc_assert (desired_alignment <= 8);
17852 /* Set enough from DST to align DST known to by aligned by ALIGN to
17853 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17854 static rtx
17855 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17856 int desired_align, int align_bytes)
17858 int off = 0;
17859 rtx dst_size = MEM_SIZE (dst);
17860 if (align_bytes & 1)
17862 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17863 off = 1;
17864 emit_insn (gen_strset (destreg, dst,
17865 gen_lowpart (QImode, value)));
17867 if (align_bytes & 2)
17869 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17870 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17871 set_mem_align (dst, 2 * BITS_PER_UNIT);
17872 off = 2;
17873 emit_insn (gen_strset (destreg, dst,
17874 gen_lowpart (HImode, value)));
17876 if (align_bytes & 4)
17878 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17879 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17880 set_mem_align (dst, 4 * BITS_PER_UNIT);
17881 off = 4;
17882 emit_insn (gen_strset (destreg, dst,
17883 gen_lowpart (SImode, value)));
17885 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17886 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17887 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17888 if (dst_size)
17889 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17890 return dst;
17893 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17894 static enum stringop_alg
17895 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17896 int *dynamic_check)
17898 const struct stringop_algs * algs;
17899 bool optimize_for_speed;
17900 /* Algorithms using the rep prefix want at least edi and ecx;
17901 additionally, memset wants eax and memcpy wants esi. Don't
17902 consider such algorithms if the user has appropriated those
17903 registers for their own purposes. */
17904 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17905 || (memset
17906 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17908 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17909 || (alg != rep_prefix_1_byte \
17910 && alg != rep_prefix_4_byte \
17911 && alg != rep_prefix_8_byte))
17912 const struct processor_costs *cost;
17914 /* Even if the string operation call is cold, we still might spend a lot
17915 of time processing large blocks. */
17916 if (optimize_function_for_size_p (cfun)
17917 || (optimize_insn_for_size_p ()
17918 && expected_size != -1 && expected_size < 256))
17919 optimize_for_speed = false;
17920 else
17921 optimize_for_speed = true;
17923 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17925 *dynamic_check = -1;
17926 if (memset)
17927 algs = &cost->memset[TARGET_64BIT != 0];
17928 else
17929 algs = &cost->memcpy[TARGET_64BIT != 0];
17930 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17931 return stringop_alg;
17932 /* rep; movq or rep; movl is the smallest variant. */
17933 else if (!optimize_for_speed)
17935 if (!count || (count & 3))
17936 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17937 else
17938 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17940 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17942 else if (expected_size != -1 && expected_size < 4)
17943 return loop_1_byte;
17944 else if (expected_size != -1)
17946 unsigned int i;
17947 enum stringop_alg alg = libcall;
17948 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17950 /* We get here if the algorithms that were not libcall-based
17951 were rep-prefix based and we are unable to use rep prefixes
17952 based on global register usage. Break out of the loop and
17953 use the heuristic below. */
17954 if (algs->size[i].max == 0)
17955 break;
17956 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17958 enum stringop_alg candidate = algs->size[i].alg;
17960 if (candidate != libcall && ALG_USABLE_P (candidate))
17961 alg = candidate;
17962 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17963 last non-libcall inline algorithm. */
17964 if (TARGET_INLINE_ALL_STRINGOPS)
17966 /* When the current size is best to be copied by a libcall,
17967 but we are still forced to inline, run the heuristic below
17968 that will pick code for medium sized blocks. */
17969 if (alg != libcall)
17970 return alg;
17971 break;
17973 else if (ALG_USABLE_P (candidate))
17974 return candidate;
17977 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17979 /* When asked to inline the call anyway, try to pick meaningful choice.
17980 We look for maximal size of block that is faster to copy by hand and
17981 take blocks of at most of that size guessing that average size will
17982 be roughly half of the block.
17984 If this turns out to be bad, we might simply specify the preferred
17985 choice in ix86_costs. */
17986 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17987 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17989 int max = -1;
17990 enum stringop_alg alg;
17991 int i;
17992 bool any_alg_usable_p = true;
17994 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17996 enum stringop_alg candidate = algs->size[i].alg;
17997 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17999 if (candidate != libcall && candidate
18000 && ALG_USABLE_P (candidate))
18001 max = algs->size[i].max;
18003 /* If there aren't any usable algorithms, then recursing on
18004 smaller sizes isn't going to find anything. Just return the
18005 simple byte-at-a-time copy loop. */
18006 if (!any_alg_usable_p)
18008 /* Pick something reasonable. */
18009 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18010 *dynamic_check = 128;
18011 return loop_1_byte;
18013 if (max == -1)
18014 max = 4096;
18015 alg = decide_alg (count, max / 2, memset, dynamic_check);
18016 gcc_assert (*dynamic_check == -1);
18017 gcc_assert (alg != libcall);
18018 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18019 *dynamic_check = max;
18020 return alg;
18022 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18023 #undef ALG_USABLE_P
18026 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18027 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18028 static int
18029 decide_alignment (int align,
18030 enum stringop_alg alg,
18031 int expected_size)
18033 int desired_align = 0;
18034 switch (alg)
18036 case no_stringop:
18037 gcc_unreachable ();
18038 case loop:
18039 case unrolled_loop:
18040 desired_align = GET_MODE_SIZE (Pmode);
18041 break;
18042 case rep_prefix_8_byte:
18043 desired_align = 8;
18044 break;
18045 case rep_prefix_4_byte:
18046 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18047 copying whole cacheline at once. */
18048 if (TARGET_PENTIUMPRO)
18049 desired_align = 8;
18050 else
18051 desired_align = 4;
18052 break;
18053 case rep_prefix_1_byte:
18054 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18055 copying whole cacheline at once. */
18056 if (TARGET_PENTIUMPRO)
18057 desired_align = 8;
18058 else
18059 desired_align = 1;
18060 break;
18061 case loop_1_byte:
18062 desired_align = 1;
18063 break;
18064 case libcall:
18065 return 0;
18068 if (optimize_size)
18069 desired_align = 1;
18070 if (desired_align < align)
18071 desired_align = align;
18072 if (expected_size != -1 && expected_size < 4)
18073 desired_align = align;
18074 return desired_align;
18077 /* Return the smallest power of 2 greater than VAL. */
18078 static int
18079 smallest_pow2_greater_than (int val)
18081 int ret = 1;
18082 while (ret <= val)
18083 ret <<= 1;
18084 return ret;
18087 /* Expand string move (memcpy) operation. Use i386 string operations when
18088 profitable. expand_setmem contains similar code. The code depends upon
18089 architecture, block size and alignment, but always has the same
18090 overall structure:
18092 1) Prologue guard: Conditional that jumps up to epilogues for small
18093 blocks that can be handled by epilogue alone. This is faster but
18094 also needed for correctness, since prologue assume the block is larger
18095 than the desired alignment.
18097 Optional dynamic check for size and libcall for large
18098 blocks is emitted here too, with -minline-stringops-dynamically.
18100 2) Prologue: copy first few bytes in order to get destination aligned
18101 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18102 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18103 We emit either a jump tree on power of two sized blocks, or a byte loop.
18105 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18106 with specified algorithm.
18108 4) Epilogue: code copying tail of the block that is too small to be
18109 handled by main body (or up to size guarded by prologue guard). */
18112 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18113 rtx expected_align_exp, rtx expected_size_exp)
18115 rtx destreg;
18116 rtx srcreg;
18117 rtx label = NULL;
18118 rtx tmp;
18119 rtx jump_around_label = NULL;
18120 HOST_WIDE_INT align = 1;
18121 unsigned HOST_WIDE_INT count = 0;
18122 HOST_WIDE_INT expected_size = -1;
18123 int size_needed = 0, epilogue_size_needed;
18124 int desired_align = 0, align_bytes = 0;
18125 enum stringop_alg alg;
18126 int dynamic_check;
18127 bool need_zero_guard = false;
18129 if (CONST_INT_P (align_exp))
18130 align = INTVAL (align_exp);
18131 /* i386 can do misaligned access on reasonably increased cost. */
18132 if (CONST_INT_P (expected_align_exp)
18133 && INTVAL (expected_align_exp) > align)
18134 align = INTVAL (expected_align_exp);
18135 /* ALIGN is the minimum of destination and source alignment, but we care here
18136 just about destination alignment. */
18137 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18138 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18140 if (CONST_INT_P (count_exp))
18141 count = expected_size = INTVAL (count_exp);
18142 if (CONST_INT_P (expected_size_exp) && count == 0)
18143 expected_size = INTVAL (expected_size_exp);
18145 /* Make sure we don't need to care about overflow later on. */
18146 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18147 return 0;
18149 /* Step 0: Decide on preferred algorithm, desired alignment and
18150 size of chunks to be copied by main loop. */
18152 alg = decide_alg (count, expected_size, false, &dynamic_check);
18153 desired_align = decide_alignment (align, alg, expected_size);
18155 if (!TARGET_ALIGN_STRINGOPS)
18156 align = desired_align;
18158 if (alg == libcall)
18159 return 0;
18160 gcc_assert (alg != no_stringop);
18161 if (!count)
18162 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18163 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18164 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18165 switch (alg)
18167 case libcall:
18168 case no_stringop:
18169 gcc_unreachable ();
18170 case loop:
18171 need_zero_guard = true;
18172 size_needed = GET_MODE_SIZE (Pmode);
18173 break;
18174 case unrolled_loop:
18175 need_zero_guard = true;
18176 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18177 break;
18178 case rep_prefix_8_byte:
18179 size_needed = 8;
18180 break;
18181 case rep_prefix_4_byte:
18182 size_needed = 4;
18183 break;
18184 case rep_prefix_1_byte:
18185 size_needed = 1;
18186 break;
18187 case loop_1_byte:
18188 need_zero_guard = true;
18189 size_needed = 1;
18190 break;
18193 epilogue_size_needed = size_needed;
18195 /* Step 1: Prologue guard. */
18197 /* Alignment code needs count to be in register. */
18198 if (CONST_INT_P (count_exp) && desired_align > align)
18200 if (INTVAL (count_exp) > desired_align
18201 && INTVAL (count_exp) > size_needed)
18203 align_bytes
18204 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18205 if (align_bytes <= 0)
18206 align_bytes = 0;
18207 else
18208 align_bytes = desired_align - align_bytes;
18210 if (align_bytes == 0)
18211 count_exp = force_reg (counter_mode (count_exp), count_exp);
18213 gcc_assert (desired_align >= 1 && align >= 1);
18215 /* Ensure that alignment prologue won't copy past end of block. */
18216 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18218 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18219 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18220 Make sure it is power of 2. */
18221 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18223 if (count)
18225 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18227 /* If main algorithm works on QImode, no epilogue is needed.
18228 For small sizes just don't align anything. */
18229 if (size_needed == 1)
18230 desired_align = align;
18231 else
18232 goto epilogue;
18235 else
18237 label = gen_label_rtx ();
18238 emit_cmp_and_jump_insns (count_exp,
18239 GEN_INT (epilogue_size_needed),
18240 LTU, 0, counter_mode (count_exp), 1, label);
18241 if (expected_size == -1 || expected_size < epilogue_size_needed)
18242 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18243 else
18244 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18248 /* Emit code to decide on runtime whether library call or inline should be
18249 used. */
18250 if (dynamic_check != -1)
18252 if (CONST_INT_P (count_exp))
18254 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18256 emit_block_move_via_libcall (dst, src, count_exp, false);
18257 count_exp = const0_rtx;
18258 goto epilogue;
18261 else
18263 rtx hot_label = gen_label_rtx ();
18264 jump_around_label = gen_label_rtx ();
18265 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18266 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18267 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18268 emit_block_move_via_libcall (dst, src, count_exp, false);
18269 emit_jump (jump_around_label);
18270 emit_label (hot_label);
18274 /* Step 2: Alignment prologue. */
18276 if (desired_align > align)
18278 if (align_bytes == 0)
18280 /* Except for the first move in epilogue, we no longer know
18281 constant offset in aliasing info. It don't seems to worth
18282 the pain to maintain it for the first move, so throw away
18283 the info early. */
18284 src = change_address (src, BLKmode, srcreg);
18285 dst = change_address (dst, BLKmode, destreg);
18286 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18287 desired_align);
18289 else
18291 /* If we know how many bytes need to be stored before dst is
18292 sufficiently aligned, maintain aliasing info accurately. */
18293 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18294 desired_align, align_bytes);
18295 count_exp = plus_constant (count_exp, -align_bytes);
18296 count -= align_bytes;
18298 if (need_zero_guard
18299 && (count < (unsigned HOST_WIDE_INT) size_needed
18300 || (align_bytes == 0
18301 && count < ((unsigned HOST_WIDE_INT) size_needed
18302 + desired_align - align))))
18304 /* It is possible that we copied enough so the main loop will not
18305 execute. */
18306 gcc_assert (size_needed > 1);
18307 if (label == NULL_RTX)
18308 label = gen_label_rtx ();
18309 emit_cmp_and_jump_insns (count_exp,
18310 GEN_INT (size_needed),
18311 LTU, 0, counter_mode (count_exp), 1, label);
18312 if (expected_size == -1
18313 || expected_size < (desired_align - align) / 2 + size_needed)
18314 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18315 else
18316 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18319 if (label && size_needed == 1)
18321 emit_label (label);
18322 LABEL_NUSES (label) = 1;
18323 label = NULL;
18324 epilogue_size_needed = 1;
18326 else if (label == NULL_RTX)
18327 epilogue_size_needed = size_needed;
18329 /* Step 3: Main loop. */
18331 switch (alg)
18333 case libcall:
18334 case no_stringop:
18335 gcc_unreachable ();
18336 case loop_1_byte:
18337 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18338 count_exp, QImode, 1, expected_size);
18339 break;
18340 case loop:
18341 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18342 count_exp, Pmode, 1, expected_size);
18343 break;
18344 case unrolled_loop:
18345 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18346 registers for 4 temporaries anyway. */
18347 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18348 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18349 expected_size);
18350 break;
18351 case rep_prefix_8_byte:
18352 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18353 DImode);
18354 break;
18355 case rep_prefix_4_byte:
18356 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18357 SImode);
18358 break;
18359 case rep_prefix_1_byte:
18360 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18361 QImode);
18362 break;
18364 /* Adjust properly the offset of src and dest memory for aliasing. */
18365 if (CONST_INT_P (count_exp))
18367 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18368 (count / size_needed) * size_needed);
18369 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18370 (count / size_needed) * size_needed);
18372 else
18374 src = change_address (src, BLKmode, srcreg);
18375 dst = change_address (dst, BLKmode, destreg);
18378 /* Step 4: Epilogue to copy the remaining bytes. */
18379 epilogue:
18380 if (label)
18382 /* When the main loop is done, COUNT_EXP might hold original count,
18383 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18384 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18385 bytes. Compensate if needed. */
18387 if (size_needed < epilogue_size_needed)
18389 tmp =
18390 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18391 GEN_INT (size_needed - 1), count_exp, 1,
18392 OPTAB_DIRECT);
18393 if (tmp != count_exp)
18394 emit_move_insn (count_exp, tmp);
18396 emit_label (label);
18397 LABEL_NUSES (label) = 1;
18400 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18401 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18402 epilogue_size_needed);
18403 if (jump_around_label)
18404 emit_label (jump_around_label);
18405 return 1;
18408 /* Helper function for memcpy. For QImode value 0xXY produce
18409 0xXYXYXYXY of wide specified by MODE. This is essentially
18410 a * 0x10101010, but we can do slightly better than
18411 synth_mult by unwinding the sequence by hand on CPUs with
18412 slow multiply. */
18413 static rtx
18414 promote_duplicated_reg (enum machine_mode mode, rtx val)
18416 enum machine_mode valmode = GET_MODE (val);
18417 rtx tmp;
18418 int nops = mode == DImode ? 3 : 2;
18420 gcc_assert (mode == SImode || mode == DImode);
18421 if (val == const0_rtx)
18422 return copy_to_mode_reg (mode, const0_rtx);
18423 if (CONST_INT_P (val))
18425 HOST_WIDE_INT v = INTVAL (val) & 255;
18427 v |= v << 8;
18428 v |= v << 16;
18429 if (mode == DImode)
18430 v |= (v << 16) << 16;
18431 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18434 if (valmode == VOIDmode)
18435 valmode = QImode;
18436 if (valmode != QImode)
18437 val = gen_lowpart (QImode, val);
18438 if (mode == QImode)
18439 return val;
18440 if (!TARGET_PARTIAL_REG_STALL)
18441 nops--;
18442 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18443 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18444 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18445 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18447 rtx reg = convert_modes (mode, QImode, val, true);
18448 tmp = promote_duplicated_reg (mode, const1_rtx);
18449 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18450 OPTAB_DIRECT);
18452 else
18454 rtx reg = convert_modes (mode, QImode, val, true);
18456 if (!TARGET_PARTIAL_REG_STALL)
18457 if (mode == SImode)
18458 emit_insn (gen_movsi_insv_1 (reg, reg));
18459 else
18460 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18461 else
18463 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18464 NULL, 1, OPTAB_DIRECT);
18465 reg =
18466 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18468 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18469 NULL, 1, OPTAB_DIRECT);
18470 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18471 if (mode == SImode)
18472 return reg;
18473 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18474 NULL, 1, OPTAB_DIRECT);
18475 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18476 return reg;
18480 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18481 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18482 alignment from ALIGN to DESIRED_ALIGN. */
18483 static rtx
18484 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18486 rtx promoted_val;
18488 if (TARGET_64BIT
18489 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18490 promoted_val = promote_duplicated_reg (DImode, val);
18491 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18492 promoted_val = promote_duplicated_reg (SImode, val);
18493 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18494 promoted_val = promote_duplicated_reg (HImode, val);
18495 else
18496 promoted_val = val;
18498 return promoted_val;
18501 /* Expand string clear operation (bzero). Use i386 string operations when
18502 profitable. See expand_movmem comment for explanation of individual
18503 steps performed. */
18505 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18506 rtx expected_align_exp, rtx expected_size_exp)
18508 rtx destreg;
18509 rtx label = NULL;
18510 rtx tmp;
18511 rtx jump_around_label = NULL;
18512 HOST_WIDE_INT align = 1;
18513 unsigned HOST_WIDE_INT count = 0;
18514 HOST_WIDE_INT expected_size = -1;
18515 int size_needed = 0, epilogue_size_needed;
18516 int desired_align = 0, align_bytes = 0;
18517 enum stringop_alg alg;
18518 rtx promoted_val = NULL;
18519 bool force_loopy_epilogue = false;
18520 int dynamic_check;
18521 bool need_zero_guard = false;
18523 if (CONST_INT_P (align_exp))
18524 align = INTVAL (align_exp);
18525 /* i386 can do misaligned access on reasonably increased cost. */
18526 if (CONST_INT_P (expected_align_exp)
18527 && INTVAL (expected_align_exp) > align)
18528 align = INTVAL (expected_align_exp);
18529 if (CONST_INT_P (count_exp))
18530 count = expected_size = INTVAL (count_exp);
18531 if (CONST_INT_P (expected_size_exp) && count == 0)
18532 expected_size = INTVAL (expected_size_exp);
18534 /* Make sure we don't need to care about overflow later on. */
18535 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18536 return 0;
18538 /* Step 0: Decide on preferred algorithm, desired alignment and
18539 size of chunks to be copied by main loop. */
18541 alg = decide_alg (count, expected_size, true, &dynamic_check);
18542 desired_align = decide_alignment (align, alg, expected_size);
18544 if (!TARGET_ALIGN_STRINGOPS)
18545 align = desired_align;
18547 if (alg == libcall)
18548 return 0;
18549 gcc_assert (alg != no_stringop);
18550 if (!count)
18551 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18552 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18553 switch (alg)
18555 case libcall:
18556 case no_stringop:
18557 gcc_unreachable ();
18558 case loop:
18559 need_zero_guard = true;
18560 size_needed = GET_MODE_SIZE (Pmode);
18561 break;
18562 case unrolled_loop:
18563 need_zero_guard = true;
18564 size_needed = GET_MODE_SIZE (Pmode) * 4;
18565 break;
18566 case rep_prefix_8_byte:
18567 size_needed = 8;
18568 break;
18569 case rep_prefix_4_byte:
18570 size_needed = 4;
18571 break;
18572 case rep_prefix_1_byte:
18573 size_needed = 1;
18574 break;
18575 case loop_1_byte:
18576 need_zero_guard = true;
18577 size_needed = 1;
18578 break;
18580 epilogue_size_needed = size_needed;
18582 /* Step 1: Prologue guard. */
18584 /* Alignment code needs count to be in register. */
18585 if (CONST_INT_P (count_exp) && desired_align > align)
18587 if (INTVAL (count_exp) > desired_align
18588 && INTVAL (count_exp) > size_needed)
18590 align_bytes
18591 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18592 if (align_bytes <= 0)
18593 align_bytes = 0;
18594 else
18595 align_bytes = desired_align - align_bytes;
18597 if (align_bytes == 0)
18599 enum machine_mode mode = SImode;
18600 if (TARGET_64BIT && (count & ~0xffffffff))
18601 mode = DImode;
18602 count_exp = force_reg (mode, count_exp);
18605 /* Do the cheap promotion to allow better CSE across the
18606 main loop and epilogue (ie one load of the big constant in the
18607 front of all code. */
18608 if (CONST_INT_P (val_exp))
18609 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18610 desired_align, align);
18611 /* Ensure that alignment prologue won't copy past end of block. */
18612 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18614 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18615 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18616 Make sure it is power of 2. */
18617 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18619 /* To improve performance of small blocks, we jump around the VAL
18620 promoting mode. This mean that if the promoted VAL is not constant,
18621 we might not use it in the epilogue and have to use byte
18622 loop variant. */
18623 if (epilogue_size_needed > 2 && !promoted_val)
18624 force_loopy_epilogue = true;
18625 if (count)
18627 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18629 /* If main algorithm works on QImode, no epilogue is needed.
18630 For small sizes just don't align anything. */
18631 if (size_needed == 1)
18632 desired_align = align;
18633 else
18634 goto epilogue;
18637 else
18639 label = gen_label_rtx ();
18640 emit_cmp_and_jump_insns (count_exp,
18641 GEN_INT (epilogue_size_needed),
18642 LTU, 0, counter_mode (count_exp), 1, label);
18643 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18644 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18645 else
18646 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18649 if (dynamic_check != -1)
18651 rtx hot_label = gen_label_rtx ();
18652 jump_around_label = gen_label_rtx ();
18653 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18654 LEU, 0, counter_mode (count_exp), 1, hot_label);
18655 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18656 set_storage_via_libcall (dst, count_exp, val_exp, false);
18657 emit_jump (jump_around_label);
18658 emit_label (hot_label);
18661 /* Step 2: Alignment prologue. */
18663 /* Do the expensive promotion once we branched off the small blocks. */
18664 if (!promoted_val)
18665 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18666 desired_align, align);
18667 gcc_assert (desired_align >= 1 && align >= 1);
18669 if (desired_align > align)
18671 if (align_bytes == 0)
18673 /* Except for the first move in epilogue, we no longer know
18674 constant offset in aliasing info. It don't seems to worth
18675 the pain to maintain it for the first move, so throw away
18676 the info early. */
18677 dst = change_address (dst, BLKmode, destreg);
18678 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18679 desired_align);
18681 else
18683 /* If we know how many bytes need to be stored before dst is
18684 sufficiently aligned, maintain aliasing info accurately. */
18685 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18686 desired_align, align_bytes);
18687 count_exp = plus_constant (count_exp, -align_bytes);
18688 count -= align_bytes;
18690 if (need_zero_guard
18691 && (count < (unsigned HOST_WIDE_INT) size_needed
18692 || (align_bytes == 0
18693 && count < ((unsigned HOST_WIDE_INT) size_needed
18694 + desired_align - align))))
18696 /* It is possible that we copied enough so the main loop will not
18697 execute. */
18698 gcc_assert (size_needed > 1);
18699 if (label == NULL_RTX)
18700 label = gen_label_rtx ();
18701 emit_cmp_and_jump_insns (count_exp,
18702 GEN_INT (size_needed),
18703 LTU, 0, counter_mode (count_exp), 1, label);
18704 if (expected_size == -1
18705 || expected_size < (desired_align - align) / 2 + size_needed)
18706 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18707 else
18708 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18711 if (label && size_needed == 1)
18713 emit_label (label);
18714 LABEL_NUSES (label) = 1;
18715 label = NULL;
18716 promoted_val = val_exp;
18717 epilogue_size_needed = 1;
18719 else if (label == NULL_RTX)
18720 epilogue_size_needed = size_needed;
18722 /* Step 3: Main loop. */
18724 switch (alg)
18726 case libcall:
18727 case no_stringop:
18728 gcc_unreachable ();
18729 case loop_1_byte:
18730 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18731 count_exp, QImode, 1, expected_size);
18732 break;
18733 case loop:
18734 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18735 count_exp, Pmode, 1, expected_size);
18736 break;
18737 case unrolled_loop:
18738 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18739 count_exp, Pmode, 4, expected_size);
18740 break;
18741 case rep_prefix_8_byte:
18742 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18743 DImode, val_exp);
18744 break;
18745 case rep_prefix_4_byte:
18746 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18747 SImode, val_exp);
18748 break;
18749 case rep_prefix_1_byte:
18750 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18751 QImode, val_exp);
18752 break;
18754 /* Adjust properly the offset of src and dest memory for aliasing. */
18755 if (CONST_INT_P (count_exp))
18756 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18757 (count / size_needed) * size_needed);
18758 else
18759 dst = change_address (dst, BLKmode, destreg);
18761 /* Step 4: Epilogue to copy the remaining bytes. */
18763 if (label)
18765 /* When the main loop is done, COUNT_EXP might hold original count,
18766 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18767 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18768 bytes. Compensate if needed. */
18770 if (size_needed < epilogue_size_needed)
18772 tmp =
18773 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18774 GEN_INT (size_needed - 1), count_exp, 1,
18775 OPTAB_DIRECT);
18776 if (tmp != count_exp)
18777 emit_move_insn (count_exp, tmp);
18779 emit_label (label);
18780 LABEL_NUSES (label) = 1;
18782 epilogue:
18783 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18785 if (force_loopy_epilogue)
18786 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18787 epilogue_size_needed);
18788 else
18789 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18790 epilogue_size_needed);
18792 if (jump_around_label)
18793 emit_label (jump_around_label);
18794 return 1;
18797 /* Expand the appropriate insns for doing strlen if not just doing
18798 repnz; scasb
18800 out = result, initialized with the start address
18801 align_rtx = alignment of the address.
18802 scratch = scratch register, initialized with the startaddress when
18803 not aligned, otherwise undefined
18805 This is just the body. It needs the initializations mentioned above and
18806 some address computing at the end. These things are done in i386.md. */
18808 static void
18809 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18811 int align;
18812 rtx tmp;
18813 rtx align_2_label = NULL_RTX;
18814 rtx align_3_label = NULL_RTX;
18815 rtx align_4_label = gen_label_rtx ();
18816 rtx end_0_label = gen_label_rtx ();
18817 rtx mem;
18818 rtx tmpreg = gen_reg_rtx (SImode);
18819 rtx scratch = gen_reg_rtx (SImode);
18820 rtx cmp;
18822 align = 0;
18823 if (CONST_INT_P (align_rtx))
18824 align = INTVAL (align_rtx);
18826 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18828 /* Is there a known alignment and is it less than 4? */
18829 if (align < 4)
18831 rtx scratch1 = gen_reg_rtx (Pmode);
18832 emit_move_insn (scratch1, out);
18833 /* Is there a known alignment and is it not 2? */
18834 if (align != 2)
18836 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18837 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18839 /* Leave just the 3 lower bits. */
18840 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18841 NULL_RTX, 0, OPTAB_WIDEN);
18843 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18844 Pmode, 1, align_4_label);
18845 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18846 Pmode, 1, align_2_label);
18847 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18848 Pmode, 1, align_3_label);
18850 else
18852 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18853 check if is aligned to 4 - byte. */
18855 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18856 NULL_RTX, 0, OPTAB_WIDEN);
18858 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18859 Pmode, 1, align_4_label);
18862 mem = change_address (src, QImode, out);
18864 /* Now compare the bytes. */
18866 /* Compare the first n unaligned byte on a byte per byte basis. */
18867 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18868 QImode, 1, end_0_label);
18870 /* Increment the address. */
18871 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18873 /* Not needed with an alignment of 2 */
18874 if (align != 2)
18876 emit_label (align_2_label);
18878 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18879 end_0_label);
18881 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18883 emit_label (align_3_label);
18886 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18887 end_0_label);
18889 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18892 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18893 align this loop. It gives only huge programs, but does not help to
18894 speed up. */
18895 emit_label (align_4_label);
18897 mem = change_address (src, SImode, out);
18898 emit_move_insn (scratch, mem);
18899 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18901 /* This formula yields a nonzero result iff one of the bytes is zero.
18902 This saves three branches inside loop and many cycles. */
18904 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18905 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18906 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18907 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18908 gen_int_mode (0x80808080, SImode)));
18909 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18910 align_4_label);
18912 if (TARGET_CMOVE)
18914 rtx reg = gen_reg_rtx (SImode);
18915 rtx reg2 = gen_reg_rtx (Pmode);
18916 emit_move_insn (reg, tmpreg);
18917 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18919 /* If zero is not in the first two bytes, move two bytes forward. */
18920 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18921 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18922 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18923 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18924 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18925 reg,
18926 tmpreg)));
18927 /* Emit lea manually to avoid clobbering of flags. */
18928 emit_insn (gen_rtx_SET (SImode, reg2,
18929 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18931 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18932 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18933 emit_insn (gen_rtx_SET (VOIDmode, out,
18934 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18935 reg2,
18936 out)));
18939 else
18941 rtx end_2_label = gen_label_rtx ();
18942 /* Is zero in the first two bytes? */
18944 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18945 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18946 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18947 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18948 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18949 pc_rtx);
18950 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18951 JUMP_LABEL (tmp) = end_2_label;
18953 /* Not in the first two. Move two bytes forward. */
18954 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18955 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18957 emit_label (end_2_label);
18961 /* Avoid branch in fixing the byte. */
18962 tmpreg = gen_lowpart (QImode, tmpreg);
18963 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18964 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18965 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18967 emit_label (end_0_label);
18970 /* Expand strlen. */
18973 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18975 rtx addr, scratch1, scratch2, scratch3, scratch4;
18977 /* The generic case of strlen expander is long. Avoid it's
18978 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18980 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18981 && !TARGET_INLINE_ALL_STRINGOPS
18982 && !optimize_insn_for_size_p ()
18983 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18984 return 0;
18986 addr = force_reg (Pmode, XEXP (src, 0));
18987 scratch1 = gen_reg_rtx (Pmode);
18989 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18990 && !optimize_insn_for_size_p ())
18992 /* Well it seems that some optimizer does not combine a call like
18993 foo(strlen(bar), strlen(bar));
18994 when the move and the subtraction is done here. It does calculate
18995 the length just once when these instructions are done inside of
18996 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18997 often used and I use one fewer register for the lifetime of
18998 output_strlen_unroll() this is better. */
19000 emit_move_insn (out, addr);
19002 ix86_expand_strlensi_unroll_1 (out, src, align);
19004 /* strlensi_unroll_1 returns the address of the zero at the end of
19005 the string, like memchr(), so compute the length by subtracting
19006 the start address. */
19007 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19009 else
19011 rtx unspec;
19013 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19014 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19015 return false;
19017 scratch2 = gen_reg_rtx (Pmode);
19018 scratch3 = gen_reg_rtx (Pmode);
19019 scratch4 = force_reg (Pmode, constm1_rtx);
19021 emit_move_insn (scratch3, addr);
19022 eoschar = force_reg (QImode, eoschar);
19024 src = replace_equiv_address_nv (src, scratch3);
19026 /* If .md starts supporting :P, this can be done in .md. */
19027 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19028 scratch4), UNSPEC_SCAS);
19029 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19030 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19031 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19033 return 1;
19036 /* For given symbol (function) construct code to compute address of it's PLT
19037 entry in large x86-64 PIC model. */
19039 construct_plt_address (rtx symbol)
19041 rtx tmp = gen_reg_rtx (Pmode);
19042 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19044 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19045 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19047 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19048 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19049 return tmp;
19052 void
19053 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19054 rtx callarg2,
19055 rtx pop, int sibcall)
19057 rtx use = NULL, call;
19059 if (pop == const0_rtx)
19060 pop = NULL;
19061 gcc_assert (!TARGET_64BIT || !pop);
19063 if (TARGET_MACHO && !TARGET_64BIT)
19065 #if TARGET_MACHO
19066 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19067 fnaddr = machopic_indirect_call_target (fnaddr);
19068 #endif
19070 else
19072 /* Static functions and indirect calls don't need the pic register. */
19073 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19074 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19075 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19076 use_reg (&use, pic_offset_table_rtx);
19079 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19081 rtx al = gen_rtx_REG (QImode, AX_REG);
19082 emit_move_insn (al, callarg2);
19083 use_reg (&use, al);
19086 if (ix86_cmodel == CM_LARGE_PIC
19087 && GET_CODE (fnaddr) == MEM
19088 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19089 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19090 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19091 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19093 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19094 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19096 if (sibcall && TARGET_64BIT
19097 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19099 rtx addr;
19100 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19101 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19102 emit_move_insn (fnaddr, addr);
19103 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19106 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19107 if (retval)
19108 call = gen_rtx_SET (VOIDmode, retval, call);
19109 if (pop)
19111 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19112 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19113 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19115 if (TARGET_64BIT
19116 && ix86_cfun_abi () == MS_ABI
19117 && (!callarg2 || INTVAL (callarg2) != -2))
19119 /* We need to represent that SI and DI registers are clobbered
19120 by SYSV calls. */
19121 static int clobbered_registers[] = {
19122 XMM6_REG, XMM7_REG, XMM8_REG,
19123 XMM9_REG, XMM10_REG, XMM11_REG,
19124 XMM12_REG, XMM13_REG, XMM14_REG,
19125 XMM15_REG, SI_REG, DI_REG
19127 unsigned int i;
19128 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19129 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19130 UNSPEC_MS_TO_SYSV_CALL);
19132 vec[0] = call;
19133 vec[1] = unspec;
19134 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19135 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19136 ? TImode : DImode,
19137 gen_rtx_REG
19138 (SSE_REGNO_P (clobbered_registers[i])
19139 ? TImode : DImode,
19140 clobbered_registers[i]));
19142 call = gen_rtx_PARALLEL (VOIDmode,
19143 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19144 + 2, vec));
19147 call = emit_call_insn (call);
19148 if (use)
19149 CALL_INSN_FUNCTION_USAGE (call) = use;
19153 /* Clear stack slot assignments remembered from previous functions.
19154 This is called from INIT_EXPANDERS once before RTL is emitted for each
19155 function. */
19157 static struct machine_function *
19158 ix86_init_machine_status (void)
19160 struct machine_function *f;
19162 f = GGC_CNEW (struct machine_function);
19163 f->use_fast_prologue_epilogue_nregs = -1;
19164 f->tls_descriptor_call_expanded_p = 0;
19165 f->call_abi = ix86_abi;
19167 return f;
19170 /* Return a MEM corresponding to a stack slot with mode MODE.
19171 Allocate a new slot if necessary.
19173 The RTL for a function can have several slots available: N is
19174 which slot to use. */
19177 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19179 struct stack_local_entry *s;
19181 gcc_assert (n < MAX_386_STACK_LOCALS);
19183 /* Virtual slot is valid only before vregs are instantiated. */
19184 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19186 for (s = ix86_stack_locals; s; s = s->next)
19187 if (s->mode == mode && s->n == n)
19188 return copy_rtx (s->rtl);
19190 s = (struct stack_local_entry *)
19191 ggc_alloc (sizeof (struct stack_local_entry));
19192 s->n = n;
19193 s->mode = mode;
19194 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19196 s->next = ix86_stack_locals;
19197 ix86_stack_locals = s;
19198 return s->rtl;
19201 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19203 static GTY(()) rtx ix86_tls_symbol;
19205 ix86_tls_get_addr (void)
19208 if (!ix86_tls_symbol)
19210 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19211 (TARGET_ANY_GNU_TLS
19212 && !TARGET_64BIT)
19213 ? "___tls_get_addr"
19214 : "__tls_get_addr");
19217 return ix86_tls_symbol;
19220 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19222 static GTY(()) rtx ix86_tls_module_base_symbol;
19224 ix86_tls_module_base (void)
19227 if (!ix86_tls_module_base_symbol)
19229 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19230 "_TLS_MODULE_BASE_");
19231 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19232 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19235 return ix86_tls_module_base_symbol;
19238 /* Calculate the length of the memory address in the instruction
19239 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19242 memory_address_length (rtx addr)
19244 struct ix86_address parts;
19245 rtx base, index, disp;
19246 int len;
19247 int ok;
19249 if (GET_CODE (addr) == PRE_DEC
19250 || GET_CODE (addr) == POST_INC
19251 || GET_CODE (addr) == PRE_MODIFY
19252 || GET_CODE (addr) == POST_MODIFY)
19253 return 0;
19255 ok = ix86_decompose_address (addr, &parts);
19256 gcc_assert (ok);
19258 if (parts.base && GET_CODE (parts.base) == SUBREG)
19259 parts.base = SUBREG_REG (parts.base);
19260 if (parts.index && GET_CODE (parts.index) == SUBREG)
19261 parts.index = SUBREG_REG (parts.index);
19263 base = parts.base;
19264 index = parts.index;
19265 disp = parts.disp;
19266 len = 0;
19268 /* Rule of thumb:
19269 - esp as the base always wants an index,
19270 - ebp as the base always wants a displacement. */
19272 /* Register Indirect. */
19273 if (base && !index && !disp)
19275 /* esp (for its index) and ebp (for its displacement) need
19276 the two-byte modrm form. */
19277 if (addr == stack_pointer_rtx
19278 || addr == arg_pointer_rtx
19279 || addr == frame_pointer_rtx
19280 || addr == hard_frame_pointer_rtx)
19281 len = 1;
19284 /* Direct Addressing. */
19285 else if (disp && !base && !index)
19286 len = 4;
19288 else
19290 /* Find the length of the displacement constant. */
19291 if (disp)
19293 if (base && satisfies_constraint_K (disp))
19294 len = 1;
19295 else
19296 len = 4;
19298 /* ebp always wants a displacement. */
19299 else if (base == hard_frame_pointer_rtx)
19300 len = 1;
19302 /* An index requires the two-byte modrm form.... */
19303 if (index
19304 /* ...like esp, which always wants an index. */
19305 || base == stack_pointer_rtx
19306 || base == arg_pointer_rtx
19307 || base == frame_pointer_rtx)
19308 len += 1;
19311 return len;
19314 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19315 is set, expect that insn have 8bit immediate alternative. */
19317 ix86_attr_length_immediate_default (rtx insn, int shortform)
19319 int len = 0;
19320 int i;
19321 extract_insn_cached (insn);
19322 for (i = recog_data.n_operands - 1; i >= 0; --i)
19323 if (CONSTANT_P (recog_data.operand[i]))
19325 gcc_assert (!len);
19326 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19327 len = 1;
19328 else
19330 switch (get_attr_mode (insn))
19332 case MODE_QI:
19333 len+=1;
19334 break;
19335 case MODE_HI:
19336 len+=2;
19337 break;
19338 case MODE_SI:
19339 len+=4;
19340 break;
19341 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19342 case MODE_DI:
19343 len+=4;
19344 break;
19345 default:
19346 fatal_insn ("unknown insn mode", insn);
19350 return len;
19352 /* Compute default value for "length_address" attribute. */
19354 ix86_attr_length_address_default (rtx insn)
19356 int i;
19358 if (get_attr_type (insn) == TYPE_LEA)
19360 rtx set = PATTERN (insn);
19362 if (GET_CODE (set) == PARALLEL)
19363 set = XVECEXP (set, 0, 0);
19365 gcc_assert (GET_CODE (set) == SET);
19367 return memory_address_length (SET_SRC (set));
19370 extract_insn_cached (insn);
19371 for (i = recog_data.n_operands - 1; i >= 0; --i)
19372 if (MEM_P (recog_data.operand[i]))
19374 return memory_address_length (XEXP (recog_data.operand[i], 0));
19375 break;
19377 return 0;
19380 /* Compute default value for "length_vex" attribute. It includes
19381 2 or 3 byte VEX prefix and 1 opcode byte. */
19384 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19385 int has_vex_w)
19387 int i;
19389 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19390 byte VEX prefix. */
19391 if (!has_0f_opcode || has_vex_w)
19392 return 3 + 1;
19394 /* We can always use 2 byte VEX prefix in 32bit. */
19395 if (!TARGET_64BIT)
19396 return 2 + 1;
19398 extract_insn_cached (insn);
19400 for (i = recog_data.n_operands - 1; i >= 0; --i)
19401 if (REG_P (recog_data.operand[i]))
19403 /* REX.W bit uses 3 byte VEX prefix. */
19404 if (GET_MODE (recog_data.operand[i]) == DImode)
19405 return 3 + 1;
19407 else
19409 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19410 if (MEM_P (recog_data.operand[i])
19411 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19412 return 3 + 1;
19415 return 2 + 1;
19418 /* Return the maximum number of instructions a cpu can issue. */
19420 static int
19421 ix86_issue_rate (void)
19423 switch (ix86_tune)
19425 case PROCESSOR_PENTIUM:
19426 case PROCESSOR_ATOM:
19427 case PROCESSOR_K6:
19428 return 2;
19430 case PROCESSOR_PENTIUMPRO:
19431 case PROCESSOR_PENTIUM4:
19432 case PROCESSOR_ATHLON:
19433 case PROCESSOR_K8:
19434 case PROCESSOR_AMDFAM10:
19435 case PROCESSOR_NOCONA:
19436 case PROCESSOR_GENERIC32:
19437 case PROCESSOR_GENERIC64:
19438 return 3;
19440 case PROCESSOR_CORE2:
19441 return 4;
19443 default:
19444 return 1;
19448 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19449 by DEP_INSN and nothing set by DEP_INSN. */
19451 static int
19452 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19454 rtx set, set2;
19456 /* Simplify the test for uninteresting insns. */
19457 if (insn_type != TYPE_SETCC
19458 && insn_type != TYPE_ICMOV
19459 && insn_type != TYPE_FCMOV
19460 && insn_type != TYPE_IBR)
19461 return 0;
19463 if ((set = single_set (dep_insn)) != 0)
19465 set = SET_DEST (set);
19466 set2 = NULL_RTX;
19468 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19469 && XVECLEN (PATTERN (dep_insn), 0) == 2
19470 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19471 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19473 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19474 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19476 else
19477 return 0;
19479 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19480 return 0;
19482 /* This test is true if the dependent insn reads the flags but
19483 not any other potentially set register. */
19484 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19485 return 0;
19487 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19488 return 0;
19490 return 1;
19493 /* Return true iff USE_INSN has a memory address with operands set by
19494 SET_INSN. */
19496 bool
19497 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19499 int i;
19500 extract_insn_cached (use_insn);
19501 for (i = recog_data.n_operands - 1; i >= 0; --i)
19502 if (MEM_P (recog_data.operand[i]))
19504 rtx addr = XEXP (recog_data.operand[i], 0);
19505 return modified_in_p (addr, set_insn) != 0;
19507 return false;
19510 static int
19511 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19513 enum attr_type insn_type, dep_insn_type;
19514 enum attr_memory memory;
19515 rtx set, set2;
19516 int dep_insn_code_number;
19518 /* Anti and output dependencies have zero cost on all CPUs. */
19519 if (REG_NOTE_KIND (link) != 0)
19520 return 0;
19522 dep_insn_code_number = recog_memoized (dep_insn);
19524 /* If we can't recognize the insns, we can't really do anything. */
19525 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19526 return cost;
19528 insn_type = get_attr_type (insn);
19529 dep_insn_type = get_attr_type (dep_insn);
19531 switch (ix86_tune)
19533 case PROCESSOR_PENTIUM:
19534 /* Address Generation Interlock adds a cycle of latency. */
19535 if (insn_type == TYPE_LEA)
19537 rtx addr = PATTERN (insn);
19539 if (GET_CODE (addr) == PARALLEL)
19540 addr = XVECEXP (addr, 0, 0);
19542 gcc_assert (GET_CODE (addr) == SET);
19544 addr = SET_SRC (addr);
19545 if (modified_in_p (addr, dep_insn))
19546 cost += 1;
19548 else if (ix86_agi_dependent (dep_insn, insn))
19549 cost += 1;
19551 /* ??? Compares pair with jump/setcc. */
19552 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19553 cost = 0;
19555 /* Floating point stores require value to be ready one cycle earlier. */
19556 if (insn_type == TYPE_FMOV
19557 && get_attr_memory (insn) == MEMORY_STORE
19558 && !ix86_agi_dependent (dep_insn, insn))
19559 cost += 1;
19560 break;
19562 case PROCESSOR_PENTIUMPRO:
19563 memory = get_attr_memory (insn);
19565 /* INT->FP conversion is expensive. */
19566 if (get_attr_fp_int_src (dep_insn))
19567 cost += 5;
19569 /* There is one cycle extra latency between an FP op and a store. */
19570 if (insn_type == TYPE_FMOV
19571 && (set = single_set (dep_insn)) != NULL_RTX
19572 && (set2 = single_set (insn)) != NULL_RTX
19573 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19574 && MEM_P (SET_DEST (set2)))
19575 cost += 1;
19577 /* Show ability of reorder buffer to hide latency of load by executing
19578 in parallel with previous instruction in case
19579 previous instruction is not needed to compute the address. */
19580 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19581 && !ix86_agi_dependent (dep_insn, insn))
19583 /* Claim moves to take one cycle, as core can issue one load
19584 at time and the next load can start cycle later. */
19585 if (dep_insn_type == TYPE_IMOV
19586 || dep_insn_type == TYPE_FMOV)
19587 cost = 1;
19588 else if (cost > 1)
19589 cost--;
19591 break;
19593 case PROCESSOR_K6:
19594 memory = get_attr_memory (insn);
19596 /* The esp dependency is resolved before the instruction is really
19597 finished. */
19598 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19599 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19600 return 1;
19602 /* INT->FP conversion is expensive. */
19603 if (get_attr_fp_int_src (dep_insn))
19604 cost += 5;
19606 /* Show ability of reorder buffer to hide latency of load by executing
19607 in parallel with previous instruction in case
19608 previous instruction is not needed to compute the address. */
19609 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19610 && !ix86_agi_dependent (dep_insn, insn))
19612 /* Claim moves to take one cycle, as core can issue one load
19613 at time and the next load can start cycle later. */
19614 if (dep_insn_type == TYPE_IMOV
19615 || dep_insn_type == TYPE_FMOV)
19616 cost = 1;
19617 else if (cost > 2)
19618 cost -= 2;
19619 else
19620 cost = 1;
19622 break;
19624 case PROCESSOR_ATHLON:
19625 case PROCESSOR_K8:
19626 case PROCESSOR_AMDFAM10:
19627 case PROCESSOR_ATOM:
19628 case PROCESSOR_GENERIC32:
19629 case PROCESSOR_GENERIC64:
19630 memory = get_attr_memory (insn);
19632 /* Show ability of reorder buffer to hide latency of load by executing
19633 in parallel with previous instruction in case
19634 previous instruction is not needed to compute the address. */
19635 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19636 && !ix86_agi_dependent (dep_insn, insn))
19638 enum attr_unit unit = get_attr_unit (insn);
19639 int loadcost = 3;
19641 /* Because of the difference between the length of integer and
19642 floating unit pipeline preparation stages, the memory operands
19643 for floating point are cheaper.
19645 ??? For Athlon it the difference is most probably 2. */
19646 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19647 loadcost = 3;
19648 else
19649 loadcost = TARGET_ATHLON ? 2 : 0;
19651 if (cost >= loadcost)
19652 cost -= loadcost;
19653 else
19654 cost = 0;
19657 default:
19658 break;
19661 return cost;
19664 /* How many alternative schedules to try. This should be as wide as the
19665 scheduling freedom in the DFA, but no wider. Making this value too
19666 large results extra work for the scheduler. */
19668 static int
19669 ia32_multipass_dfa_lookahead (void)
19671 switch (ix86_tune)
19673 case PROCESSOR_PENTIUM:
19674 return 2;
19676 case PROCESSOR_PENTIUMPRO:
19677 case PROCESSOR_K6:
19678 return 1;
19680 default:
19681 return 0;
19686 /* Compute the alignment given to a constant that is being placed in memory.
19687 EXP is the constant and ALIGN is the alignment that the object would
19688 ordinarily have.
19689 The value of this function is used instead of that alignment to align
19690 the object. */
19693 ix86_constant_alignment (tree exp, int align)
19695 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19696 || TREE_CODE (exp) == INTEGER_CST)
19698 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19699 return 64;
19700 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19701 return 128;
19703 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19704 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19705 return BITS_PER_WORD;
19707 return align;
19710 /* Compute the alignment for a static variable.
19711 TYPE is the data type, and ALIGN is the alignment that
19712 the object would ordinarily have. The value of this function is used
19713 instead of that alignment to align the object. */
19716 ix86_data_alignment (tree type, int align)
19718 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19720 if (AGGREGATE_TYPE_P (type)
19721 && TYPE_SIZE (type)
19722 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19723 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19724 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19725 && align < max_align)
19726 align = max_align;
19728 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19729 to 16byte boundary. */
19730 if (TARGET_64BIT)
19732 if (AGGREGATE_TYPE_P (type)
19733 && TYPE_SIZE (type)
19734 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19735 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19736 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19737 return 128;
19740 if (TREE_CODE (type) == ARRAY_TYPE)
19742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19743 return 64;
19744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19745 return 128;
19747 else if (TREE_CODE (type) == COMPLEX_TYPE)
19750 if (TYPE_MODE (type) == DCmode && align < 64)
19751 return 64;
19752 if ((TYPE_MODE (type) == XCmode
19753 || TYPE_MODE (type) == TCmode) && align < 128)
19754 return 128;
19756 else if ((TREE_CODE (type) == RECORD_TYPE
19757 || TREE_CODE (type) == UNION_TYPE
19758 || TREE_CODE (type) == QUAL_UNION_TYPE)
19759 && TYPE_FIELDS (type))
19761 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19762 return 64;
19763 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19764 return 128;
19766 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19767 || TREE_CODE (type) == INTEGER_TYPE)
19769 if (TYPE_MODE (type) == DFmode && align < 64)
19770 return 64;
19771 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19772 return 128;
19775 return align;
19778 /* Compute the alignment for a local variable or a stack slot. EXP is
19779 the data type or decl itself, MODE is the widest mode available and
19780 ALIGN is the alignment that the object would ordinarily have. The
19781 value of this macro is used instead of that alignment to align the
19782 object. */
19784 unsigned int
19785 ix86_local_alignment (tree exp, enum machine_mode mode,
19786 unsigned int align)
19788 tree type, decl;
19790 if (exp && DECL_P (exp))
19792 type = TREE_TYPE (exp);
19793 decl = exp;
19795 else
19797 type = exp;
19798 decl = NULL;
19801 /* Don't do dynamic stack realignment for long long objects with
19802 -mpreferred-stack-boundary=2. */
19803 if (!TARGET_64BIT
19804 && align == 64
19805 && ix86_preferred_stack_boundary < 64
19806 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19807 && (!type || !TYPE_USER_ALIGN (type))
19808 && (!decl || !DECL_USER_ALIGN (decl)))
19809 align = 32;
19811 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19812 register in MODE. We will return the largest alignment of XF
19813 and DF. */
19814 if (!type)
19816 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19817 align = GET_MODE_ALIGNMENT (DFmode);
19818 return align;
19821 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19822 to 16byte boundary. */
19823 if (TARGET_64BIT)
19825 if (AGGREGATE_TYPE_P (type)
19826 && TYPE_SIZE (type)
19827 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19828 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19829 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19830 return 128;
19832 if (TREE_CODE (type) == ARRAY_TYPE)
19834 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19835 return 64;
19836 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19837 return 128;
19839 else if (TREE_CODE (type) == COMPLEX_TYPE)
19841 if (TYPE_MODE (type) == DCmode && align < 64)
19842 return 64;
19843 if ((TYPE_MODE (type) == XCmode
19844 || TYPE_MODE (type) == TCmode) && align < 128)
19845 return 128;
19847 else if ((TREE_CODE (type) == RECORD_TYPE
19848 || TREE_CODE (type) == UNION_TYPE
19849 || TREE_CODE (type) == QUAL_UNION_TYPE)
19850 && TYPE_FIELDS (type))
19852 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19853 return 64;
19854 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19855 return 128;
19857 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19858 || TREE_CODE (type) == INTEGER_TYPE)
19861 if (TYPE_MODE (type) == DFmode && align < 64)
19862 return 64;
19863 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19864 return 128;
19866 return align;
19869 /* Emit RTL insns to initialize the variable parts of a trampoline.
19870 FNADDR is an RTX for the address of the function's pure code.
19871 CXT is an RTX for the static chain value for the function. */
19872 void
19873 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19875 if (!TARGET_64BIT)
19877 /* Compute offset from the end of the jmp to the target function. */
19878 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19879 plus_constant (tramp, 10),
19880 NULL_RTX, 1, OPTAB_DIRECT);
19881 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19882 gen_int_mode (0xb9, QImode));
19883 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19884 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19885 gen_int_mode (0xe9, QImode));
19886 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19888 else
19890 int offset = 0;
19891 /* Try to load address using shorter movl instead of movabs.
19892 We may want to support movq for kernel mode, but kernel does not use
19893 trampolines at the moment. */
19894 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19896 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19897 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19898 gen_int_mode (0xbb41, HImode));
19899 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19900 gen_lowpart (SImode, fnaddr));
19901 offset += 6;
19903 else
19905 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19906 gen_int_mode (0xbb49, HImode));
19907 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19908 fnaddr);
19909 offset += 10;
19911 /* Load static chain using movabs to r10. */
19912 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19913 gen_int_mode (0xba49, HImode));
19914 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19915 cxt);
19916 offset += 10;
19917 /* Jump to the r11 */
19918 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19919 gen_int_mode (0xff49, HImode));
19920 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19921 gen_int_mode (0xe3, QImode));
19922 offset += 3;
19923 gcc_assert (offset <= TRAMPOLINE_SIZE);
19926 #ifdef ENABLE_EXECUTE_STACK
19927 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19928 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19929 #endif
19932 /* Codes for all the SSE/MMX builtins. */
19933 enum ix86_builtins
19935 IX86_BUILTIN_ADDPS,
19936 IX86_BUILTIN_ADDSS,
19937 IX86_BUILTIN_DIVPS,
19938 IX86_BUILTIN_DIVSS,
19939 IX86_BUILTIN_MULPS,
19940 IX86_BUILTIN_MULSS,
19941 IX86_BUILTIN_SUBPS,
19942 IX86_BUILTIN_SUBSS,
19944 IX86_BUILTIN_CMPEQPS,
19945 IX86_BUILTIN_CMPLTPS,
19946 IX86_BUILTIN_CMPLEPS,
19947 IX86_BUILTIN_CMPGTPS,
19948 IX86_BUILTIN_CMPGEPS,
19949 IX86_BUILTIN_CMPNEQPS,
19950 IX86_BUILTIN_CMPNLTPS,
19951 IX86_BUILTIN_CMPNLEPS,
19952 IX86_BUILTIN_CMPNGTPS,
19953 IX86_BUILTIN_CMPNGEPS,
19954 IX86_BUILTIN_CMPORDPS,
19955 IX86_BUILTIN_CMPUNORDPS,
19956 IX86_BUILTIN_CMPEQSS,
19957 IX86_BUILTIN_CMPLTSS,
19958 IX86_BUILTIN_CMPLESS,
19959 IX86_BUILTIN_CMPNEQSS,
19960 IX86_BUILTIN_CMPNLTSS,
19961 IX86_BUILTIN_CMPNLESS,
19962 IX86_BUILTIN_CMPNGTSS,
19963 IX86_BUILTIN_CMPNGESS,
19964 IX86_BUILTIN_CMPORDSS,
19965 IX86_BUILTIN_CMPUNORDSS,
19967 IX86_BUILTIN_COMIEQSS,
19968 IX86_BUILTIN_COMILTSS,
19969 IX86_BUILTIN_COMILESS,
19970 IX86_BUILTIN_COMIGTSS,
19971 IX86_BUILTIN_COMIGESS,
19972 IX86_BUILTIN_COMINEQSS,
19973 IX86_BUILTIN_UCOMIEQSS,
19974 IX86_BUILTIN_UCOMILTSS,
19975 IX86_BUILTIN_UCOMILESS,
19976 IX86_BUILTIN_UCOMIGTSS,
19977 IX86_BUILTIN_UCOMIGESS,
19978 IX86_BUILTIN_UCOMINEQSS,
19980 IX86_BUILTIN_CVTPI2PS,
19981 IX86_BUILTIN_CVTPS2PI,
19982 IX86_BUILTIN_CVTSI2SS,
19983 IX86_BUILTIN_CVTSI642SS,
19984 IX86_BUILTIN_CVTSS2SI,
19985 IX86_BUILTIN_CVTSS2SI64,
19986 IX86_BUILTIN_CVTTPS2PI,
19987 IX86_BUILTIN_CVTTSS2SI,
19988 IX86_BUILTIN_CVTTSS2SI64,
19990 IX86_BUILTIN_MAXPS,
19991 IX86_BUILTIN_MAXSS,
19992 IX86_BUILTIN_MINPS,
19993 IX86_BUILTIN_MINSS,
19995 IX86_BUILTIN_LOADUPS,
19996 IX86_BUILTIN_STOREUPS,
19997 IX86_BUILTIN_MOVSS,
19999 IX86_BUILTIN_MOVHLPS,
20000 IX86_BUILTIN_MOVLHPS,
20001 IX86_BUILTIN_LOADHPS,
20002 IX86_BUILTIN_LOADLPS,
20003 IX86_BUILTIN_STOREHPS,
20004 IX86_BUILTIN_STORELPS,
20006 IX86_BUILTIN_MASKMOVQ,
20007 IX86_BUILTIN_MOVMSKPS,
20008 IX86_BUILTIN_PMOVMSKB,
20010 IX86_BUILTIN_MOVNTPS,
20011 IX86_BUILTIN_MOVNTQ,
20013 IX86_BUILTIN_LOADDQU,
20014 IX86_BUILTIN_STOREDQU,
20016 IX86_BUILTIN_PACKSSWB,
20017 IX86_BUILTIN_PACKSSDW,
20018 IX86_BUILTIN_PACKUSWB,
20020 IX86_BUILTIN_PADDB,
20021 IX86_BUILTIN_PADDW,
20022 IX86_BUILTIN_PADDD,
20023 IX86_BUILTIN_PADDQ,
20024 IX86_BUILTIN_PADDSB,
20025 IX86_BUILTIN_PADDSW,
20026 IX86_BUILTIN_PADDUSB,
20027 IX86_BUILTIN_PADDUSW,
20028 IX86_BUILTIN_PSUBB,
20029 IX86_BUILTIN_PSUBW,
20030 IX86_BUILTIN_PSUBD,
20031 IX86_BUILTIN_PSUBQ,
20032 IX86_BUILTIN_PSUBSB,
20033 IX86_BUILTIN_PSUBSW,
20034 IX86_BUILTIN_PSUBUSB,
20035 IX86_BUILTIN_PSUBUSW,
20037 IX86_BUILTIN_PAND,
20038 IX86_BUILTIN_PANDN,
20039 IX86_BUILTIN_POR,
20040 IX86_BUILTIN_PXOR,
20042 IX86_BUILTIN_PAVGB,
20043 IX86_BUILTIN_PAVGW,
20045 IX86_BUILTIN_PCMPEQB,
20046 IX86_BUILTIN_PCMPEQW,
20047 IX86_BUILTIN_PCMPEQD,
20048 IX86_BUILTIN_PCMPGTB,
20049 IX86_BUILTIN_PCMPGTW,
20050 IX86_BUILTIN_PCMPGTD,
20052 IX86_BUILTIN_PMADDWD,
20054 IX86_BUILTIN_PMAXSW,
20055 IX86_BUILTIN_PMAXUB,
20056 IX86_BUILTIN_PMINSW,
20057 IX86_BUILTIN_PMINUB,
20059 IX86_BUILTIN_PMULHUW,
20060 IX86_BUILTIN_PMULHW,
20061 IX86_BUILTIN_PMULLW,
20063 IX86_BUILTIN_PSADBW,
20064 IX86_BUILTIN_PSHUFW,
20066 IX86_BUILTIN_PSLLW,
20067 IX86_BUILTIN_PSLLD,
20068 IX86_BUILTIN_PSLLQ,
20069 IX86_BUILTIN_PSRAW,
20070 IX86_BUILTIN_PSRAD,
20071 IX86_BUILTIN_PSRLW,
20072 IX86_BUILTIN_PSRLD,
20073 IX86_BUILTIN_PSRLQ,
20074 IX86_BUILTIN_PSLLWI,
20075 IX86_BUILTIN_PSLLDI,
20076 IX86_BUILTIN_PSLLQI,
20077 IX86_BUILTIN_PSRAWI,
20078 IX86_BUILTIN_PSRADI,
20079 IX86_BUILTIN_PSRLWI,
20080 IX86_BUILTIN_PSRLDI,
20081 IX86_BUILTIN_PSRLQI,
20083 IX86_BUILTIN_PUNPCKHBW,
20084 IX86_BUILTIN_PUNPCKHWD,
20085 IX86_BUILTIN_PUNPCKHDQ,
20086 IX86_BUILTIN_PUNPCKLBW,
20087 IX86_BUILTIN_PUNPCKLWD,
20088 IX86_BUILTIN_PUNPCKLDQ,
20090 IX86_BUILTIN_SHUFPS,
20092 IX86_BUILTIN_RCPPS,
20093 IX86_BUILTIN_RCPSS,
20094 IX86_BUILTIN_RSQRTPS,
20095 IX86_BUILTIN_RSQRTPS_NR,
20096 IX86_BUILTIN_RSQRTSS,
20097 IX86_BUILTIN_RSQRTF,
20098 IX86_BUILTIN_SQRTPS,
20099 IX86_BUILTIN_SQRTPS_NR,
20100 IX86_BUILTIN_SQRTSS,
20102 IX86_BUILTIN_UNPCKHPS,
20103 IX86_BUILTIN_UNPCKLPS,
20105 IX86_BUILTIN_ANDPS,
20106 IX86_BUILTIN_ANDNPS,
20107 IX86_BUILTIN_ORPS,
20108 IX86_BUILTIN_XORPS,
20110 IX86_BUILTIN_EMMS,
20111 IX86_BUILTIN_LDMXCSR,
20112 IX86_BUILTIN_STMXCSR,
20113 IX86_BUILTIN_SFENCE,
20115 /* 3DNow! Original */
20116 IX86_BUILTIN_FEMMS,
20117 IX86_BUILTIN_PAVGUSB,
20118 IX86_BUILTIN_PF2ID,
20119 IX86_BUILTIN_PFACC,
20120 IX86_BUILTIN_PFADD,
20121 IX86_BUILTIN_PFCMPEQ,
20122 IX86_BUILTIN_PFCMPGE,
20123 IX86_BUILTIN_PFCMPGT,
20124 IX86_BUILTIN_PFMAX,
20125 IX86_BUILTIN_PFMIN,
20126 IX86_BUILTIN_PFMUL,
20127 IX86_BUILTIN_PFRCP,
20128 IX86_BUILTIN_PFRCPIT1,
20129 IX86_BUILTIN_PFRCPIT2,
20130 IX86_BUILTIN_PFRSQIT1,
20131 IX86_BUILTIN_PFRSQRT,
20132 IX86_BUILTIN_PFSUB,
20133 IX86_BUILTIN_PFSUBR,
20134 IX86_BUILTIN_PI2FD,
20135 IX86_BUILTIN_PMULHRW,
20137 /* 3DNow! Athlon Extensions */
20138 IX86_BUILTIN_PF2IW,
20139 IX86_BUILTIN_PFNACC,
20140 IX86_BUILTIN_PFPNACC,
20141 IX86_BUILTIN_PI2FW,
20142 IX86_BUILTIN_PSWAPDSI,
20143 IX86_BUILTIN_PSWAPDSF,
20145 /* SSE2 */
20146 IX86_BUILTIN_ADDPD,
20147 IX86_BUILTIN_ADDSD,
20148 IX86_BUILTIN_DIVPD,
20149 IX86_BUILTIN_DIVSD,
20150 IX86_BUILTIN_MULPD,
20151 IX86_BUILTIN_MULSD,
20152 IX86_BUILTIN_SUBPD,
20153 IX86_BUILTIN_SUBSD,
20155 IX86_BUILTIN_CMPEQPD,
20156 IX86_BUILTIN_CMPLTPD,
20157 IX86_BUILTIN_CMPLEPD,
20158 IX86_BUILTIN_CMPGTPD,
20159 IX86_BUILTIN_CMPGEPD,
20160 IX86_BUILTIN_CMPNEQPD,
20161 IX86_BUILTIN_CMPNLTPD,
20162 IX86_BUILTIN_CMPNLEPD,
20163 IX86_BUILTIN_CMPNGTPD,
20164 IX86_BUILTIN_CMPNGEPD,
20165 IX86_BUILTIN_CMPORDPD,
20166 IX86_BUILTIN_CMPUNORDPD,
20167 IX86_BUILTIN_CMPEQSD,
20168 IX86_BUILTIN_CMPLTSD,
20169 IX86_BUILTIN_CMPLESD,
20170 IX86_BUILTIN_CMPNEQSD,
20171 IX86_BUILTIN_CMPNLTSD,
20172 IX86_BUILTIN_CMPNLESD,
20173 IX86_BUILTIN_CMPORDSD,
20174 IX86_BUILTIN_CMPUNORDSD,
20176 IX86_BUILTIN_COMIEQSD,
20177 IX86_BUILTIN_COMILTSD,
20178 IX86_BUILTIN_COMILESD,
20179 IX86_BUILTIN_COMIGTSD,
20180 IX86_BUILTIN_COMIGESD,
20181 IX86_BUILTIN_COMINEQSD,
20182 IX86_BUILTIN_UCOMIEQSD,
20183 IX86_BUILTIN_UCOMILTSD,
20184 IX86_BUILTIN_UCOMILESD,
20185 IX86_BUILTIN_UCOMIGTSD,
20186 IX86_BUILTIN_UCOMIGESD,
20187 IX86_BUILTIN_UCOMINEQSD,
20189 IX86_BUILTIN_MAXPD,
20190 IX86_BUILTIN_MAXSD,
20191 IX86_BUILTIN_MINPD,
20192 IX86_BUILTIN_MINSD,
20194 IX86_BUILTIN_ANDPD,
20195 IX86_BUILTIN_ANDNPD,
20196 IX86_BUILTIN_ORPD,
20197 IX86_BUILTIN_XORPD,
20199 IX86_BUILTIN_SQRTPD,
20200 IX86_BUILTIN_SQRTSD,
20202 IX86_BUILTIN_UNPCKHPD,
20203 IX86_BUILTIN_UNPCKLPD,
20205 IX86_BUILTIN_SHUFPD,
20207 IX86_BUILTIN_LOADUPD,
20208 IX86_BUILTIN_STOREUPD,
20209 IX86_BUILTIN_MOVSD,
20211 IX86_BUILTIN_LOADHPD,
20212 IX86_BUILTIN_LOADLPD,
20214 IX86_BUILTIN_CVTDQ2PD,
20215 IX86_BUILTIN_CVTDQ2PS,
20217 IX86_BUILTIN_CVTPD2DQ,
20218 IX86_BUILTIN_CVTPD2PI,
20219 IX86_BUILTIN_CVTPD2PS,
20220 IX86_BUILTIN_CVTTPD2DQ,
20221 IX86_BUILTIN_CVTTPD2PI,
20223 IX86_BUILTIN_CVTPI2PD,
20224 IX86_BUILTIN_CVTSI2SD,
20225 IX86_BUILTIN_CVTSI642SD,
20227 IX86_BUILTIN_CVTSD2SI,
20228 IX86_BUILTIN_CVTSD2SI64,
20229 IX86_BUILTIN_CVTSD2SS,
20230 IX86_BUILTIN_CVTSS2SD,
20231 IX86_BUILTIN_CVTTSD2SI,
20232 IX86_BUILTIN_CVTTSD2SI64,
20234 IX86_BUILTIN_CVTPS2DQ,
20235 IX86_BUILTIN_CVTPS2PD,
20236 IX86_BUILTIN_CVTTPS2DQ,
20238 IX86_BUILTIN_MOVNTI,
20239 IX86_BUILTIN_MOVNTPD,
20240 IX86_BUILTIN_MOVNTDQ,
20242 IX86_BUILTIN_MOVQ128,
20244 /* SSE2 MMX */
20245 IX86_BUILTIN_MASKMOVDQU,
20246 IX86_BUILTIN_MOVMSKPD,
20247 IX86_BUILTIN_PMOVMSKB128,
20249 IX86_BUILTIN_PACKSSWB128,
20250 IX86_BUILTIN_PACKSSDW128,
20251 IX86_BUILTIN_PACKUSWB128,
20253 IX86_BUILTIN_PADDB128,
20254 IX86_BUILTIN_PADDW128,
20255 IX86_BUILTIN_PADDD128,
20256 IX86_BUILTIN_PADDQ128,
20257 IX86_BUILTIN_PADDSB128,
20258 IX86_BUILTIN_PADDSW128,
20259 IX86_BUILTIN_PADDUSB128,
20260 IX86_BUILTIN_PADDUSW128,
20261 IX86_BUILTIN_PSUBB128,
20262 IX86_BUILTIN_PSUBW128,
20263 IX86_BUILTIN_PSUBD128,
20264 IX86_BUILTIN_PSUBQ128,
20265 IX86_BUILTIN_PSUBSB128,
20266 IX86_BUILTIN_PSUBSW128,
20267 IX86_BUILTIN_PSUBUSB128,
20268 IX86_BUILTIN_PSUBUSW128,
20270 IX86_BUILTIN_PAND128,
20271 IX86_BUILTIN_PANDN128,
20272 IX86_BUILTIN_POR128,
20273 IX86_BUILTIN_PXOR128,
20275 IX86_BUILTIN_PAVGB128,
20276 IX86_BUILTIN_PAVGW128,
20278 IX86_BUILTIN_PCMPEQB128,
20279 IX86_BUILTIN_PCMPEQW128,
20280 IX86_BUILTIN_PCMPEQD128,
20281 IX86_BUILTIN_PCMPGTB128,
20282 IX86_BUILTIN_PCMPGTW128,
20283 IX86_BUILTIN_PCMPGTD128,
20285 IX86_BUILTIN_PMADDWD128,
20287 IX86_BUILTIN_PMAXSW128,
20288 IX86_BUILTIN_PMAXUB128,
20289 IX86_BUILTIN_PMINSW128,
20290 IX86_BUILTIN_PMINUB128,
20292 IX86_BUILTIN_PMULUDQ,
20293 IX86_BUILTIN_PMULUDQ128,
20294 IX86_BUILTIN_PMULHUW128,
20295 IX86_BUILTIN_PMULHW128,
20296 IX86_BUILTIN_PMULLW128,
20298 IX86_BUILTIN_PSADBW128,
20299 IX86_BUILTIN_PSHUFHW,
20300 IX86_BUILTIN_PSHUFLW,
20301 IX86_BUILTIN_PSHUFD,
20303 IX86_BUILTIN_PSLLDQI128,
20304 IX86_BUILTIN_PSLLWI128,
20305 IX86_BUILTIN_PSLLDI128,
20306 IX86_BUILTIN_PSLLQI128,
20307 IX86_BUILTIN_PSRAWI128,
20308 IX86_BUILTIN_PSRADI128,
20309 IX86_BUILTIN_PSRLDQI128,
20310 IX86_BUILTIN_PSRLWI128,
20311 IX86_BUILTIN_PSRLDI128,
20312 IX86_BUILTIN_PSRLQI128,
20314 IX86_BUILTIN_PSLLDQ128,
20315 IX86_BUILTIN_PSLLW128,
20316 IX86_BUILTIN_PSLLD128,
20317 IX86_BUILTIN_PSLLQ128,
20318 IX86_BUILTIN_PSRAW128,
20319 IX86_BUILTIN_PSRAD128,
20320 IX86_BUILTIN_PSRLW128,
20321 IX86_BUILTIN_PSRLD128,
20322 IX86_BUILTIN_PSRLQ128,
20324 IX86_BUILTIN_PUNPCKHBW128,
20325 IX86_BUILTIN_PUNPCKHWD128,
20326 IX86_BUILTIN_PUNPCKHDQ128,
20327 IX86_BUILTIN_PUNPCKHQDQ128,
20328 IX86_BUILTIN_PUNPCKLBW128,
20329 IX86_BUILTIN_PUNPCKLWD128,
20330 IX86_BUILTIN_PUNPCKLDQ128,
20331 IX86_BUILTIN_PUNPCKLQDQ128,
20333 IX86_BUILTIN_CLFLUSH,
20334 IX86_BUILTIN_MFENCE,
20335 IX86_BUILTIN_LFENCE,
20337 /* SSE3. */
20338 IX86_BUILTIN_ADDSUBPS,
20339 IX86_BUILTIN_HADDPS,
20340 IX86_BUILTIN_HSUBPS,
20341 IX86_BUILTIN_MOVSHDUP,
20342 IX86_BUILTIN_MOVSLDUP,
20343 IX86_BUILTIN_ADDSUBPD,
20344 IX86_BUILTIN_HADDPD,
20345 IX86_BUILTIN_HSUBPD,
20346 IX86_BUILTIN_LDDQU,
20348 IX86_BUILTIN_MONITOR,
20349 IX86_BUILTIN_MWAIT,
20351 /* SSSE3. */
20352 IX86_BUILTIN_PHADDW,
20353 IX86_BUILTIN_PHADDD,
20354 IX86_BUILTIN_PHADDSW,
20355 IX86_BUILTIN_PHSUBW,
20356 IX86_BUILTIN_PHSUBD,
20357 IX86_BUILTIN_PHSUBSW,
20358 IX86_BUILTIN_PMADDUBSW,
20359 IX86_BUILTIN_PMULHRSW,
20360 IX86_BUILTIN_PSHUFB,
20361 IX86_BUILTIN_PSIGNB,
20362 IX86_BUILTIN_PSIGNW,
20363 IX86_BUILTIN_PSIGND,
20364 IX86_BUILTIN_PALIGNR,
20365 IX86_BUILTIN_PABSB,
20366 IX86_BUILTIN_PABSW,
20367 IX86_BUILTIN_PABSD,
20369 IX86_BUILTIN_PHADDW128,
20370 IX86_BUILTIN_PHADDD128,
20371 IX86_BUILTIN_PHADDSW128,
20372 IX86_BUILTIN_PHSUBW128,
20373 IX86_BUILTIN_PHSUBD128,
20374 IX86_BUILTIN_PHSUBSW128,
20375 IX86_BUILTIN_PMADDUBSW128,
20376 IX86_BUILTIN_PMULHRSW128,
20377 IX86_BUILTIN_PSHUFB128,
20378 IX86_BUILTIN_PSIGNB128,
20379 IX86_BUILTIN_PSIGNW128,
20380 IX86_BUILTIN_PSIGND128,
20381 IX86_BUILTIN_PALIGNR128,
20382 IX86_BUILTIN_PABSB128,
20383 IX86_BUILTIN_PABSW128,
20384 IX86_BUILTIN_PABSD128,
20386 /* AMDFAM10 - SSE4A New Instructions. */
20387 IX86_BUILTIN_MOVNTSD,
20388 IX86_BUILTIN_MOVNTSS,
20389 IX86_BUILTIN_EXTRQI,
20390 IX86_BUILTIN_EXTRQ,
20391 IX86_BUILTIN_INSERTQI,
20392 IX86_BUILTIN_INSERTQ,
20394 /* SSE4.1. */
20395 IX86_BUILTIN_BLENDPD,
20396 IX86_BUILTIN_BLENDPS,
20397 IX86_BUILTIN_BLENDVPD,
20398 IX86_BUILTIN_BLENDVPS,
20399 IX86_BUILTIN_PBLENDVB128,
20400 IX86_BUILTIN_PBLENDW128,
20402 IX86_BUILTIN_DPPD,
20403 IX86_BUILTIN_DPPS,
20405 IX86_BUILTIN_INSERTPS128,
20407 IX86_BUILTIN_MOVNTDQA,
20408 IX86_BUILTIN_MPSADBW128,
20409 IX86_BUILTIN_PACKUSDW128,
20410 IX86_BUILTIN_PCMPEQQ,
20411 IX86_BUILTIN_PHMINPOSUW128,
20413 IX86_BUILTIN_PMAXSB128,
20414 IX86_BUILTIN_PMAXSD128,
20415 IX86_BUILTIN_PMAXUD128,
20416 IX86_BUILTIN_PMAXUW128,
20418 IX86_BUILTIN_PMINSB128,
20419 IX86_BUILTIN_PMINSD128,
20420 IX86_BUILTIN_PMINUD128,
20421 IX86_BUILTIN_PMINUW128,
20423 IX86_BUILTIN_PMOVSXBW128,
20424 IX86_BUILTIN_PMOVSXBD128,
20425 IX86_BUILTIN_PMOVSXBQ128,
20426 IX86_BUILTIN_PMOVSXWD128,
20427 IX86_BUILTIN_PMOVSXWQ128,
20428 IX86_BUILTIN_PMOVSXDQ128,
20430 IX86_BUILTIN_PMOVZXBW128,
20431 IX86_BUILTIN_PMOVZXBD128,
20432 IX86_BUILTIN_PMOVZXBQ128,
20433 IX86_BUILTIN_PMOVZXWD128,
20434 IX86_BUILTIN_PMOVZXWQ128,
20435 IX86_BUILTIN_PMOVZXDQ128,
20437 IX86_BUILTIN_PMULDQ128,
20438 IX86_BUILTIN_PMULLD128,
20440 IX86_BUILTIN_ROUNDPD,
20441 IX86_BUILTIN_ROUNDPS,
20442 IX86_BUILTIN_ROUNDSD,
20443 IX86_BUILTIN_ROUNDSS,
20445 IX86_BUILTIN_PTESTZ,
20446 IX86_BUILTIN_PTESTC,
20447 IX86_BUILTIN_PTESTNZC,
20449 IX86_BUILTIN_VEC_INIT_V2SI,
20450 IX86_BUILTIN_VEC_INIT_V4HI,
20451 IX86_BUILTIN_VEC_INIT_V8QI,
20452 IX86_BUILTIN_VEC_EXT_V2DF,
20453 IX86_BUILTIN_VEC_EXT_V2DI,
20454 IX86_BUILTIN_VEC_EXT_V4SF,
20455 IX86_BUILTIN_VEC_EXT_V4SI,
20456 IX86_BUILTIN_VEC_EXT_V8HI,
20457 IX86_BUILTIN_VEC_EXT_V2SI,
20458 IX86_BUILTIN_VEC_EXT_V4HI,
20459 IX86_BUILTIN_VEC_EXT_V16QI,
20460 IX86_BUILTIN_VEC_SET_V2DI,
20461 IX86_BUILTIN_VEC_SET_V4SF,
20462 IX86_BUILTIN_VEC_SET_V4SI,
20463 IX86_BUILTIN_VEC_SET_V8HI,
20464 IX86_BUILTIN_VEC_SET_V4HI,
20465 IX86_BUILTIN_VEC_SET_V16QI,
20467 IX86_BUILTIN_VEC_PACK_SFIX,
20469 /* SSE4.2. */
20470 IX86_BUILTIN_CRC32QI,
20471 IX86_BUILTIN_CRC32HI,
20472 IX86_BUILTIN_CRC32SI,
20473 IX86_BUILTIN_CRC32DI,
20475 IX86_BUILTIN_PCMPESTRI128,
20476 IX86_BUILTIN_PCMPESTRM128,
20477 IX86_BUILTIN_PCMPESTRA128,
20478 IX86_BUILTIN_PCMPESTRC128,
20479 IX86_BUILTIN_PCMPESTRO128,
20480 IX86_BUILTIN_PCMPESTRS128,
20481 IX86_BUILTIN_PCMPESTRZ128,
20482 IX86_BUILTIN_PCMPISTRI128,
20483 IX86_BUILTIN_PCMPISTRM128,
20484 IX86_BUILTIN_PCMPISTRA128,
20485 IX86_BUILTIN_PCMPISTRC128,
20486 IX86_BUILTIN_PCMPISTRO128,
20487 IX86_BUILTIN_PCMPISTRS128,
20488 IX86_BUILTIN_PCMPISTRZ128,
20490 IX86_BUILTIN_PCMPGTQ,
20492 /* AES instructions */
20493 IX86_BUILTIN_AESENC128,
20494 IX86_BUILTIN_AESENCLAST128,
20495 IX86_BUILTIN_AESDEC128,
20496 IX86_BUILTIN_AESDECLAST128,
20497 IX86_BUILTIN_AESIMC128,
20498 IX86_BUILTIN_AESKEYGENASSIST128,
20500 /* PCLMUL instruction */
20501 IX86_BUILTIN_PCLMULQDQ128,
20503 /* AVX */
20504 IX86_BUILTIN_ADDPD256,
20505 IX86_BUILTIN_ADDPS256,
20506 IX86_BUILTIN_ADDSUBPD256,
20507 IX86_BUILTIN_ADDSUBPS256,
20508 IX86_BUILTIN_ANDPD256,
20509 IX86_BUILTIN_ANDPS256,
20510 IX86_BUILTIN_ANDNPD256,
20511 IX86_BUILTIN_ANDNPS256,
20512 IX86_BUILTIN_BLENDPD256,
20513 IX86_BUILTIN_BLENDPS256,
20514 IX86_BUILTIN_BLENDVPD256,
20515 IX86_BUILTIN_BLENDVPS256,
20516 IX86_BUILTIN_DIVPD256,
20517 IX86_BUILTIN_DIVPS256,
20518 IX86_BUILTIN_DPPS256,
20519 IX86_BUILTIN_HADDPD256,
20520 IX86_BUILTIN_HADDPS256,
20521 IX86_BUILTIN_HSUBPD256,
20522 IX86_BUILTIN_HSUBPS256,
20523 IX86_BUILTIN_MAXPD256,
20524 IX86_BUILTIN_MAXPS256,
20525 IX86_BUILTIN_MINPD256,
20526 IX86_BUILTIN_MINPS256,
20527 IX86_BUILTIN_MULPD256,
20528 IX86_BUILTIN_MULPS256,
20529 IX86_BUILTIN_ORPD256,
20530 IX86_BUILTIN_ORPS256,
20531 IX86_BUILTIN_SHUFPD256,
20532 IX86_BUILTIN_SHUFPS256,
20533 IX86_BUILTIN_SUBPD256,
20534 IX86_BUILTIN_SUBPS256,
20535 IX86_BUILTIN_XORPD256,
20536 IX86_BUILTIN_XORPS256,
20537 IX86_BUILTIN_CMPSD,
20538 IX86_BUILTIN_CMPSS,
20539 IX86_BUILTIN_CMPPD,
20540 IX86_BUILTIN_CMPPS,
20541 IX86_BUILTIN_CMPPD256,
20542 IX86_BUILTIN_CMPPS256,
20543 IX86_BUILTIN_CVTDQ2PD256,
20544 IX86_BUILTIN_CVTDQ2PS256,
20545 IX86_BUILTIN_CVTPD2PS256,
20546 IX86_BUILTIN_CVTPS2DQ256,
20547 IX86_BUILTIN_CVTPS2PD256,
20548 IX86_BUILTIN_CVTTPD2DQ256,
20549 IX86_BUILTIN_CVTPD2DQ256,
20550 IX86_BUILTIN_CVTTPS2DQ256,
20551 IX86_BUILTIN_EXTRACTF128PD256,
20552 IX86_BUILTIN_EXTRACTF128PS256,
20553 IX86_BUILTIN_EXTRACTF128SI256,
20554 IX86_BUILTIN_VZEROALL,
20555 IX86_BUILTIN_VZEROUPPER,
20556 IX86_BUILTIN_VZEROUPPER_REX64,
20557 IX86_BUILTIN_VPERMILVARPD,
20558 IX86_BUILTIN_VPERMILVARPS,
20559 IX86_BUILTIN_VPERMILVARPD256,
20560 IX86_BUILTIN_VPERMILVARPS256,
20561 IX86_BUILTIN_VPERMILPD,
20562 IX86_BUILTIN_VPERMILPS,
20563 IX86_BUILTIN_VPERMILPD256,
20564 IX86_BUILTIN_VPERMILPS256,
20565 IX86_BUILTIN_VPERM2F128PD256,
20566 IX86_BUILTIN_VPERM2F128PS256,
20567 IX86_BUILTIN_VPERM2F128SI256,
20568 IX86_BUILTIN_VBROADCASTSS,
20569 IX86_BUILTIN_VBROADCASTSD256,
20570 IX86_BUILTIN_VBROADCASTSS256,
20571 IX86_BUILTIN_VBROADCASTPD256,
20572 IX86_BUILTIN_VBROADCASTPS256,
20573 IX86_BUILTIN_VINSERTF128PD256,
20574 IX86_BUILTIN_VINSERTF128PS256,
20575 IX86_BUILTIN_VINSERTF128SI256,
20576 IX86_BUILTIN_LOADUPD256,
20577 IX86_BUILTIN_LOADUPS256,
20578 IX86_BUILTIN_STOREUPD256,
20579 IX86_BUILTIN_STOREUPS256,
20580 IX86_BUILTIN_LDDQU256,
20581 IX86_BUILTIN_MOVNTDQ256,
20582 IX86_BUILTIN_MOVNTPD256,
20583 IX86_BUILTIN_MOVNTPS256,
20584 IX86_BUILTIN_LOADDQU256,
20585 IX86_BUILTIN_STOREDQU256,
20586 IX86_BUILTIN_MASKLOADPD,
20587 IX86_BUILTIN_MASKLOADPS,
20588 IX86_BUILTIN_MASKSTOREPD,
20589 IX86_BUILTIN_MASKSTOREPS,
20590 IX86_BUILTIN_MASKLOADPD256,
20591 IX86_BUILTIN_MASKLOADPS256,
20592 IX86_BUILTIN_MASKSTOREPD256,
20593 IX86_BUILTIN_MASKSTOREPS256,
20594 IX86_BUILTIN_MOVSHDUP256,
20595 IX86_BUILTIN_MOVSLDUP256,
20596 IX86_BUILTIN_MOVDDUP256,
20598 IX86_BUILTIN_SQRTPD256,
20599 IX86_BUILTIN_SQRTPS256,
20600 IX86_BUILTIN_SQRTPS_NR256,
20601 IX86_BUILTIN_RSQRTPS256,
20602 IX86_BUILTIN_RSQRTPS_NR256,
20604 IX86_BUILTIN_RCPPS256,
20606 IX86_BUILTIN_ROUNDPD256,
20607 IX86_BUILTIN_ROUNDPS256,
20609 IX86_BUILTIN_UNPCKHPD256,
20610 IX86_BUILTIN_UNPCKLPD256,
20611 IX86_BUILTIN_UNPCKHPS256,
20612 IX86_BUILTIN_UNPCKLPS256,
20614 IX86_BUILTIN_SI256_SI,
20615 IX86_BUILTIN_PS256_PS,
20616 IX86_BUILTIN_PD256_PD,
20617 IX86_BUILTIN_SI_SI256,
20618 IX86_BUILTIN_PS_PS256,
20619 IX86_BUILTIN_PD_PD256,
20621 IX86_BUILTIN_VTESTZPD,
20622 IX86_BUILTIN_VTESTCPD,
20623 IX86_BUILTIN_VTESTNZCPD,
20624 IX86_BUILTIN_VTESTZPS,
20625 IX86_BUILTIN_VTESTCPS,
20626 IX86_BUILTIN_VTESTNZCPS,
20627 IX86_BUILTIN_VTESTZPD256,
20628 IX86_BUILTIN_VTESTCPD256,
20629 IX86_BUILTIN_VTESTNZCPD256,
20630 IX86_BUILTIN_VTESTZPS256,
20631 IX86_BUILTIN_VTESTCPS256,
20632 IX86_BUILTIN_VTESTNZCPS256,
20633 IX86_BUILTIN_PTESTZ256,
20634 IX86_BUILTIN_PTESTC256,
20635 IX86_BUILTIN_PTESTNZC256,
20637 IX86_BUILTIN_MOVMSKPD256,
20638 IX86_BUILTIN_MOVMSKPS256,
20640 /* TFmode support builtins. */
20641 IX86_BUILTIN_INFQ,
20642 IX86_BUILTIN_HUGE_VALQ,
20643 IX86_BUILTIN_FABSQ,
20644 IX86_BUILTIN_COPYSIGNQ,
20646 /* SSE5 instructions */
20647 IX86_BUILTIN_FMADDSS,
20648 IX86_BUILTIN_FMADDSD,
20649 IX86_BUILTIN_FMADDPS,
20650 IX86_BUILTIN_FMADDPD,
20651 IX86_BUILTIN_FMSUBSS,
20652 IX86_BUILTIN_FMSUBSD,
20653 IX86_BUILTIN_FMSUBPS,
20654 IX86_BUILTIN_FMSUBPD,
20655 IX86_BUILTIN_FNMADDSS,
20656 IX86_BUILTIN_FNMADDSD,
20657 IX86_BUILTIN_FNMADDPS,
20658 IX86_BUILTIN_FNMADDPD,
20659 IX86_BUILTIN_FNMSUBSS,
20660 IX86_BUILTIN_FNMSUBSD,
20661 IX86_BUILTIN_FNMSUBPS,
20662 IX86_BUILTIN_FNMSUBPD,
20663 IX86_BUILTIN_PCMOV,
20664 IX86_BUILTIN_PCMOV_V2DI,
20665 IX86_BUILTIN_PCMOV_V4SI,
20666 IX86_BUILTIN_PCMOV_V8HI,
20667 IX86_BUILTIN_PCMOV_V16QI,
20668 IX86_BUILTIN_PCMOV_V4SF,
20669 IX86_BUILTIN_PCMOV_V2DF,
20670 IX86_BUILTIN_PPERM,
20671 IX86_BUILTIN_PERMPS,
20672 IX86_BUILTIN_PERMPD,
20673 IX86_BUILTIN_PMACSSWW,
20674 IX86_BUILTIN_PMACSWW,
20675 IX86_BUILTIN_PMACSSWD,
20676 IX86_BUILTIN_PMACSWD,
20677 IX86_BUILTIN_PMACSSDD,
20678 IX86_BUILTIN_PMACSDD,
20679 IX86_BUILTIN_PMACSSDQL,
20680 IX86_BUILTIN_PMACSSDQH,
20681 IX86_BUILTIN_PMACSDQL,
20682 IX86_BUILTIN_PMACSDQH,
20683 IX86_BUILTIN_PMADCSSWD,
20684 IX86_BUILTIN_PMADCSWD,
20685 IX86_BUILTIN_PHADDBW,
20686 IX86_BUILTIN_PHADDBD,
20687 IX86_BUILTIN_PHADDBQ,
20688 IX86_BUILTIN_PHADDWD,
20689 IX86_BUILTIN_PHADDWQ,
20690 IX86_BUILTIN_PHADDDQ,
20691 IX86_BUILTIN_PHADDUBW,
20692 IX86_BUILTIN_PHADDUBD,
20693 IX86_BUILTIN_PHADDUBQ,
20694 IX86_BUILTIN_PHADDUWD,
20695 IX86_BUILTIN_PHADDUWQ,
20696 IX86_BUILTIN_PHADDUDQ,
20697 IX86_BUILTIN_PHSUBBW,
20698 IX86_BUILTIN_PHSUBWD,
20699 IX86_BUILTIN_PHSUBDQ,
20700 IX86_BUILTIN_PROTB,
20701 IX86_BUILTIN_PROTW,
20702 IX86_BUILTIN_PROTD,
20703 IX86_BUILTIN_PROTQ,
20704 IX86_BUILTIN_PROTB_IMM,
20705 IX86_BUILTIN_PROTW_IMM,
20706 IX86_BUILTIN_PROTD_IMM,
20707 IX86_BUILTIN_PROTQ_IMM,
20708 IX86_BUILTIN_PSHLB,
20709 IX86_BUILTIN_PSHLW,
20710 IX86_BUILTIN_PSHLD,
20711 IX86_BUILTIN_PSHLQ,
20712 IX86_BUILTIN_PSHAB,
20713 IX86_BUILTIN_PSHAW,
20714 IX86_BUILTIN_PSHAD,
20715 IX86_BUILTIN_PSHAQ,
20716 IX86_BUILTIN_FRCZSS,
20717 IX86_BUILTIN_FRCZSD,
20718 IX86_BUILTIN_FRCZPS,
20719 IX86_BUILTIN_FRCZPD,
20720 IX86_BUILTIN_CVTPH2PS,
20721 IX86_BUILTIN_CVTPS2PH,
20723 IX86_BUILTIN_COMEQSS,
20724 IX86_BUILTIN_COMNESS,
20725 IX86_BUILTIN_COMLTSS,
20726 IX86_BUILTIN_COMLESS,
20727 IX86_BUILTIN_COMGTSS,
20728 IX86_BUILTIN_COMGESS,
20729 IX86_BUILTIN_COMUEQSS,
20730 IX86_BUILTIN_COMUNESS,
20731 IX86_BUILTIN_COMULTSS,
20732 IX86_BUILTIN_COMULESS,
20733 IX86_BUILTIN_COMUGTSS,
20734 IX86_BUILTIN_COMUGESS,
20735 IX86_BUILTIN_COMORDSS,
20736 IX86_BUILTIN_COMUNORDSS,
20737 IX86_BUILTIN_COMFALSESS,
20738 IX86_BUILTIN_COMTRUESS,
20740 IX86_BUILTIN_COMEQSD,
20741 IX86_BUILTIN_COMNESD,
20742 IX86_BUILTIN_COMLTSD,
20743 IX86_BUILTIN_COMLESD,
20744 IX86_BUILTIN_COMGTSD,
20745 IX86_BUILTIN_COMGESD,
20746 IX86_BUILTIN_COMUEQSD,
20747 IX86_BUILTIN_COMUNESD,
20748 IX86_BUILTIN_COMULTSD,
20749 IX86_BUILTIN_COMULESD,
20750 IX86_BUILTIN_COMUGTSD,
20751 IX86_BUILTIN_COMUGESD,
20752 IX86_BUILTIN_COMORDSD,
20753 IX86_BUILTIN_COMUNORDSD,
20754 IX86_BUILTIN_COMFALSESD,
20755 IX86_BUILTIN_COMTRUESD,
20757 IX86_BUILTIN_COMEQPS,
20758 IX86_BUILTIN_COMNEPS,
20759 IX86_BUILTIN_COMLTPS,
20760 IX86_BUILTIN_COMLEPS,
20761 IX86_BUILTIN_COMGTPS,
20762 IX86_BUILTIN_COMGEPS,
20763 IX86_BUILTIN_COMUEQPS,
20764 IX86_BUILTIN_COMUNEPS,
20765 IX86_BUILTIN_COMULTPS,
20766 IX86_BUILTIN_COMULEPS,
20767 IX86_BUILTIN_COMUGTPS,
20768 IX86_BUILTIN_COMUGEPS,
20769 IX86_BUILTIN_COMORDPS,
20770 IX86_BUILTIN_COMUNORDPS,
20771 IX86_BUILTIN_COMFALSEPS,
20772 IX86_BUILTIN_COMTRUEPS,
20774 IX86_BUILTIN_COMEQPD,
20775 IX86_BUILTIN_COMNEPD,
20776 IX86_BUILTIN_COMLTPD,
20777 IX86_BUILTIN_COMLEPD,
20778 IX86_BUILTIN_COMGTPD,
20779 IX86_BUILTIN_COMGEPD,
20780 IX86_BUILTIN_COMUEQPD,
20781 IX86_BUILTIN_COMUNEPD,
20782 IX86_BUILTIN_COMULTPD,
20783 IX86_BUILTIN_COMULEPD,
20784 IX86_BUILTIN_COMUGTPD,
20785 IX86_BUILTIN_COMUGEPD,
20786 IX86_BUILTIN_COMORDPD,
20787 IX86_BUILTIN_COMUNORDPD,
20788 IX86_BUILTIN_COMFALSEPD,
20789 IX86_BUILTIN_COMTRUEPD,
20791 IX86_BUILTIN_PCOMEQUB,
20792 IX86_BUILTIN_PCOMNEUB,
20793 IX86_BUILTIN_PCOMLTUB,
20794 IX86_BUILTIN_PCOMLEUB,
20795 IX86_BUILTIN_PCOMGTUB,
20796 IX86_BUILTIN_PCOMGEUB,
20797 IX86_BUILTIN_PCOMFALSEUB,
20798 IX86_BUILTIN_PCOMTRUEUB,
20799 IX86_BUILTIN_PCOMEQUW,
20800 IX86_BUILTIN_PCOMNEUW,
20801 IX86_BUILTIN_PCOMLTUW,
20802 IX86_BUILTIN_PCOMLEUW,
20803 IX86_BUILTIN_PCOMGTUW,
20804 IX86_BUILTIN_PCOMGEUW,
20805 IX86_BUILTIN_PCOMFALSEUW,
20806 IX86_BUILTIN_PCOMTRUEUW,
20807 IX86_BUILTIN_PCOMEQUD,
20808 IX86_BUILTIN_PCOMNEUD,
20809 IX86_BUILTIN_PCOMLTUD,
20810 IX86_BUILTIN_PCOMLEUD,
20811 IX86_BUILTIN_PCOMGTUD,
20812 IX86_BUILTIN_PCOMGEUD,
20813 IX86_BUILTIN_PCOMFALSEUD,
20814 IX86_BUILTIN_PCOMTRUEUD,
20815 IX86_BUILTIN_PCOMEQUQ,
20816 IX86_BUILTIN_PCOMNEUQ,
20817 IX86_BUILTIN_PCOMLTUQ,
20818 IX86_BUILTIN_PCOMLEUQ,
20819 IX86_BUILTIN_PCOMGTUQ,
20820 IX86_BUILTIN_PCOMGEUQ,
20821 IX86_BUILTIN_PCOMFALSEUQ,
20822 IX86_BUILTIN_PCOMTRUEUQ,
20824 IX86_BUILTIN_PCOMEQB,
20825 IX86_BUILTIN_PCOMNEB,
20826 IX86_BUILTIN_PCOMLTB,
20827 IX86_BUILTIN_PCOMLEB,
20828 IX86_BUILTIN_PCOMGTB,
20829 IX86_BUILTIN_PCOMGEB,
20830 IX86_BUILTIN_PCOMFALSEB,
20831 IX86_BUILTIN_PCOMTRUEB,
20832 IX86_BUILTIN_PCOMEQW,
20833 IX86_BUILTIN_PCOMNEW,
20834 IX86_BUILTIN_PCOMLTW,
20835 IX86_BUILTIN_PCOMLEW,
20836 IX86_BUILTIN_PCOMGTW,
20837 IX86_BUILTIN_PCOMGEW,
20838 IX86_BUILTIN_PCOMFALSEW,
20839 IX86_BUILTIN_PCOMTRUEW,
20840 IX86_BUILTIN_PCOMEQD,
20841 IX86_BUILTIN_PCOMNED,
20842 IX86_BUILTIN_PCOMLTD,
20843 IX86_BUILTIN_PCOMLED,
20844 IX86_BUILTIN_PCOMGTD,
20845 IX86_BUILTIN_PCOMGED,
20846 IX86_BUILTIN_PCOMFALSED,
20847 IX86_BUILTIN_PCOMTRUED,
20848 IX86_BUILTIN_PCOMEQQ,
20849 IX86_BUILTIN_PCOMNEQ,
20850 IX86_BUILTIN_PCOMLTQ,
20851 IX86_BUILTIN_PCOMLEQ,
20852 IX86_BUILTIN_PCOMGTQ,
20853 IX86_BUILTIN_PCOMGEQ,
20854 IX86_BUILTIN_PCOMFALSEQ,
20855 IX86_BUILTIN_PCOMTRUEQ,
20857 IX86_BUILTIN_MAX
20860 /* Table for the ix86 builtin decls. */
20861 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20863 /* Table of all of the builtin functions that are possible with different ISA's
20864 but are waiting to be built until a function is declared to use that
20865 ISA. */
20866 struct GTY(()) builtin_isa {
20867 tree type; /* builtin type to use in the declaration */
20868 const char *name; /* function name */
20869 int isa; /* isa_flags this builtin is defined for */
20870 bool const_p; /* true if the declaration is constant */
20873 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20876 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20877 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20878 * function decl in the ix86_builtins array. Returns the function decl or
20879 * NULL_TREE, if the builtin was not added.
20881 * If the front end has a special hook for builtin functions, delay adding
20882 * builtin functions that aren't in the current ISA until the ISA is changed
20883 * with function specific optimization. Doing so, can save about 300K for the
20884 * default compiler. When the builtin is expanded, check at that time whether
20885 * it is valid.
20887 * If the front end doesn't have a special hook, record all builtins, even if
20888 * it isn't an instruction set in the current ISA in case the user uses
20889 * function specific options for a different ISA, so that we don't get scope
20890 * errors if a builtin is added in the middle of a function scope. */
20892 static inline tree
20893 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20895 tree decl = NULL_TREE;
20897 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20899 ix86_builtins_isa[(int) code].isa = mask;
20901 if ((mask & ix86_isa_flags) != 0
20902 || (lang_hooks.builtin_function
20903 == lang_hooks.builtin_function_ext_scope))
20906 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20907 NULL_TREE);
20908 ix86_builtins[(int) code] = decl;
20909 ix86_builtins_isa[(int) code].type = NULL_TREE;
20911 else
20913 ix86_builtins[(int) code] = NULL_TREE;
20914 ix86_builtins_isa[(int) code].const_p = false;
20915 ix86_builtins_isa[(int) code].type = type;
20916 ix86_builtins_isa[(int) code].name = name;
20920 return decl;
20923 /* Like def_builtin, but also marks the function decl "const". */
20925 static inline tree
20926 def_builtin_const (int mask, const char *name, tree type,
20927 enum ix86_builtins code)
20929 tree decl = def_builtin (mask, name, type, code);
20930 if (decl)
20931 TREE_READONLY (decl) = 1;
20932 else
20933 ix86_builtins_isa[(int) code].const_p = true;
20935 return decl;
20938 /* Add any new builtin functions for a given ISA that may not have been
20939 declared. This saves a bit of space compared to adding all of the
20940 declarations to the tree, even if we didn't use them. */
20942 static void
20943 ix86_add_new_builtins (int isa)
20945 int i;
20946 tree decl;
20948 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20950 if ((ix86_builtins_isa[i].isa & isa) != 0
20951 && ix86_builtins_isa[i].type != NULL_TREE)
20953 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20954 ix86_builtins_isa[i].type,
20955 i, BUILT_IN_MD, NULL,
20956 NULL_TREE);
20958 ix86_builtins[i] = decl;
20959 ix86_builtins_isa[i].type = NULL_TREE;
20960 if (ix86_builtins_isa[i].const_p)
20961 TREE_READONLY (decl) = 1;
20966 /* Bits for builtin_description.flag. */
20968 /* Set when we don't support the comparison natively, and should
20969 swap_comparison in order to support it. */
20970 #define BUILTIN_DESC_SWAP_OPERANDS 1
20972 struct builtin_description
20974 const unsigned int mask;
20975 const enum insn_code icode;
20976 const char *const name;
20977 const enum ix86_builtins code;
20978 const enum rtx_code comparison;
20979 const int flag;
20982 static const struct builtin_description bdesc_comi[] =
20984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21010 static const struct builtin_description bdesc_pcmpestr[] =
21012 /* SSE4.2 */
21013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21015 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21016 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21017 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21022 static const struct builtin_description bdesc_pcmpistr[] =
21024 /* SSE4.2 */
21025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21026 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21027 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21029 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21030 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21031 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21034 /* Special builtin types */
21035 enum ix86_special_builtin_type
21037 SPECIAL_FTYPE_UNKNOWN,
21038 VOID_FTYPE_VOID,
21039 V32QI_FTYPE_PCCHAR,
21040 V16QI_FTYPE_PCCHAR,
21041 V8SF_FTYPE_PCV4SF,
21042 V8SF_FTYPE_PCFLOAT,
21043 V4DF_FTYPE_PCV2DF,
21044 V4DF_FTYPE_PCDOUBLE,
21045 V4SF_FTYPE_PCFLOAT,
21046 V2DF_FTYPE_PCDOUBLE,
21047 V8SF_FTYPE_PCV8SF_V8SF,
21048 V4DF_FTYPE_PCV4DF_V4DF,
21049 V4SF_FTYPE_V4SF_PCV2SF,
21050 V4SF_FTYPE_PCV4SF_V4SF,
21051 V2DF_FTYPE_V2DF_PCDOUBLE,
21052 V2DF_FTYPE_PCV2DF_V2DF,
21053 V2DI_FTYPE_PV2DI,
21054 VOID_FTYPE_PV2SF_V4SF,
21055 VOID_FTYPE_PV4DI_V4DI,
21056 VOID_FTYPE_PV2DI_V2DI,
21057 VOID_FTYPE_PCHAR_V32QI,
21058 VOID_FTYPE_PCHAR_V16QI,
21059 VOID_FTYPE_PFLOAT_V8SF,
21060 VOID_FTYPE_PFLOAT_V4SF,
21061 VOID_FTYPE_PDOUBLE_V4DF,
21062 VOID_FTYPE_PDOUBLE_V2DF,
21063 VOID_FTYPE_PDI_DI,
21064 VOID_FTYPE_PINT_INT,
21065 VOID_FTYPE_PV8SF_V8SF_V8SF,
21066 VOID_FTYPE_PV4DF_V4DF_V4DF,
21067 VOID_FTYPE_PV4SF_V4SF_V4SF,
21068 VOID_FTYPE_PV2DF_V2DF_V2DF
21071 /* Builtin types */
21072 enum ix86_builtin_type
21074 FTYPE_UNKNOWN,
21075 FLOAT128_FTYPE_FLOAT128,
21076 FLOAT_FTYPE_FLOAT,
21077 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21078 INT_FTYPE_V8SF_V8SF_PTEST,
21079 INT_FTYPE_V4DI_V4DI_PTEST,
21080 INT_FTYPE_V4DF_V4DF_PTEST,
21081 INT_FTYPE_V4SF_V4SF_PTEST,
21082 INT_FTYPE_V2DI_V2DI_PTEST,
21083 INT_FTYPE_V2DF_V2DF_PTEST,
21084 INT64_FTYPE_V4SF,
21085 INT64_FTYPE_V2DF,
21086 INT_FTYPE_V16QI,
21087 INT_FTYPE_V8QI,
21088 INT_FTYPE_V8SF,
21089 INT_FTYPE_V4DF,
21090 INT_FTYPE_V4SF,
21091 INT_FTYPE_V2DF,
21092 V16QI_FTYPE_V16QI,
21093 V8SI_FTYPE_V8SF,
21094 V8SI_FTYPE_V4SI,
21095 V8HI_FTYPE_V8HI,
21096 V8HI_FTYPE_V16QI,
21097 V8QI_FTYPE_V8QI,
21098 V8SF_FTYPE_V8SF,
21099 V8SF_FTYPE_V8SI,
21100 V8SF_FTYPE_V4SF,
21101 V4SI_FTYPE_V4SI,
21102 V4SI_FTYPE_V16QI,
21103 V4SI_FTYPE_V8SI,
21104 V4SI_FTYPE_V8HI,
21105 V4SI_FTYPE_V4DF,
21106 V4SI_FTYPE_V4SF,
21107 V4SI_FTYPE_V2DF,
21108 V4HI_FTYPE_V4HI,
21109 V4DF_FTYPE_V4DF,
21110 V4DF_FTYPE_V4SI,
21111 V4DF_FTYPE_V4SF,
21112 V4DF_FTYPE_V2DF,
21113 V4SF_FTYPE_V4DF,
21114 V4SF_FTYPE_V4SF,
21115 V4SF_FTYPE_V4SF_VEC_MERGE,
21116 V4SF_FTYPE_V8SF,
21117 V4SF_FTYPE_V4SI,
21118 V4SF_FTYPE_V2DF,
21119 V2DI_FTYPE_V2DI,
21120 V2DI_FTYPE_V16QI,
21121 V2DI_FTYPE_V8HI,
21122 V2DI_FTYPE_V4SI,
21123 V2DF_FTYPE_V2DF,
21124 V2DF_FTYPE_V2DF_VEC_MERGE,
21125 V2DF_FTYPE_V4SI,
21126 V2DF_FTYPE_V4DF,
21127 V2DF_FTYPE_V4SF,
21128 V2DF_FTYPE_V2SI,
21129 V2SI_FTYPE_V2SI,
21130 V2SI_FTYPE_V4SF,
21131 V2SI_FTYPE_V2SF,
21132 V2SI_FTYPE_V2DF,
21133 V2SF_FTYPE_V2SF,
21134 V2SF_FTYPE_V2SI,
21135 V16QI_FTYPE_V16QI_V16QI,
21136 V16QI_FTYPE_V8HI_V8HI,
21137 V8QI_FTYPE_V8QI_V8QI,
21138 V8QI_FTYPE_V4HI_V4HI,
21139 V8HI_FTYPE_V8HI_V8HI,
21140 V8HI_FTYPE_V8HI_V8HI_COUNT,
21141 V8HI_FTYPE_V16QI_V16QI,
21142 V8HI_FTYPE_V4SI_V4SI,
21143 V8HI_FTYPE_V8HI_SI_COUNT,
21144 V8SF_FTYPE_V8SF_V8SF,
21145 V8SF_FTYPE_V8SF_V8SI,
21146 V4SI_FTYPE_V4SI_V4SI,
21147 V4SI_FTYPE_V4SI_V4SI_COUNT,
21148 V4SI_FTYPE_V8HI_V8HI,
21149 V4SI_FTYPE_V4SF_V4SF,
21150 V4SI_FTYPE_V2DF_V2DF,
21151 V4SI_FTYPE_V4SI_SI_COUNT,
21152 V4HI_FTYPE_V4HI_V4HI,
21153 V4HI_FTYPE_V4HI_V4HI_COUNT,
21154 V4HI_FTYPE_V8QI_V8QI,
21155 V4HI_FTYPE_V2SI_V2SI,
21156 V4HI_FTYPE_V4HI_SI_COUNT,
21157 V4DF_FTYPE_V4DF_V4DF,
21158 V4DF_FTYPE_V4DF_V4DI,
21159 V4SF_FTYPE_V4SF_V4SF,
21160 V4SF_FTYPE_V4SF_V4SF_SWAP,
21161 V4SF_FTYPE_V4SF_V4SI,
21162 V4SF_FTYPE_V4SF_V2SI,
21163 V4SF_FTYPE_V4SF_V2DF,
21164 V4SF_FTYPE_V4SF_DI,
21165 V4SF_FTYPE_V4SF_SI,
21166 V2DI_FTYPE_V2DI_V2DI,
21167 V2DI_FTYPE_V2DI_V2DI_COUNT,
21168 V2DI_FTYPE_V16QI_V16QI,
21169 V2DI_FTYPE_V4SI_V4SI,
21170 V2DI_FTYPE_V2DI_V16QI,
21171 V2DI_FTYPE_V2DF_V2DF,
21172 V2DI_FTYPE_V2DI_SI_COUNT,
21173 V2SI_FTYPE_V2SI_V2SI,
21174 V2SI_FTYPE_V2SI_V2SI_COUNT,
21175 V2SI_FTYPE_V4HI_V4HI,
21176 V2SI_FTYPE_V2SF_V2SF,
21177 V2SI_FTYPE_V2SI_SI_COUNT,
21178 V2DF_FTYPE_V2DF_V2DF,
21179 V2DF_FTYPE_V2DF_V2DF_SWAP,
21180 V2DF_FTYPE_V2DF_V4SF,
21181 V2DF_FTYPE_V2DF_V2DI,
21182 V2DF_FTYPE_V2DF_DI,
21183 V2DF_FTYPE_V2DF_SI,
21184 V2SF_FTYPE_V2SF_V2SF,
21185 V1DI_FTYPE_V1DI_V1DI,
21186 V1DI_FTYPE_V1DI_V1DI_COUNT,
21187 V1DI_FTYPE_V8QI_V8QI,
21188 V1DI_FTYPE_V2SI_V2SI,
21189 V1DI_FTYPE_V1DI_SI_COUNT,
21190 UINT64_FTYPE_UINT64_UINT64,
21191 UINT_FTYPE_UINT_UINT,
21192 UINT_FTYPE_UINT_USHORT,
21193 UINT_FTYPE_UINT_UCHAR,
21194 V8HI_FTYPE_V8HI_INT,
21195 V4SI_FTYPE_V4SI_INT,
21196 V4HI_FTYPE_V4HI_INT,
21197 V8SF_FTYPE_V8SF_INT,
21198 V4SI_FTYPE_V8SI_INT,
21199 V4SF_FTYPE_V8SF_INT,
21200 V2DF_FTYPE_V4DF_INT,
21201 V4DF_FTYPE_V4DF_INT,
21202 V4SF_FTYPE_V4SF_INT,
21203 V2DI_FTYPE_V2DI_INT,
21204 V2DI2TI_FTYPE_V2DI_INT,
21205 V2DF_FTYPE_V2DF_INT,
21206 V16QI_FTYPE_V16QI_V16QI_V16QI,
21207 V8SF_FTYPE_V8SF_V8SF_V8SF,
21208 V4DF_FTYPE_V4DF_V4DF_V4DF,
21209 V4SF_FTYPE_V4SF_V4SF_V4SF,
21210 V2DF_FTYPE_V2DF_V2DF_V2DF,
21211 V16QI_FTYPE_V16QI_V16QI_INT,
21212 V8SI_FTYPE_V8SI_V8SI_INT,
21213 V8SI_FTYPE_V8SI_V4SI_INT,
21214 V8HI_FTYPE_V8HI_V8HI_INT,
21215 V8SF_FTYPE_V8SF_V8SF_INT,
21216 V8SF_FTYPE_V8SF_V4SF_INT,
21217 V4SI_FTYPE_V4SI_V4SI_INT,
21218 V4DF_FTYPE_V4DF_V4DF_INT,
21219 V4DF_FTYPE_V4DF_V2DF_INT,
21220 V4SF_FTYPE_V4SF_V4SF_INT,
21221 V2DI_FTYPE_V2DI_V2DI_INT,
21222 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21223 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21224 V2DF_FTYPE_V2DF_V2DF_INT,
21225 V2DI_FTYPE_V2DI_UINT_UINT,
21226 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21229 /* Special builtins with variable number of arguments. */
21230 static const struct builtin_description bdesc_special_args[] =
21232 /* MMX */
21233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21235 /* 3DNow! */
21236 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21238 /* SSE */
21239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21248 /* SSE or 3DNow!A */
21249 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21250 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21252 /* SSE2 */
21253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21266 /* SSE3 */
21267 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21269 /* SSE4.1 */
21270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21272 /* SSE4A */
21273 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21274 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21276 /* AVX */
21277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21279 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21309 /* Builtins with variable number of arguments. */
21310 static const struct builtin_description bdesc_args[] =
21312 /* MMX */
21313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21376 /* 3DNow! */
21377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21398 /* 3DNow!A */
21399 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21400 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21401 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21402 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21403 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21404 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21406 /* SSE */
21407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21409 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21411 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21415 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21422 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21423 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21424 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21461 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21462 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21480 /* SSE MMX or 3Dnow!A */
21481 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21483 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21486 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21493 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21495 /* SSE2 */
21496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21514 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21515 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21521 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21522 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21523 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21524 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21552 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21572 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21630 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21635 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21645 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21666 /* SSE2 MMX */
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21670 /* SSE3 */
21671 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21672 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21674 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21675 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21676 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21677 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21678 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21681 /* SSSE3 */
21682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21714 /* SSSE3. */
21715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21718 /* SSE4.1 */
21719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21757 /* SSE4.1 and SSE5 */
21758 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21759 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21760 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21761 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21763 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21764 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21765 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21767 /* SSE4.2 */
21768 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21771 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21772 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21774 /* SSE4A */
21775 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21776 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21777 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21778 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21780 /* AES */
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21789 /* PCLMUL */
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21792 /* AVX */
21793 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21794 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21797 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21798 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21801 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21807 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21808 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21809 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21810 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21811 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21812 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21813 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21814 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21815 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21816 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21817 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21818 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21907 /* SSE5 */
21908 enum multi_arg_type {
21909 MULTI_ARG_UNKNOWN,
21910 MULTI_ARG_3_SF,
21911 MULTI_ARG_3_DF,
21912 MULTI_ARG_3_DI,
21913 MULTI_ARG_3_SI,
21914 MULTI_ARG_3_SI_DI,
21915 MULTI_ARG_3_HI,
21916 MULTI_ARG_3_HI_SI,
21917 MULTI_ARG_3_QI,
21918 MULTI_ARG_3_PERMPS,
21919 MULTI_ARG_3_PERMPD,
21920 MULTI_ARG_2_SF,
21921 MULTI_ARG_2_DF,
21922 MULTI_ARG_2_DI,
21923 MULTI_ARG_2_SI,
21924 MULTI_ARG_2_HI,
21925 MULTI_ARG_2_QI,
21926 MULTI_ARG_2_DI_IMM,
21927 MULTI_ARG_2_SI_IMM,
21928 MULTI_ARG_2_HI_IMM,
21929 MULTI_ARG_2_QI_IMM,
21930 MULTI_ARG_2_SF_CMP,
21931 MULTI_ARG_2_DF_CMP,
21932 MULTI_ARG_2_DI_CMP,
21933 MULTI_ARG_2_SI_CMP,
21934 MULTI_ARG_2_HI_CMP,
21935 MULTI_ARG_2_QI_CMP,
21936 MULTI_ARG_2_DI_TF,
21937 MULTI_ARG_2_SI_TF,
21938 MULTI_ARG_2_HI_TF,
21939 MULTI_ARG_2_QI_TF,
21940 MULTI_ARG_2_SF_TF,
21941 MULTI_ARG_2_DF_TF,
21942 MULTI_ARG_1_SF,
21943 MULTI_ARG_1_DF,
21944 MULTI_ARG_1_DI,
21945 MULTI_ARG_1_SI,
21946 MULTI_ARG_1_HI,
21947 MULTI_ARG_1_QI,
21948 MULTI_ARG_1_SI_DI,
21949 MULTI_ARG_1_HI_DI,
21950 MULTI_ARG_1_HI_SI,
21951 MULTI_ARG_1_QI_DI,
21952 MULTI_ARG_1_QI_SI,
21953 MULTI_ARG_1_QI_HI,
21954 MULTI_ARG_1_PH2PS,
21955 MULTI_ARG_1_PS2PH
21958 static const struct builtin_description bdesc_multi_arg[] =
21960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22196 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22197 in the current target ISA to allow the user to compile particular modules
22198 with different target specific options that differ from the command line
22199 options. */
22200 static void
22201 ix86_init_mmx_sse_builtins (void)
22203 const struct builtin_description * d;
22204 size_t i;
22206 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22207 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22208 tree V1DI_type_node
22209 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22210 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22211 tree V2DI_type_node
22212 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22213 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22214 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22215 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22216 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22217 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22218 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22220 tree pchar_type_node = build_pointer_type (char_type_node);
22221 tree pcchar_type_node
22222 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22223 tree pfloat_type_node = build_pointer_type (float_type_node);
22224 tree pcfloat_type_node
22225 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22226 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22227 tree pcv2sf_type_node
22228 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22229 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22230 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22232 /* Comparisons. */
22233 tree int_ftype_v4sf_v4sf
22234 = build_function_type_list (integer_type_node,
22235 V4SF_type_node, V4SF_type_node, NULL_TREE);
22236 tree v4si_ftype_v4sf_v4sf
22237 = build_function_type_list (V4SI_type_node,
22238 V4SF_type_node, V4SF_type_node, NULL_TREE);
22239 /* MMX/SSE/integer conversions. */
22240 tree int_ftype_v4sf
22241 = build_function_type_list (integer_type_node,
22242 V4SF_type_node, NULL_TREE);
22243 tree int64_ftype_v4sf
22244 = build_function_type_list (long_long_integer_type_node,
22245 V4SF_type_node, NULL_TREE);
22246 tree int_ftype_v8qi
22247 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22248 tree v4sf_ftype_v4sf_int
22249 = build_function_type_list (V4SF_type_node,
22250 V4SF_type_node, integer_type_node, NULL_TREE);
22251 tree v4sf_ftype_v4sf_int64
22252 = build_function_type_list (V4SF_type_node,
22253 V4SF_type_node, long_long_integer_type_node,
22254 NULL_TREE);
22255 tree v4sf_ftype_v4sf_v2si
22256 = build_function_type_list (V4SF_type_node,
22257 V4SF_type_node, V2SI_type_node, NULL_TREE);
22259 /* Miscellaneous. */
22260 tree v8qi_ftype_v4hi_v4hi
22261 = build_function_type_list (V8QI_type_node,
22262 V4HI_type_node, V4HI_type_node, NULL_TREE);
22263 tree v4hi_ftype_v2si_v2si
22264 = build_function_type_list (V4HI_type_node,
22265 V2SI_type_node, V2SI_type_node, NULL_TREE);
22266 tree v4sf_ftype_v4sf_v4sf_int
22267 = build_function_type_list (V4SF_type_node,
22268 V4SF_type_node, V4SF_type_node,
22269 integer_type_node, NULL_TREE);
22270 tree v2si_ftype_v4hi_v4hi
22271 = build_function_type_list (V2SI_type_node,
22272 V4HI_type_node, V4HI_type_node, NULL_TREE);
22273 tree v4hi_ftype_v4hi_int
22274 = build_function_type_list (V4HI_type_node,
22275 V4HI_type_node, integer_type_node, NULL_TREE);
22276 tree v2si_ftype_v2si_int
22277 = build_function_type_list (V2SI_type_node,
22278 V2SI_type_node, integer_type_node, NULL_TREE);
22279 tree v1di_ftype_v1di_int
22280 = build_function_type_list (V1DI_type_node,
22281 V1DI_type_node, integer_type_node, NULL_TREE);
22283 tree void_ftype_void
22284 = build_function_type (void_type_node, void_list_node);
22285 tree void_ftype_unsigned
22286 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22287 tree void_ftype_unsigned_unsigned
22288 = build_function_type_list (void_type_node, unsigned_type_node,
22289 unsigned_type_node, NULL_TREE);
22290 tree void_ftype_pcvoid_unsigned_unsigned
22291 = build_function_type_list (void_type_node, const_ptr_type_node,
22292 unsigned_type_node, unsigned_type_node,
22293 NULL_TREE);
22294 tree unsigned_ftype_void
22295 = build_function_type (unsigned_type_node, void_list_node);
22296 tree v2si_ftype_v4sf
22297 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22298 /* Loads/stores. */
22299 tree void_ftype_v8qi_v8qi_pchar
22300 = build_function_type_list (void_type_node,
22301 V8QI_type_node, V8QI_type_node,
22302 pchar_type_node, NULL_TREE);
22303 tree v4sf_ftype_pcfloat
22304 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22305 tree v4sf_ftype_v4sf_pcv2sf
22306 = build_function_type_list (V4SF_type_node,
22307 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22308 tree void_ftype_pv2sf_v4sf
22309 = build_function_type_list (void_type_node,
22310 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22311 tree void_ftype_pfloat_v4sf
22312 = build_function_type_list (void_type_node,
22313 pfloat_type_node, V4SF_type_node, NULL_TREE);
22314 tree void_ftype_pdi_di
22315 = build_function_type_list (void_type_node,
22316 pdi_type_node, long_long_unsigned_type_node,
22317 NULL_TREE);
22318 tree void_ftype_pv2di_v2di
22319 = build_function_type_list (void_type_node,
22320 pv2di_type_node, V2DI_type_node, NULL_TREE);
22321 /* Normal vector unops. */
22322 tree v4sf_ftype_v4sf
22323 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22324 tree v16qi_ftype_v16qi
22325 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22326 tree v8hi_ftype_v8hi
22327 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22328 tree v4si_ftype_v4si
22329 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22330 tree v8qi_ftype_v8qi
22331 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22332 tree v4hi_ftype_v4hi
22333 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22335 /* Normal vector binops. */
22336 tree v4sf_ftype_v4sf_v4sf
22337 = build_function_type_list (V4SF_type_node,
22338 V4SF_type_node, V4SF_type_node, NULL_TREE);
22339 tree v8qi_ftype_v8qi_v8qi
22340 = build_function_type_list (V8QI_type_node,
22341 V8QI_type_node, V8QI_type_node, NULL_TREE);
22342 tree v4hi_ftype_v4hi_v4hi
22343 = build_function_type_list (V4HI_type_node,
22344 V4HI_type_node, V4HI_type_node, NULL_TREE);
22345 tree v2si_ftype_v2si_v2si
22346 = build_function_type_list (V2SI_type_node,
22347 V2SI_type_node, V2SI_type_node, NULL_TREE);
22348 tree v1di_ftype_v1di_v1di
22349 = build_function_type_list (V1DI_type_node,
22350 V1DI_type_node, V1DI_type_node, NULL_TREE);
22351 tree v1di_ftype_v1di_v1di_int
22352 = build_function_type_list (V1DI_type_node,
22353 V1DI_type_node, V1DI_type_node,
22354 integer_type_node, NULL_TREE);
22355 tree v2si_ftype_v2sf
22356 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22357 tree v2sf_ftype_v2si
22358 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22359 tree v2si_ftype_v2si
22360 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22361 tree v2sf_ftype_v2sf
22362 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22363 tree v2sf_ftype_v2sf_v2sf
22364 = build_function_type_list (V2SF_type_node,
22365 V2SF_type_node, V2SF_type_node, NULL_TREE);
22366 tree v2si_ftype_v2sf_v2sf
22367 = build_function_type_list (V2SI_type_node,
22368 V2SF_type_node, V2SF_type_node, NULL_TREE);
22369 tree pint_type_node = build_pointer_type (integer_type_node);
22370 tree pdouble_type_node = build_pointer_type (double_type_node);
22371 tree pcdouble_type_node = build_pointer_type (
22372 build_type_variant (double_type_node, 1, 0));
22373 tree int_ftype_v2df_v2df
22374 = build_function_type_list (integer_type_node,
22375 V2DF_type_node, V2DF_type_node, NULL_TREE);
22377 tree void_ftype_pcvoid
22378 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22379 tree v4sf_ftype_v4si
22380 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22381 tree v4si_ftype_v4sf
22382 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22383 tree v2df_ftype_v4si
22384 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22385 tree v4si_ftype_v2df
22386 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22387 tree v4si_ftype_v2df_v2df
22388 = build_function_type_list (V4SI_type_node,
22389 V2DF_type_node, V2DF_type_node, NULL_TREE);
22390 tree v2si_ftype_v2df
22391 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22392 tree v4sf_ftype_v2df
22393 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22394 tree v2df_ftype_v2si
22395 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22396 tree v2df_ftype_v4sf
22397 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22398 tree int_ftype_v2df
22399 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22400 tree int64_ftype_v2df
22401 = build_function_type_list (long_long_integer_type_node,
22402 V2DF_type_node, NULL_TREE);
22403 tree v2df_ftype_v2df_int
22404 = build_function_type_list (V2DF_type_node,
22405 V2DF_type_node, integer_type_node, NULL_TREE);
22406 tree v2df_ftype_v2df_int64
22407 = build_function_type_list (V2DF_type_node,
22408 V2DF_type_node, long_long_integer_type_node,
22409 NULL_TREE);
22410 tree v4sf_ftype_v4sf_v2df
22411 = build_function_type_list (V4SF_type_node,
22412 V4SF_type_node, V2DF_type_node, NULL_TREE);
22413 tree v2df_ftype_v2df_v4sf
22414 = build_function_type_list (V2DF_type_node,
22415 V2DF_type_node, V4SF_type_node, NULL_TREE);
22416 tree v2df_ftype_v2df_v2df_int
22417 = build_function_type_list (V2DF_type_node,
22418 V2DF_type_node, V2DF_type_node,
22419 integer_type_node,
22420 NULL_TREE);
22421 tree v2df_ftype_v2df_pcdouble
22422 = build_function_type_list (V2DF_type_node,
22423 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22424 tree void_ftype_pdouble_v2df
22425 = build_function_type_list (void_type_node,
22426 pdouble_type_node, V2DF_type_node, NULL_TREE);
22427 tree void_ftype_pint_int
22428 = build_function_type_list (void_type_node,
22429 pint_type_node, integer_type_node, NULL_TREE);
22430 tree void_ftype_v16qi_v16qi_pchar
22431 = build_function_type_list (void_type_node,
22432 V16QI_type_node, V16QI_type_node,
22433 pchar_type_node, NULL_TREE);
22434 tree v2df_ftype_pcdouble
22435 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22436 tree v2df_ftype_v2df_v2df
22437 = build_function_type_list (V2DF_type_node,
22438 V2DF_type_node, V2DF_type_node, NULL_TREE);
22439 tree v16qi_ftype_v16qi_v16qi
22440 = build_function_type_list (V16QI_type_node,
22441 V16QI_type_node, V16QI_type_node, NULL_TREE);
22442 tree v8hi_ftype_v8hi_v8hi
22443 = build_function_type_list (V8HI_type_node,
22444 V8HI_type_node, V8HI_type_node, NULL_TREE);
22445 tree v4si_ftype_v4si_v4si
22446 = build_function_type_list (V4SI_type_node,
22447 V4SI_type_node, V4SI_type_node, NULL_TREE);
22448 tree v2di_ftype_v2di_v2di
22449 = build_function_type_list (V2DI_type_node,
22450 V2DI_type_node, V2DI_type_node, NULL_TREE);
22451 tree v2di_ftype_v2df_v2df
22452 = build_function_type_list (V2DI_type_node,
22453 V2DF_type_node, V2DF_type_node, NULL_TREE);
22454 tree v2df_ftype_v2df
22455 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22456 tree v2di_ftype_v2di_int
22457 = build_function_type_list (V2DI_type_node,
22458 V2DI_type_node, integer_type_node, NULL_TREE);
22459 tree v2di_ftype_v2di_v2di_int
22460 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22461 V2DI_type_node, integer_type_node, NULL_TREE);
22462 tree v4si_ftype_v4si_int
22463 = build_function_type_list (V4SI_type_node,
22464 V4SI_type_node, integer_type_node, NULL_TREE);
22465 tree v8hi_ftype_v8hi_int
22466 = build_function_type_list (V8HI_type_node,
22467 V8HI_type_node, integer_type_node, NULL_TREE);
22468 tree v4si_ftype_v8hi_v8hi
22469 = build_function_type_list (V4SI_type_node,
22470 V8HI_type_node, V8HI_type_node, NULL_TREE);
22471 tree v1di_ftype_v8qi_v8qi
22472 = build_function_type_list (V1DI_type_node,
22473 V8QI_type_node, V8QI_type_node, NULL_TREE);
22474 tree v1di_ftype_v2si_v2si
22475 = build_function_type_list (V1DI_type_node,
22476 V2SI_type_node, V2SI_type_node, NULL_TREE);
22477 tree v2di_ftype_v16qi_v16qi
22478 = build_function_type_list (V2DI_type_node,
22479 V16QI_type_node, V16QI_type_node, NULL_TREE);
22480 tree v2di_ftype_v4si_v4si
22481 = build_function_type_list (V2DI_type_node,
22482 V4SI_type_node, V4SI_type_node, NULL_TREE);
22483 tree int_ftype_v16qi
22484 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22485 tree v16qi_ftype_pcchar
22486 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22487 tree void_ftype_pchar_v16qi
22488 = build_function_type_list (void_type_node,
22489 pchar_type_node, V16QI_type_node, NULL_TREE);
22491 tree v2di_ftype_v2di_unsigned_unsigned
22492 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22493 unsigned_type_node, unsigned_type_node,
22494 NULL_TREE);
22495 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22496 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22497 unsigned_type_node, unsigned_type_node,
22498 NULL_TREE);
22499 tree v2di_ftype_v2di_v16qi
22500 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22501 NULL_TREE);
22502 tree v2df_ftype_v2df_v2df_v2df
22503 = build_function_type_list (V2DF_type_node,
22504 V2DF_type_node, V2DF_type_node,
22505 V2DF_type_node, NULL_TREE);
22506 tree v4sf_ftype_v4sf_v4sf_v4sf
22507 = build_function_type_list (V4SF_type_node,
22508 V4SF_type_node, V4SF_type_node,
22509 V4SF_type_node, NULL_TREE);
22510 tree v8hi_ftype_v16qi
22511 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22512 NULL_TREE);
22513 tree v4si_ftype_v16qi
22514 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22515 NULL_TREE);
22516 tree v2di_ftype_v16qi
22517 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22518 NULL_TREE);
22519 tree v4si_ftype_v8hi
22520 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22521 NULL_TREE);
22522 tree v2di_ftype_v8hi
22523 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22524 NULL_TREE);
22525 tree v2di_ftype_v4si
22526 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22527 NULL_TREE);
22528 tree v2di_ftype_pv2di
22529 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22530 NULL_TREE);
22531 tree v16qi_ftype_v16qi_v16qi_int
22532 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22533 V16QI_type_node, integer_type_node,
22534 NULL_TREE);
22535 tree v16qi_ftype_v16qi_v16qi_v16qi
22536 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22537 V16QI_type_node, V16QI_type_node,
22538 NULL_TREE);
22539 tree v8hi_ftype_v8hi_v8hi_int
22540 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22541 V8HI_type_node, integer_type_node,
22542 NULL_TREE);
22543 tree v4si_ftype_v4si_v4si_int
22544 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22545 V4SI_type_node, integer_type_node,
22546 NULL_TREE);
22547 tree int_ftype_v2di_v2di
22548 = build_function_type_list (integer_type_node,
22549 V2DI_type_node, V2DI_type_node,
22550 NULL_TREE);
22551 tree int_ftype_v16qi_int_v16qi_int_int
22552 = build_function_type_list (integer_type_node,
22553 V16QI_type_node,
22554 integer_type_node,
22555 V16QI_type_node,
22556 integer_type_node,
22557 integer_type_node,
22558 NULL_TREE);
22559 tree v16qi_ftype_v16qi_int_v16qi_int_int
22560 = build_function_type_list (V16QI_type_node,
22561 V16QI_type_node,
22562 integer_type_node,
22563 V16QI_type_node,
22564 integer_type_node,
22565 integer_type_node,
22566 NULL_TREE);
22567 tree int_ftype_v16qi_v16qi_int
22568 = build_function_type_list (integer_type_node,
22569 V16QI_type_node,
22570 V16QI_type_node,
22571 integer_type_node,
22572 NULL_TREE);
22574 /* SSE5 instructions */
22575 tree v2di_ftype_v2di_v2di_v2di
22576 = build_function_type_list (V2DI_type_node,
22577 V2DI_type_node,
22578 V2DI_type_node,
22579 V2DI_type_node,
22580 NULL_TREE);
22582 tree v4si_ftype_v4si_v4si_v4si
22583 = build_function_type_list (V4SI_type_node,
22584 V4SI_type_node,
22585 V4SI_type_node,
22586 V4SI_type_node,
22587 NULL_TREE);
22589 tree v4si_ftype_v4si_v4si_v2di
22590 = build_function_type_list (V4SI_type_node,
22591 V4SI_type_node,
22592 V4SI_type_node,
22593 V2DI_type_node,
22594 NULL_TREE);
22596 tree v8hi_ftype_v8hi_v8hi_v8hi
22597 = build_function_type_list (V8HI_type_node,
22598 V8HI_type_node,
22599 V8HI_type_node,
22600 V8HI_type_node,
22601 NULL_TREE);
22603 tree v8hi_ftype_v8hi_v8hi_v4si
22604 = build_function_type_list (V8HI_type_node,
22605 V8HI_type_node,
22606 V8HI_type_node,
22607 V4SI_type_node,
22608 NULL_TREE);
22610 tree v2df_ftype_v2df_v2df_v16qi
22611 = build_function_type_list (V2DF_type_node,
22612 V2DF_type_node,
22613 V2DF_type_node,
22614 V16QI_type_node,
22615 NULL_TREE);
22617 tree v4sf_ftype_v4sf_v4sf_v16qi
22618 = build_function_type_list (V4SF_type_node,
22619 V4SF_type_node,
22620 V4SF_type_node,
22621 V16QI_type_node,
22622 NULL_TREE);
22624 tree v2di_ftype_v2di_si
22625 = build_function_type_list (V2DI_type_node,
22626 V2DI_type_node,
22627 integer_type_node,
22628 NULL_TREE);
22630 tree v4si_ftype_v4si_si
22631 = build_function_type_list (V4SI_type_node,
22632 V4SI_type_node,
22633 integer_type_node,
22634 NULL_TREE);
22636 tree v8hi_ftype_v8hi_si
22637 = build_function_type_list (V8HI_type_node,
22638 V8HI_type_node,
22639 integer_type_node,
22640 NULL_TREE);
22642 tree v16qi_ftype_v16qi_si
22643 = build_function_type_list (V16QI_type_node,
22644 V16QI_type_node,
22645 integer_type_node,
22646 NULL_TREE);
22647 tree v4sf_ftype_v4hi
22648 = build_function_type_list (V4SF_type_node,
22649 V4HI_type_node,
22650 NULL_TREE);
22652 tree v4hi_ftype_v4sf
22653 = build_function_type_list (V4HI_type_node,
22654 V4SF_type_node,
22655 NULL_TREE);
22657 tree v2di_ftype_v2di
22658 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22660 tree v16qi_ftype_v8hi_v8hi
22661 = build_function_type_list (V16QI_type_node,
22662 V8HI_type_node, V8HI_type_node,
22663 NULL_TREE);
22664 tree v8hi_ftype_v4si_v4si
22665 = build_function_type_list (V8HI_type_node,
22666 V4SI_type_node, V4SI_type_node,
22667 NULL_TREE);
22668 tree v8hi_ftype_v16qi_v16qi
22669 = build_function_type_list (V8HI_type_node,
22670 V16QI_type_node, V16QI_type_node,
22671 NULL_TREE);
22672 tree v4hi_ftype_v8qi_v8qi
22673 = build_function_type_list (V4HI_type_node,
22674 V8QI_type_node, V8QI_type_node,
22675 NULL_TREE);
22676 tree unsigned_ftype_unsigned_uchar
22677 = build_function_type_list (unsigned_type_node,
22678 unsigned_type_node,
22679 unsigned_char_type_node,
22680 NULL_TREE);
22681 tree unsigned_ftype_unsigned_ushort
22682 = build_function_type_list (unsigned_type_node,
22683 unsigned_type_node,
22684 short_unsigned_type_node,
22685 NULL_TREE);
22686 tree unsigned_ftype_unsigned_unsigned
22687 = build_function_type_list (unsigned_type_node,
22688 unsigned_type_node,
22689 unsigned_type_node,
22690 NULL_TREE);
22691 tree uint64_ftype_uint64_uint64
22692 = build_function_type_list (long_long_unsigned_type_node,
22693 long_long_unsigned_type_node,
22694 long_long_unsigned_type_node,
22695 NULL_TREE);
22696 tree float_ftype_float
22697 = build_function_type_list (float_type_node,
22698 float_type_node,
22699 NULL_TREE);
22701 /* AVX builtins */
22702 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22703 V32QImode);
22704 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22705 V8SImode);
22706 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22707 V8SFmode);
22708 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22709 V4DImode);
22710 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22711 V4DFmode);
22712 tree v8sf_ftype_v8sf
22713 = build_function_type_list (V8SF_type_node,
22714 V8SF_type_node,
22715 NULL_TREE);
22716 tree v8si_ftype_v8sf
22717 = build_function_type_list (V8SI_type_node,
22718 V8SF_type_node,
22719 NULL_TREE);
22720 tree v8sf_ftype_v8si
22721 = build_function_type_list (V8SF_type_node,
22722 V8SI_type_node,
22723 NULL_TREE);
22724 tree v4si_ftype_v4df
22725 = build_function_type_list (V4SI_type_node,
22726 V4DF_type_node,
22727 NULL_TREE);
22728 tree v4df_ftype_v4df
22729 = build_function_type_list (V4DF_type_node,
22730 V4DF_type_node,
22731 NULL_TREE);
22732 tree v4df_ftype_v4si
22733 = build_function_type_list (V4DF_type_node,
22734 V4SI_type_node,
22735 NULL_TREE);
22736 tree v4df_ftype_v4sf
22737 = build_function_type_list (V4DF_type_node,
22738 V4SF_type_node,
22739 NULL_TREE);
22740 tree v4sf_ftype_v4df
22741 = build_function_type_list (V4SF_type_node,
22742 V4DF_type_node,
22743 NULL_TREE);
22744 tree v8sf_ftype_v8sf_v8sf
22745 = build_function_type_list (V8SF_type_node,
22746 V8SF_type_node, V8SF_type_node,
22747 NULL_TREE);
22748 tree v4df_ftype_v4df_v4df
22749 = build_function_type_list (V4DF_type_node,
22750 V4DF_type_node, V4DF_type_node,
22751 NULL_TREE);
22752 tree v8sf_ftype_v8sf_int
22753 = build_function_type_list (V8SF_type_node,
22754 V8SF_type_node, integer_type_node,
22755 NULL_TREE);
22756 tree v4si_ftype_v8si_int
22757 = build_function_type_list (V4SI_type_node,
22758 V8SI_type_node, integer_type_node,
22759 NULL_TREE);
22760 tree v4df_ftype_v4df_int
22761 = build_function_type_list (V4DF_type_node,
22762 V4DF_type_node, integer_type_node,
22763 NULL_TREE);
22764 tree v4sf_ftype_v8sf_int
22765 = build_function_type_list (V4SF_type_node,
22766 V8SF_type_node, integer_type_node,
22767 NULL_TREE);
22768 tree v2df_ftype_v4df_int
22769 = build_function_type_list (V2DF_type_node,
22770 V4DF_type_node, integer_type_node,
22771 NULL_TREE);
22772 tree v8sf_ftype_v8sf_v8sf_int
22773 = build_function_type_list (V8SF_type_node,
22774 V8SF_type_node, V8SF_type_node,
22775 integer_type_node,
22776 NULL_TREE);
22777 tree v8sf_ftype_v8sf_v8sf_v8sf
22778 = build_function_type_list (V8SF_type_node,
22779 V8SF_type_node, V8SF_type_node,
22780 V8SF_type_node,
22781 NULL_TREE);
22782 tree v4df_ftype_v4df_v4df_v4df
22783 = build_function_type_list (V4DF_type_node,
22784 V4DF_type_node, V4DF_type_node,
22785 V4DF_type_node,
22786 NULL_TREE);
22787 tree v8si_ftype_v8si_v8si_int
22788 = build_function_type_list (V8SI_type_node,
22789 V8SI_type_node, V8SI_type_node,
22790 integer_type_node,
22791 NULL_TREE);
22792 tree v4df_ftype_v4df_v4df_int
22793 = build_function_type_list (V4DF_type_node,
22794 V4DF_type_node, V4DF_type_node,
22795 integer_type_node,
22796 NULL_TREE);
22797 tree v8sf_ftype_pcfloat
22798 = build_function_type_list (V8SF_type_node,
22799 pcfloat_type_node,
22800 NULL_TREE);
22801 tree v4df_ftype_pcdouble
22802 = build_function_type_list (V4DF_type_node,
22803 pcdouble_type_node,
22804 NULL_TREE);
22805 tree pcv4sf_type_node
22806 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22807 tree pcv2df_type_node
22808 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22809 tree v8sf_ftype_pcv4sf
22810 = build_function_type_list (V8SF_type_node,
22811 pcv4sf_type_node,
22812 NULL_TREE);
22813 tree v4df_ftype_pcv2df
22814 = build_function_type_list (V4DF_type_node,
22815 pcv2df_type_node,
22816 NULL_TREE);
22817 tree v32qi_ftype_pcchar
22818 = build_function_type_list (V32QI_type_node,
22819 pcchar_type_node,
22820 NULL_TREE);
22821 tree void_ftype_pchar_v32qi
22822 = build_function_type_list (void_type_node,
22823 pchar_type_node, V32QI_type_node,
22824 NULL_TREE);
22825 tree v8si_ftype_v8si_v4si_int
22826 = build_function_type_list (V8SI_type_node,
22827 V8SI_type_node, V4SI_type_node,
22828 integer_type_node,
22829 NULL_TREE);
22830 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22831 tree void_ftype_pv4di_v4di
22832 = build_function_type_list (void_type_node,
22833 pv4di_type_node, V4DI_type_node,
22834 NULL_TREE);
22835 tree v8sf_ftype_v8sf_v4sf_int
22836 = build_function_type_list (V8SF_type_node,
22837 V8SF_type_node, V4SF_type_node,
22838 integer_type_node,
22839 NULL_TREE);
22840 tree v4df_ftype_v4df_v2df_int
22841 = build_function_type_list (V4DF_type_node,
22842 V4DF_type_node, V2DF_type_node,
22843 integer_type_node,
22844 NULL_TREE);
22845 tree void_ftype_pfloat_v8sf
22846 = build_function_type_list (void_type_node,
22847 pfloat_type_node, V8SF_type_node,
22848 NULL_TREE);
22849 tree void_ftype_pdouble_v4df
22850 = build_function_type_list (void_type_node,
22851 pdouble_type_node, V4DF_type_node,
22852 NULL_TREE);
22853 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22854 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22855 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22856 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22857 tree pcv8sf_type_node
22858 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22859 tree pcv4df_type_node
22860 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22861 tree v8sf_ftype_pcv8sf_v8sf
22862 = build_function_type_list (V8SF_type_node,
22863 pcv8sf_type_node, V8SF_type_node,
22864 NULL_TREE);
22865 tree v4df_ftype_pcv4df_v4df
22866 = build_function_type_list (V4DF_type_node,
22867 pcv4df_type_node, V4DF_type_node,
22868 NULL_TREE);
22869 tree v4sf_ftype_pcv4sf_v4sf
22870 = build_function_type_list (V4SF_type_node,
22871 pcv4sf_type_node, V4SF_type_node,
22872 NULL_TREE);
22873 tree v2df_ftype_pcv2df_v2df
22874 = build_function_type_list (V2DF_type_node,
22875 pcv2df_type_node, V2DF_type_node,
22876 NULL_TREE);
22877 tree void_ftype_pv8sf_v8sf_v8sf
22878 = build_function_type_list (void_type_node,
22879 pv8sf_type_node, V8SF_type_node,
22880 V8SF_type_node,
22881 NULL_TREE);
22882 tree void_ftype_pv4df_v4df_v4df
22883 = build_function_type_list (void_type_node,
22884 pv4df_type_node, V4DF_type_node,
22885 V4DF_type_node,
22886 NULL_TREE);
22887 tree void_ftype_pv4sf_v4sf_v4sf
22888 = build_function_type_list (void_type_node,
22889 pv4sf_type_node, V4SF_type_node,
22890 V4SF_type_node,
22891 NULL_TREE);
22892 tree void_ftype_pv2df_v2df_v2df
22893 = build_function_type_list (void_type_node,
22894 pv2df_type_node, V2DF_type_node,
22895 V2DF_type_node,
22896 NULL_TREE);
22897 tree v4df_ftype_v2df
22898 = build_function_type_list (V4DF_type_node,
22899 V2DF_type_node,
22900 NULL_TREE);
22901 tree v8sf_ftype_v4sf
22902 = build_function_type_list (V8SF_type_node,
22903 V4SF_type_node,
22904 NULL_TREE);
22905 tree v8si_ftype_v4si
22906 = build_function_type_list (V8SI_type_node,
22907 V4SI_type_node,
22908 NULL_TREE);
22909 tree v2df_ftype_v4df
22910 = build_function_type_list (V2DF_type_node,
22911 V4DF_type_node,
22912 NULL_TREE);
22913 tree v4sf_ftype_v8sf
22914 = build_function_type_list (V4SF_type_node,
22915 V8SF_type_node,
22916 NULL_TREE);
22917 tree v4si_ftype_v8si
22918 = build_function_type_list (V4SI_type_node,
22919 V8SI_type_node,
22920 NULL_TREE);
22921 tree int_ftype_v4df
22922 = build_function_type_list (integer_type_node,
22923 V4DF_type_node,
22924 NULL_TREE);
22925 tree int_ftype_v8sf
22926 = build_function_type_list (integer_type_node,
22927 V8SF_type_node,
22928 NULL_TREE);
22929 tree int_ftype_v8sf_v8sf
22930 = build_function_type_list (integer_type_node,
22931 V8SF_type_node, V8SF_type_node,
22932 NULL_TREE);
22933 tree int_ftype_v4di_v4di
22934 = build_function_type_list (integer_type_node,
22935 V4DI_type_node, V4DI_type_node,
22936 NULL_TREE);
22937 tree int_ftype_v4df_v4df
22938 = build_function_type_list (integer_type_node,
22939 V4DF_type_node, V4DF_type_node,
22940 NULL_TREE);
22941 tree v8sf_ftype_v8sf_v8si
22942 = build_function_type_list (V8SF_type_node,
22943 V8SF_type_node, V8SI_type_node,
22944 NULL_TREE);
22945 tree v4df_ftype_v4df_v4di
22946 = build_function_type_list (V4DF_type_node,
22947 V4DF_type_node, V4DI_type_node,
22948 NULL_TREE);
22949 tree v4sf_ftype_v4sf_v4si
22950 = build_function_type_list (V4SF_type_node,
22951 V4SF_type_node, V4SI_type_node, NULL_TREE);
22952 tree v2df_ftype_v2df_v2di
22953 = build_function_type_list (V2DF_type_node,
22954 V2DF_type_node, V2DI_type_node, NULL_TREE);
22956 tree ftype;
22958 /* Add all special builtins with variable number of operands. */
22959 for (i = 0, d = bdesc_special_args;
22960 i < ARRAY_SIZE (bdesc_special_args);
22961 i++, d++)
22963 tree type;
22965 if (d->name == 0)
22966 continue;
22968 switch ((enum ix86_special_builtin_type) d->flag)
22970 case VOID_FTYPE_VOID:
22971 type = void_ftype_void;
22972 break;
22973 case V32QI_FTYPE_PCCHAR:
22974 type = v32qi_ftype_pcchar;
22975 break;
22976 case V16QI_FTYPE_PCCHAR:
22977 type = v16qi_ftype_pcchar;
22978 break;
22979 case V8SF_FTYPE_PCV4SF:
22980 type = v8sf_ftype_pcv4sf;
22981 break;
22982 case V8SF_FTYPE_PCFLOAT:
22983 type = v8sf_ftype_pcfloat;
22984 break;
22985 case V4DF_FTYPE_PCV2DF:
22986 type = v4df_ftype_pcv2df;
22987 break;
22988 case V4DF_FTYPE_PCDOUBLE:
22989 type = v4df_ftype_pcdouble;
22990 break;
22991 case V4SF_FTYPE_PCFLOAT:
22992 type = v4sf_ftype_pcfloat;
22993 break;
22994 case V2DI_FTYPE_PV2DI:
22995 type = v2di_ftype_pv2di;
22996 break;
22997 case V2DF_FTYPE_PCDOUBLE:
22998 type = v2df_ftype_pcdouble;
22999 break;
23000 case V8SF_FTYPE_PCV8SF_V8SF:
23001 type = v8sf_ftype_pcv8sf_v8sf;
23002 break;
23003 case V4DF_FTYPE_PCV4DF_V4DF:
23004 type = v4df_ftype_pcv4df_v4df;
23005 break;
23006 case V4SF_FTYPE_V4SF_PCV2SF:
23007 type = v4sf_ftype_v4sf_pcv2sf;
23008 break;
23009 case V4SF_FTYPE_PCV4SF_V4SF:
23010 type = v4sf_ftype_pcv4sf_v4sf;
23011 break;
23012 case V2DF_FTYPE_V2DF_PCDOUBLE:
23013 type = v2df_ftype_v2df_pcdouble;
23014 break;
23015 case V2DF_FTYPE_PCV2DF_V2DF:
23016 type = v2df_ftype_pcv2df_v2df;
23017 break;
23018 case VOID_FTYPE_PV2SF_V4SF:
23019 type = void_ftype_pv2sf_v4sf;
23020 break;
23021 case VOID_FTYPE_PV4DI_V4DI:
23022 type = void_ftype_pv4di_v4di;
23023 break;
23024 case VOID_FTYPE_PV2DI_V2DI:
23025 type = void_ftype_pv2di_v2di;
23026 break;
23027 case VOID_FTYPE_PCHAR_V32QI:
23028 type = void_ftype_pchar_v32qi;
23029 break;
23030 case VOID_FTYPE_PCHAR_V16QI:
23031 type = void_ftype_pchar_v16qi;
23032 break;
23033 case VOID_FTYPE_PFLOAT_V8SF:
23034 type = void_ftype_pfloat_v8sf;
23035 break;
23036 case VOID_FTYPE_PFLOAT_V4SF:
23037 type = void_ftype_pfloat_v4sf;
23038 break;
23039 case VOID_FTYPE_PDOUBLE_V4DF:
23040 type = void_ftype_pdouble_v4df;
23041 break;
23042 case VOID_FTYPE_PDOUBLE_V2DF:
23043 type = void_ftype_pdouble_v2df;
23044 break;
23045 case VOID_FTYPE_PDI_DI:
23046 type = void_ftype_pdi_di;
23047 break;
23048 case VOID_FTYPE_PINT_INT:
23049 type = void_ftype_pint_int;
23050 break;
23051 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23052 type = void_ftype_pv8sf_v8sf_v8sf;
23053 break;
23054 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23055 type = void_ftype_pv4df_v4df_v4df;
23056 break;
23057 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23058 type = void_ftype_pv4sf_v4sf_v4sf;
23059 break;
23060 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23061 type = void_ftype_pv2df_v2df_v2df;
23062 break;
23063 default:
23064 gcc_unreachable ();
23067 def_builtin (d->mask, d->name, type, d->code);
23070 /* Add all builtins with variable number of operands. */
23071 for (i = 0, d = bdesc_args;
23072 i < ARRAY_SIZE (bdesc_args);
23073 i++, d++)
23075 tree type;
23077 if (d->name == 0)
23078 continue;
23080 switch ((enum ix86_builtin_type) d->flag)
23082 case FLOAT_FTYPE_FLOAT:
23083 type = float_ftype_float;
23084 break;
23085 case INT_FTYPE_V8SF_V8SF_PTEST:
23086 type = int_ftype_v8sf_v8sf;
23087 break;
23088 case INT_FTYPE_V4DI_V4DI_PTEST:
23089 type = int_ftype_v4di_v4di;
23090 break;
23091 case INT_FTYPE_V4DF_V4DF_PTEST:
23092 type = int_ftype_v4df_v4df;
23093 break;
23094 case INT_FTYPE_V4SF_V4SF_PTEST:
23095 type = int_ftype_v4sf_v4sf;
23096 break;
23097 case INT_FTYPE_V2DI_V2DI_PTEST:
23098 type = int_ftype_v2di_v2di;
23099 break;
23100 case INT_FTYPE_V2DF_V2DF_PTEST:
23101 type = int_ftype_v2df_v2df;
23102 break;
23103 case INT64_FTYPE_V4SF:
23104 type = int64_ftype_v4sf;
23105 break;
23106 case INT64_FTYPE_V2DF:
23107 type = int64_ftype_v2df;
23108 break;
23109 case INT_FTYPE_V16QI:
23110 type = int_ftype_v16qi;
23111 break;
23112 case INT_FTYPE_V8QI:
23113 type = int_ftype_v8qi;
23114 break;
23115 case INT_FTYPE_V8SF:
23116 type = int_ftype_v8sf;
23117 break;
23118 case INT_FTYPE_V4DF:
23119 type = int_ftype_v4df;
23120 break;
23121 case INT_FTYPE_V4SF:
23122 type = int_ftype_v4sf;
23123 break;
23124 case INT_FTYPE_V2DF:
23125 type = int_ftype_v2df;
23126 break;
23127 case V16QI_FTYPE_V16QI:
23128 type = v16qi_ftype_v16qi;
23129 break;
23130 case V8SI_FTYPE_V8SF:
23131 type = v8si_ftype_v8sf;
23132 break;
23133 case V8SI_FTYPE_V4SI:
23134 type = v8si_ftype_v4si;
23135 break;
23136 case V8HI_FTYPE_V8HI:
23137 type = v8hi_ftype_v8hi;
23138 break;
23139 case V8HI_FTYPE_V16QI:
23140 type = v8hi_ftype_v16qi;
23141 break;
23142 case V8QI_FTYPE_V8QI:
23143 type = v8qi_ftype_v8qi;
23144 break;
23145 case V8SF_FTYPE_V8SF:
23146 type = v8sf_ftype_v8sf;
23147 break;
23148 case V8SF_FTYPE_V8SI:
23149 type = v8sf_ftype_v8si;
23150 break;
23151 case V8SF_FTYPE_V4SF:
23152 type = v8sf_ftype_v4sf;
23153 break;
23154 case V4SI_FTYPE_V4DF:
23155 type = v4si_ftype_v4df;
23156 break;
23157 case V4SI_FTYPE_V4SI:
23158 type = v4si_ftype_v4si;
23159 break;
23160 case V4SI_FTYPE_V16QI:
23161 type = v4si_ftype_v16qi;
23162 break;
23163 case V4SI_FTYPE_V8SI:
23164 type = v4si_ftype_v8si;
23165 break;
23166 case V4SI_FTYPE_V8HI:
23167 type = v4si_ftype_v8hi;
23168 break;
23169 case V4SI_FTYPE_V4SF:
23170 type = v4si_ftype_v4sf;
23171 break;
23172 case V4SI_FTYPE_V2DF:
23173 type = v4si_ftype_v2df;
23174 break;
23175 case V4HI_FTYPE_V4HI:
23176 type = v4hi_ftype_v4hi;
23177 break;
23178 case V4DF_FTYPE_V4DF:
23179 type = v4df_ftype_v4df;
23180 break;
23181 case V4DF_FTYPE_V4SI:
23182 type = v4df_ftype_v4si;
23183 break;
23184 case V4DF_FTYPE_V4SF:
23185 type = v4df_ftype_v4sf;
23186 break;
23187 case V4DF_FTYPE_V2DF:
23188 type = v4df_ftype_v2df;
23189 break;
23190 case V4SF_FTYPE_V4SF:
23191 case V4SF_FTYPE_V4SF_VEC_MERGE:
23192 type = v4sf_ftype_v4sf;
23193 break;
23194 case V4SF_FTYPE_V8SF:
23195 type = v4sf_ftype_v8sf;
23196 break;
23197 case V4SF_FTYPE_V4SI:
23198 type = v4sf_ftype_v4si;
23199 break;
23200 case V4SF_FTYPE_V4DF:
23201 type = v4sf_ftype_v4df;
23202 break;
23203 case V4SF_FTYPE_V2DF:
23204 type = v4sf_ftype_v2df;
23205 break;
23206 case V2DI_FTYPE_V2DI:
23207 type = v2di_ftype_v2di;
23208 break;
23209 case V2DI_FTYPE_V16QI:
23210 type = v2di_ftype_v16qi;
23211 break;
23212 case V2DI_FTYPE_V8HI:
23213 type = v2di_ftype_v8hi;
23214 break;
23215 case V2DI_FTYPE_V4SI:
23216 type = v2di_ftype_v4si;
23217 break;
23218 case V2SI_FTYPE_V2SI:
23219 type = v2si_ftype_v2si;
23220 break;
23221 case V2SI_FTYPE_V4SF:
23222 type = v2si_ftype_v4sf;
23223 break;
23224 case V2SI_FTYPE_V2DF:
23225 type = v2si_ftype_v2df;
23226 break;
23227 case V2SI_FTYPE_V2SF:
23228 type = v2si_ftype_v2sf;
23229 break;
23230 case V2DF_FTYPE_V4DF:
23231 type = v2df_ftype_v4df;
23232 break;
23233 case V2DF_FTYPE_V4SF:
23234 type = v2df_ftype_v4sf;
23235 break;
23236 case V2DF_FTYPE_V2DF:
23237 case V2DF_FTYPE_V2DF_VEC_MERGE:
23238 type = v2df_ftype_v2df;
23239 break;
23240 case V2DF_FTYPE_V2SI:
23241 type = v2df_ftype_v2si;
23242 break;
23243 case V2DF_FTYPE_V4SI:
23244 type = v2df_ftype_v4si;
23245 break;
23246 case V2SF_FTYPE_V2SF:
23247 type = v2sf_ftype_v2sf;
23248 break;
23249 case V2SF_FTYPE_V2SI:
23250 type = v2sf_ftype_v2si;
23251 break;
23252 case V16QI_FTYPE_V16QI_V16QI:
23253 type = v16qi_ftype_v16qi_v16qi;
23254 break;
23255 case V16QI_FTYPE_V8HI_V8HI:
23256 type = v16qi_ftype_v8hi_v8hi;
23257 break;
23258 case V8QI_FTYPE_V8QI_V8QI:
23259 type = v8qi_ftype_v8qi_v8qi;
23260 break;
23261 case V8QI_FTYPE_V4HI_V4HI:
23262 type = v8qi_ftype_v4hi_v4hi;
23263 break;
23264 case V8HI_FTYPE_V8HI_V8HI:
23265 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23266 type = v8hi_ftype_v8hi_v8hi;
23267 break;
23268 case V8HI_FTYPE_V16QI_V16QI:
23269 type = v8hi_ftype_v16qi_v16qi;
23270 break;
23271 case V8HI_FTYPE_V4SI_V4SI:
23272 type = v8hi_ftype_v4si_v4si;
23273 break;
23274 case V8HI_FTYPE_V8HI_SI_COUNT:
23275 type = v8hi_ftype_v8hi_int;
23276 break;
23277 case V8SF_FTYPE_V8SF_V8SF:
23278 type = v8sf_ftype_v8sf_v8sf;
23279 break;
23280 case V8SF_FTYPE_V8SF_V8SI:
23281 type = v8sf_ftype_v8sf_v8si;
23282 break;
23283 case V4SI_FTYPE_V4SI_V4SI:
23284 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23285 type = v4si_ftype_v4si_v4si;
23286 break;
23287 case V4SI_FTYPE_V8HI_V8HI:
23288 type = v4si_ftype_v8hi_v8hi;
23289 break;
23290 case V4SI_FTYPE_V4SF_V4SF:
23291 type = v4si_ftype_v4sf_v4sf;
23292 break;
23293 case V4SI_FTYPE_V2DF_V2DF:
23294 type = v4si_ftype_v2df_v2df;
23295 break;
23296 case V4SI_FTYPE_V4SI_SI_COUNT:
23297 type = v4si_ftype_v4si_int;
23298 break;
23299 case V4HI_FTYPE_V4HI_V4HI:
23300 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23301 type = v4hi_ftype_v4hi_v4hi;
23302 break;
23303 case V4HI_FTYPE_V8QI_V8QI:
23304 type = v4hi_ftype_v8qi_v8qi;
23305 break;
23306 case V4HI_FTYPE_V2SI_V2SI:
23307 type = v4hi_ftype_v2si_v2si;
23308 break;
23309 case V4HI_FTYPE_V4HI_SI_COUNT:
23310 type = v4hi_ftype_v4hi_int;
23311 break;
23312 case V4DF_FTYPE_V4DF_V4DF:
23313 type = v4df_ftype_v4df_v4df;
23314 break;
23315 case V4DF_FTYPE_V4DF_V4DI:
23316 type = v4df_ftype_v4df_v4di;
23317 break;
23318 case V4SF_FTYPE_V4SF_V4SF:
23319 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23320 type = v4sf_ftype_v4sf_v4sf;
23321 break;
23322 case V4SF_FTYPE_V4SF_V4SI:
23323 type = v4sf_ftype_v4sf_v4si;
23324 break;
23325 case V4SF_FTYPE_V4SF_V2SI:
23326 type = v4sf_ftype_v4sf_v2si;
23327 break;
23328 case V4SF_FTYPE_V4SF_V2DF:
23329 type = v4sf_ftype_v4sf_v2df;
23330 break;
23331 case V4SF_FTYPE_V4SF_DI:
23332 type = v4sf_ftype_v4sf_int64;
23333 break;
23334 case V4SF_FTYPE_V4SF_SI:
23335 type = v4sf_ftype_v4sf_int;
23336 break;
23337 case V2DI_FTYPE_V2DI_V2DI:
23338 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23339 type = v2di_ftype_v2di_v2di;
23340 break;
23341 case V2DI_FTYPE_V16QI_V16QI:
23342 type = v2di_ftype_v16qi_v16qi;
23343 break;
23344 case V2DI_FTYPE_V4SI_V4SI:
23345 type = v2di_ftype_v4si_v4si;
23346 break;
23347 case V2DI_FTYPE_V2DI_V16QI:
23348 type = v2di_ftype_v2di_v16qi;
23349 break;
23350 case V2DI_FTYPE_V2DF_V2DF:
23351 type = v2di_ftype_v2df_v2df;
23352 break;
23353 case V2DI_FTYPE_V2DI_SI_COUNT:
23354 type = v2di_ftype_v2di_int;
23355 break;
23356 case V2SI_FTYPE_V2SI_V2SI:
23357 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23358 type = v2si_ftype_v2si_v2si;
23359 break;
23360 case V2SI_FTYPE_V4HI_V4HI:
23361 type = v2si_ftype_v4hi_v4hi;
23362 break;
23363 case V2SI_FTYPE_V2SF_V2SF:
23364 type = v2si_ftype_v2sf_v2sf;
23365 break;
23366 case V2SI_FTYPE_V2SI_SI_COUNT:
23367 type = v2si_ftype_v2si_int;
23368 break;
23369 case V2DF_FTYPE_V2DF_V2DF:
23370 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23371 type = v2df_ftype_v2df_v2df;
23372 break;
23373 case V2DF_FTYPE_V2DF_V4SF:
23374 type = v2df_ftype_v2df_v4sf;
23375 break;
23376 case V2DF_FTYPE_V2DF_V2DI:
23377 type = v2df_ftype_v2df_v2di;
23378 break;
23379 case V2DF_FTYPE_V2DF_DI:
23380 type = v2df_ftype_v2df_int64;
23381 break;
23382 case V2DF_FTYPE_V2DF_SI:
23383 type = v2df_ftype_v2df_int;
23384 break;
23385 case V2SF_FTYPE_V2SF_V2SF:
23386 type = v2sf_ftype_v2sf_v2sf;
23387 break;
23388 case V1DI_FTYPE_V1DI_V1DI:
23389 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23390 type = v1di_ftype_v1di_v1di;
23391 break;
23392 case V1DI_FTYPE_V8QI_V8QI:
23393 type = v1di_ftype_v8qi_v8qi;
23394 break;
23395 case V1DI_FTYPE_V2SI_V2SI:
23396 type = v1di_ftype_v2si_v2si;
23397 break;
23398 case V1DI_FTYPE_V1DI_SI_COUNT:
23399 type = v1di_ftype_v1di_int;
23400 break;
23401 case UINT64_FTYPE_UINT64_UINT64:
23402 type = uint64_ftype_uint64_uint64;
23403 break;
23404 case UINT_FTYPE_UINT_UINT:
23405 type = unsigned_ftype_unsigned_unsigned;
23406 break;
23407 case UINT_FTYPE_UINT_USHORT:
23408 type = unsigned_ftype_unsigned_ushort;
23409 break;
23410 case UINT_FTYPE_UINT_UCHAR:
23411 type = unsigned_ftype_unsigned_uchar;
23412 break;
23413 case V8HI_FTYPE_V8HI_INT:
23414 type = v8hi_ftype_v8hi_int;
23415 break;
23416 case V8SF_FTYPE_V8SF_INT:
23417 type = v8sf_ftype_v8sf_int;
23418 break;
23419 case V4SI_FTYPE_V4SI_INT:
23420 type = v4si_ftype_v4si_int;
23421 break;
23422 case V4SI_FTYPE_V8SI_INT:
23423 type = v4si_ftype_v8si_int;
23424 break;
23425 case V4HI_FTYPE_V4HI_INT:
23426 type = v4hi_ftype_v4hi_int;
23427 break;
23428 case V4DF_FTYPE_V4DF_INT:
23429 type = v4df_ftype_v4df_int;
23430 break;
23431 case V4SF_FTYPE_V4SF_INT:
23432 type = v4sf_ftype_v4sf_int;
23433 break;
23434 case V4SF_FTYPE_V8SF_INT:
23435 type = v4sf_ftype_v8sf_int;
23436 break;
23437 case V2DI_FTYPE_V2DI_INT:
23438 case V2DI2TI_FTYPE_V2DI_INT:
23439 type = v2di_ftype_v2di_int;
23440 break;
23441 case V2DF_FTYPE_V2DF_INT:
23442 type = v2df_ftype_v2df_int;
23443 break;
23444 case V2DF_FTYPE_V4DF_INT:
23445 type = v2df_ftype_v4df_int;
23446 break;
23447 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23448 type = v16qi_ftype_v16qi_v16qi_v16qi;
23449 break;
23450 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23451 type = v8sf_ftype_v8sf_v8sf_v8sf;
23452 break;
23453 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23454 type = v4df_ftype_v4df_v4df_v4df;
23455 break;
23456 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23457 type = v4sf_ftype_v4sf_v4sf_v4sf;
23458 break;
23459 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23460 type = v2df_ftype_v2df_v2df_v2df;
23461 break;
23462 case V16QI_FTYPE_V16QI_V16QI_INT:
23463 type = v16qi_ftype_v16qi_v16qi_int;
23464 break;
23465 case V8SI_FTYPE_V8SI_V8SI_INT:
23466 type = v8si_ftype_v8si_v8si_int;
23467 break;
23468 case V8SI_FTYPE_V8SI_V4SI_INT:
23469 type = v8si_ftype_v8si_v4si_int;
23470 break;
23471 case V8HI_FTYPE_V8HI_V8HI_INT:
23472 type = v8hi_ftype_v8hi_v8hi_int;
23473 break;
23474 case V8SF_FTYPE_V8SF_V8SF_INT:
23475 type = v8sf_ftype_v8sf_v8sf_int;
23476 break;
23477 case V8SF_FTYPE_V8SF_V4SF_INT:
23478 type = v8sf_ftype_v8sf_v4sf_int;
23479 break;
23480 case V4SI_FTYPE_V4SI_V4SI_INT:
23481 type = v4si_ftype_v4si_v4si_int;
23482 break;
23483 case V4DF_FTYPE_V4DF_V4DF_INT:
23484 type = v4df_ftype_v4df_v4df_int;
23485 break;
23486 case V4DF_FTYPE_V4DF_V2DF_INT:
23487 type = v4df_ftype_v4df_v2df_int;
23488 break;
23489 case V4SF_FTYPE_V4SF_V4SF_INT:
23490 type = v4sf_ftype_v4sf_v4sf_int;
23491 break;
23492 case V2DI_FTYPE_V2DI_V2DI_INT:
23493 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23494 type = v2di_ftype_v2di_v2di_int;
23495 break;
23496 case V2DF_FTYPE_V2DF_V2DF_INT:
23497 type = v2df_ftype_v2df_v2df_int;
23498 break;
23499 case V2DI_FTYPE_V2DI_UINT_UINT:
23500 type = v2di_ftype_v2di_unsigned_unsigned;
23501 break;
23502 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23503 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23504 break;
23505 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23506 type = v1di_ftype_v1di_v1di_int;
23507 break;
23508 default:
23509 gcc_unreachable ();
23512 def_builtin_const (d->mask, d->name, type, d->code);
23515 /* pcmpestr[im] insns. */
23516 for (i = 0, d = bdesc_pcmpestr;
23517 i < ARRAY_SIZE (bdesc_pcmpestr);
23518 i++, d++)
23520 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23521 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23522 else
23523 ftype = int_ftype_v16qi_int_v16qi_int_int;
23524 def_builtin_const (d->mask, d->name, ftype, d->code);
23527 /* pcmpistr[im] insns. */
23528 for (i = 0, d = bdesc_pcmpistr;
23529 i < ARRAY_SIZE (bdesc_pcmpistr);
23530 i++, d++)
23532 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23533 ftype = v16qi_ftype_v16qi_v16qi_int;
23534 else
23535 ftype = int_ftype_v16qi_v16qi_int;
23536 def_builtin_const (d->mask, d->name, ftype, d->code);
23539 /* comi/ucomi insns. */
23540 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23541 if (d->mask == OPTION_MASK_ISA_SSE2)
23542 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23543 else
23544 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23546 /* SSE */
23547 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23548 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23550 /* SSE or 3DNow!A */
23551 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23553 /* SSE2 */
23554 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23556 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23557 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23559 /* SSE3. */
23560 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23561 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23563 /* AES */
23564 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23565 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23566 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23567 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23568 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23569 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23571 /* PCLMUL */
23572 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23574 /* AVX */
23575 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23576 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23578 /* Access to the vec_init patterns. */
23579 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23580 integer_type_node, NULL_TREE);
23581 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23583 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23584 short_integer_type_node,
23585 short_integer_type_node,
23586 short_integer_type_node, NULL_TREE);
23587 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23589 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23590 char_type_node, char_type_node,
23591 char_type_node, char_type_node,
23592 char_type_node, char_type_node,
23593 char_type_node, NULL_TREE);
23594 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23596 /* Access to the vec_extract patterns. */
23597 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23598 integer_type_node, NULL_TREE);
23599 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23601 ftype = build_function_type_list (long_long_integer_type_node,
23602 V2DI_type_node, integer_type_node,
23603 NULL_TREE);
23604 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23606 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23607 integer_type_node, NULL_TREE);
23608 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23610 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23611 integer_type_node, NULL_TREE);
23612 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23614 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23615 integer_type_node, NULL_TREE);
23616 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23618 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23619 integer_type_node, NULL_TREE);
23620 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23622 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23623 integer_type_node, NULL_TREE);
23624 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23626 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23627 integer_type_node, NULL_TREE);
23628 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23630 /* Access to the vec_set patterns. */
23631 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23632 intDI_type_node,
23633 integer_type_node, NULL_TREE);
23634 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23636 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23637 float_type_node,
23638 integer_type_node, NULL_TREE);
23639 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23641 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23642 intSI_type_node,
23643 integer_type_node, NULL_TREE);
23644 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23646 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23647 intHI_type_node,
23648 integer_type_node, NULL_TREE);
23649 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23651 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23652 intHI_type_node,
23653 integer_type_node, NULL_TREE);
23654 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23656 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23657 intQI_type_node,
23658 integer_type_node, NULL_TREE);
23659 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23661 /* Add SSE5 multi-arg argument instructions */
23662 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23664 tree mtype = NULL_TREE;
23666 if (d->name == 0)
23667 continue;
23669 switch ((enum multi_arg_type)d->flag)
23671 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23672 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23673 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23674 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23675 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23676 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23677 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23678 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23679 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23680 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23681 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23682 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23683 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23684 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23685 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23686 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23687 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23688 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23689 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23690 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23691 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23692 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23693 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23694 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23695 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23696 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23697 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23698 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23699 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23700 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23701 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23702 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23703 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23704 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23705 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23706 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23707 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23708 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23709 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23710 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23711 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23712 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23713 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23714 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23715 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23716 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23717 case MULTI_ARG_UNKNOWN:
23718 default:
23719 gcc_unreachable ();
23722 if (mtype)
23723 def_builtin_const (d->mask, d->name, mtype, d->code);
23727 /* Internal method for ix86_init_builtins. */
23729 static void
23730 ix86_init_builtins_va_builtins_abi (void)
23732 tree ms_va_ref, sysv_va_ref;
23733 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23734 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23735 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23736 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23738 if (!TARGET_64BIT)
23739 return;
23740 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23741 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23742 ms_va_ref = build_reference_type (ms_va_list_type_node);
23743 sysv_va_ref =
23744 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23746 fnvoid_va_end_ms =
23747 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23748 fnvoid_va_start_ms =
23749 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23750 fnvoid_va_end_sysv =
23751 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23752 fnvoid_va_start_sysv =
23753 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23754 NULL_TREE);
23755 fnvoid_va_copy_ms =
23756 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23757 NULL_TREE);
23758 fnvoid_va_copy_sysv =
23759 build_function_type_list (void_type_node, sysv_va_ref,
23760 sysv_va_ref, NULL_TREE);
23762 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23763 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23764 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23765 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23766 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23767 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23768 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23769 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23770 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23771 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23772 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23773 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23776 static void
23777 ix86_init_builtins (void)
23779 tree float128_type_node = make_node (REAL_TYPE);
23780 tree ftype, decl;
23782 /* The __float80 type. */
23783 if (TYPE_MODE (long_double_type_node) == XFmode)
23784 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23785 "__float80");
23786 else
23788 /* The __float80 type. */
23789 tree float80_type_node = make_node (REAL_TYPE);
23791 TYPE_PRECISION (float80_type_node) = 80;
23792 layout_type (float80_type_node);
23793 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23794 "__float80");
23797 /* The __float128 type. */
23798 TYPE_PRECISION (float128_type_node) = 128;
23799 layout_type (float128_type_node);
23800 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23801 "__float128");
23803 /* TFmode support builtins. */
23804 ftype = build_function_type (float128_type_node, void_list_node);
23805 decl = add_builtin_function ("__builtin_infq", ftype,
23806 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23807 NULL, NULL_TREE);
23808 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23810 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23811 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23812 NULL, NULL_TREE);
23813 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23815 /* We will expand them to normal call if SSE2 isn't available since
23816 they are used by libgcc. */
23817 ftype = build_function_type_list (float128_type_node,
23818 float128_type_node,
23819 NULL_TREE);
23820 decl = add_builtin_function ("__builtin_fabsq", ftype,
23821 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23822 "__fabstf2", NULL_TREE);
23823 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23824 TREE_READONLY (decl) = 1;
23826 ftype = build_function_type_list (float128_type_node,
23827 float128_type_node,
23828 float128_type_node,
23829 NULL_TREE);
23830 decl = add_builtin_function ("__builtin_copysignq", ftype,
23831 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23832 "__copysigntf3", NULL_TREE);
23833 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23834 TREE_READONLY (decl) = 1;
23836 ix86_init_mmx_sse_builtins ();
23837 if (TARGET_64BIT)
23838 ix86_init_builtins_va_builtins_abi ();
23841 /* Errors in the source file can cause expand_expr to return const0_rtx
23842 where we expect a vector. To avoid crashing, use one of the vector
23843 clear instructions. */
23844 static rtx
23845 safe_vector_operand (rtx x, enum machine_mode mode)
23847 if (x == const0_rtx)
23848 x = CONST0_RTX (mode);
23849 return x;
23852 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23854 static rtx
23855 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23857 rtx pat;
23858 tree arg0 = CALL_EXPR_ARG (exp, 0);
23859 tree arg1 = CALL_EXPR_ARG (exp, 1);
23860 rtx op0 = expand_normal (arg0);
23861 rtx op1 = expand_normal (arg1);
23862 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23863 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23864 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23866 if (VECTOR_MODE_P (mode0))
23867 op0 = safe_vector_operand (op0, mode0);
23868 if (VECTOR_MODE_P (mode1))
23869 op1 = safe_vector_operand (op1, mode1);
23871 if (optimize || !target
23872 || GET_MODE (target) != tmode
23873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23874 target = gen_reg_rtx (tmode);
23876 if (GET_MODE (op1) == SImode && mode1 == TImode)
23878 rtx x = gen_reg_rtx (V4SImode);
23879 emit_insn (gen_sse2_loadd (x, op1));
23880 op1 = gen_lowpart (TImode, x);
23883 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23884 op0 = copy_to_mode_reg (mode0, op0);
23885 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23886 op1 = copy_to_mode_reg (mode1, op1);
23888 pat = GEN_FCN (icode) (target, op0, op1);
23889 if (! pat)
23890 return 0;
23892 emit_insn (pat);
23894 return target;
23897 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23899 static rtx
23900 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23901 enum multi_arg_type m_type,
23902 enum rtx_code sub_code)
23904 rtx pat;
23905 int i;
23906 int nargs;
23907 bool comparison_p = false;
23908 bool tf_p = false;
23909 bool last_arg_constant = false;
23910 int num_memory = 0;
23911 struct {
23912 rtx op;
23913 enum machine_mode mode;
23914 } args[4];
23916 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23918 switch (m_type)
23920 case MULTI_ARG_3_SF:
23921 case MULTI_ARG_3_DF:
23922 case MULTI_ARG_3_DI:
23923 case MULTI_ARG_3_SI:
23924 case MULTI_ARG_3_SI_DI:
23925 case MULTI_ARG_3_HI:
23926 case MULTI_ARG_3_HI_SI:
23927 case MULTI_ARG_3_QI:
23928 case MULTI_ARG_3_PERMPS:
23929 case MULTI_ARG_3_PERMPD:
23930 nargs = 3;
23931 break;
23933 case MULTI_ARG_2_SF:
23934 case MULTI_ARG_2_DF:
23935 case MULTI_ARG_2_DI:
23936 case MULTI_ARG_2_SI:
23937 case MULTI_ARG_2_HI:
23938 case MULTI_ARG_2_QI:
23939 nargs = 2;
23940 break;
23942 case MULTI_ARG_2_DI_IMM:
23943 case MULTI_ARG_2_SI_IMM:
23944 case MULTI_ARG_2_HI_IMM:
23945 case MULTI_ARG_2_QI_IMM:
23946 nargs = 2;
23947 last_arg_constant = true;
23948 break;
23950 case MULTI_ARG_1_SF:
23951 case MULTI_ARG_1_DF:
23952 case MULTI_ARG_1_DI:
23953 case MULTI_ARG_1_SI:
23954 case MULTI_ARG_1_HI:
23955 case MULTI_ARG_1_QI:
23956 case MULTI_ARG_1_SI_DI:
23957 case MULTI_ARG_1_HI_DI:
23958 case MULTI_ARG_1_HI_SI:
23959 case MULTI_ARG_1_QI_DI:
23960 case MULTI_ARG_1_QI_SI:
23961 case MULTI_ARG_1_QI_HI:
23962 case MULTI_ARG_1_PH2PS:
23963 case MULTI_ARG_1_PS2PH:
23964 nargs = 1;
23965 break;
23967 case MULTI_ARG_2_SF_CMP:
23968 case MULTI_ARG_2_DF_CMP:
23969 case MULTI_ARG_2_DI_CMP:
23970 case MULTI_ARG_2_SI_CMP:
23971 case MULTI_ARG_2_HI_CMP:
23972 case MULTI_ARG_2_QI_CMP:
23973 nargs = 2;
23974 comparison_p = true;
23975 break;
23977 case MULTI_ARG_2_SF_TF:
23978 case MULTI_ARG_2_DF_TF:
23979 case MULTI_ARG_2_DI_TF:
23980 case MULTI_ARG_2_SI_TF:
23981 case MULTI_ARG_2_HI_TF:
23982 case MULTI_ARG_2_QI_TF:
23983 nargs = 2;
23984 tf_p = true;
23985 break;
23987 case MULTI_ARG_UNKNOWN:
23988 default:
23989 gcc_unreachable ();
23992 if (optimize || !target
23993 || GET_MODE (target) != tmode
23994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23995 target = gen_reg_rtx (tmode);
23997 gcc_assert (nargs <= 4);
23999 for (i = 0; i < nargs; i++)
24001 tree arg = CALL_EXPR_ARG (exp, i);
24002 rtx op = expand_normal (arg);
24003 int adjust = (comparison_p) ? 1 : 0;
24004 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24006 if (last_arg_constant && i == nargs-1)
24008 if (GET_CODE (op) != CONST_INT)
24010 error ("last argument must be an immediate");
24011 return gen_reg_rtx (tmode);
24014 else
24016 if (VECTOR_MODE_P (mode))
24017 op = safe_vector_operand (op, mode);
24019 /* If we aren't optimizing, only allow one memory operand to be
24020 generated. */
24021 if (memory_operand (op, mode))
24022 num_memory++;
24024 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24026 if (optimize
24027 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24028 || num_memory > 1)
24029 op = force_reg (mode, op);
24032 args[i].op = op;
24033 args[i].mode = mode;
24036 switch (nargs)
24038 case 1:
24039 pat = GEN_FCN (icode) (target, args[0].op);
24040 break;
24042 case 2:
24043 if (tf_p)
24044 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24045 GEN_INT ((int)sub_code));
24046 else if (! comparison_p)
24047 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24048 else
24050 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24051 args[0].op,
24052 args[1].op);
24054 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24056 break;
24058 case 3:
24059 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24060 break;
24062 default:
24063 gcc_unreachable ();
24066 if (! pat)
24067 return 0;
24069 emit_insn (pat);
24070 return target;
24073 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24074 insns with vec_merge. */
24076 static rtx
24077 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24078 rtx target)
24080 rtx pat;
24081 tree arg0 = CALL_EXPR_ARG (exp, 0);
24082 rtx op1, op0 = expand_normal (arg0);
24083 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24084 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24086 if (optimize || !target
24087 || GET_MODE (target) != tmode
24088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24089 target = gen_reg_rtx (tmode);
24091 if (VECTOR_MODE_P (mode0))
24092 op0 = safe_vector_operand (op0, mode0);
24094 if ((optimize && !register_operand (op0, mode0))
24095 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24096 op0 = copy_to_mode_reg (mode0, op0);
24098 op1 = op0;
24099 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24100 op1 = copy_to_mode_reg (mode0, op1);
24102 pat = GEN_FCN (icode) (target, op0, op1);
24103 if (! pat)
24104 return 0;
24105 emit_insn (pat);
24106 return target;
24109 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24111 static rtx
24112 ix86_expand_sse_compare (const struct builtin_description *d,
24113 tree exp, rtx target, bool swap)
24115 rtx pat;
24116 tree arg0 = CALL_EXPR_ARG (exp, 0);
24117 tree arg1 = CALL_EXPR_ARG (exp, 1);
24118 rtx op0 = expand_normal (arg0);
24119 rtx op1 = expand_normal (arg1);
24120 rtx op2;
24121 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24122 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24123 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24124 enum rtx_code comparison = d->comparison;
24126 if (VECTOR_MODE_P (mode0))
24127 op0 = safe_vector_operand (op0, mode0);
24128 if (VECTOR_MODE_P (mode1))
24129 op1 = safe_vector_operand (op1, mode1);
24131 /* Swap operands if we have a comparison that isn't available in
24132 hardware. */
24133 if (swap)
24135 rtx tmp = gen_reg_rtx (mode1);
24136 emit_move_insn (tmp, op1);
24137 op1 = op0;
24138 op0 = tmp;
24141 if (optimize || !target
24142 || GET_MODE (target) != tmode
24143 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24144 target = gen_reg_rtx (tmode);
24146 if ((optimize && !register_operand (op0, mode0))
24147 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24148 op0 = copy_to_mode_reg (mode0, op0);
24149 if ((optimize && !register_operand (op1, mode1))
24150 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24151 op1 = copy_to_mode_reg (mode1, op1);
24153 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24154 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24155 if (! pat)
24156 return 0;
24157 emit_insn (pat);
24158 return target;
24161 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24163 static rtx
24164 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24165 rtx target)
24167 rtx pat;
24168 tree arg0 = CALL_EXPR_ARG (exp, 0);
24169 tree arg1 = CALL_EXPR_ARG (exp, 1);
24170 rtx op0 = expand_normal (arg0);
24171 rtx op1 = expand_normal (arg1);
24172 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24173 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24174 enum rtx_code comparison = d->comparison;
24176 if (VECTOR_MODE_P (mode0))
24177 op0 = safe_vector_operand (op0, mode0);
24178 if (VECTOR_MODE_P (mode1))
24179 op1 = safe_vector_operand (op1, mode1);
24181 /* Swap operands if we have a comparison that isn't available in
24182 hardware. */
24183 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24185 rtx tmp = op1;
24186 op1 = op0;
24187 op0 = tmp;
24190 target = gen_reg_rtx (SImode);
24191 emit_move_insn (target, const0_rtx);
24192 target = gen_rtx_SUBREG (QImode, target, 0);
24194 if ((optimize && !register_operand (op0, mode0))
24195 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24196 op0 = copy_to_mode_reg (mode0, op0);
24197 if ((optimize && !register_operand (op1, mode1))
24198 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24199 op1 = copy_to_mode_reg (mode1, op1);
24201 pat = GEN_FCN (d->icode) (op0, op1);
24202 if (! pat)
24203 return 0;
24204 emit_insn (pat);
24205 emit_insn (gen_rtx_SET (VOIDmode,
24206 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24207 gen_rtx_fmt_ee (comparison, QImode,
24208 SET_DEST (pat),
24209 const0_rtx)));
24211 return SUBREG_REG (target);
24214 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24216 static rtx
24217 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24218 rtx target)
24220 rtx pat;
24221 tree arg0 = CALL_EXPR_ARG (exp, 0);
24222 tree arg1 = CALL_EXPR_ARG (exp, 1);
24223 rtx op0 = expand_normal (arg0);
24224 rtx op1 = expand_normal (arg1);
24225 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24226 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24227 enum rtx_code comparison = d->comparison;
24229 if (VECTOR_MODE_P (mode0))
24230 op0 = safe_vector_operand (op0, mode0);
24231 if (VECTOR_MODE_P (mode1))
24232 op1 = safe_vector_operand (op1, mode1);
24234 target = gen_reg_rtx (SImode);
24235 emit_move_insn (target, const0_rtx);
24236 target = gen_rtx_SUBREG (QImode, target, 0);
24238 if ((optimize && !register_operand (op0, mode0))
24239 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24240 op0 = copy_to_mode_reg (mode0, op0);
24241 if ((optimize && !register_operand (op1, mode1))
24242 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24243 op1 = copy_to_mode_reg (mode1, op1);
24245 pat = GEN_FCN (d->icode) (op0, op1);
24246 if (! pat)
24247 return 0;
24248 emit_insn (pat);
24249 emit_insn (gen_rtx_SET (VOIDmode,
24250 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24251 gen_rtx_fmt_ee (comparison, QImode,
24252 SET_DEST (pat),
24253 const0_rtx)));
24255 return SUBREG_REG (target);
24258 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24260 static rtx
24261 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24262 tree exp, rtx target)
24264 rtx pat;
24265 tree arg0 = CALL_EXPR_ARG (exp, 0);
24266 tree arg1 = CALL_EXPR_ARG (exp, 1);
24267 tree arg2 = CALL_EXPR_ARG (exp, 2);
24268 tree arg3 = CALL_EXPR_ARG (exp, 3);
24269 tree arg4 = CALL_EXPR_ARG (exp, 4);
24270 rtx scratch0, scratch1;
24271 rtx op0 = expand_normal (arg0);
24272 rtx op1 = expand_normal (arg1);
24273 rtx op2 = expand_normal (arg2);
24274 rtx op3 = expand_normal (arg3);
24275 rtx op4 = expand_normal (arg4);
24276 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24278 tmode0 = insn_data[d->icode].operand[0].mode;
24279 tmode1 = insn_data[d->icode].operand[1].mode;
24280 modev2 = insn_data[d->icode].operand[2].mode;
24281 modei3 = insn_data[d->icode].operand[3].mode;
24282 modev4 = insn_data[d->icode].operand[4].mode;
24283 modei5 = insn_data[d->icode].operand[5].mode;
24284 modeimm = insn_data[d->icode].operand[6].mode;
24286 if (VECTOR_MODE_P (modev2))
24287 op0 = safe_vector_operand (op0, modev2);
24288 if (VECTOR_MODE_P (modev4))
24289 op2 = safe_vector_operand (op2, modev4);
24291 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24292 op0 = copy_to_mode_reg (modev2, op0);
24293 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24294 op1 = copy_to_mode_reg (modei3, op1);
24295 if ((optimize && !register_operand (op2, modev4))
24296 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24297 op2 = copy_to_mode_reg (modev4, op2);
24298 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24299 op3 = copy_to_mode_reg (modei5, op3);
24301 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24303 error ("the fifth argument must be a 8-bit immediate");
24304 return const0_rtx;
24307 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24309 if (optimize || !target
24310 || GET_MODE (target) != tmode0
24311 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24312 target = gen_reg_rtx (tmode0);
24314 scratch1 = gen_reg_rtx (tmode1);
24316 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24318 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24320 if (optimize || !target
24321 || GET_MODE (target) != tmode1
24322 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24323 target = gen_reg_rtx (tmode1);
24325 scratch0 = gen_reg_rtx (tmode0);
24327 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24329 else
24331 gcc_assert (d->flag);
24333 scratch0 = gen_reg_rtx (tmode0);
24334 scratch1 = gen_reg_rtx (tmode1);
24336 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24339 if (! pat)
24340 return 0;
24342 emit_insn (pat);
24344 if (d->flag)
24346 target = gen_reg_rtx (SImode);
24347 emit_move_insn (target, const0_rtx);
24348 target = gen_rtx_SUBREG (QImode, target, 0);
24350 emit_insn
24351 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24352 gen_rtx_fmt_ee (EQ, QImode,
24353 gen_rtx_REG ((enum machine_mode) d->flag,
24354 FLAGS_REG),
24355 const0_rtx)));
24356 return SUBREG_REG (target);
24358 else
24359 return target;
24363 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24365 static rtx
24366 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24367 tree exp, rtx target)
24369 rtx pat;
24370 tree arg0 = CALL_EXPR_ARG (exp, 0);
24371 tree arg1 = CALL_EXPR_ARG (exp, 1);
24372 tree arg2 = CALL_EXPR_ARG (exp, 2);
24373 rtx scratch0, scratch1;
24374 rtx op0 = expand_normal (arg0);
24375 rtx op1 = expand_normal (arg1);
24376 rtx op2 = expand_normal (arg2);
24377 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24379 tmode0 = insn_data[d->icode].operand[0].mode;
24380 tmode1 = insn_data[d->icode].operand[1].mode;
24381 modev2 = insn_data[d->icode].operand[2].mode;
24382 modev3 = insn_data[d->icode].operand[3].mode;
24383 modeimm = insn_data[d->icode].operand[4].mode;
24385 if (VECTOR_MODE_P (modev2))
24386 op0 = safe_vector_operand (op0, modev2);
24387 if (VECTOR_MODE_P (modev3))
24388 op1 = safe_vector_operand (op1, modev3);
24390 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24391 op0 = copy_to_mode_reg (modev2, op0);
24392 if ((optimize && !register_operand (op1, modev3))
24393 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24394 op1 = copy_to_mode_reg (modev3, op1);
24396 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24398 error ("the third argument must be a 8-bit immediate");
24399 return const0_rtx;
24402 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24404 if (optimize || !target
24405 || GET_MODE (target) != tmode0
24406 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24407 target = gen_reg_rtx (tmode0);
24409 scratch1 = gen_reg_rtx (tmode1);
24411 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24413 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24415 if (optimize || !target
24416 || GET_MODE (target) != tmode1
24417 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24418 target = gen_reg_rtx (tmode1);
24420 scratch0 = gen_reg_rtx (tmode0);
24422 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24424 else
24426 gcc_assert (d->flag);
24428 scratch0 = gen_reg_rtx (tmode0);
24429 scratch1 = gen_reg_rtx (tmode1);
24431 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24434 if (! pat)
24435 return 0;
24437 emit_insn (pat);
24439 if (d->flag)
24441 target = gen_reg_rtx (SImode);
24442 emit_move_insn (target, const0_rtx);
24443 target = gen_rtx_SUBREG (QImode, target, 0);
24445 emit_insn
24446 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24447 gen_rtx_fmt_ee (EQ, QImode,
24448 gen_rtx_REG ((enum machine_mode) d->flag,
24449 FLAGS_REG),
24450 const0_rtx)));
24451 return SUBREG_REG (target);
24453 else
24454 return target;
24457 /* Subroutine of ix86_expand_builtin to take care of insns with
24458 variable number of operands. */
24460 static rtx
24461 ix86_expand_args_builtin (const struct builtin_description *d,
24462 tree exp, rtx target)
24464 rtx pat, real_target;
24465 unsigned int i, nargs;
24466 unsigned int nargs_constant = 0;
24467 int num_memory = 0;
24468 struct
24470 rtx op;
24471 enum machine_mode mode;
24472 } args[4];
24473 bool last_arg_count = false;
24474 enum insn_code icode = d->icode;
24475 const struct insn_data *insn_p = &insn_data[icode];
24476 enum machine_mode tmode = insn_p->operand[0].mode;
24477 enum machine_mode rmode = VOIDmode;
24478 bool swap = false;
24479 enum rtx_code comparison = d->comparison;
24481 switch ((enum ix86_builtin_type) d->flag)
24483 case INT_FTYPE_V8SF_V8SF_PTEST:
24484 case INT_FTYPE_V4DI_V4DI_PTEST:
24485 case INT_FTYPE_V4DF_V4DF_PTEST:
24486 case INT_FTYPE_V4SF_V4SF_PTEST:
24487 case INT_FTYPE_V2DI_V2DI_PTEST:
24488 case INT_FTYPE_V2DF_V2DF_PTEST:
24489 return ix86_expand_sse_ptest (d, exp, target);
24490 case FLOAT128_FTYPE_FLOAT128:
24491 case FLOAT_FTYPE_FLOAT:
24492 case INT64_FTYPE_V4SF:
24493 case INT64_FTYPE_V2DF:
24494 case INT_FTYPE_V16QI:
24495 case INT_FTYPE_V8QI:
24496 case INT_FTYPE_V8SF:
24497 case INT_FTYPE_V4DF:
24498 case INT_FTYPE_V4SF:
24499 case INT_FTYPE_V2DF:
24500 case V16QI_FTYPE_V16QI:
24501 case V8SI_FTYPE_V8SF:
24502 case V8SI_FTYPE_V4SI:
24503 case V8HI_FTYPE_V8HI:
24504 case V8HI_FTYPE_V16QI:
24505 case V8QI_FTYPE_V8QI:
24506 case V8SF_FTYPE_V8SF:
24507 case V8SF_FTYPE_V8SI:
24508 case V8SF_FTYPE_V4SF:
24509 case V4SI_FTYPE_V4SI:
24510 case V4SI_FTYPE_V16QI:
24511 case V4SI_FTYPE_V4SF:
24512 case V4SI_FTYPE_V8SI:
24513 case V4SI_FTYPE_V8HI:
24514 case V4SI_FTYPE_V4DF:
24515 case V4SI_FTYPE_V2DF:
24516 case V4HI_FTYPE_V4HI:
24517 case V4DF_FTYPE_V4DF:
24518 case V4DF_FTYPE_V4SI:
24519 case V4DF_FTYPE_V4SF:
24520 case V4DF_FTYPE_V2DF:
24521 case V4SF_FTYPE_V4SF:
24522 case V4SF_FTYPE_V4SI:
24523 case V4SF_FTYPE_V8SF:
24524 case V4SF_FTYPE_V4DF:
24525 case V4SF_FTYPE_V2DF:
24526 case V2DI_FTYPE_V2DI:
24527 case V2DI_FTYPE_V16QI:
24528 case V2DI_FTYPE_V8HI:
24529 case V2DI_FTYPE_V4SI:
24530 case V2DF_FTYPE_V2DF:
24531 case V2DF_FTYPE_V4SI:
24532 case V2DF_FTYPE_V4DF:
24533 case V2DF_FTYPE_V4SF:
24534 case V2DF_FTYPE_V2SI:
24535 case V2SI_FTYPE_V2SI:
24536 case V2SI_FTYPE_V4SF:
24537 case V2SI_FTYPE_V2SF:
24538 case V2SI_FTYPE_V2DF:
24539 case V2SF_FTYPE_V2SF:
24540 case V2SF_FTYPE_V2SI:
24541 nargs = 1;
24542 break;
24543 case V4SF_FTYPE_V4SF_VEC_MERGE:
24544 case V2DF_FTYPE_V2DF_VEC_MERGE:
24545 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24546 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24547 case V16QI_FTYPE_V16QI_V16QI:
24548 case V16QI_FTYPE_V8HI_V8HI:
24549 case V8QI_FTYPE_V8QI_V8QI:
24550 case V8QI_FTYPE_V4HI_V4HI:
24551 case V8HI_FTYPE_V8HI_V8HI:
24552 case V8HI_FTYPE_V16QI_V16QI:
24553 case V8HI_FTYPE_V4SI_V4SI:
24554 case V8SF_FTYPE_V8SF_V8SF:
24555 case V8SF_FTYPE_V8SF_V8SI:
24556 case V4SI_FTYPE_V4SI_V4SI:
24557 case V4SI_FTYPE_V8HI_V8HI:
24558 case V4SI_FTYPE_V4SF_V4SF:
24559 case V4SI_FTYPE_V2DF_V2DF:
24560 case V4HI_FTYPE_V4HI_V4HI:
24561 case V4HI_FTYPE_V8QI_V8QI:
24562 case V4HI_FTYPE_V2SI_V2SI:
24563 case V4DF_FTYPE_V4DF_V4DF:
24564 case V4DF_FTYPE_V4DF_V4DI:
24565 case V4SF_FTYPE_V4SF_V4SF:
24566 case V4SF_FTYPE_V4SF_V4SI:
24567 case V4SF_FTYPE_V4SF_V2SI:
24568 case V4SF_FTYPE_V4SF_V2DF:
24569 case V4SF_FTYPE_V4SF_DI:
24570 case V4SF_FTYPE_V4SF_SI:
24571 case V2DI_FTYPE_V2DI_V2DI:
24572 case V2DI_FTYPE_V16QI_V16QI:
24573 case V2DI_FTYPE_V4SI_V4SI:
24574 case V2DI_FTYPE_V2DI_V16QI:
24575 case V2DI_FTYPE_V2DF_V2DF:
24576 case V2SI_FTYPE_V2SI_V2SI:
24577 case V2SI_FTYPE_V4HI_V4HI:
24578 case V2SI_FTYPE_V2SF_V2SF:
24579 case V2DF_FTYPE_V2DF_V2DF:
24580 case V2DF_FTYPE_V2DF_V4SF:
24581 case V2DF_FTYPE_V2DF_V2DI:
24582 case V2DF_FTYPE_V2DF_DI:
24583 case V2DF_FTYPE_V2DF_SI:
24584 case V2SF_FTYPE_V2SF_V2SF:
24585 case V1DI_FTYPE_V1DI_V1DI:
24586 case V1DI_FTYPE_V8QI_V8QI:
24587 case V1DI_FTYPE_V2SI_V2SI:
24588 if (comparison == UNKNOWN)
24589 return ix86_expand_binop_builtin (icode, exp, target);
24590 nargs = 2;
24591 break;
24592 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24593 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24594 gcc_assert (comparison != UNKNOWN);
24595 nargs = 2;
24596 swap = true;
24597 break;
24598 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24599 case V8HI_FTYPE_V8HI_SI_COUNT:
24600 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24601 case V4SI_FTYPE_V4SI_SI_COUNT:
24602 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24603 case V4HI_FTYPE_V4HI_SI_COUNT:
24604 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24605 case V2DI_FTYPE_V2DI_SI_COUNT:
24606 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24607 case V2SI_FTYPE_V2SI_SI_COUNT:
24608 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24609 case V1DI_FTYPE_V1DI_SI_COUNT:
24610 nargs = 2;
24611 last_arg_count = true;
24612 break;
24613 case UINT64_FTYPE_UINT64_UINT64:
24614 case UINT_FTYPE_UINT_UINT:
24615 case UINT_FTYPE_UINT_USHORT:
24616 case UINT_FTYPE_UINT_UCHAR:
24617 nargs = 2;
24618 break;
24619 case V2DI2TI_FTYPE_V2DI_INT:
24620 nargs = 2;
24621 rmode = V2DImode;
24622 nargs_constant = 1;
24623 break;
24624 case V8HI_FTYPE_V8HI_INT:
24625 case V8SF_FTYPE_V8SF_INT:
24626 case V4SI_FTYPE_V4SI_INT:
24627 case V4SI_FTYPE_V8SI_INT:
24628 case V4HI_FTYPE_V4HI_INT:
24629 case V4DF_FTYPE_V4DF_INT:
24630 case V4SF_FTYPE_V4SF_INT:
24631 case V4SF_FTYPE_V8SF_INT:
24632 case V2DI_FTYPE_V2DI_INT:
24633 case V2DF_FTYPE_V2DF_INT:
24634 case V2DF_FTYPE_V4DF_INT:
24635 nargs = 2;
24636 nargs_constant = 1;
24637 break;
24638 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24639 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24640 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24641 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24642 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24643 nargs = 3;
24644 break;
24645 case V16QI_FTYPE_V16QI_V16QI_INT:
24646 case V8HI_FTYPE_V8HI_V8HI_INT:
24647 case V8SI_FTYPE_V8SI_V8SI_INT:
24648 case V8SI_FTYPE_V8SI_V4SI_INT:
24649 case V8SF_FTYPE_V8SF_V8SF_INT:
24650 case V8SF_FTYPE_V8SF_V4SF_INT:
24651 case V4SI_FTYPE_V4SI_V4SI_INT:
24652 case V4DF_FTYPE_V4DF_V4DF_INT:
24653 case V4DF_FTYPE_V4DF_V2DF_INT:
24654 case V4SF_FTYPE_V4SF_V4SF_INT:
24655 case V2DI_FTYPE_V2DI_V2DI_INT:
24656 case V2DF_FTYPE_V2DF_V2DF_INT:
24657 nargs = 3;
24658 nargs_constant = 1;
24659 break;
24660 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24661 nargs = 3;
24662 rmode = V2DImode;
24663 nargs_constant = 1;
24664 break;
24665 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24666 nargs = 3;
24667 rmode = DImode;
24668 nargs_constant = 1;
24669 break;
24670 case V2DI_FTYPE_V2DI_UINT_UINT:
24671 nargs = 3;
24672 nargs_constant = 2;
24673 break;
24674 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24675 nargs = 4;
24676 nargs_constant = 2;
24677 break;
24678 default:
24679 gcc_unreachable ();
24682 gcc_assert (nargs <= ARRAY_SIZE (args));
24684 if (comparison != UNKNOWN)
24686 gcc_assert (nargs == 2);
24687 return ix86_expand_sse_compare (d, exp, target, swap);
24690 if (rmode == VOIDmode || rmode == tmode)
24692 if (optimize
24693 || target == 0
24694 || GET_MODE (target) != tmode
24695 || ! (*insn_p->operand[0].predicate) (target, tmode))
24696 target = gen_reg_rtx (tmode);
24697 real_target = target;
24699 else
24701 target = gen_reg_rtx (rmode);
24702 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24705 for (i = 0; i < nargs; i++)
24707 tree arg = CALL_EXPR_ARG (exp, i);
24708 rtx op = expand_normal (arg);
24709 enum machine_mode mode = insn_p->operand[i + 1].mode;
24710 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24712 if (last_arg_count && (i + 1) == nargs)
24714 /* SIMD shift insns take either an 8-bit immediate or
24715 register as count. But builtin functions take int as
24716 count. If count doesn't match, we put it in register. */
24717 if (!match)
24719 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24720 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24721 op = copy_to_reg (op);
24724 else if ((nargs - i) <= nargs_constant)
24726 if (!match)
24727 switch (icode)
24729 case CODE_FOR_sse4_1_roundpd:
24730 case CODE_FOR_sse4_1_roundps:
24731 case CODE_FOR_sse4_1_roundsd:
24732 case CODE_FOR_sse4_1_roundss:
24733 case CODE_FOR_sse4_1_blendps:
24734 case CODE_FOR_avx_blendpd256:
24735 case CODE_FOR_avx_vpermilv4df:
24736 case CODE_FOR_avx_roundpd256:
24737 case CODE_FOR_avx_roundps256:
24738 error ("the last argument must be a 4-bit immediate");
24739 return const0_rtx;
24741 case CODE_FOR_sse4_1_blendpd:
24742 case CODE_FOR_avx_vpermilv2df:
24743 error ("the last argument must be a 2-bit immediate");
24744 return const0_rtx;
24746 case CODE_FOR_avx_vextractf128v4df:
24747 case CODE_FOR_avx_vextractf128v8sf:
24748 case CODE_FOR_avx_vextractf128v8si:
24749 case CODE_FOR_avx_vinsertf128v4df:
24750 case CODE_FOR_avx_vinsertf128v8sf:
24751 case CODE_FOR_avx_vinsertf128v8si:
24752 error ("the last argument must be a 1-bit immediate");
24753 return const0_rtx;
24755 case CODE_FOR_avx_cmpsdv2df3:
24756 case CODE_FOR_avx_cmpssv4sf3:
24757 case CODE_FOR_avx_cmppdv2df3:
24758 case CODE_FOR_avx_cmppsv4sf3:
24759 case CODE_FOR_avx_cmppdv4df3:
24760 case CODE_FOR_avx_cmppsv8sf3:
24761 error ("the last argument must be a 5-bit immediate");
24762 return const0_rtx;
24764 default:
24765 switch (nargs_constant)
24767 case 2:
24768 if ((nargs - i) == nargs_constant)
24770 error ("the next to last argument must be an 8-bit immediate");
24771 break;
24773 case 1:
24774 error ("the last argument must be an 8-bit immediate");
24775 break;
24776 default:
24777 gcc_unreachable ();
24779 return const0_rtx;
24782 else
24784 if (VECTOR_MODE_P (mode))
24785 op = safe_vector_operand (op, mode);
24787 /* If we aren't optimizing, only allow one memory operand to
24788 be generated. */
24789 if (memory_operand (op, mode))
24790 num_memory++;
24792 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24794 if (optimize || !match || num_memory > 1)
24795 op = copy_to_mode_reg (mode, op);
24797 else
24799 op = copy_to_reg (op);
24800 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24804 args[i].op = op;
24805 args[i].mode = mode;
24808 switch (nargs)
24810 case 1:
24811 pat = GEN_FCN (icode) (real_target, args[0].op);
24812 break;
24813 case 2:
24814 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24815 break;
24816 case 3:
24817 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24818 args[2].op);
24819 break;
24820 case 4:
24821 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24822 args[2].op, args[3].op);
24823 break;
24824 default:
24825 gcc_unreachable ();
24828 if (! pat)
24829 return 0;
24831 emit_insn (pat);
24832 return target;
24835 /* Subroutine of ix86_expand_builtin to take care of special insns
24836 with variable number of operands. */
24838 static rtx
24839 ix86_expand_special_args_builtin (const struct builtin_description *d,
24840 tree exp, rtx target)
24842 tree arg;
24843 rtx pat, op;
24844 unsigned int i, nargs, arg_adjust, memory;
24845 struct
24847 rtx op;
24848 enum machine_mode mode;
24849 } args[2];
24850 enum insn_code icode = d->icode;
24851 bool last_arg_constant = false;
24852 const struct insn_data *insn_p = &insn_data[icode];
24853 enum machine_mode tmode = insn_p->operand[0].mode;
24854 enum { load, store } klass;
24856 switch ((enum ix86_special_builtin_type) d->flag)
24858 case VOID_FTYPE_VOID:
24859 emit_insn (GEN_FCN (icode) (target));
24860 return 0;
24861 case V2DI_FTYPE_PV2DI:
24862 case V32QI_FTYPE_PCCHAR:
24863 case V16QI_FTYPE_PCCHAR:
24864 case V8SF_FTYPE_PCV4SF:
24865 case V8SF_FTYPE_PCFLOAT:
24866 case V4SF_FTYPE_PCFLOAT:
24867 case V4DF_FTYPE_PCV2DF:
24868 case V4DF_FTYPE_PCDOUBLE:
24869 case V2DF_FTYPE_PCDOUBLE:
24870 nargs = 1;
24871 klass = load;
24872 memory = 0;
24873 break;
24874 case VOID_FTYPE_PV2SF_V4SF:
24875 case VOID_FTYPE_PV4DI_V4DI:
24876 case VOID_FTYPE_PV2DI_V2DI:
24877 case VOID_FTYPE_PCHAR_V32QI:
24878 case VOID_FTYPE_PCHAR_V16QI:
24879 case VOID_FTYPE_PFLOAT_V8SF:
24880 case VOID_FTYPE_PFLOAT_V4SF:
24881 case VOID_FTYPE_PDOUBLE_V4DF:
24882 case VOID_FTYPE_PDOUBLE_V2DF:
24883 case VOID_FTYPE_PDI_DI:
24884 case VOID_FTYPE_PINT_INT:
24885 nargs = 1;
24886 klass = store;
24887 /* Reserve memory operand for target. */
24888 memory = ARRAY_SIZE (args);
24889 break;
24890 case V4SF_FTYPE_V4SF_PCV2SF:
24891 case V2DF_FTYPE_V2DF_PCDOUBLE:
24892 nargs = 2;
24893 klass = load;
24894 memory = 1;
24895 break;
24896 case V8SF_FTYPE_PCV8SF_V8SF:
24897 case V4DF_FTYPE_PCV4DF_V4DF:
24898 case V4SF_FTYPE_PCV4SF_V4SF:
24899 case V2DF_FTYPE_PCV2DF_V2DF:
24900 nargs = 2;
24901 klass = load;
24902 memory = 0;
24903 break;
24904 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24905 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24906 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24907 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24908 nargs = 2;
24909 klass = store;
24910 /* Reserve memory operand for target. */
24911 memory = ARRAY_SIZE (args);
24912 break;
24913 default:
24914 gcc_unreachable ();
24917 gcc_assert (nargs <= ARRAY_SIZE (args));
24919 if (klass == store)
24921 arg = CALL_EXPR_ARG (exp, 0);
24922 op = expand_normal (arg);
24923 gcc_assert (target == 0);
24924 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24925 arg_adjust = 1;
24927 else
24929 arg_adjust = 0;
24930 if (optimize
24931 || target == 0
24932 || GET_MODE (target) != tmode
24933 || ! (*insn_p->operand[0].predicate) (target, tmode))
24934 target = gen_reg_rtx (tmode);
24937 for (i = 0; i < nargs; i++)
24939 enum machine_mode mode = insn_p->operand[i + 1].mode;
24940 bool match;
24942 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24943 op = expand_normal (arg);
24944 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24946 if (last_arg_constant && (i + 1) == nargs)
24948 if (!match)
24949 switch (icode)
24951 default:
24952 error ("the last argument must be an 8-bit immediate");
24953 return const0_rtx;
24956 else
24958 if (i == memory)
24960 /* This must be the memory operand. */
24961 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24962 gcc_assert (GET_MODE (op) == mode
24963 || GET_MODE (op) == VOIDmode);
24965 else
24967 /* This must be register. */
24968 if (VECTOR_MODE_P (mode))
24969 op = safe_vector_operand (op, mode);
24971 gcc_assert (GET_MODE (op) == mode
24972 || GET_MODE (op) == VOIDmode);
24973 op = copy_to_mode_reg (mode, op);
24977 args[i].op = op;
24978 args[i].mode = mode;
24981 switch (nargs)
24983 case 1:
24984 pat = GEN_FCN (icode) (target, args[0].op);
24985 break;
24986 case 2:
24987 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24988 break;
24989 default:
24990 gcc_unreachable ();
24993 if (! pat)
24994 return 0;
24995 emit_insn (pat);
24996 return klass == store ? 0 : target;
24999 /* Return the integer constant in ARG. Constrain it to be in the range
25000 of the subparts of VEC_TYPE; issue an error if not. */
25002 static int
25003 get_element_number (tree vec_type, tree arg)
25005 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25007 if (!host_integerp (arg, 1)
25008 || (elt = tree_low_cst (arg, 1), elt > max))
25010 error ("selector must be an integer constant in the range 0..%wi", max);
25011 return 0;
25014 return elt;
25017 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25018 ix86_expand_vector_init. We DO have language-level syntax for this, in
25019 the form of (type){ init-list }. Except that since we can't place emms
25020 instructions from inside the compiler, we can't allow the use of MMX
25021 registers unless the user explicitly asks for it. So we do *not* define
25022 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25023 we have builtins invoked by mmintrin.h that gives us license to emit
25024 these sorts of instructions. */
25026 static rtx
25027 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25029 enum machine_mode tmode = TYPE_MODE (type);
25030 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25031 int i, n_elt = GET_MODE_NUNITS (tmode);
25032 rtvec v = rtvec_alloc (n_elt);
25034 gcc_assert (VECTOR_MODE_P (tmode));
25035 gcc_assert (call_expr_nargs (exp) == n_elt);
25037 for (i = 0; i < n_elt; ++i)
25039 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25040 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25043 if (!target || !register_operand (target, tmode))
25044 target = gen_reg_rtx (tmode);
25046 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25047 return target;
25050 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25051 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25052 had a language-level syntax for referencing vector elements. */
25054 static rtx
25055 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25057 enum machine_mode tmode, mode0;
25058 tree arg0, arg1;
25059 int elt;
25060 rtx op0;
25062 arg0 = CALL_EXPR_ARG (exp, 0);
25063 arg1 = CALL_EXPR_ARG (exp, 1);
25065 op0 = expand_normal (arg0);
25066 elt = get_element_number (TREE_TYPE (arg0), arg1);
25068 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25069 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25070 gcc_assert (VECTOR_MODE_P (mode0));
25072 op0 = force_reg (mode0, op0);
25074 if (optimize || !target || !register_operand (target, tmode))
25075 target = gen_reg_rtx (tmode);
25077 ix86_expand_vector_extract (true, target, op0, elt);
25079 return target;
25082 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25083 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25084 a language-level syntax for referencing vector elements. */
25086 static rtx
25087 ix86_expand_vec_set_builtin (tree exp)
25089 enum machine_mode tmode, mode1;
25090 tree arg0, arg1, arg2;
25091 int elt;
25092 rtx op0, op1, target;
25094 arg0 = CALL_EXPR_ARG (exp, 0);
25095 arg1 = CALL_EXPR_ARG (exp, 1);
25096 arg2 = CALL_EXPR_ARG (exp, 2);
25098 tmode = TYPE_MODE (TREE_TYPE (arg0));
25099 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25100 gcc_assert (VECTOR_MODE_P (tmode));
25102 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25103 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25104 elt = get_element_number (TREE_TYPE (arg0), arg2);
25106 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25107 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25109 op0 = force_reg (tmode, op0);
25110 op1 = force_reg (mode1, op1);
25112 /* OP0 is the source of these builtin functions and shouldn't be
25113 modified. Create a copy, use it and return it as target. */
25114 target = gen_reg_rtx (tmode);
25115 emit_move_insn (target, op0);
25116 ix86_expand_vector_set (true, target, op1, elt);
25118 return target;
25121 /* Expand an expression EXP that calls a built-in function,
25122 with result going to TARGET if that's convenient
25123 (and in mode MODE if that's convenient).
25124 SUBTARGET may be used as the target for computing one of EXP's operands.
25125 IGNORE is nonzero if the value is to be ignored. */
25127 static rtx
25128 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25129 enum machine_mode mode ATTRIBUTE_UNUSED,
25130 int ignore ATTRIBUTE_UNUSED)
25132 const struct builtin_description *d;
25133 size_t i;
25134 enum insn_code icode;
25135 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25136 tree arg0, arg1, arg2;
25137 rtx op0, op1, op2, pat;
25138 enum machine_mode mode0, mode1, mode2;
25139 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25141 /* Determine whether the builtin function is available under the current ISA.
25142 Originally the builtin was not created if it wasn't applicable to the
25143 current ISA based on the command line switches. With function specific
25144 options, we need to check in the context of the function making the call
25145 whether it is supported. */
25146 if (ix86_builtins_isa[fcode].isa
25147 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25149 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25150 NULL, NULL, false);
25152 if (!opts)
25153 error ("%qE needs unknown isa option", fndecl);
25154 else
25156 gcc_assert (opts != NULL);
25157 error ("%qE needs isa option %s", fndecl, opts);
25158 free (opts);
25160 return const0_rtx;
25163 switch (fcode)
25165 case IX86_BUILTIN_MASKMOVQ:
25166 case IX86_BUILTIN_MASKMOVDQU:
25167 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25168 ? CODE_FOR_mmx_maskmovq
25169 : CODE_FOR_sse2_maskmovdqu);
25170 /* Note the arg order is different from the operand order. */
25171 arg1 = CALL_EXPR_ARG (exp, 0);
25172 arg2 = CALL_EXPR_ARG (exp, 1);
25173 arg0 = CALL_EXPR_ARG (exp, 2);
25174 op0 = expand_normal (arg0);
25175 op1 = expand_normal (arg1);
25176 op2 = expand_normal (arg2);
25177 mode0 = insn_data[icode].operand[0].mode;
25178 mode1 = insn_data[icode].operand[1].mode;
25179 mode2 = insn_data[icode].operand[2].mode;
25181 op0 = force_reg (Pmode, op0);
25182 op0 = gen_rtx_MEM (mode1, op0);
25184 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25185 op0 = copy_to_mode_reg (mode0, op0);
25186 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25187 op1 = copy_to_mode_reg (mode1, op1);
25188 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25189 op2 = copy_to_mode_reg (mode2, op2);
25190 pat = GEN_FCN (icode) (op0, op1, op2);
25191 if (! pat)
25192 return 0;
25193 emit_insn (pat);
25194 return 0;
25196 case IX86_BUILTIN_LDMXCSR:
25197 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25198 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25199 emit_move_insn (target, op0);
25200 emit_insn (gen_sse_ldmxcsr (target));
25201 return 0;
25203 case IX86_BUILTIN_STMXCSR:
25204 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25205 emit_insn (gen_sse_stmxcsr (target));
25206 return copy_to_mode_reg (SImode, target);
25208 case IX86_BUILTIN_CLFLUSH:
25209 arg0 = CALL_EXPR_ARG (exp, 0);
25210 op0 = expand_normal (arg0);
25211 icode = CODE_FOR_sse2_clflush;
25212 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25213 op0 = copy_to_mode_reg (Pmode, op0);
25215 emit_insn (gen_sse2_clflush (op0));
25216 return 0;
25218 case IX86_BUILTIN_MONITOR:
25219 arg0 = CALL_EXPR_ARG (exp, 0);
25220 arg1 = CALL_EXPR_ARG (exp, 1);
25221 arg2 = CALL_EXPR_ARG (exp, 2);
25222 op0 = expand_normal (arg0);
25223 op1 = expand_normal (arg1);
25224 op2 = expand_normal (arg2);
25225 if (!REG_P (op0))
25226 op0 = copy_to_mode_reg (Pmode, op0);
25227 if (!REG_P (op1))
25228 op1 = copy_to_mode_reg (SImode, op1);
25229 if (!REG_P (op2))
25230 op2 = copy_to_mode_reg (SImode, op2);
25231 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25232 return 0;
25234 case IX86_BUILTIN_MWAIT:
25235 arg0 = CALL_EXPR_ARG (exp, 0);
25236 arg1 = CALL_EXPR_ARG (exp, 1);
25237 op0 = expand_normal (arg0);
25238 op1 = expand_normal (arg1);
25239 if (!REG_P (op0))
25240 op0 = copy_to_mode_reg (SImode, op0);
25241 if (!REG_P (op1))
25242 op1 = copy_to_mode_reg (SImode, op1);
25243 emit_insn (gen_sse3_mwait (op0, op1));
25244 return 0;
25246 case IX86_BUILTIN_VEC_INIT_V2SI:
25247 case IX86_BUILTIN_VEC_INIT_V4HI:
25248 case IX86_BUILTIN_VEC_INIT_V8QI:
25249 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25251 case IX86_BUILTIN_VEC_EXT_V2DF:
25252 case IX86_BUILTIN_VEC_EXT_V2DI:
25253 case IX86_BUILTIN_VEC_EXT_V4SF:
25254 case IX86_BUILTIN_VEC_EXT_V4SI:
25255 case IX86_BUILTIN_VEC_EXT_V8HI:
25256 case IX86_BUILTIN_VEC_EXT_V2SI:
25257 case IX86_BUILTIN_VEC_EXT_V4HI:
25258 case IX86_BUILTIN_VEC_EXT_V16QI:
25259 return ix86_expand_vec_ext_builtin (exp, target);
25261 case IX86_BUILTIN_VEC_SET_V2DI:
25262 case IX86_BUILTIN_VEC_SET_V4SF:
25263 case IX86_BUILTIN_VEC_SET_V4SI:
25264 case IX86_BUILTIN_VEC_SET_V8HI:
25265 case IX86_BUILTIN_VEC_SET_V4HI:
25266 case IX86_BUILTIN_VEC_SET_V16QI:
25267 return ix86_expand_vec_set_builtin (exp);
25269 case IX86_BUILTIN_INFQ:
25270 case IX86_BUILTIN_HUGE_VALQ:
25272 REAL_VALUE_TYPE inf;
25273 rtx tmp;
25275 real_inf (&inf);
25276 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25278 tmp = validize_mem (force_const_mem (mode, tmp));
25280 if (target == 0)
25281 target = gen_reg_rtx (mode);
25283 emit_move_insn (target, tmp);
25284 return target;
25287 default:
25288 break;
25291 for (i = 0, d = bdesc_special_args;
25292 i < ARRAY_SIZE (bdesc_special_args);
25293 i++, d++)
25294 if (d->code == fcode)
25295 return ix86_expand_special_args_builtin (d, exp, target);
25297 for (i = 0, d = bdesc_args;
25298 i < ARRAY_SIZE (bdesc_args);
25299 i++, d++)
25300 if (d->code == fcode)
25301 switch (fcode)
25303 case IX86_BUILTIN_FABSQ:
25304 case IX86_BUILTIN_COPYSIGNQ:
25305 if (!TARGET_SSE2)
25306 /* Emit a normal call if SSE2 isn't available. */
25307 return expand_call (exp, target, ignore);
25308 default:
25309 return ix86_expand_args_builtin (d, exp, target);
25312 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25313 if (d->code == fcode)
25314 return ix86_expand_sse_comi (d, exp, target);
25316 for (i = 0, d = bdesc_pcmpestr;
25317 i < ARRAY_SIZE (bdesc_pcmpestr);
25318 i++, d++)
25319 if (d->code == fcode)
25320 return ix86_expand_sse_pcmpestr (d, exp, target);
25322 for (i = 0, d = bdesc_pcmpistr;
25323 i < ARRAY_SIZE (bdesc_pcmpistr);
25324 i++, d++)
25325 if (d->code == fcode)
25326 return ix86_expand_sse_pcmpistr (d, exp, target);
25328 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25329 if (d->code == fcode)
25330 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25331 (enum multi_arg_type)d->flag,
25332 d->comparison);
25334 gcc_unreachable ();
25337 /* Returns a function decl for a vectorized version of the builtin function
25338 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25339 if it is not available. */
25341 static tree
25342 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25343 tree type_in)
25345 enum machine_mode in_mode, out_mode;
25346 int in_n, out_n;
25348 if (TREE_CODE (type_out) != VECTOR_TYPE
25349 || TREE_CODE (type_in) != VECTOR_TYPE)
25350 return NULL_TREE;
25352 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25353 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25354 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25355 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25357 switch (fn)
25359 case BUILT_IN_SQRT:
25360 if (out_mode == DFmode && out_n == 2
25361 && in_mode == DFmode && in_n == 2)
25362 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25363 break;
25365 case BUILT_IN_SQRTF:
25366 if (out_mode == SFmode && out_n == 4
25367 && in_mode == SFmode && in_n == 4)
25368 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25369 break;
25371 case BUILT_IN_LRINT:
25372 if (out_mode == SImode && out_n == 4
25373 && in_mode == DFmode && in_n == 2)
25374 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25375 break;
25377 case BUILT_IN_LRINTF:
25378 if (out_mode == SImode && out_n == 4
25379 && in_mode == SFmode && in_n == 4)
25380 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25381 break;
25383 default:
25387 /* Dispatch to a handler for a vectorization library. */
25388 if (ix86_veclib_handler)
25389 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25390 type_in);
25392 return NULL_TREE;
25395 /* Handler for an SVML-style interface to
25396 a library with vectorized intrinsics. */
25398 static tree
25399 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25401 char name[20];
25402 tree fntype, new_fndecl, args;
25403 unsigned arity;
25404 const char *bname;
25405 enum machine_mode el_mode, in_mode;
25406 int n, in_n;
25408 /* The SVML is suitable for unsafe math only. */
25409 if (!flag_unsafe_math_optimizations)
25410 return NULL_TREE;
25412 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25413 n = TYPE_VECTOR_SUBPARTS (type_out);
25414 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25415 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25416 if (el_mode != in_mode
25417 || n != in_n)
25418 return NULL_TREE;
25420 switch (fn)
25422 case BUILT_IN_EXP:
25423 case BUILT_IN_LOG:
25424 case BUILT_IN_LOG10:
25425 case BUILT_IN_POW:
25426 case BUILT_IN_TANH:
25427 case BUILT_IN_TAN:
25428 case BUILT_IN_ATAN:
25429 case BUILT_IN_ATAN2:
25430 case BUILT_IN_ATANH:
25431 case BUILT_IN_CBRT:
25432 case BUILT_IN_SINH:
25433 case BUILT_IN_SIN:
25434 case BUILT_IN_ASINH:
25435 case BUILT_IN_ASIN:
25436 case BUILT_IN_COSH:
25437 case BUILT_IN_COS:
25438 case BUILT_IN_ACOSH:
25439 case BUILT_IN_ACOS:
25440 if (el_mode != DFmode || n != 2)
25441 return NULL_TREE;
25442 break;
25444 case BUILT_IN_EXPF:
25445 case BUILT_IN_LOGF:
25446 case BUILT_IN_LOG10F:
25447 case BUILT_IN_POWF:
25448 case BUILT_IN_TANHF:
25449 case BUILT_IN_TANF:
25450 case BUILT_IN_ATANF:
25451 case BUILT_IN_ATAN2F:
25452 case BUILT_IN_ATANHF:
25453 case BUILT_IN_CBRTF:
25454 case BUILT_IN_SINHF:
25455 case BUILT_IN_SINF:
25456 case BUILT_IN_ASINHF:
25457 case BUILT_IN_ASINF:
25458 case BUILT_IN_COSHF:
25459 case BUILT_IN_COSF:
25460 case BUILT_IN_ACOSHF:
25461 case BUILT_IN_ACOSF:
25462 if (el_mode != SFmode || n != 4)
25463 return NULL_TREE;
25464 break;
25466 default:
25467 return NULL_TREE;
25470 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25472 if (fn == BUILT_IN_LOGF)
25473 strcpy (name, "vmlsLn4");
25474 else if (fn == BUILT_IN_LOG)
25475 strcpy (name, "vmldLn2");
25476 else if (n == 4)
25478 sprintf (name, "vmls%s", bname+10);
25479 name[strlen (name)-1] = '4';
25481 else
25482 sprintf (name, "vmld%s2", bname+10);
25484 /* Convert to uppercase. */
25485 name[4] &= ~0x20;
25487 arity = 0;
25488 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25489 args = TREE_CHAIN (args))
25490 arity++;
25492 if (arity == 1)
25493 fntype = build_function_type_list (type_out, type_in, NULL);
25494 else
25495 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25497 /* Build a function declaration for the vectorized function. */
25498 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25499 TREE_PUBLIC (new_fndecl) = 1;
25500 DECL_EXTERNAL (new_fndecl) = 1;
25501 DECL_IS_NOVOPS (new_fndecl) = 1;
25502 TREE_READONLY (new_fndecl) = 1;
25504 return new_fndecl;
25507 /* Handler for an ACML-style interface to
25508 a library with vectorized intrinsics. */
25510 static tree
25511 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25513 char name[20] = "__vr.._";
25514 tree fntype, new_fndecl, args;
25515 unsigned arity;
25516 const char *bname;
25517 enum machine_mode el_mode, in_mode;
25518 int n, in_n;
25520 /* The ACML is 64bits only and suitable for unsafe math only as
25521 it does not correctly support parts of IEEE with the required
25522 precision such as denormals. */
25523 if (!TARGET_64BIT
25524 || !flag_unsafe_math_optimizations)
25525 return NULL_TREE;
25527 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25528 n = TYPE_VECTOR_SUBPARTS (type_out);
25529 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25530 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25531 if (el_mode != in_mode
25532 || n != in_n)
25533 return NULL_TREE;
25535 switch (fn)
25537 case BUILT_IN_SIN:
25538 case BUILT_IN_COS:
25539 case BUILT_IN_EXP:
25540 case BUILT_IN_LOG:
25541 case BUILT_IN_LOG2:
25542 case BUILT_IN_LOG10:
25543 name[4] = 'd';
25544 name[5] = '2';
25545 if (el_mode != DFmode
25546 || n != 2)
25547 return NULL_TREE;
25548 break;
25550 case BUILT_IN_SINF:
25551 case BUILT_IN_COSF:
25552 case BUILT_IN_EXPF:
25553 case BUILT_IN_POWF:
25554 case BUILT_IN_LOGF:
25555 case BUILT_IN_LOG2F:
25556 case BUILT_IN_LOG10F:
25557 name[4] = 's';
25558 name[5] = '4';
25559 if (el_mode != SFmode
25560 || n != 4)
25561 return NULL_TREE;
25562 break;
25564 default:
25565 return NULL_TREE;
25568 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25569 sprintf (name + 7, "%s", bname+10);
25571 arity = 0;
25572 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25573 args = TREE_CHAIN (args))
25574 arity++;
25576 if (arity == 1)
25577 fntype = build_function_type_list (type_out, type_in, NULL);
25578 else
25579 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25581 /* Build a function declaration for the vectorized function. */
25582 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25583 TREE_PUBLIC (new_fndecl) = 1;
25584 DECL_EXTERNAL (new_fndecl) = 1;
25585 DECL_IS_NOVOPS (new_fndecl) = 1;
25586 TREE_READONLY (new_fndecl) = 1;
25588 return new_fndecl;
25592 /* Returns a decl of a function that implements conversion of an integer vector
25593 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25594 side of the conversion.
25595 Return NULL_TREE if it is not available. */
25597 static tree
25598 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25600 if (TREE_CODE (type) != VECTOR_TYPE)
25601 return NULL_TREE;
25603 switch (code)
25605 case FLOAT_EXPR:
25606 switch (TYPE_MODE (type))
25608 case V4SImode:
25609 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25610 default:
25611 return NULL_TREE;
25614 case FIX_TRUNC_EXPR:
25615 switch (TYPE_MODE (type))
25617 case V4SImode:
25618 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25619 default:
25620 return NULL_TREE;
25622 default:
25623 return NULL_TREE;
25628 /* Returns a code for a target-specific builtin that implements
25629 reciprocal of the function, or NULL_TREE if not available. */
25631 static tree
25632 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25633 bool sqrt ATTRIBUTE_UNUSED)
25635 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25636 && flag_finite_math_only && !flag_trapping_math
25637 && flag_unsafe_math_optimizations))
25638 return NULL_TREE;
25640 if (md_fn)
25641 /* Machine dependent builtins. */
25642 switch (fn)
25644 /* Vectorized version of sqrt to rsqrt conversion. */
25645 case IX86_BUILTIN_SQRTPS_NR:
25646 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25648 default:
25649 return NULL_TREE;
25651 else
25652 /* Normal builtins. */
25653 switch (fn)
25655 /* Sqrt to rsqrt conversion. */
25656 case BUILT_IN_SQRTF:
25657 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25659 default:
25660 return NULL_TREE;
25664 /* Store OPERAND to the memory after reload is completed. This means
25665 that we can't easily use assign_stack_local. */
25667 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25669 rtx result;
25671 gcc_assert (reload_completed);
25672 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25674 result = gen_rtx_MEM (mode,
25675 gen_rtx_PLUS (Pmode,
25676 stack_pointer_rtx,
25677 GEN_INT (-RED_ZONE_SIZE)));
25678 emit_move_insn (result, operand);
25680 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25682 switch (mode)
25684 case HImode:
25685 case SImode:
25686 operand = gen_lowpart (DImode, operand);
25687 /* FALLTHRU */
25688 case DImode:
25689 emit_insn (
25690 gen_rtx_SET (VOIDmode,
25691 gen_rtx_MEM (DImode,
25692 gen_rtx_PRE_DEC (DImode,
25693 stack_pointer_rtx)),
25694 operand));
25695 break;
25696 default:
25697 gcc_unreachable ();
25699 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25701 else
25703 switch (mode)
25705 case DImode:
25707 rtx operands[2];
25708 split_di (&operand, 1, operands, operands + 1);
25709 emit_insn (
25710 gen_rtx_SET (VOIDmode,
25711 gen_rtx_MEM (SImode,
25712 gen_rtx_PRE_DEC (Pmode,
25713 stack_pointer_rtx)),
25714 operands[1]));
25715 emit_insn (
25716 gen_rtx_SET (VOIDmode,
25717 gen_rtx_MEM (SImode,
25718 gen_rtx_PRE_DEC (Pmode,
25719 stack_pointer_rtx)),
25720 operands[0]));
25722 break;
25723 case HImode:
25724 /* Store HImodes as SImodes. */
25725 operand = gen_lowpart (SImode, operand);
25726 /* FALLTHRU */
25727 case SImode:
25728 emit_insn (
25729 gen_rtx_SET (VOIDmode,
25730 gen_rtx_MEM (GET_MODE (operand),
25731 gen_rtx_PRE_DEC (SImode,
25732 stack_pointer_rtx)),
25733 operand));
25734 break;
25735 default:
25736 gcc_unreachable ();
25738 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25740 return result;
25743 /* Free operand from the memory. */
25744 void
25745 ix86_free_from_memory (enum machine_mode mode)
25747 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25749 int size;
25751 if (mode == DImode || TARGET_64BIT)
25752 size = 8;
25753 else
25754 size = 4;
25755 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25756 to pop or add instruction if registers are available. */
25757 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25758 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25759 GEN_INT (size))));
25763 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25764 QImode must go into class Q_REGS.
25765 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25766 movdf to do mem-to-mem moves through integer regs. */
25767 enum reg_class
25768 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25770 enum machine_mode mode = GET_MODE (x);
25772 /* We're only allowed to return a subclass of CLASS. Many of the
25773 following checks fail for NO_REGS, so eliminate that early. */
25774 if (regclass == NO_REGS)
25775 return NO_REGS;
25777 /* All classes can load zeros. */
25778 if (x == CONST0_RTX (mode))
25779 return regclass;
25781 /* Force constants into memory if we are loading a (nonzero) constant into
25782 an MMX or SSE register. This is because there are no MMX/SSE instructions
25783 to load from a constant. */
25784 if (CONSTANT_P (x)
25785 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25786 return NO_REGS;
25788 /* Prefer SSE regs only, if we can use them for math. */
25789 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25790 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25792 /* Floating-point constants need more complex checks. */
25793 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25795 /* General regs can load everything. */
25796 if (reg_class_subset_p (regclass, GENERAL_REGS))
25797 return regclass;
25799 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25800 zero above. We only want to wind up preferring 80387 registers if
25801 we plan on doing computation with them. */
25802 if (TARGET_80387
25803 && standard_80387_constant_p (x))
25805 /* Limit class to non-sse. */
25806 if (regclass == FLOAT_SSE_REGS)
25807 return FLOAT_REGS;
25808 if (regclass == FP_TOP_SSE_REGS)
25809 return FP_TOP_REG;
25810 if (regclass == FP_SECOND_SSE_REGS)
25811 return FP_SECOND_REG;
25812 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25813 return regclass;
25816 return NO_REGS;
25819 /* Generally when we see PLUS here, it's the function invariant
25820 (plus soft-fp const_int). Which can only be computed into general
25821 regs. */
25822 if (GET_CODE (x) == PLUS)
25823 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25825 /* QImode constants are easy to load, but non-constant QImode data
25826 must go into Q_REGS. */
25827 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25829 if (reg_class_subset_p (regclass, Q_REGS))
25830 return regclass;
25831 if (reg_class_subset_p (Q_REGS, regclass))
25832 return Q_REGS;
25833 return NO_REGS;
25836 return regclass;
25839 /* Discourage putting floating-point values in SSE registers unless
25840 SSE math is being used, and likewise for the 387 registers. */
25841 enum reg_class
25842 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25844 enum machine_mode mode = GET_MODE (x);
25846 /* Restrict the output reload class to the register bank that we are doing
25847 math on. If we would like not to return a subset of CLASS, reject this
25848 alternative: if reload cannot do this, it will still use its choice. */
25849 mode = GET_MODE (x);
25850 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25851 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25853 if (X87_FLOAT_MODE_P (mode))
25855 if (regclass == FP_TOP_SSE_REGS)
25856 return FP_TOP_REG;
25857 else if (regclass == FP_SECOND_SSE_REGS)
25858 return FP_SECOND_REG;
25859 else
25860 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25863 return regclass;
25866 static enum reg_class
25867 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25868 enum machine_mode mode,
25869 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25871 /* QImode spills from non-QI registers require
25872 intermediate register on 32bit targets. */
25873 if (!in_p && mode == QImode && !TARGET_64BIT
25874 && (rclass == GENERAL_REGS
25875 || rclass == LEGACY_REGS
25876 || rclass == INDEX_REGS))
25878 int regno;
25880 if (REG_P (x))
25881 regno = REGNO (x);
25882 else
25883 regno = -1;
25885 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25886 regno = true_regnum (x);
25888 /* Return Q_REGS if the operand is in memory. */
25889 if (regno == -1)
25890 return Q_REGS;
25893 return NO_REGS;
25896 /* If we are copying between general and FP registers, we need a memory
25897 location. The same is true for SSE and MMX registers.
25899 To optimize register_move_cost performance, allow inline variant.
25901 The macro can't work reliably when one of the CLASSES is class containing
25902 registers from multiple units (SSE, MMX, integer). We avoid this by never
25903 combining those units in single alternative in the machine description.
25904 Ensure that this constraint holds to avoid unexpected surprises.
25906 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25907 enforce these sanity checks. */
25909 static inline int
25910 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25911 enum machine_mode mode, int strict)
25913 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25914 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25915 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25916 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25917 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25918 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25920 gcc_assert (!strict);
25921 return true;
25924 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25925 return true;
25927 /* ??? This is a lie. We do have moves between mmx/general, and for
25928 mmx/sse2. But by saying we need secondary memory we discourage the
25929 register allocator from using the mmx registers unless needed. */
25930 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25931 return true;
25933 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25935 /* SSE1 doesn't have any direct moves from other classes. */
25936 if (!TARGET_SSE2)
25937 return true;
25939 /* If the target says that inter-unit moves are more expensive
25940 than moving through memory, then don't generate them. */
25941 if (!TARGET_INTER_UNIT_MOVES)
25942 return true;
25944 /* Between SSE and general, we have moves no larger than word size. */
25945 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25946 return true;
25949 return false;
25953 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25954 enum machine_mode mode, int strict)
25956 return inline_secondary_memory_needed (class1, class2, mode, strict);
25959 /* Return true if the registers in CLASS cannot represent the change from
25960 modes FROM to TO. */
25962 bool
25963 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25964 enum reg_class regclass)
25966 if (from == to)
25967 return false;
25969 /* x87 registers can't do subreg at all, as all values are reformatted
25970 to extended precision. */
25971 if (MAYBE_FLOAT_CLASS_P (regclass))
25972 return true;
25974 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25976 /* Vector registers do not support QI or HImode loads. If we don't
25977 disallow a change to these modes, reload will assume it's ok to
25978 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25979 the vec_dupv4hi pattern. */
25980 if (GET_MODE_SIZE (from) < 4)
25981 return true;
25983 /* Vector registers do not support subreg with nonzero offsets, which
25984 are otherwise valid for integer registers. Since we can't see
25985 whether we have a nonzero offset from here, prohibit all
25986 nonparadoxical subregs changing size. */
25987 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25988 return true;
25991 return false;
25994 /* Return the cost of moving data of mode M between a
25995 register and memory. A value of 2 is the default; this cost is
25996 relative to those in `REGISTER_MOVE_COST'.
25998 This function is used extensively by register_move_cost that is used to
25999 build tables at startup. Make it inline in this case.
26000 When IN is 2, return maximum of in and out move cost.
26002 If moving between registers and memory is more expensive than
26003 between two registers, you should define this macro to express the
26004 relative cost.
26006 Model also increased moving costs of QImode registers in non
26007 Q_REGS classes.
26009 static inline int
26010 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26011 int in)
26013 int cost;
26014 if (FLOAT_CLASS_P (regclass))
26016 int index;
26017 switch (mode)
26019 case SFmode:
26020 index = 0;
26021 break;
26022 case DFmode:
26023 index = 1;
26024 break;
26025 case XFmode:
26026 index = 2;
26027 break;
26028 default:
26029 return 100;
26031 if (in == 2)
26032 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26033 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26035 if (SSE_CLASS_P (regclass))
26037 int index;
26038 switch (GET_MODE_SIZE (mode))
26040 case 4:
26041 index = 0;
26042 break;
26043 case 8:
26044 index = 1;
26045 break;
26046 case 16:
26047 index = 2;
26048 break;
26049 default:
26050 return 100;
26052 if (in == 2)
26053 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26054 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26056 if (MMX_CLASS_P (regclass))
26058 int index;
26059 switch (GET_MODE_SIZE (mode))
26061 case 4:
26062 index = 0;
26063 break;
26064 case 8:
26065 index = 1;
26066 break;
26067 default:
26068 return 100;
26070 if (in)
26071 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26072 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26074 switch (GET_MODE_SIZE (mode))
26076 case 1:
26077 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26079 if (!in)
26080 return ix86_cost->int_store[0];
26081 if (TARGET_PARTIAL_REG_DEPENDENCY
26082 && optimize_function_for_speed_p (cfun))
26083 cost = ix86_cost->movzbl_load;
26084 else
26085 cost = ix86_cost->int_load[0];
26086 if (in == 2)
26087 return MAX (cost, ix86_cost->int_store[0]);
26088 return cost;
26090 else
26092 if (in == 2)
26093 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26094 if (in)
26095 return ix86_cost->movzbl_load;
26096 else
26097 return ix86_cost->int_store[0] + 4;
26099 break;
26100 case 2:
26101 if (in == 2)
26102 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26103 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26104 default:
26105 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26106 if (mode == TFmode)
26107 mode = XFmode;
26108 if (in == 2)
26109 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26110 else if (in)
26111 cost = ix86_cost->int_load[2];
26112 else
26113 cost = ix86_cost->int_store[2];
26114 return (cost * (((int) GET_MODE_SIZE (mode)
26115 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26120 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26122 return inline_memory_move_cost (mode, regclass, in);
26126 /* Return the cost of moving data from a register in class CLASS1 to
26127 one in class CLASS2.
26129 It is not required that the cost always equal 2 when FROM is the same as TO;
26130 on some machines it is expensive to move between registers if they are not
26131 general registers. */
26134 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26135 enum reg_class class2)
26137 /* In case we require secondary memory, compute cost of the store followed
26138 by load. In order to avoid bad register allocation choices, we need
26139 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26141 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26143 int cost = 1;
26145 cost += inline_memory_move_cost (mode, class1, 2);
26146 cost += inline_memory_move_cost (mode, class2, 2);
26148 /* In case of copying from general_purpose_register we may emit multiple
26149 stores followed by single load causing memory size mismatch stall.
26150 Count this as arbitrarily high cost of 20. */
26151 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26152 cost += 20;
26154 /* In the case of FP/MMX moves, the registers actually overlap, and we
26155 have to switch modes in order to treat them differently. */
26156 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26157 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26158 cost += 20;
26160 return cost;
26163 /* Moves between SSE/MMX and integer unit are expensive. */
26164 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26165 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26167 /* ??? By keeping returned value relatively high, we limit the number
26168 of moves between integer and MMX/SSE registers for all targets.
26169 Additionally, high value prevents problem with x86_modes_tieable_p(),
26170 where integer modes in MMX/SSE registers are not tieable
26171 because of missing QImode and HImode moves to, from or between
26172 MMX/SSE registers. */
26173 return MAX (8, ix86_cost->mmxsse_to_integer);
26175 if (MAYBE_FLOAT_CLASS_P (class1))
26176 return ix86_cost->fp_move;
26177 if (MAYBE_SSE_CLASS_P (class1))
26178 return ix86_cost->sse_move;
26179 if (MAYBE_MMX_CLASS_P (class1))
26180 return ix86_cost->mmx_move;
26181 return 2;
26184 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26186 bool
26187 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26189 /* Flags and only flags can only hold CCmode values. */
26190 if (CC_REGNO_P (regno))
26191 return GET_MODE_CLASS (mode) == MODE_CC;
26192 if (GET_MODE_CLASS (mode) == MODE_CC
26193 || GET_MODE_CLASS (mode) == MODE_RANDOM
26194 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26195 return 0;
26196 if (FP_REGNO_P (regno))
26197 return VALID_FP_MODE_P (mode);
26198 if (SSE_REGNO_P (regno))
26200 /* We implement the move patterns for all vector modes into and
26201 out of SSE registers, even when no operation instructions
26202 are available. OImode move is available only when AVX is
26203 enabled. */
26204 return ((TARGET_AVX && mode == OImode)
26205 || VALID_AVX256_REG_MODE (mode)
26206 || VALID_SSE_REG_MODE (mode)
26207 || VALID_SSE2_REG_MODE (mode)
26208 || VALID_MMX_REG_MODE (mode)
26209 || VALID_MMX_REG_MODE_3DNOW (mode));
26211 if (MMX_REGNO_P (regno))
26213 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26214 so if the register is available at all, then we can move data of
26215 the given mode into or out of it. */
26216 return (VALID_MMX_REG_MODE (mode)
26217 || VALID_MMX_REG_MODE_3DNOW (mode));
26220 if (mode == QImode)
26222 /* Take care for QImode values - they can be in non-QI regs,
26223 but then they do cause partial register stalls. */
26224 if (regno <= BX_REG || TARGET_64BIT)
26225 return 1;
26226 if (!TARGET_PARTIAL_REG_STALL)
26227 return 1;
26228 return reload_in_progress || reload_completed;
26230 /* We handle both integer and floats in the general purpose registers. */
26231 else if (VALID_INT_MODE_P (mode))
26232 return 1;
26233 else if (VALID_FP_MODE_P (mode))
26234 return 1;
26235 else if (VALID_DFP_MODE_P (mode))
26236 return 1;
26237 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26238 on to use that value in smaller contexts, this can easily force a
26239 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26240 supporting DImode, allow it. */
26241 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26242 return 1;
26244 return 0;
26247 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26248 tieable integer mode. */
26250 static bool
26251 ix86_tieable_integer_mode_p (enum machine_mode mode)
26253 switch (mode)
26255 case HImode:
26256 case SImode:
26257 return true;
26259 case QImode:
26260 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26262 case DImode:
26263 return TARGET_64BIT;
26265 default:
26266 return false;
26270 /* Return true if MODE1 is accessible in a register that can hold MODE2
26271 without copying. That is, all register classes that can hold MODE2
26272 can also hold MODE1. */
26274 bool
26275 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26277 if (mode1 == mode2)
26278 return true;
26280 if (ix86_tieable_integer_mode_p (mode1)
26281 && ix86_tieable_integer_mode_p (mode2))
26282 return true;
26284 /* MODE2 being XFmode implies fp stack or general regs, which means we
26285 can tie any smaller floating point modes to it. Note that we do not
26286 tie this with TFmode. */
26287 if (mode2 == XFmode)
26288 return mode1 == SFmode || mode1 == DFmode;
26290 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26291 that we can tie it with SFmode. */
26292 if (mode2 == DFmode)
26293 return mode1 == SFmode;
26295 /* If MODE2 is only appropriate for an SSE register, then tie with
26296 any other mode acceptable to SSE registers. */
26297 if (GET_MODE_SIZE (mode2) == 16
26298 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26299 return (GET_MODE_SIZE (mode1) == 16
26300 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26302 /* If MODE2 is appropriate for an MMX register, then tie
26303 with any other mode acceptable to MMX registers. */
26304 if (GET_MODE_SIZE (mode2) == 8
26305 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26306 return (GET_MODE_SIZE (mode1) == 8
26307 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26309 return false;
26312 /* Compute a (partial) cost for rtx X. Return true if the complete
26313 cost has been computed, and false if subexpressions should be
26314 scanned. In either case, *TOTAL contains the cost result. */
26316 static bool
26317 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26319 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26320 enum machine_mode mode = GET_MODE (x);
26321 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26323 switch (code)
26325 case CONST_INT:
26326 case CONST:
26327 case LABEL_REF:
26328 case SYMBOL_REF:
26329 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26330 *total = 3;
26331 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26332 *total = 2;
26333 else if (flag_pic && SYMBOLIC_CONST (x)
26334 && (!TARGET_64BIT
26335 || (!GET_CODE (x) != LABEL_REF
26336 && (GET_CODE (x) != SYMBOL_REF
26337 || !SYMBOL_REF_LOCAL_P (x)))))
26338 *total = 1;
26339 else
26340 *total = 0;
26341 return true;
26343 case CONST_DOUBLE:
26344 if (mode == VOIDmode)
26345 *total = 0;
26346 else
26347 switch (standard_80387_constant_p (x))
26349 case 1: /* 0.0 */
26350 *total = 1;
26351 break;
26352 default: /* Other constants */
26353 *total = 2;
26354 break;
26355 case 0:
26356 case -1:
26357 /* Start with (MEM (SYMBOL_REF)), since that's where
26358 it'll probably end up. Add a penalty for size. */
26359 *total = (COSTS_N_INSNS (1)
26360 + (flag_pic != 0 && !TARGET_64BIT)
26361 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26362 break;
26364 return true;
26366 case ZERO_EXTEND:
26367 /* The zero extensions is often completely free on x86_64, so make
26368 it as cheap as possible. */
26369 if (TARGET_64BIT && mode == DImode
26370 && GET_MODE (XEXP (x, 0)) == SImode)
26371 *total = 1;
26372 else if (TARGET_ZERO_EXTEND_WITH_AND)
26373 *total = cost->add;
26374 else
26375 *total = cost->movzx;
26376 return false;
26378 case SIGN_EXTEND:
26379 *total = cost->movsx;
26380 return false;
26382 case ASHIFT:
26383 if (CONST_INT_P (XEXP (x, 1))
26384 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26386 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26387 if (value == 1)
26389 *total = cost->add;
26390 return false;
26392 if ((value == 2 || value == 3)
26393 && cost->lea <= cost->shift_const)
26395 *total = cost->lea;
26396 return false;
26399 /* FALLTHRU */
26401 case ROTATE:
26402 case ASHIFTRT:
26403 case LSHIFTRT:
26404 case ROTATERT:
26405 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26407 if (CONST_INT_P (XEXP (x, 1)))
26409 if (INTVAL (XEXP (x, 1)) > 32)
26410 *total = cost->shift_const + COSTS_N_INSNS (2);
26411 else
26412 *total = cost->shift_const * 2;
26414 else
26416 if (GET_CODE (XEXP (x, 1)) == AND)
26417 *total = cost->shift_var * 2;
26418 else
26419 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26422 else
26424 if (CONST_INT_P (XEXP (x, 1)))
26425 *total = cost->shift_const;
26426 else
26427 *total = cost->shift_var;
26429 return false;
26431 case MULT:
26432 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26434 /* ??? SSE scalar cost should be used here. */
26435 *total = cost->fmul;
26436 return false;
26438 else if (X87_FLOAT_MODE_P (mode))
26440 *total = cost->fmul;
26441 return false;
26443 else if (FLOAT_MODE_P (mode))
26445 /* ??? SSE vector cost should be used here. */
26446 *total = cost->fmul;
26447 return false;
26449 else
26451 rtx op0 = XEXP (x, 0);
26452 rtx op1 = XEXP (x, 1);
26453 int nbits;
26454 if (CONST_INT_P (XEXP (x, 1)))
26456 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26457 for (nbits = 0; value != 0; value &= value - 1)
26458 nbits++;
26460 else
26461 /* This is arbitrary. */
26462 nbits = 7;
26464 /* Compute costs correctly for widening multiplication. */
26465 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26466 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26467 == GET_MODE_SIZE (mode))
26469 int is_mulwiden = 0;
26470 enum machine_mode inner_mode = GET_MODE (op0);
26472 if (GET_CODE (op0) == GET_CODE (op1))
26473 is_mulwiden = 1, op1 = XEXP (op1, 0);
26474 else if (CONST_INT_P (op1))
26476 if (GET_CODE (op0) == SIGN_EXTEND)
26477 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26478 == INTVAL (op1);
26479 else
26480 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26483 if (is_mulwiden)
26484 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26487 *total = (cost->mult_init[MODE_INDEX (mode)]
26488 + nbits * cost->mult_bit
26489 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26491 return true;
26494 case DIV:
26495 case UDIV:
26496 case MOD:
26497 case UMOD:
26498 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26499 /* ??? SSE cost should be used here. */
26500 *total = cost->fdiv;
26501 else if (X87_FLOAT_MODE_P (mode))
26502 *total = cost->fdiv;
26503 else if (FLOAT_MODE_P (mode))
26504 /* ??? SSE vector cost should be used here. */
26505 *total = cost->fdiv;
26506 else
26507 *total = cost->divide[MODE_INDEX (mode)];
26508 return false;
26510 case PLUS:
26511 if (GET_MODE_CLASS (mode) == MODE_INT
26512 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26514 if (GET_CODE (XEXP (x, 0)) == PLUS
26515 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26516 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26517 && CONSTANT_P (XEXP (x, 1)))
26519 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26520 if (val == 2 || val == 4 || val == 8)
26522 *total = cost->lea;
26523 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26524 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26525 outer_code, speed);
26526 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26527 return true;
26530 else if (GET_CODE (XEXP (x, 0)) == MULT
26531 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26533 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26534 if (val == 2 || val == 4 || val == 8)
26536 *total = cost->lea;
26537 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26538 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26539 return true;
26542 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26544 *total = cost->lea;
26545 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26546 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26547 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26548 return true;
26551 /* FALLTHRU */
26553 case MINUS:
26554 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26556 /* ??? SSE cost should be used here. */
26557 *total = cost->fadd;
26558 return false;
26560 else if (X87_FLOAT_MODE_P (mode))
26562 *total = cost->fadd;
26563 return false;
26565 else if (FLOAT_MODE_P (mode))
26567 /* ??? SSE vector cost should be used here. */
26568 *total = cost->fadd;
26569 return false;
26571 /* FALLTHRU */
26573 case AND:
26574 case IOR:
26575 case XOR:
26576 if (!TARGET_64BIT && mode == DImode)
26578 *total = (cost->add * 2
26579 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26580 << (GET_MODE (XEXP (x, 0)) != DImode))
26581 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26582 << (GET_MODE (XEXP (x, 1)) != DImode)));
26583 return true;
26585 /* FALLTHRU */
26587 case NEG:
26588 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26590 /* ??? SSE cost should be used here. */
26591 *total = cost->fchs;
26592 return false;
26594 else if (X87_FLOAT_MODE_P (mode))
26596 *total = cost->fchs;
26597 return false;
26599 else if (FLOAT_MODE_P (mode))
26601 /* ??? SSE vector cost should be used here. */
26602 *total = cost->fchs;
26603 return false;
26605 /* FALLTHRU */
26607 case NOT:
26608 if (!TARGET_64BIT && mode == DImode)
26609 *total = cost->add * 2;
26610 else
26611 *total = cost->add;
26612 return false;
26614 case COMPARE:
26615 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26616 && XEXP (XEXP (x, 0), 1) == const1_rtx
26617 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26618 && XEXP (x, 1) == const0_rtx)
26620 /* This kind of construct is implemented using test[bwl].
26621 Treat it as if we had an AND. */
26622 *total = (cost->add
26623 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26624 + rtx_cost (const1_rtx, outer_code, speed));
26625 return true;
26627 return false;
26629 case FLOAT_EXTEND:
26630 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26631 *total = 0;
26632 return false;
26634 case ABS:
26635 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26636 /* ??? SSE cost should be used here. */
26637 *total = cost->fabs;
26638 else if (X87_FLOAT_MODE_P (mode))
26639 *total = cost->fabs;
26640 else if (FLOAT_MODE_P (mode))
26641 /* ??? SSE vector cost should be used here. */
26642 *total = cost->fabs;
26643 return false;
26645 case SQRT:
26646 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26647 /* ??? SSE cost should be used here. */
26648 *total = cost->fsqrt;
26649 else if (X87_FLOAT_MODE_P (mode))
26650 *total = cost->fsqrt;
26651 else if (FLOAT_MODE_P (mode))
26652 /* ??? SSE vector cost should be used here. */
26653 *total = cost->fsqrt;
26654 return false;
26656 case UNSPEC:
26657 if (XINT (x, 1) == UNSPEC_TP)
26658 *total = 0;
26659 return false;
26661 default:
26662 return false;
26666 #if TARGET_MACHO
26668 static int current_machopic_label_num;
26670 /* Given a symbol name and its associated stub, write out the
26671 definition of the stub. */
26673 void
26674 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26676 unsigned int length;
26677 char *binder_name, *symbol_name, lazy_ptr_name[32];
26678 int label = ++current_machopic_label_num;
26680 /* For 64-bit we shouldn't get here. */
26681 gcc_assert (!TARGET_64BIT);
26683 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26684 symb = (*targetm.strip_name_encoding) (symb);
26686 length = strlen (stub);
26687 binder_name = XALLOCAVEC (char, length + 32);
26688 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26690 length = strlen (symb);
26691 symbol_name = XALLOCAVEC (char, length + 32);
26692 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26694 sprintf (lazy_ptr_name, "L%d$lz", label);
26696 if (MACHOPIC_PURE)
26697 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26698 else
26699 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26701 fprintf (file, "%s:\n", stub);
26702 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26704 if (MACHOPIC_PURE)
26706 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26707 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26708 fprintf (file, "\tjmp\t*%%edx\n");
26710 else
26711 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26713 fprintf (file, "%s:\n", binder_name);
26715 if (MACHOPIC_PURE)
26717 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26718 fprintf (file, "\tpushl\t%%eax\n");
26720 else
26721 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26723 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26725 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26726 fprintf (file, "%s:\n", lazy_ptr_name);
26727 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26728 fprintf (file, "\t.long %s\n", binder_name);
26731 void
26732 darwin_x86_file_end (void)
26734 darwin_file_end ();
26735 ix86_file_end ();
26737 #endif /* TARGET_MACHO */
26739 /* Order the registers for register allocator. */
26741 void
26742 x86_order_regs_for_local_alloc (void)
26744 int pos = 0;
26745 int i;
26747 /* First allocate the local general purpose registers. */
26748 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26749 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26750 reg_alloc_order [pos++] = i;
26752 /* Global general purpose registers. */
26753 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26754 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26755 reg_alloc_order [pos++] = i;
26757 /* x87 registers come first in case we are doing FP math
26758 using them. */
26759 if (!TARGET_SSE_MATH)
26760 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26761 reg_alloc_order [pos++] = i;
26763 /* SSE registers. */
26764 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26765 reg_alloc_order [pos++] = i;
26766 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26767 reg_alloc_order [pos++] = i;
26769 /* x87 registers. */
26770 if (TARGET_SSE_MATH)
26771 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26772 reg_alloc_order [pos++] = i;
26774 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26775 reg_alloc_order [pos++] = i;
26777 /* Initialize the rest of array as we do not allocate some registers
26778 at all. */
26779 while (pos < FIRST_PSEUDO_REGISTER)
26780 reg_alloc_order [pos++] = 0;
26783 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26784 struct attribute_spec.handler. */
26785 static tree
26786 ix86_handle_abi_attribute (tree *node, tree name,
26787 tree args ATTRIBUTE_UNUSED,
26788 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26790 if (TREE_CODE (*node) != FUNCTION_TYPE
26791 && TREE_CODE (*node) != METHOD_TYPE
26792 && TREE_CODE (*node) != FIELD_DECL
26793 && TREE_CODE (*node) != TYPE_DECL)
26795 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26796 IDENTIFIER_POINTER (name));
26797 *no_add_attrs = true;
26798 return NULL_TREE;
26800 if (!TARGET_64BIT)
26802 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26803 IDENTIFIER_POINTER (name));
26804 *no_add_attrs = true;
26805 return NULL_TREE;
26808 /* Can combine regparm with all attributes but fastcall. */
26809 if (is_attribute_p ("ms_abi", name))
26811 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26813 error ("ms_abi and sysv_abi attributes are not compatible");
26816 return NULL_TREE;
26818 else if (is_attribute_p ("sysv_abi", name))
26820 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26822 error ("ms_abi and sysv_abi attributes are not compatible");
26825 return NULL_TREE;
26828 return NULL_TREE;
26831 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26832 struct attribute_spec.handler. */
26833 static tree
26834 ix86_handle_struct_attribute (tree *node, tree name,
26835 tree args ATTRIBUTE_UNUSED,
26836 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26838 tree *type = NULL;
26839 if (DECL_P (*node))
26841 if (TREE_CODE (*node) == TYPE_DECL)
26842 type = &TREE_TYPE (*node);
26844 else
26845 type = node;
26847 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26848 || TREE_CODE (*type) == UNION_TYPE)))
26850 warning (OPT_Wattributes, "%qs attribute ignored",
26851 IDENTIFIER_POINTER (name));
26852 *no_add_attrs = true;
26855 else if ((is_attribute_p ("ms_struct", name)
26856 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26857 || ((is_attribute_p ("gcc_struct", name)
26858 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26860 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26861 IDENTIFIER_POINTER (name));
26862 *no_add_attrs = true;
26865 return NULL_TREE;
26868 static bool
26869 ix86_ms_bitfield_layout_p (const_tree record_type)
26871 return (TARGET_MS_BITFIELD_LAYOUT &&
26872 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26873 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26876 /* Returns an expression indicating where the this parameter is
26877 located on entry to the FUNCTION. */
26879 static rtx
26880 x86_this_parameter (tree function)
26882 tree type = TREE_TYPE (function);
26883 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26884 int nregs;
26886 if (TARGET_64BIT)
26888 const int *parm_regs;
26890 if (ix86_function_type_abi (type) == MS_ABI)
26891 parm_regs = x86_64_ms_abi_int_parameter_registers;
26892 else
26893 parm_regs = x86_64_int_parameter_registers;
26894 return gen_rtx_REG (DImode, parm_regs[aggr]);
26897 nregs = ix86_function_regparm (type, function);
26899 if (nregs > 0 && !stdarg_p (type))
26901 int regno;
26903 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26904 regno = aggr ? DX_REG : CX_REG;
26905 else
26907 regno = AX_REG;
26908 if (aggr)
26910 regno = DX_REG;
26911 if (nregs == 1)
26912 return gen_rtx_MEM (SImode,
26913 plus_constant (stack_pointer_rtx, 4));
26916 return gen_rtx_REG (SImode, regno);
26919 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26922 /* Determine whether x86_output_mi_thunk can succeed. */
26924 static bool
26925 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26926 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26927 HOST_WIDE_INT vcall_offset, const_tree function)
26929 /* 64-bit can handle anything. */
26930 if (TARGET_64BIT)
26931 return true;
26933 /* For 32-bit, everything's fine if we have one free register. */
26934 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26935 return true;
26937 /* Need a free register for vcall_offset. */
26938 if (vcall_offset)
26939 return false;
26941 /* Need a free register for GOT references. */
26942 if (flag_pic && !(*targetm.binds_local_p) (function))
26943 return false;
26945 /* Otherwise ok. */
26946 return true;
26949 /* Output the assembler code for a thunk function. THUNK_DECL is the
26950 declaration for the thunk function itself, FUNCTION is the decl for
26951 the target function. DELTA is an immediate constant offset to be
26952 added to THIS. If VCALL_OFFSET is nonzero, the word at
26953 *(*this + vcall_offset) should be added to THIS. */
26955 static void
26956 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26957 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26958 HOST_WIDE_INT vcall_offset, tree function)
26960 rtx xops[3];
26961 rtx this_param = x86_this_parameter (function);
26962 rtx this_reg, tmp;
26964 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26965 pull it in now and let DELTA benefit. */
26966 if (REG_P (this_param))
26967 this_reg = this_param;
26968 else if (vcall_offset)
26970 /* Put the this parameter into %eax. */
26971 xops[0] = this_param;
26972 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26973 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26975 else
26976 this_reg = NULL_RTX;
26978 /* Adjust the this parameter by a fixed constant. */
26979 if (delta)
26981 xops[0] = GEN_INT (delta);
26982 xops[1] = this_reg ? this_reg : this_param;
26983 if (TARGET_64BIT)
26985 if (!x86_64_general_operand (xops[0], DImode))
26987 tmp = gen_rtx_REG (DImode, R10_REG);
26988 xops[1] = tmp;
26989 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26990 xops[0] = tmp;
26991 xops[1] = this_param;
26993 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26995 else
26996 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26999 /* Adjust the this parameter by a value stored in the vtable. */
27000 if (vcall_offset)
27002 if (TARGET_64BIT)
27003 tmp = gen_rtx_REG (DImode, R10_REG);
27004 else
27006 int tmp_regno = CX_REG;
27007 if (lookup_attribute ("fastcall",
27008 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27009 tmp_regno = AX_REG;
27010 tmp = gen_rtx_REG (SImode, tmp_regno);
27013 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27014 xops[1] = tmp;
27015 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27017 /* Adjust the this parameter. */
27018 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27019 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27021 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27022 xops[0] = GEN_INT (vcall_offset);
27023 xops[1] = tmp2;
27024 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27025 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27027 xops[1] = this_reg;
27028 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27031 /* If necessary, drop THIS back to its stack slot. */
27032 if (this_reg && this_reg != this_param)
27034 xops[0] = this_reg;
27035 xops[1] = this_param;
27036 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27039 xops[0] = XEXP (DECL_RTL (function), 0);
27040 if (TARGET_64BIT)
27042 if (!flag_pic || (*targetm.binds_local_p) (function))
27043 output_asm_insn ("jmp\t%P0", xops);
27044 /* All thunks should be in the same object as their target,
27045 and thus binds_local_p should be true. */
27046 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27047 gcc_unreachable ();
27048 else
27050 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27051 tmp = gen_rtx_CONST (Pmode, tmp);
27052 tmp = gen_rtx_MEM (QImode, tmp);
27053 xops[0] = tmp;
27054 output_asm_insn ("jmp\t%A0", xops);
27057 else
27059 if (!flag_pic || (*targetm.binds_local_p) (function))
27060 output_asm_insn ("jmp\t%P0", xops);
27061 else
27062 #if TARGET_MACHO
27063 if (TARGET_MACHO)
27065 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27066 tmp = (gen_rtx_SYMBOL_REF
27067 (Pmode,
27068 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27069 tmp = gen_rtx_MEM (QImode, tmp);
27070 xops[0] = tmp;
27071 output_asm_insn ("jmp\t%0", xops);
27073 else
27074 #endif /* TARGET_MACHO */
27076 tmp = gen_rtx_REG (SImode, CX_REG);
27077 output_set_got (tmp, NULL_RTX);
27079 xops[1] = tmp;
27080 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27081 output_asm_insn ("jmp\t{*}%1", xops);
27086 static void
27087 x86_file_start (void)
27089 default_file_start ();
27090 #if TARGET_MACHO
27091 darwin_file_start ();
27092 #endif
27093 if (X86_FILE_START_VERSION_DIRECTIVE)
27094 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27095 if (X86_FILE_START_FLTUSED)
27096 fputs ("\t.global\t__fltused\n", asm_out_file);
27097 if (ix86_asm_dialect == ASM_INTEL)
27098 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27102 x86_field_alignment (tree field, int computed)
27104 enum machine_mode mode;
27105 tree type = TREE_TYPE (field);
27107 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27108 return computed;
27109 mode = TYPE_MODE (strip_array_types (type));
27110 if (mode == DFmode || mode == DCmode
27111 || GET_MODE_CLASS (mode) == MODE_INT
27112 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27113 return MIN (32, computed);
27114 return computed;
27117 /* Output assembler code to FILE to increment profiler label # LABELNO
27118 for profiling a function entry. */
27119 void
27120 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27122 if (TARGET_64BIT)
27124 #ifndef NO_PROFILE_COUNTERS
27125 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27126 #endif
27128 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27129 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27130 else
27131 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27133 else if (flag_pic)
27135 #ifndef NO_PROFILE_COUNTERS
27136 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27137 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27138 #endif
27139 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27141 else
27143 #ifndef NO_PROFILE_COUNTERS
27144 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27145 PROFILE_COUNT_REGISTER);
27146 #endif
27147 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27151 /* We don't have exact information about the insn sizes, but we may assume
27152 quite safely that we are informed about all 1 byte insns and memory
27153 address sizes. This is enough to eliminate unnecessary padding in
27154 99% of cases. */
27156 static int
27157 min_insn_size (rtx insn)
27159 int l = 0;
27161 if (!INSN_P (insn) || !active_insn_p (insn))
27162 return 0;
27164 /* Discard alignments we've emit and jump instructions. */
27165 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27166 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27167 return 0;
27168 if (JUMP_P (insn)
27169 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27170 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27171 return 0;
27173 /* Important case - calls are always 5 bytes.
27174 It is common to have many calls in the row. */
27175 if (CALL_P (insn)
27176 && symbolic_reference_mentioned_p (PATTERN (insn))
27177 && !SIBLING_CALL_P (insn))
27178 return 5;
27179 if (get_attr_length (insn) <= 1)
27180 return 1;
27182 /* For normal instructions we may rely on the sizes of addresses
27183 and the presence of symbol to require 4 bytes of encoding.
27184 This is not the case for jumps where references are PC relative. */
27185 if (!JUMP_P (insn))
27187 l = get_attr_length_address (insn);
27188 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27189 l = 4;
27191 if (l)
27192 return 1+l;
27193 else
27194 return 2;
27197 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27198 window. */
27200 static void
27201 ix86_avoid_jump_misspredicts (void)
27203 rtx insn, start = get_insns ();
27204 int nbytes = 0, njumps = 0;
27205 int isjump = 0;
27207 /* Look for all minimal intervals of instructions containing 4 jumps.
27208 The intervals are bounded by START and INSN. NBYTES is the total
27209 size of instructions in the interval including INSN and not including
27210 START. When the NBYTES is smaller than 16 bytes, it is possible
27211 that the end of START and INSN ends up in the same 16byte page.
27213 The smallest offset in the page INSN can start is the case where START
27214 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27215 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27217 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27220 nbytes += min_insn_size (insn);
27221 if (dump_file)
27222 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27223 INSN_UID (insn), min_insn_size (insn));
27224 if ((JUMP_P (insn)
27225 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27226 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27227 || CALL_P (insn))
27228 njumps++;
27229 else
27230 continue;
27232 while (njumps > 3)
27234 start = NEXT_INSN (start);
27235 if ((JUMP_P (start)
27236 && GET_CODE (PATTERN (start)) != ADDR_VEC
27237 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27238 || CALL_P (start))
27239 njumps--, isjump = 1;
27240 else
27241 isjump = 0;
27242 nbytes -= min_insn_size (start);
27244 gcc_assert (njumps >= 0);
27245 if (dump_file)
27246 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27247 INSN_UID (start), INSN_UID (insn), nbytes);
27249 if (njumps == 3 && isjump && nbytes < 16)
27251 int padsize = 15 - nbytes + min_insn_size (insn);
27253 if (dump_file)
27254 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27255 INSN_UID (insn), padsize);
27256 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27261 /* AMD Athlon works faster
27262 when RET is not destination of conditional jump or directly preceded
27263 by other jump instruction. We avoid the penalty by inserting NOP just
27264 before the RET instructions in such cases. */
27265 static void
27266 ix86_pad_returns (void)
27268 edge e;
27269 edge_iterator ei;
27271 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27273 basic_block bb = e->src;
27274 rtx ret = BB_END (bb);
27275 rtx prev;
27276 bool replace = false;
27278 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27279 || optimize_bb_for_size_p (bb))
27280 continue;
27281 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27282 if (active_insn_p (prev) || LABEL_P (prev))
27283 break;
27284 if (prev && LABEL_P (prev))
27286 edge e;
27287 edge_iterator ei;
27289 FOR_EACH_EDGE (e, ei, bb->preds)
27290 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27291 && !(e->flags & EDGE_FALLTHRU))
27292 replace = true;
27294 if (!replace)
27296 prev = prev_active_insn (ret);
27297 if (prev
27298 && ((JUMP_P (prev) && any_condjump_p (prev))
27299 || CALL_P (prev)))
27300 replace = true;
27301 /* Empty functions get branch mispredict even when the jump destination
27302 is not visible to us. */
27303 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27304 replace = true;
27306 if (replace)
27308 emit_insn_before (gen_return_internal_long (), ret);
27309 delete_insn (ret);
27314 /* Implement machine specific optimizations. We implement padding of returns
27315 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27316 static void
27317 ix86_reorg (void)
27319 if (TARGET_PAD_RETURNS && optimize
27320 && optimize_function_for_speed_p (cfun))
27321 ix86_pad_returns ();
27322 if (TARGET_FOUR_JUMP_LIMIT && optimize
27323 && optimize_function_for_speed_p (cfun))
27324 ix86_avoid_jump_misspredicts ();
27327 /* Return nonzero when QImode register that must be represented via REX prefix
27328 is used. */
27329 bool
27330 x86_extended_QIreg_mentioned_p (rtx insn)
27332 int i;
27333 extract_insn_cached (insn);
27334 for (i = 0; i < recog_data.n_operands; i++)
27335 if (REG_P (recog_data.operand[i])
27336 && REGNO (recog_data.operand[i]) > BX_REG)
27337 return true;
27338 return false;
27341 /* Return nonzero when P points to register encoded via REX prefix.
27342 Called via for_each_rtx. */
27343 static int
27344 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27346 unsigned int regno;
27347 if (!REG_P (*p))
27348 return 0;
27349 regno = REGNO (*p);
27350 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27353 /* Return true when INSN mentions register that must be encoded using REX
27354 prefix. */
27355 bool
27356 x86_extended_reg_mentioned_p (rtx insn)
27358 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27359 extended_reg_mentioned_1, NULL);
27362 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27363 optabs would emit if we didn't have TFmode patterns. */
27365 void
27366 x86_emit_floatuns (rtx operands[2])
27368 rtx neglab, donelab, i0, i1, f0, in, out;
27369 enum machine_mode mode, inmode;
27371 inmode = GET_MODE (operands[1]);
27372 gcc_assert (inmode == SImode || inmode == DImode);
27374 out = operands[0];
27375 in = force_reg (inmode, operands[1]);
27376 mode = GET_MODE (out);
27377 neglab = gen_label_rtx ();
27378 donelab = gen_label_rtx ();
27379 f0 = gen_reg_rtx (mode);
27381 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27383 expand_float (out, in, 0);
27385 emit_jump_insn (gen_jump (donelab));
27386 emit_barrier ();
27388 emit_label (neglab);
27390 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27391 1, OPTAB_DIRECT);
27392 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27393 1, OPTAB_DIRECT);
27394 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27396 expand_float (f0, i0, 0);
27398 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27400 emit_label (donelab);
27403 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27404 with all elements equal to VAR. Return true if successful. */
27406 static bool
27407 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27408 rtx target, rtx val)
27410 enum machine_mode hmode, smode, wsmode, wvmode;
27411 rtx x;
27413 switch (mode)
27415 case V2SImode:
27416 case V2SFmode:
27417 if (!mmx_ok)
27418 return false;
27419 /* FALLTHRU */
27421 case V2DFmode:
27422 case V2DImode:
27423 case V4SFmode:
27424 case V4SImode:
27425 val = force_reg (GET_MODE_INNER (mode), val);
27426 x = gen_rtx_VEC_DUPLICATE (mode, val);
27427 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27428 return true;
27430 case V4HImode:
27431 if (!mmx_ok)
27432 return false;
27433 if (TARGET_SSE || TARGET_3DNOW_A)
27435 val = gen_lowpart (SImode, val);
27436 x = gen_rtx_TRUNCATE (HImode, val);
27437 x = gen_rtx_VEC_DUPLICATE (mode, x);
27438 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27439 return true;
27441 else
27443 smode = HImode;
27444 wsmode = SImode;
27445 wvmode = V2SImode;
27446 goto widen;
27449 case V8QImode:
27450 if (!mmx_ok)
27451 return false;
27452 smode = QImode;
27453 wsmode = HImode;
27454 wvmode = V4HImode;
27455 goto widen;
27456 case V8HImode:
27457 if (TARGET_SSE2)
27459 rtx tmp1, tmp2;
27460 /* Extend HImode to SImode using a paradoxical SUBREG. */
27461 tmp1 = gen_reg_rtx (SImode);
27462 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27463 /* Insert the SImode value as low element of V4SImode vector. */
27464 tmp2 = gen_reg_rtx (V4SImode);
27465 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27466 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27467 CONST0_RTX (V4SImode),
27468 const1_rtx);
27469 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27470 /* Cast the V4SImode vector back to a V8HImode vector. */
27471 tmp1 = gen_reg_rtx (V8HImode);
27472 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27473 /* Duplicate the low short through the whole low SImode word. */
27474 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27475 /* Cast the V8HImode vector back to a V4SImode vector. */
27476 tmp2 = gen_reg_rtx (V4SImode);
27477 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27478 /* Replicate the low element of the V4SImode vector. */
27479 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27480 /* Cast the V2SImode back to V8HImode, and store in target. */
27481 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27482 return true;
27484 smode = HImode;
27485 wsmode = SImode;
27486 wvmode = V4SImode;
27487 goto widen;
27488 case V16QImode:
27489 if (TARGET_SSE2)
27491 rtx tmp1, tmp2;
27492 /* Extend QImode to SImode using a paradoxical SUBREG. */
27493 tmp1 = gen_reg_rtx (SImode);
27494 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27495 /* Insert the SImode value as low element of V4SImode vector. */
27496 tmp2 = gen_reg_rtx (V4SImode);
27497 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27498 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27499 CONST0_RTX (V4SImode),
27500 const1_rtx);
27501 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27502 /* Cast the V4SImode vector back to a V16QImode vector. */
27503 tmp1 = gen_reg_rtx (V16QImode);
27504 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27505 /* Duplicate the low byte through the whole low SImode word. */
27506 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27507 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27508 /* Cast the V16QImode vector back to a V4SImode vector. */
27509 tmp2 = gen_reg_rtx (V4SImode);
27510 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27511 /* Replicate the low element of the V4SImode vector. */
27512 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27513 /* Cast the V2SImode back to V16QImode, and store in target. */
27514 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27515 return true;
27517 smode = QImode;
27518 wsmode = HImode;
27519 wvmode = V8HImode;
27520 goto widen;
27521 widen:
27522 /* Replicate the value once into the next wider mode and recurse. */
27523 val = convert_modes (wsmode, smode, val, true);
27524 x = expand_simple_binop (wsmode, ASHIFT, val,
27525 GEN_INT (GET_MODE_BITSIZE (smode)),
27526 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27527 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27529 x = gen_reg_rtx (wvmode);
27530 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27531 gcc_unreachable ();
27532 emit_move_insn (target, gen_lowpart (mode, x));
27533 return true;
27535 case V4DFmode:
27536 hmode = V2DFmode;
27537 goto half;
27538 case V4DImode:
27539 hmode = V2DImode;
27540 goto half;
27541 case V8SFmode:
27542 hmode = V4SFmode;
27543 goto half;
27544 case V8SImode:
27545 hmode = V4SImode;
27546 goto half;
27547 case V16HImode:
27548 hmode = V8HImode;
27549 goto half;
27550 case V32QImode:
27551 hmode = V16QImode;
27552 goto half;
27553 half:
27555 rtx tmp = gen_reg_rtx (hmode);
27556 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27557 emit_insn (gen_rtx_SET (VOIDmode, target,
27558 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27560 return true;
27562 default:
27563 return false;
27567 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27568 whose ONE_VAR element is VAR, and other elements are zero. Return true
27569 if successful. */
27571 static bool
27572 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27573 rtx target, rtx var, int one_var)
27575 enum machine_mode vsimode;
27576 rtx new_target;
27577 rtx x, tmp;
27578 bool use_vector_set = false;
27580 switch (mode)
27582 case V2DImode:
27583 /* For SSE4.1, we normally use vector set. But if the second
27584 element is zero and inter-unit moves are OK, we use movq
27585 instead. */
27586 use_vector_set = (TARGET_64BIT
27587 && TARGET_SSE4_1
27588 && !(TARGET_INTER_UNIT_MOVES
27589 && one_var == 0));
27590 break;
27591 case V16QImode:
27592 case V4SImode:
27593 case V4SFmode:
27594 use_vector_set = TARGET_SSE4_1;
27595 break;
27596 case V8HImode:
27597 use_vector_set = TARGET_SSE2;
27598 break;
27599 case V4HImode:
27600 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27601 break;
27602 case V32QImode:
27603 case V16HImode:
27604 case V8SImode:
27605 case V8SFmode:
27606 case V4DFmode:
27607 use_vector_set = TARGET_AVX;
27608 break;
27609 case V4DImode:
27610 /* Use ix86_expand_vector_set in 64bit mode only. */
27611 use_vector_set = TARGET_AVX && TARGET_64BIT;
27612 break;
27613 default:
27614 break;
27617 if (use_vector_set)
27619 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27620 var = force_reg (GET_MODE_INNER (mode), var);
27621 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27622 return true;
27625 switch (mode)
27627 case V2SFmode:
27628 case V2SImode:
27629 if (!mmx_ok)
27630 return false;
27631 /* FALLTHRU */
27633 case V2DFmode:
27634 case V2DImode:
27635 if (one_var != 0)
27636 return false;
27637 var = force_reg (GET_MODE_INNER (mode), var);
27638 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27639 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27640 return true;
27642 case V4SFmode:
27643 case V4SImode:
27644 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27645 new_target = gen_reg_rtx (mode);
27646 else
27647 new_target = target;
27648 var = force_reg (GET_MODE_INNER (mode), var);
27649 x = gen_rtx_VEC_DUPLICATE (mode, var);
27650 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27651 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27652 if (one_var != 0)
27654 /* We need to shuffle the value to the correct position, so
27655 create a new pseudo to store the intermediate result. */
27657 /* With SSE2, we can use the integer shuffle insns. */
27658 if (mode != V4SFmode && TARGET_SSE2)
27660 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27661 GEN_INT (1),
27662 GEN_INT (one_var == 1 ? 0 : 1),
27663 GEN_INT (one_var == 2 ? 0 : 1),
27664 GEN_INT (one_var == 3 ? 0 : 1)));
27665 if (target != new_target)
27666 emit_move_insn (target, new_target);
27667 return true;
27670 /* Otherwise convert the intermediate result to V4SFmode and
27671 use the SSE1 shuffle instructions. */
27672 if (mode != V4SFmode)
27674 tmp = gen_reg_rtx (V4SFmode);
27675 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27677 else
27678 tmp = new_target;
27680 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27681 GEN_INT (1),
27682 GEN_INT (one_var == 1 ? 0 : 1),
27683 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27684 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27686 if (mode != V4SFmode)
27687 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27688 else if (tmp != target)
27689 emit_move_insn (target, tmp);
27691 else if (target != new_target)
27692 emit_move_insn (target, new_target);
27693 return true;
27695 case V8HImode:
27696 case V16QImode:
27697 vsimode = V4SImode;
27698 goto widen;
27699 case V4HImode:
27700 case V8QImode:
27701 if (!mmx_ok)
27702 return false;
27703 vsimode = V2SImode;
27704 goto widen;
27705 widen:
27706 if (one_var != 0)
27707 return false;
27709 /* Zero extend the variable element to SImode and recurse. */
27710 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27712 x = gen_reg_rtx (vsimode);
27713 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27714 var, one_var))
27715 gcc_unreachable ();
27717 emit_move_insn (target, gen_lowpart (mode, x));
27718 return true;
27720 default:
27721 return false;
27725 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27726 consisting of the values in VALS. It is known that all elements
27727 except ONE_VAR are constants. Return true if successful. */
27729 static bool
27730 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27731 rtx target, rtx vals, int one_var)
27733 rtx var = XVECEXP (vals, 0, one_var);
27734 enum machine_mode wmode;
27735 rtx const_vec, x;
27737 const_vec = copy_rtx (vals);
27738 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27739 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27741 switch (mode)
27743 case V2DFmode:
27744 case V2DImode:
27745 case V2SFmode:
27746 case V2SImode:
27747 /* For the two element vectors, it's just as easy to use
27748 the general case. */
27749 return false;
27751 case V4DImode:
27752 /* Use ix86_expand_vector_set in 64bit mode only. */
27753 if (!TARGET_64BIT)
27754 return false;
27755 case V4DFmode:
27756 case V8SFmode:
27757 case V8SImode:
27758 case V16HImode:
27759 case V32QImode:
27760 case V4SFmode:
27761 case V4SImode:
27762 case V8HImode:
27763 case V4HImode:
27764 break;
27766 case V16QImode:
27767 if (TARGET_SSE4_1)
27768 break;
27769 wmode = V8HImode;
27770 goto widen;
27771 case V8QImode:
27772 wmode = V4HImode;
27773 goto widen;
27774 widen:
27775 /* There's no way to set one QImode entry easily. Combine
27776 the variable value with its adjacent constant value, and
27777 promote to an HImode set. */
27778 x = XVECEXP (vals, 0, one_var ^ 1);
27779 if (one_var & 1)
27781 var = convert_modes (HImode, QImode, var, true);
27782 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27783 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27784 x = GEN_INT (INTVAL (x) & 0xff);
27786 else
27788 var = convert_modes (HImode, QImode, var, true);
27789 x = gen_int_mode (INTVAL (x) << 8, HImode);
27791 if (x != const0_rtx)
27792 var = expand_simple_binop (HImode, IOR, var, x, var,
27793 1, OPTAB_LIB_WIDEN);
27795 x = gen_reg_rtx (wmode);
27796 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27797 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27799 emit_move_insn (target, gen_lowpart (mode, x));
27800 return true;
27802 default:
27803 return false;
27806 emit_move_insn (target, const_vec);
27807 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27808 return true;
27811 /* A subroutine of ix86_expand_vector_init_general. Use vector
27812 concatenate to handle the most general case: all values variable,
27813 and none identical. */
27815 static void
27816 ix86_expand_vector_init_concat (enum machine_mode mode,
27817 rtx target, rtx *ops, int n)
27819 enum machine_mode cmode, hmode = VOIDmode;
27820 rtx first[8], second[4];
27821 rtvec v;
27822 int i, j;
27824 switch (n)
27826 case 2:
27827 switch (mode)
27829 case V8SImode:
27830 cmode = V4SImode;
27831 break;
27832 case V8SFmode:
27833 cmode = V4SFmode;
27834 break;
27835 case V4DImode:
27836 cmode = V2DImode;
27837 break;
27838 case V4DFmode:
27839 cmode = V2DFmode;
27840 break;
27841 case V4SImode:
27842 cmode = V2SImode;
27843 break;
27844 case V4SFmode:
27845 cmode = V2SFmode;
27846 break;
27847 case V2DImode:
27848 cmode = DImode;
27849 break;
27850 case V2SImode:
27851 cmode = SImode;
27852 break;
27853 case V2DFmode:
27854 cmode = DFmode;
27855 break;
27856 case V2SFmode:
27857 cmode = SFmode;
27858 break;
27859 default:
27860 gcc_unreachable ();
27863 if (!register_operand (ops[1], cmode))
27864 ops[1] = force_reg (cmode, ops[1]);
27865 if (!register_operand (ops[0], cmode))
27866 ops[0] = force_reg (cmode, ops[0]);
27867 emit_insn (gen_rtx_SET (VOIDmode, target,
27868 gen_rtx_VEC_CONCAT (mode, ops[0],
27869 ops[1])));
27870 break;
27872 case 4:
27873 switch (mode)
27875 case V4DImode:
27876 cmode = V2DImode;
27877 break;
27878 case V4DFmode:
27879 cmode = V2DFmode;
27880 break;
27881 case V4SImode:
27882 cmode = V2SImode;
27883 break;
27884 case V4SFmode:
27885 cmode = V2SFmode;
27886 break;
27887 default:
27888 gcc_unreachable ();
27890 goto half;
27892 case 8:
27893 switch (mode)
27895 case V8SImode:
27896 cmode = V2SImode;
27897 hmode = V4SImode;
27898 break;
27899 case V8SFmode:
27900 cmode = V2SFmode;
27901 hmode = V4SFmode;
27902 break;
27903 default:
27904 gcc_unreachable ();
27906 goto half;
27908 half:
27909 /* FIXME: We process inputs backward to help RA. PR 36222. */
27910 i = n - 1;
27911 j = (n >> 1) - 1;
27912 for (; i > 0; i -= 2, j--)
27914 first[j] = gen_reg_rtx (cmode);
27915 v = gen_rtvec (2, ops[i - 1], ops[i]);
27916 ix86_expand_vector_init (false, first[j],
27917 gen_rtx_PARALLEL (cmode, v));
27920 n >>= 1;
27921 if (n > 2)
27923 gcc_assert (hmode != VOIDmode);
27924 for (i = j = 0; i < n; i += 2, j++)
27926 second[j] = gen_reg_rtx (hmode);
27927 ix86_expand_vector_init_concat (hmode, second [j],
27928 &first [i], 2);
27930 n >>= 1;
27931 ix86_expand_vector_init_concat (mode, target, second, n);
27933 else
27934 ix86_expand_vector_init_concat (mode, target, first, n);
27935 break;
27937 default:
27938 gcc_unreachable ();
27942 /* A subroutine of ix86_expand_vector_init_general. Use vector
27943 interleave to handle the most general case: all values variable,
27944 and none identical. */
27946 static void
27947 ix86_expand_vector_init_interleave (enum machine_mode mode,
27948 rtx target, rtx *ops, int n)
27950 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27951 int i, j;
27952 rtx op0, op1;
27953 rtx (*gen_load_even) (rtx, rtx, rtx);
27954 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27955 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27957 switch (mode)
27959 case V8HImode:
27960 gen_load_even = gen_vec_setv8hi;
27961 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27962 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27963 inner_mode = HImode;
27964 first_imode = V4SImode;
27965 second_imode = V2DImode;
27966 third_imode = VOIDmode;
27967 break;
27968 case V16QImode:
27969 gen_load_even = gen_vec_setv16qi;
27970 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27971 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27972 inner_mode = QImode;
27973 first_imode = V8HImode;
27974 second_imode = V4SImode;
27975 third_imode = V2DImode;
27976 break;
27977 default:
27978 gcc_unreachable ();
27981 for (i = 0; i < n; i++)
27983 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27984 op0 = gen_reg_rtx (SImode);
27985 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27987 /* Insert the SImode value as low element of V4SImode vector. */
27988 op1 = gen_reg_rtx (V4SImode);
27989 op0 = gen_rtx_VEC_MERGE (V4SImode,
27990 gen_rtx_VEC_DUPLICATE (V4SImode,
27991 op0),
27992 CONST0_RTX (V4SImode),
27993 const1_rtx);
27994 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27996 /* Cast the V4SImode vector back to a vector in orignal mode. */
27997 op0 = gen_reg_rtx (mode);
27998 emit_move_insn (op0, gen_lowpart (mode, op1));
28000 /* Load even elements into the second positon. */
28001 emit_insn ((*gen_load_even) (op0,
28002 force_reg (inner_mode,
28003 ops [i + i + 1]),
28004 const1_rtx));
28006 /* Cast vector to FIRST_IMODE vector. */
28007 ops[i] = gen_reg_rtx (first_imode);
28008 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28011 /* Interleave low FIRST_IMODE vectors. */
28012 for (i = j = 0; i < n; i += 2, j++)
28014 op0 = gen_reg_rtx (first_imode);
28015 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28017 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28018 ops[j] = gen_reg_rtx (second_imode);
28019 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28022 /* Interleave low SECOND_IMODE vectors. */
28023 switch (second_imode)
28025 case V4SImode:
28026 for (i = j = 0; i < n / 2; i += 2, j++)
28028 op0 = gen_reg_rtx (second_imode);
28029 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28030 ops[i + 1]));
28032 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28033 vector. */
28034 ops[j] = gen_reg_rtx (third_imode);
28035 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28037 second_imode = V2DImode;
28038 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28039 /* FALLTHRU */
28041 case V2DImode:
28042 op0 = gen_reg_rtx (second_imode);
28043 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28044 ops[1]));
28046 /* Cast the SECOND_IMODE vector back to a vector on original
28047 mode. */
28048 emit_insn (gen_rtx_SET (VOIDmode, target,
28049 gen_lowpart (mode, op0)));
28050 break;
28052 default:
28053 gcc_unreachable ();
28057 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28058 all values variable, and none identical. */
28060 static void
28061 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28062 rtx target, rtx vals)
28064 rtx ops[32], op0, op1;
28065 enum machine_mode half_mode = VOIDmode;
28066 int n, i;
28068 switch (mode)
28070 case V2SFmode:
28071 case V2SImode:
28072 if (!mmx_ok && !TARGET_SSE)
28073 break;
28074 /* FALLTHRU */
28076 case V8SFmode:
28077 case V8SImode:
28078 case V4DFmode:
28079 case V4DImode:
28080 case V4SFmode:
28081 case V4SImode:
28082 case V2DFmode:
28083 case V2DImode:
28084 n = GET_MODE_NUNITS (mode);
28085 for (i = 0; i < n; i++)
28086 ops[i] = XVECEXP (vals, 0, i);
28087 ix86_expand_vector_init_concat (mode, target, ops, n);
28088 return;
28090 case V32QImode:
28091 half_mode = V16QImode;
28092 goto half;
28094 case V16HImode:
28095 half_mode = V8HImode;
28096 goto half;
28098 half:
28099 n = GET_MODE_NUNITS (mode);
28100 for (i = 0; i < n; i++)
28101 ops[i] = XVECEXP (vals, 0, i);
28102 op0 = gen_reg_rtx (half_mode);
28103 op1 = gen_reg_rtx (half_mode);
28104 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28105 n >> 2);
28106 ix86_expand_vector_init_interleave (half_mode, op1,
28107 &ops [n >> 1], n >> 2);
28108 emit_insn (gen_rtx_SET (VOIDmode, target,
28109 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28110 return;
28112 case V16QImode:
28113 if (!TARGET_SSE4_1)
28114 break;
28115 /* FALLTHRU */
28117 case V8HImode:
28118 if (!TARGET_SSE2)
28119 break;
28121 /* Don't use ix86_expand_vector_init_interleave if we can't
28122 move from GPR to SSE register directly. */
28123 if (!TARGET_INTER_UNIT_MOVES)
28124 break;
28126 n = GET_MODE_NUNITS (mode);
28127 for (i = 0; i < n; i++)
28128 ops[i] = XVECEXP (vals, 0, i);
28129 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28130 return;
28132 case V4HImode:
28133 case V8QImode:
28134 break;
28136 default:
28137 gcc_unreachable ();
28141 int i, j, n_elts, n_words, n_elt_per_word;
28142 enum machine_mode inner_mode;
28143 rtx words[4], shift;
28145 inner_mode = GET_MODE_INNER (mode);
28146 n_elts = GET_MODE_NUNITS (mode);
28147 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28148 n_elt_per_word = n_elts / n_words;
28149 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28151 for (i = 0; i < n_words; ++i)
28153 rtx word = NULL_RTX;
28155 for (j = 0; j < n_elt_per_word; ++j)
28157 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28158 elt = convert_modes (word_mode, inner_mode, elt, true);
28160 if (j == 0)
28161 word = elt;
28162 else
28164 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28165 word, 1, OPTAB_LIB_WIDEN);
28166 word = expand_simple_binop (word_mode, IOR, word, elt,
28167 word, 1, OPTAB_LIB_WIDEN);
28171 words[i] = word;
28174 if (n_words == 1)
28175 emit_move_insn (target, gen_lowpart (mode, words[0]));
28176 else if (n_words == 2)
28178 rtx tmp = gen_reg_rtx (mode);
28179 emit_clobber (tmp);
28180 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28181 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28182 emit_move_insn (target, tmp);
28184 else if (n_words == 4)
28186 rtx tmp = gen_reg_rtx (V4SImode);
28187 gcc_assert (word_mode == SImode);
28188 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28189 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28190 emit_move_insn (target, gen_lowpart (mode, tmp));
28192 else
28193 gcc_unreachable ();
28197 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28198 instructions unless MMX_OK is true. */
28200 void
28201 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28203 enum machine_mode mode = GET_MODE (target);
28204 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28205 int n_elts = GET_MODE_NUNITS (mode);
28206 int n_var = 0, one_var = -1;
28207 bool all_same = true, all_const_zero = true;
28208 int i;
28209 rtx x;
28211 for (i = 0; i < n_elts; ++i)
28213 x = XVECEXP (vals, 0, i);
28214 if (!(CONST_INT_P (x)
28215 || GET_CODE (x) == CONST_DOUBLE
28216 || GET_CODE (x) == CONST_FIXED))
28217 n_var++, one_var = i;
28218 else if (x != CONST0_RTX (inner_mode))
28219 all_const_zero = false;
28220 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28221 all_same = false;
28224 /* Constants are best loaded from the constant pool. */
28225 if (n_var == 0)
28227 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28228 return;
28231 /* If all values are identical, broadcast the value. */
28232 if (all_same
28233 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28234 XVECEXP (vals, 0, 0)))
28235 return;
28237 /* Values where only one field is non-constant are best loaded from
28238 the pool and overwritten via move later. */
28239 if (n_var == 1)
28241 if (all_const_zero
28242 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28243 XVECEXP (vals, 0, one_var),
28244 one_var))
28245 return;
28247 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28248 return;
28251 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28254 void
28255 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28257 enum machine_mode mode = GET_MODE (target);
28258 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28259 enum machine_mode half_mode;
28260 bool use_vec_merge = false;
28261 rtx tmp;
28262 static rtx (*gen_extract[6][2]) (rtx, rtx)
28264 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28265 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28266 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28267 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28268 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28269 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28271 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28273 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28274 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28275 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28276 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28277 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28278 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28280 int i, j, n;
28282 switch (mode)
28284 case V2SFmode:
28285 case V2SImode:
28286 if (mmx_ok)
28288 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28289 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28290 if (elt == 0)
28291 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28292 else
28293 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28294 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28295 return;
28297 break;
28299 case V2DImode:
28300 use_vec_merge = TARGET_SSE4_1;
28301 if (use_vec_merge)
28302 break;
28304 case V2DFmode:
28306 rtx op0, op1;
28308 /* For the two element vectors, we implement a VEC_CONCAT with
28309 the extraction of the other element. */
28311 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28312 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28314 if (elt == 0)
28315 op0 = val, op1 = tmp;
28316 else
28317 op0 = tmp, op1 = val;
28319 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28320 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28322 return;
28324 case V4SFmode:
28325 use_vec_merge = TARGET_SSE4_1;
28326 if (use_vec_merge)
28327 break;
28329 switch (elt)
28331 case 0:
28332 use_vec_merge = true;
28333 break;
28335 case 1:
28336 /* tmp = target = A B C D */
28337 tmp = copy_to_reg (target);
28338 /* target = A A B B */
28339 emit_insn (gen_sse_unpcklps (target, target, target));
28340 /* target = X A B B */
28341 ix86_expand_vector_set (false, target, val, 0);
28342 /* target = A X C D */
28343 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28344 GEN_INT (1), GEN_INT (0),
28345 GEN_INT (2+4), GEN_INT (3+4)));
28346 return;
28348 case 2:
28349 /* tmp = target = A B C D */
28350 tmp = copy_to_reg (target);
28351 /* tmp = X B C D */
28352 ix86_expand_vector_set (false, tmp, val, 0);
28353 /* target = A B X D */
28354 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28355 GEN_INT (0), GEN_INT (1),
28356 GEN_INT (0+4), GEN_INT (3+4)));
28357 return;
28359 case 3:
28360 /* tmp = target = A B C D */
28361 tmp = copy_to_reg (target);
28362 /* tmp = X B C D */
28363 ix86_expand_vector_set (false, tmp, val, 0);
28364 /* target = A B X D */
28365 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28366 GEN_INT (0), GEN_INT (1),
28367 GEN_INT (2+4), GEN_INT (0+4)));
28368 return;
28370 default:
28371 gcc_unreachable ();
28373 break;
28375 case V4SImode:
28376 use_vec_merge = TARGET_SSE4_1;
28377 if (use_vec_merge)
28378 break;
28380 /* Element 0 handled by vec_merge below. */
28381 if (elt == 0)
28383 use_vec_merge = true;
28384 break;
28387 if (TARGET_SSE2)
28389 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28390 store into element 0, then shuffle them back. */
28392 rtx order[4];
28394 order[0] = GEN_INT (elt);
28395 order[1] = const1_rtx;
28396 order[2] = const2_rtx;
28397 order[3] = GEN_INT (3);
28398 order[elt] = const0_rtx;
28400 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28401 order[1], order[2], order[3]));
28403 ix86_expand_vector_set (false, target, val, 0);
28405 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28406 order[1], order[2], order[3]));
28408 else
28410 /* For SSE1, we have to reuse the V4SF code. */
28411 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28412 gen_lowpart (SFmode, val), elt);
28414 return;
28416 case V8HImode:
28417 use_vec_merge = TARGET_SSE2;
28418 break;
28419 case V4HImode:
28420 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28421 break;
28423 case V16QImode:
28424 use_vec_merge = TARGET_SSE4_1;
28425 break;
28427 case V8QImode:
28428 break;
28430 case V32QImode:
28431 half_mode = V16QImode;
28432 j = 0;
28433 n = 16;
28434 goto half;
28436 case V16HImode:
28437 half_mode = V8HImode;
28438 j = 1;
28439 n = 8;
28440 goto half;
28442 case V8SImode:
28443 half_mode = V4SImode;
28444 j = 2;
28445 n = 4;
28446 goto half;
28448 case V4DImode:
28449 half_mode = V2DImode;
28450 j = 3;
28451 n = 2;
28452 goto half;
28454 case V8SFmode:
28455 half_mode = V4SFmode;
28456 j = 4;
28457 n = 4;
28458 goto half;
28460 case V4DFmode:
28461 half_mode = V2DFmode;
28462 j = 5;
28463 n = 2;
28464 goto half;
28466 half:
28467 /* Compute offset. */
28468 i = elt / n;
28469 elt %= n;
28471 gcc_assert (i <= 1);
28473 /* Extract the half. */
28474 tmp = gen_reg_rtx (half_mode);
28475 emit_insn ((*gen_extract[j][i]) (tmp, target));
28477 /* Put val in tmp at elt. */
28478 ix86_expand_vector_set (false, tmp, val, elt);
28480 /* Put it back. */
28481 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28482 return;
28484 default:
28485 break;
28488 if (use_vec_merge)
28490 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28491 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28492 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28494 else
28496 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28498 emit_move_insn (mem, target);
28500 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28501 emit_move_insn (tmp, val);
28503 emit_move_insn (target, mem);
28507 void
28508 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28510 enum machine_mode mode = GET_MODE (vec);
28511 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28512 bool use_vec_extr = false;
28513 rtx tmp;
28515 switch (mode)
28517 case V2SImode:
28518 case V2SFmode:
28519 if (!mmx_ok)
28520 break;
28521 /* FALLTHRU */
28523 case V2DFmode:
28524 case V2DImode:
28525 use_vec_extr = true;
28526 break;
28528 case V4SFmode:
28529 use_vec_extr = TARGET_SSE4_1;
28530 if (use_vec_extr)
28531 break;
28533 switch (elt)
28535 case 0:
28536 tmp = vec;
28537 break;
28539 case 1:
28540 case 3:
28541 tmp = gen_reg_rtx (mode);
28542 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28543 GEN_INT (elt), GEN_INT (elt),
28544 GEN_INT (elt+4), GEN_INT (elt+4)));
28545 break;
28547 case 2:
28548 tmp = gen_reg_rtx (mode);
28549 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28550 break;
28552 default:
28553 gcc_unreachable ();
28555 vec = tmp;
28556 use_vec_extr = true;
28557 elt = 0;
28558 break;
28560 case V4SImode:
28561 use_vec_extr = TARGET_SSE4_1;
28562 if (use_vec_extr)
28563 break;
28565 if (TARGET_SSE2)
28567 switch (elt)
28569 case 0:
28570 tmp = vec;
28571 break;
28573 case 1:
28574 case 3:
28575 tmp = gen_reg_rtx (mode);
28576 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28577 GEN_INT (elt), GEN_INT (elt),
28578 GEN_INT (elt), GEN_INT (elt)));
28579 break;
28581 case 2:
28582 tmp = gen_reg_rtx (mode);
28583 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28584 break;
28586 default:
28587 gcc_unreachable ();
28589 vec = tmp;
28590 use_vec_extr = true;
28591 elt = 0;
28593 else
28595 /* For SSE1, we have to reuse the V4SF code. */
28596 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28597 gen_lowpart (V4SFmode, vec), elt);
28598 return;
28600 break;
28602 case V8HImode:
28603 use_vec_extr = TARGET_SSE2;
28604 break;
28605 case V4HImode:
28606 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28607 break;
28609 case V16QImode:
28610 use_vec_extr = TARGET_SSE4_1;
28611 break;
28613 case V8QImode:
28614 /* ??? Could extract the appropriate HImode element and shift. */
28615 default:
28616 break;
28619 if (use_vec_extr)
28621 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28622 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28624 /* Let the rtl optimizers know about the zero extension performed. */
28625 if (inner_mode == QImode || inner_mode == HImode)
28627 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28628 target = gen_lowpart (SImode, target);
28631 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28633 else
28635 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28637 emit_move_insn (mem, vec);
28639 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28640 emit_move_insn (target, tmp);
28644 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28645 pattern to reduce; DEST is the destination; IN is the input vector. */
28647 void
28648 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28650 rtx tmp1, tmp2, tmp3;
28652 tmp1 = gen_reg_rtx (V4SFmode);
28653 tmp2 = gen_reg_rtx (V4SFmode);
28654 tmp3 = gen_reg_rtx (V4SFmode);
28656 emit_insn (gen_sse_movhlps (tmp1, in, in));
28657 emit_insn (fn (tmp2, tmp1, in));
28659 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28660 GEN_INT (1), GEN_INT (1),
28661 GEN_INT (1+4), GEN_INT (1+4)));
28662 emit_insn (fn (dest, tmp2, tmp3));
28665 /* Target hook for scalar_mode_supported_p. */
28666 static bool
28667 ix86_scalar_mode_supported_p (enum machine_mode mode)
28669 if (DECIMAL_FLOAT_MODE_P (mode))
28670 return true;
28671 else if (mode == TFmode)
28672 return true;
28673 else
28674 return default_scalar_mode_supported_p (mode);
28677 /* Implements target hook vector_mode_supported_p. */
28678 static bool
28679 ix86_vector_mode_supported_p (enum machine_mode mode)
28681 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28682 return true;
28683 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28684 return true;
28685 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28686 return true;
28687 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28688 return true;
28689 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28690 return true;
28691 return false;
28694 /* Target hook for c_mode_for_suffix. */
28695 static enum machine_mode
28696 ix86_c_mode_for_suffix (char suffix)
28698 if (suffix == 'q')
28699 return TFmode;
28700 if (suffix == 'w')
28701 return XFmode;
28703 return VOIDmode;
28706 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28708 We do this in the new i386 backend to maintain source compatibility
28709 with the old cc0-based compiler. */
28711 static tree
28712 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28713 tree inputs ATTRIBUTE_UNUSED,
28714 tree clobbers)
28716 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28717 clobbers);
28718 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28719 clobbers);
28720 return clobbers;
28723 /* Implements target vector targetm.asm.encode_section_info. This
28724 is not used by netware. */
28726 static void ATTRIBUTE_UNUSED
28727 ix86_encode_section_info (tree decl, rtx rtl, int first)
28729 default_encode_section_info (decl, rtl, first);
28731 if (TREE_CODE (decl) == VAR_DECL
28732 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28733 && ix86_in_large_data_p (decl))
28734 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28737 /* Worker function for REVERSE_CONDITION. */
28739 enum rtx_code
28740 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28742 return (mode != CCFPmode && mode != CCFPUmode
28743 ? reverse_condition (code)
28744 : reverse_condition_maybe_unordered (code));
28747 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28748 to OPERANDS[0]. */
28750 const char *
28751 output_387_reg_move (rtx insn, rtx *operands)
28753 if (REG_P (operands[0]))
28755 if (REG_P (operands[1])
28756 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28758 if (REGNO (operands[0]) == FIRST_STACK_REG)
28759 return output_387_ffreep (operands, 0);
28760 return "fstp\t%y0";
28762 if (STACK_TOP_P (operands[0]))
28763 return "fld%z1\t%y1";
28764 return "fst\t%y0";
28766 else if (MEM_P (operands[0]))
28768 gcc_assert (REG_P (operands[1]));
28769 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28770 return "fstp%z0\t%y0";
28771 else
28773 /* There is no non-popping store to memory for XFmode.
28774 So if we need one, follow the store with a load. */
28775 if (GET_MODE (operands[0]) == XFmode)
28776 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28777 else
28778 return "fst%z0\t%y0";
28781 else
28782 gcc_unreachable();
28785 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28786 FP status register is set. */
28788 void
28789 ix86_emit_fp_unordered_jump (rtx label)
28791 rtx reg = gen_reg_rtx (HImode);
28792 rtx temp;
28794 emit_insn (gen_x86_fnstsw_1 (reg));
28796 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28798 emit_insn (gen_x86_sahf_1 (reg));
28800 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28801 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28803 else
28805 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28807 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28808 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28811 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28812 gen_rtx_LABEL_REF (VOIDmode, label),
28813 pc_rtx);
28814 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28816 emit_jump_insn (temp);
28817 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28820 /* Output code to perform a log1p XFmode calculation. */
28822 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28824 rtx label1 = gen_label_rtx ();
28825 rtx label2 = gen_label_rtx ();
28827 rtx tmp = gen_reg_rtx (XFmode);
28828 rtx tmp2 = gen_reg_rtx (XFmode);
28830 emit_insn (gen_absxf2 (tmp, op1));
28831 emit_insn (gen_cmpxf (tmp,
28832 CONST_DOUBLE_FROM_REAL_VALUE (
28833 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28834 XFmode)));
28835 emit_jump_insn (gen_bge (label1));
28837 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28838 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28839 emit_jump (label2);
28841 emit_label (label1);
28842 emit_move_insn (tmp, CONST1_RTX (XFmode));
28843 emit_insn (gen_addxf3 (tmp, op1, tmp));
28844 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28845 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28847 emit_label (label2);
28850 /* Output code to perform a Newton-Rhapson approximation of a single precision
28851 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28853 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28855 rtx x0, x1, e0, e1, two;
28857 x0 = gen_reg_rtx (mode);
28858 e0 = gen_reg_rtx (mode);
28859 e1 = gen_reg_rtx (mode);
28860 x1 = gen_reg_rtx (mode);
28862 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28864 if (VECTOR_MODE_P (mode))
28865 two = ix86_build_const_vector (SFmode, true, two);
28867 two = force_reg (mode, two);
28869 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28871 /* x0 = rcp(b) estimate */
28872 emit_insn (gen_rtx_SET (VOIDmode, x0,
28873 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28874 UNSPEC_RCP)));
28875 /* e0 = x0 * b */
28876 emit_insn (gen_rtx_SET (VOIDmode, e0,
28877 gen_rtx_MULT (mode, x0, b)));
28878 /* e1 = 2. - e0 */
28879 emit_insn (gen_rtx_SET (VOIDmode, e1,
28880 gen_rtx_MINUS (mode, two, e0)));
28881 /* x1 = x0 * e1 */
28882 emit_insn (gen_rtx_SET (VOIDmode, x1,
28883 gen_rtx_MULT (mode, x0, e1)));
28884 /* res = a * x1 */
28885 emit_insn (gen_rtx_SET (VOIDmode, res,
28886 gen_rtx_MULT (mode, a, x1)));
28889 /* Output code to perform a Newton-Rhapson approximation of a
28890 single precision floating point [reciprocal] square root. */
28892 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28893 bool recip)
28895 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28896 REAL_VALUE_TYPE r;
28898 x0 = gen_reg_rtx (mode);
28899 e0 = gen_reg_rtx (mode);
28900 e1 = gen_reg_rtx (mode);
28901 e2 = gen_reg_rtx (mode);
28902 e3 = gen_reg_rtx (mode);
28904 real_from_integer (&r, VOIDmode, -3, -1, 0);
28905 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28907 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28908 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28910 if (VECTOR_MODE_P (mode))
28912 mthree = ix86_build_const_vector (SFmode, true, mthree);
28913 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28916 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28917 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28919 /* x0 = rsqrt(a) estimate */
28920 emit_insn (gen_rtx_SET (VOIDmode, x0,
28921 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28922 UNSPEC_RSQRT)));
28924 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28925 if (!recip)
28927 rtx zero, mask;
28929 zero = gen_reg_rtx (mode);
28930 mask = gen_reg_rtx (mode);
28932 zero = force_reg (mode, CONST0_RTX(mode));
28933 emit_insn (gen_rtx_SET (VOIDmode, mask,
28934 gen_rtx_NE (mode, zero, a)));
28936 emit_insn (gen_rtx_SET (VOIDmode, x0,
28937 gen_rtx_AND (mode, x0, mask)));
28940 /* e0 = x0 * a */
28941 emit_insn (gen_rtx_SET (VOIDmode, e0,
28942 gen_rtx_MULT (mode, x0, a)));
28943 /* e1 = e0 * x0 */
28944 emit_insn (gen_rtx_SET (VOIDmode, e1,
28945 gen_rtx_MULT (mode, e0, x0)));
28947 /* e2 = e1 - 3. */
28948 mthree = force_reg (mode, mthree);
28949 emit_insn (gen_rtx_SET (VOIDmode, e2,
28950 gen_rtx_PLUS (mode, e1, mthree)));
28952 mhalf = force_reg (mode, mhalf);
28953 if (recip)
28954 /* e3 = -.5 * x0 */
28955 emit_insn (gen_rtx_SET (VOIDmode, e3,
28956 gen_rtx_MULT (mode, x0, mhalf)));
28957 else
28958 /* e3 = -.5 * e0 */
28959 emit_insn (gen_rtx_SET (VOIDmode, e3,
28960 gen_rtx_MULT (mode, e0, mhalf)));
28961 /* ret = e2 * e3 */
28962 emit_insn (gen_rtx_SET (VOIDmode, res,
28963 gen_rtx_MULT (mode, e2, e3)));
28966 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28968 static void ATTRIBUTE_UNUSED
28969 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28970 tree decl)
28972 /* With Binutils 2.15, the "@unwind" marker must be specified on
28973 every occurrence of the ".eh_frame" section, not just the first
28974 one. */
28975 if (TARGET_64BIT
28976 && strcmp (name, ".eh_frame") == 0)
28978 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28979 flags & SECTION_WRITE ? "aw" : "a");
28980 return;
28982 default_elf_asm_named_section (name, flags, decl);
28985 /* Return the mangling of TYPE if it is an extended fundamental type. */
28987 static const char *
28988 ix86_mangle_type (const_tree type)
28990 type = TYPE_MAIN_VARIANT (type);
28992 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28993 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28994 return NULL;
28996 switch (TYPE_MODE (type))
28998 case TFmode:
28999 /* __float128 is "g". */
29000 return "g";
29001 case XFmode:
29002 /* "long double" or __float80 is "e". */
29003 return "e";
29004 default:
29005 return NULL;
29009 /* For 32-bit code we can save PIC register setup by using
29010 __stack_chk_fail_local hidden function instead of calling
29011 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29012 register, so it is better to call __stack_chk_fail directly. */
29014 static tree
29015 ix86_stack_protect_fail (void)
29017 return TARGET_64BIT
29018 ? default_external_stack_protect_fail ()
29019 : default_hidden_stack_protect_fail ();
29022 /* Select a format to encode pointers in exception handling data. CODE
29023 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29024 true if the symbol may be affected by dynamic relocations.
29026 ??? All x86 object file formats are capable of representing this.
29027 After all, the relocation needed is the same as for the call insn.
29028 Whether or not a particular assembler allows us to enter such, I
29029 guess we'll have to see. */
29031 asm_preferred_eh_data_format (int code, int global)
29033 if (flag_pic)
29035 int type = DW_EH_PE_sdata8;
29036 if (!TARGET_64BIT
29037 || ix86_cmodel == CM_SMALL_PIC
29038 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29039 type = DW_EH_PE_sdata4;
29040 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29042 if (ix86_cmodel == CM_SMALL
29043 || (ix86_cmodel == CM_MEDIUM && code))
29044 return DW_EH_PE_udata4;
29045 return DW_EH_PE_absptr;
29048 /* Expand copysign from SIGN to the positive value ABS_VALUE
29049 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29050 the sign-bit. */
29051 static void
29052 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29054 enum machine_mode mode = GET_MODE (sign);
29055 rtx sgn = gen_reg_rtx (mode);
29056 if (mask == NULL_RTX)
29058 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29059 if (!VECTOR_MODE_P (mode))
29061 /* We need to generate a scalar mode mask in this case. */
29062 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29063 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29064 mask = gen_reg_rtx (mode);
29065 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29068 else
29069 mask = gen_rtx_NOT (mode, mask);
29070 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29071 gen_rtx_AND (mode, mask, sign)));
29072 emit_insn (gen_rtx_SET (VOIDmode, result,
29073 gen_rtx_IOR (mode, abs_value, sgn)));
29076 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29077 mask for masking out the sign-bit is stored in *SMASK, if that is
29078 non-null. */
29079 static rtx
29080 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29082 enum machine_mode mode = GET_MODE (op0);
29083 rtx xa, mask;
29085 xa = gen_reg_rtx (mode);
29086 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29087 if (!VECTOR_MODE_P (mode))
29089 /* We need to generate a scalar mode mask in this case. */
29090 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29091 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29092 mask = gen_reg_rtx (mode);
29093 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29095 emit_insn (gen_rtx_SET (VOIDmode, xa,
29096 gen_rtx_AND (mode, op0, mask)));
29098 if (smask)
29099 *smask = mask;
29101 return xa;
29104 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29105 swapping the operands if SWAP_OPERANDS is true. The expanded
29106 code is a forward jump to a newly created label in case the
29107 comparison is true. The generated label rtx is returned. */
29108 static rtx
29109 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29110 bool swap_operands)
29112 rtx label, tmp;
29114 if (swap_operands)
29116 tmp = op0;
29117 op0 = op1;
29118 op1 = tmp;
29121 label = gen_label_rtx ();
29122 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29123 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29124 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29125 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29126 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29127 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29128 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29129 JUMP_LABEL (tmp) = label;
29131 return label;
29134 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29135 using comparison code CODE. Operands are swapped for the comparison if
29136 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29137 static rtx
29138 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29139 bool swap_operands)
29141 enum machine_mode mode = GET_MODE (op0);
29142 rtx mask = gen_reg_rtx (mode);
29144 if (swap_operands)
29146 rtx tmp = op0;
29147 op0 = op1;
29148 op1 = tmp;
29151 if (mode == DFmode)
29152 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29153 gen_rtx_fmt_ee (code, mode, op0, op1)));
29154 else
29155 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29156 gen_rtx_fmt_ee (code, mode, op0, op1)));
29158 return mask;
29161 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29162 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29163 static rtx
29164 ix86_gen_TWO52 (enum machine_mode mode)
29166 REAL_VALUE_TYPE TWO52r;
29167 rtx TWO52;
29169 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29170 TWO52 = const_double_from_real_value (TWO52r, mode);
29171 TWO52 = force_reg (mode, TWO52);
29173 return TWO52;
29176 /* Expand SSE sequence for computing lround from OP1 storing
29177 into OP0. */
29178 void
29179 ix86_expand_lround (rtx op0, rtx op1)
29181 /* C code for the stuff we're doing below:
29182 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29183 return (long)tmp;
29185 enum machine_mode mode = GET_MODE (op1);
29186 const struct real_format *fmt;
29187 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29188 rtx adj;
29190 /* load nextafter (0.5, 0.0) */
29191 fmt = REAL_MODE_FORMAT (mode);
29192 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29193 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29195 /* adj = copysign (0.5, op1) */
29196 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29197 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29199 /* adj = op1 + adj */
29200 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29202 /* op0 = (imode)adj */
29203 expand_fix (op0, adj, 0);
29206 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29207 into OPERAND0. */
29208 void
29209 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29211 /* C code for the stuff we're doing below (for do_floor):
29212 xi = (long)op1;
29213 xi -= (double)xi > op1 ? 1 : 0;
29214 return xi;
29216 enum machine_mode fmode = GET_MODE (op1);
29217 enum machine_mode imode = GET_MODE (op0);
29218 rtx ireg, freg, label, tmp;
29220 /* reg = (long)op1 */
29221 ireg = gen_reg_rtx (imode);
29222 expand_fix (ireg, op1, 0);
29224 /* freg = (double)reg */
29225 freg = gen_reg_rtx (fmode);
29226 expand_float (freg, ireg, 0);
29228 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29229 label = ix86_expand_sse_compare_and_jump (UNLE,
29230 freg, op1, !do_floor);
29231 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29232 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29233 emit_move_insn (ireg, tmp);
29235 emit_label (label);
29236 LABEL_NUSES (label) = 1;
29238 emit_move_insn (op0, ireg);
29241 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29242 result in OPERAND0. */
29243 void
29244 ix86_expand_rint (rtx operand0, rtx operand1)
29246 /* C code for the stuff we're doing below:
29247 xa = fabs (operand1);
29248 if (!isless (xa, 2**52))
29249 return operand1;
29250 xa = xa + 2**52 - 2**52;
29251 return copysign (xa, operand1);
29253 enum machine_mode mode = GET_MODE (operand0);
29254 rtx res, xa, label, TWO52, mask;
29256 res = gen_reg_rtx (mode);
29257 emit_move_insn (res, operand1);
29259 /* xa = abs (operand1) */
29260 xa = ix86_expand_sse_fabs (res, &mask);
29262 /* if (!isless (xa, TWO52)) goto label; */
29263 TWO52 = ix86_gen_TWO52 (mode);
29264 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29266 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29267 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29269 ix86_sse_copysign_to_positive (res, xa, res, mask);
29271 emit_label (label);
29272 LABEL_NUSES (label) = 1;
29274 emit_move_insn (operand0, res);
29277 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29278 into OPERAND0. */
29279 void
29280 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29282 /* C code for the stuff we expand below.
29283 double xa = fabs (x), x2;
29284 if (!isless (xa, TWO52))
29285 return x;
29286 xa = xa + TWO52 - TWO52;
29287 x2 = copysign (xa, x);
29288 Compensate. Floor:
29289 if (x2 > x)
29290 x2 -= 1;
29291 Compensate. Ceil:
29292 if (x2 < x)
29293 x2 -= -1;
29294 return x2;
29296 enum machine_mode mode = GET_MODE (operand0);
29297 rtx xa, TWO52, tmp, label, one, res, mask;
29299 TWO52 = ix86_gen_TWO52 (mode);
29301 /* Temporary for holding the result, initialized to the input
29302 operand to ease control flow. */
29303 res = gen_reg_rtx (mode);
29304 emit_move_insn (res, operand1);
29306 /* xa = abs (operand1) */
29307 xa = ix86_expand_sse_fabs (res, &mask);
29309 /* if (!isless (xa, TWO52)) goto label; */
29310 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29312 /* xa = xa + TWO52 - TWO52; */
29313 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29314 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29316 /* xa = copysign (xa, operand1) */
29317 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29319 /* generate 1.0 or -1.0 */
29320 one = force_reg (mode,
29321 const_double_from_real_value (do_floor
29322 ? dconst1 : dconstm1, mode));
29324 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29325 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29326 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29327 gen_rtx_AND (mode, one, tmp)));
29328 /* We always need to subtract here to preserve signed zero. */
29329 tmp = expand_simple_binop (mode, MINUS,
29330 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29331 emit_move_insn (res, tmp);
29333 emit_label (label);
29334 LABEL_NUSES (label) = 1;
29336 emit_move_insn (operand0, res);
29339 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29340 into OPERAND0. */
29341 void
29342 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29344 /* C code for the stuff we expand below.
29345 double xa = fabs (x), x2;
29346 if (!isless (xa, TWO52))
29347 return x;
29348 x2 = (double)(long)x;
29349 Compensate. Floor:
29350 if (x2 > x)
29351 x2 -= 1;
29352 Compensate. Ceil:
29353 if (x2 < x)
29354 x2 += 1;
29355 if (HONOR_SIGNED_ZEROS (mode))
29356 return copysign (x2, x);
29357 return x2;
29359 enum machine_mode mode = GET_MODE (operand0);
29360 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29362 TWO52 = ix86_gen_TWO52 (mode);
29364 /* Temporary for holding the result, initialized to the input
29365 operand to ease control flow. */
29366 res = gen_reg_rtx (mode);
29367 emit_move_insn (res, operand1);
29369 /* xa = abs (operand1) */
29370 xa = ix86_expand_sse_fabs (res, &mask);
29372 /* if (!isless (xa, TWO52)) goto label; */
29373 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29375 /* xa = (double)(long)x */
29376 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29377 expand_fix (xi, res, 0);
29378 expand_float (xa, xi, 0);
29380 /* generate 1.0 */
29381 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29383 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29384 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29385 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29386 gen_rtx_AND (mode, one, tmp)));
29387 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29388 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29389 emit_move_insn (res, tmp);
29391 if (HONOR_SIGNED_ZEROS (mode))
29392 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29394 emit_label (label);
29395 LABEL_NUSES (label) = 1;
29397 emit_move_insn (operand0, res);
29400 /* Expand SSE sequence for computing round from OPERAND1 storing
29401 into OPERAND0. Sequence that works without relying on DImode truncation
29402 via cvttsd2siq that is only available on 64bit targets. */
29403 void
29404 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29406 /* C code for the stuff we expand below.
29407 double xa = fabs (x), xa2, x2;
29408 if (!isless (xa, TWO52))
29409 return x;
29410 Using the absolute value and copying back sign makes
29411 -0.0 -> -0.0 correct.
29412 xa2 = xa + TWO52 - TWO52;
29413 Compensate.
29414 dxa = xa2 - xa;
29415 if (dxa <= -0.5)
29416 xa2 += 1;
29417 else if (dxa > 0.5)
29418 xa2 -= 1;
29419 x2 = copysign (xa2, x);
29420 return x2;
29422 enum machine_mode mode = GET_MODE (operand0);
29423 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29425 TWO52 = ix86_gen_TWO52 (mode);
29427 /* Temporary for holding the result, initialized to the input
29428 operand to ease control flow. */
29429 res = gen_reg_rtx (mode);
29430 emit_move_insn (res, operand1);
29432 /* xa = abs (operand1) */
29433 xa = ix86_expand_sse_fabs (res, &mask);
29435 /* if (!isless (xa, TWO52)) goto label; */
29436 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29438 /* xa2 = xa + TWO52 - TWO52; */
29439 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29440 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29442 /* dxa = xa2 - xa; */
29443 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29445 /* generate 0.5, 1.0 and -0.5 */
29446 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29447 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29448 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29449 0, OPTAB_DIRECT);
29451 /* Compensate. */
29452 tmp = gen_reg_rtx (mode);
29453 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29454 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29455 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29456 gen_rtx_AND (mode, one, tmp)));
29457 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29458 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29459 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29460 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29461 gen_rtx_AND (mode, one, tmp)));
29462 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29464 /* res = copysign (xa2, operand1) */
29465 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29467 emit_label (label);
29468 LABEL_NUSES (label) = 1;
29470 emit_move_insn (operand0, res);
29473 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29474 into OPERAND0. */
29475 void
29476 ix86_expand_trunc (rtx operand0, rtx operand1)
29478 /* C code for SSE variant we expand below.
29479 double xa = fabs (x), x2;
29480 if (!isless (xa, TWO52))
29481 return x;
29482 x2 = (double)(long)x;
29483 if (HONOR_SIGNED_ZEROS (mode))
29484 return copysign (x2, x);
29485 return x2;
29487 enum machine_mode mode = GET_MODE (operand0);
29488 rtx xa, xi, TWO52, label, res, mask;
29490 TWO52 = ix86_gen_TWO52 (mode);
29492 /* Temporary for holding the result, initialized to the input
29493 operand to ease control flow. */
29494 res = gen_reg_rtx (mode);
29495 emit_move_insn (res, operand1);
29497 /* xa = abs (operand1) */
29498 xa = ix86_expand_sse_fabs (res, &mask);
29500 /* if (!isless (xa, TWO52)) goto label; */
29501 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29503 /* x = (double)(long)x */
29504 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29505 expand_fix (xi, res, 0);
29506 expand_float (res, xi, 0);
29508 if (HONOR_SIGNED_ZEROS (mode))
29509 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29511 emit_label (label);
29512 LABEL_NUSES (label) = 1;
29514 emit_move_insn (operand0, res);
29517 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29518 into OPERAND0. */
29519 void
29520 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29522 enum machine_mode mode = GET_MODE (operand0);
29523 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29525 /* C code for SSE variant we expand below.
29526 double xa = fabs (x), x2;
29527 if (!isless (xa, TWO52))
29528 return x;
29529 xa2 = xa + TWO52 - TWO52;
29530 Compensate:
29531 if (xa2 > xa)
29532 xa2 -= 1.0;
29533 x2 = copysign (xa2, x);
29534 return x2;
29537 TWO52 = ix86_gen_TWO52 (mode);
29539 /* Temporary for holding the result, initialized to the input
29540 operand to ease control flow. */
29541 res = gen_reg_rtx (mode);
29542 emit_move_insn (res, operand1);
29544 /* xa = abs (operand1) */
29545 xa = ix86_expand_sse_fabs (res, &smask);
29547 /* if (!isless (xa, TWO52)) goto label; */
29548 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29550 /* res = xa + TWO52 - TWO52; */
29551 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29552 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29553 emit_move_insn (res, tmp);
29555 /* generate 1.0 */
29556 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29558 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29559 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29560 emit_insn (gen_rtx_SET (VOIDmode, mask,
29561 gen_rtx_AND (mode, mask, one)));
29562 tmp = expand_simple_binop (mode, MINUS,
29563 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29564 emit_move_insn (res, tmp);
29566 /* res = copysign (res, operand1) */
29567 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29569 emit_label (label);
29570 LABEL_NUSES (label) = 1;
29572 emit_move_insn (operand0, res);
29575 /* Expand SSE sequence for computing round from OPERAND1 storing
29576 into OPERAND0. */
29577 void
29578 ix86_expand_round (rtx operand0, rtx operand1)
29580 /* C code for the stuff we're doing below:
29581 double xa = fabs (x);
29582 if (!isless (xa, TWO52))
29583 return x;
29584 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29585 return copysign (xa, x);
29587 enum machine_mode mode = GET_MODE (operand0);
29588 rtx res, TWO52, xa, label, xi, half, mask;
29589 const struct real_format *fmt;
29590 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29592 /* Temporary for holding the result, initialized to the input
29593 operand to ease control flow. */
29594 res = gen_reg_rtx (mode);
29595 emit_move_insn (res, operand1);
29597 TWO52 = ix86_gen_TWO52 (mode);
29598 xa = ix86_expand_sse_fabs (res, &mask);
29599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29601 /* load nextafter (0.5, 0.0) */
29602 fmt = REAL_MODE_FORMAT (mode);
29603 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29604 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29606 /* xa = xa + 0.5 */
29607 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29608 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29610 /* xa = (double)(int64_t)xa */
29611 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29612 expand_fix (xi, xa, 0);
29613 expand_float (xa, xi, 0);
29615 /* res = copysign (xa, operand1) */
29616 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29618 emit_label (label);
29619 LABEL_NUSES (label) = 1;
29621 emit_move_insn (operand0, res);
29625 /* Validate whether a SSE5 instruction is valid or not.
29626 OPERANDS is the array of operands.
29627 NUM is the number of operands.
29628 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29629 NUM_MEMORY is the maximum number of memory operands to accept.
29630 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29632 bool
29633 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29634 bool uses_oc0, int num_memory, bool commutative)
29636 int mem_mask;
29637 int mem_count;
29638 int i;
29640 /* Count the number of memory arguments */
29641 mem_mask = 0;
29642 mem_count = 0;
29643 for (i = 0; i < num; i++)
29645 enum machine_mode mode = GET_MODE (operands[i]);
29646 if (register_operand (operands[i], mode))
29649 else if (memory_operand (operands[i], mode))
29651 mem_mask |= (1 << i);
29652 mem_count++;
29655 else
29657 rtx pattern = PATTERN (insn);
29659 /* allow 0 for pcmov */
29660 if (GET_CODE (pattern) != SET
29661 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29662 || i < 2
29663 || operands[i] != CONST0_RTX (mode))
29664 return false;
29668 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29669 a memory operation. */
29670 if (num_memory < 0)
29672 num_memory = -num_memory;
29673 if ((mem_mask & (1 << (num-1))) != 0)
29675 mem_mask &= ~(1 << (num-1));
29676 mem_count--;
29680 /* If there were no memory operations, allow the insn */
29681 if (mem_mask == 0)
29682 return true;
29684 /* Do not allow the destination register to be a memory operand. */
29685 else if (mem_mask & (1 << 0))
29686 return false;
29688 /* If there are too many memory operations, disallow the instruction. While
29689 the hardware only allows 1 memory reference, before register allocation
29690 for some insns, we allow two memory operations sometimes in order to allow
29691 code like the following to be optimized:
29693 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29695 or similar cases that are vectorized into using the fmaddss
29696 instruction. */
29697 else if (mem_count > num_memory)
29698 return false;
29700 /* Don't allow more than one memory operation if not optimizing. */
29701 else if (mem_count > 1 && !optimize)
29702 return false;
29704 else if (num == 4 && mem_count == 1)
29706 /* formats (destination is the first argument), example fmaddss:
29707 xmm1, xmm1, xmm2, xmm3/mem
29708 xmm1, xmm1, xmm2/mem, xmm3
29709 xmm1, xmm2, xmm3/mem, xmm1
29710 xmm1, xmm2/mem, xmm3, xmm1 */
29711 if (uses_oc0)
29712 return ((mem_mask == (1 << 1))
29713 || (mem_mask == (1 << 2))
29714 || (mem_mask == (1 << 3)));
29716 /* format, example pmacsdd:
29717 xmm1, xmm2, xmm3/mem, xmm1 */
29718 if (commutative)
29719 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29720 else
29721 return (mem_mask == (1 << 2));
29724 else if (num == 4 && num_memory == 2)
29726 /* If there are two memory operations, we can load one of the memory ops
29727 into the destination register. This is for optimizing the
29728 multiply/add ops, which the combiner has optimized both the multiply
29729 and the add insns to have a memory operation. We have to be careful
29730 that the destination doesn't overlap with the inputs. */
29731 rtx op0 = operands[0];
29733 if (reg_mentioned_p (op0, operands[1])
29734 || reg_mentioned_p (op0, operands[2])
29735 || reg_mentioned_p (op0, operands[3]))
29736 return false;
29738 /* formats (destination is the first argument), example fmaddss:
29739 xmm1, xmm1, xmm2, xmm3/mem
29740 xmm1, xmm1, xmm2/mem, xmm3
29741 xmm1, xmm2, xmm3/mem, xmm1
29742 xmm1, xmm2/mem, xmm3, xmm1
29744 For the oc0 case, we will load either operands[1] or operands[3] into
29745 operands[0], so any combination of 2 memory operands is ok. */
29746 if (uses_oc0)
29747 return true;
29749 /* format, example pmacsdd:
29750 xmm1, xmm2, xmm3/mem, xmm1
29752 For the integer multiply/add instructions be more restrictive and
29753 require operands[2] and operands[3] to be the memory operands. */
29754 if (commutative)
29755 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29756 else
29757 return (mem_mask == ((1 << 2) | (1 << 3)));
29760 else if (num == 3 && num_memory == 1)
29762 /* formats, example protb:
29763 xmm1, xmm2, xmm3/mem
29764 xmm1, xmm2/mem, xmm3 */
29765 if (uses_oc0)
29766 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29768 /* format, example comeq:
29769 xmm1, xmm2, xmm3/mem */
29770 else
29771 return (mem_mask == (1 << 2));
29774 else
29775 gcc_unreachable ();
29777 return false;
29781 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29782 hardware will allow by using the destination register to load one of the
29783 memory operations. Presently this is used by the multiply/add routines to
29784 allow 2 memory references. */
29786 void
29787 ix86_expand_sse5_multiple_memory (rtx operands[],
29788 int num,
29789 enum machine_mode mode)
29791 rtx op0 = operands[0];
29792 if (num != 4
29793 || memory_operand (op0, mode)
29794 || reg_mentioned_p (op0, operands[1])
29795 || reg_mentioned_p (op0, operands[2])
29796 || reg_mentioned_p (op0, operands[3]))
29797 gcc_unreachable ();
29799 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29800 the destination register. */
29801 if (memory_operand (operands[1], mode))
29803 emit_move_insn (op0, operands[1]);
29804 operands[1] = op0;
29806 else if (memory_operand (operands[3], mode))
29808 emit_move_insn (op0, operands[3]);
29809 operands[3] = op0;
29811 else
29812 gcc_unreachable ();
29814 return;
29818 /* Table of valid machine attributes. */
29819 static const struct attribute_spec ix86_attribute_table[] =
29821 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29822 /* Stdcall attribute says callee is responsible for popping arguments
29823 if they are not variable. */
29824 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29825 /* Fastcall attribute says callee is responsible for popping arguments
29826 if they are not variable. */
29827 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29828 /* Cdecl attribute says the callee is a normal C declaration */
29829 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29830 /* Regparm attribute specifies how many integer arguments are to be
29831 passed in registers. */
29832 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29833 /* Sseregparm attribute says we are using x86_64 calling conventions
29834 for FP arguments. */
29835 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29836 /* force_align_arg_pointer says this function realigns the stack at entry. */
29837 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29838 false, true, true, ix86_handle_cconv_attribute },
29839 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29840 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29841 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29842 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29843 #endif
29844 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29845 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29846 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29847 SUBTARGET_ATTRIBUTE_TABLE,
29848 #endif
29849 /* ms_abi and sysv_abi calling convention function attributes. */
29850 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29851 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29852 /* End element. */
29853 { NULL, 0, 0, false, false, false, NULL }
29856 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29857 static int
29858 x86_builtin_vectorization_cost (bool runtime_test)
29860 /* If the branch of the runtime test is taken - i.e. - the vectorized
29861 version is skipped - this incurs a misprediction cost (because the
29862 vectorized version is expected to be the fall-through). So we subtract
29863 the latency of a mispredicted branch from the costs that are incured
29864 when the vectorized version is executed.
29866 TODO: The values in individual target tables have to be tuned or new
29867 fields may be needed. For eg. on K8, the default branch path is the
29868 not-taken path. If the taken path is predicted correctly, the minimum
29869 penalty of going down the taken-path is 1 cycle. If the taken-path is
29870 not predicted correctly, then the minimum penalty is 10 cycles. */
29872 if (runtime_test)
29874 return (-(ix86_cost->cond_taken_branch_cost));
29876 else
29877 return 0;
29880 /* This function returns the calling abi specific va_list type node.
29881 It returns the FNDECL specific va_list type. */
29883 tree
29884 ix86_fn_abi_va_list (tree fndecl)
29886 if (!TARGET_64BIT)
29887 return va_list_type_node;
29888 gcc_assert (fndecl != NULL_TREE);
29890 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29891 return ms_va_list_type_node;
29892 else
29893 return sysv_va_list_type_node;
29896 /* Returns the canonical va_list type specified by TYPE. If there
29897 is no valid TYPE provided, it return NULL_TREE. */
29899 tree
29900 ix86_canonical_va_list_type (tree type)
29902 tree wtype, htype;
29904 /* Resolve references and pointers to va_list type. */
29905 if (INDIRECT_REF_P (type))
29906 type = TREE_TYPE (type);
29907 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29908 type = TREE_TYPE (type);
29910 if (TARGET_64BIT)
29912 wtype = va_list_type_node;
29913 gcc_assert (wtype != NULL_TREE);
29914 htype = type;
29915 if (TREE_CODE (wtype) == ARRAY_TYPE)
29917 /* If va_list is an array type, the argument may have decayed
29918 to a pointer type, e.g. by being passed to another function.
29919 In that case, unwrap both types so that we can compare the
29920 underlying records. */
29921 if (TREE_CODE (htype) == ARRAY_TYPE
29922 || POINTER_TYPE_P (htype))
29924 wtype = TREE_TYPE (wtype);
29925 htype = TREE_TYPE (htype);
29928 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29929 return va_list_type_node;
29930 wtype = sysv_va_list_type_node;
29931 gcc_assert (wtype != NULL_TREE);
29932 htype = type;
29933 if (TREE_CODE (wtype) == ARRAY_TYPE)
29935 /* If va_list is an array type, the argument may have decayed
29936 to a pointer type, e.g. by being passed to another function.
29937 In that case, unwrap both types so that we can compare the
29938 underlying records. */
29939 if (TREE_CODE (htype) == ARRAY_TYPE
29940 || POINTER_TYPE_P (htype))
29942 wtype = TREE_TYPE (wtype);
29943 htype = TREE_TYPE (htype);
29946 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29947 return sysv_va_list_type_node;
29948 wtype = ms_va_list_type_node;
29949 gcc_assert (wtype != NULL_TREE);
29950 htype = type;
29951 if (TREE_CODE (wtype) == ARRAY_TYPE)
29953 /* If va_list is an array type, the argument may have decayed
29954 to a pointer type, e.g. by being passed to another function.
29955 In that case, unwrap both types so that we can compare the
29956 underlying records. */
29957 if (TREE_CODE (htype) == ARRAY_TYPE
29958 || POINTER_TYPE_P (htype))
29960 wtype = TREE_TYPE (wtype);
29961 htype = TREE_TYPE (htype);
29964 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29965 return ms_va_list_type_node;
29966 return NULL_TREE;
29968 return std_canonical_va_list_type (type);
29971 /* Iterate through the target-specific builtin types for va_list.
29972 IDX denotes the iterator, *PTREE is set to the result type of
29973 the va_list builtin, and *PNAME to its internal type.
29974 Returns zero if there is no element for this index, otherwise
29975 IDX should be increased upon the next call.
29976 Note, do not iterate a base builtin's name like __builtin_va_list.
29977 Used from c_common_nodes_and_builtins. */
29980 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29982 if (!TARGET_64BIT)
29983 return 0;
29984 switch (idx) {
29985 case 0:
29986 *ptree = ms_va_list_type_node;
29987 *pname = "__builtin_ms_va_list";
29988 break;
29989 case 1:
29990 *ptree = sysv_va_list_type_node;
29991 *pname = "__builtin_sysv_va_list";
29992 break;
29993 default:
29994 return 0;
29996 return 1;
29999 /* Initialize the GCC target structure. */
30000 #undef TARGET_RETURN_IN_MEMORY
30001 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30003 #undef TARGET_ATTRIBUTE_TABLE
30004 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30005 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30006 # undef TARGET_MERGE_DECL_ATTRIBUTES
30007 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30008 #endif
30010 #undef TARGET_COMP_TYPE_ATTRIBUTES
30011 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30013 #undef TARGET_INIT_BUILTINS
30014 #define TARGET_INIT_BUILTINS ix86_init_builtins
30015 #undef TARGET_EXPAND_BUILTIN
30016 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30018 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30019 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30020 ix86_builtin_vectorized_function
30022 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30023 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30025 #undef TARGET_BUILTIN_RECIPROCAL
30026 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30028 #undef TARGET_ASM_FUNCTION_EPILOGUE
30029 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30031 #undef TARGET_ENCODE_SECTION_INFO
30032 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30033 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30034 #else
30035 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30036 #endif
30038 #undef TARGET_ASM_OPEN_PAREN
30039 #define TARGET_ASM_OPEN_PAREN ""
30040 #undef TARGET_ASM_CLOSE_PAREN
30041 #define TARGET_ASM_CLOSE_PAREN ""
30043 #undef TARGET_ASM_ALIGNED_HI_OP
30044 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30045 #undef TARGET_ASM_ALIGNED_SI_OP
30046 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30047 #ifdef ASM_QUAD
30048 #undef TARGET_ASM_ALIGNED_DI_OP
30049 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30050 #endif
30052 #undef TARGET_ASM_UNALIGNED_HI_OP
30053 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30054 #undef TARGET_ASM_UNALIGNED_SI_OP
30055 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30056 #undef TARGET_ASM_UNALIGNED_DI_OP
30057 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30059 #undef TARGET_SCHED_ADJUST_COST
30060 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30061 #undef TARGET_SCHED_ISSUE_RATE
30062 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30063 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30064 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30065 ia32_multipass_dfa_lookahead
30067 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30068 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30070 #ifdef HAVE_AS_TLS
30071 #undef TARGET_HAVE_TLS
30072 #define TARGET_HAVE_TLS true
30073 #endif
30074 #undef TARGET_CANNOT_FORCE_CONST_MEM
30075 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30076 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30077 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30079 #undef TARGET_DELEGITIMIZE_ADDRESS
30080 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30082 #undef TARGET_MS_BITFIELD_LAYOUT_P
30083 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30085 #if TARGET_MACHO
30086 #undef TARGET_BINDS_LOCAL_P
30087 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30088 #endif
30089 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30090 #undef TARGET_BINDS_LOCAL_P
30091 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30092 #endif
30094 #undef TARGET_ASM_OUTPUT_MI_THUNK
30095 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30096 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30097 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30099 #undef TARGET_ASM_FILE_START
30100 #define TARGET_ASM_FILE_START x86_file_start
30102 #undef TARGET_DEFAULT_TARGET_FLAGS
30103 #define TARGET_DEFAULT_TARGET_FLAGS \
30104 (TARGET_DEFAULT \
30105 | TARGET_SUBTARGET_DEFAULT \
30106 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30108 #undef TARGET_HANDLE_OPTION
30109 #define TARGET_HANDLE_OPTION ix86_handle_option
30111 #undef TARGET_RTX_COSTS
30112 #define TARGET_RTX_COSTS ix86_rtx_costs
30113 #undef TARGET_ADDRESS_COST
30114 #define TARGET_ADDRESS_COST ix86_address_cost
30116 #undef TARGET_FIXED_CONDITION_CODE_REGS
30117 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30118 #undef TARGET_CC_MODES_COMPATIBLE
30119 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30121 #undef TARGET_MACHINE_DEPENDENT_REORG
30122 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30124 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30125 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30127 #undef TARGET_BUILD_BUILTIN_VA_LIST
30128 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30130 #undef TARGET_FN_ABI_VA_LIST
30131 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30133 #undef TARGET_CANONICAL_VA_LIST_TYPE
30134 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30136 #undef TARGET_EXPAND_BUILTIN_VA_START
30137 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30139 #undef TARGET_MD_ASM_CLOBBERS
30140 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30142 #undef TARGET_PROMOTE_PROTOTYPES
30143 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30144 #undef TARGET_STRUCT_VALUE_RTX
30145 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30146 #undef TARGET_SETUP_INCOMING_VARARGS
30147 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30148 #undef TARGET_MUST_PASS_IN_STACK
30149 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30150 #undef TARGET_PASS_BY_REFERENCE
30151 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30152 #undef TARGET_INTERNAL_ARG_POINTER
30153 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30154 #undef TARGET_UPDATE_STACK_BOUNDARY
30155 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30156 #undef TARGET_GET_DRAP_RTX
30157 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30158 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30159 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30160 #undef TARGET_STRICT_ARGUMENT_NAMING
30161 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30163 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30164 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30166 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30167 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30169 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30170 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30172 #undef TARGET_C_MODE_FOR_SUFFIX
30173 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30175 #ifdef HAVE_AS_TLS
30176 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30177 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30178 #endif
30180 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30181 #undef TARGET_INSERT_ATTRIBUTES
30182 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30183 #endif
30185 #undef TARGET_MANGLE_TYPE
30186 #define TARGET_MANGLE_TYPE ix86_mangle_type
30188 #undef TARGET_STACK_PROTECT_FAIL
30189 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30191 #undef TARGET_FUNCTION_VALUE
30192 #define TARGET_FUNCTION_VALUE ix86_function_value
30194 #undef TARGET_SECONDARY_RELOAD
30195 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30197 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30198 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30200 #undef TARGET_SET_CURRENT_FUNCTION
30201 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30203 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30204 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30206 #undef TARGET_OPTION_SAVE
30207 #define TARGET_OPTION_SAVE ix86_function_specific_save
30209 #undef TARGET_OPTION_RESTORE
30210 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30212 #undef TARGET_OPTION_PRINT
30213 #define TARGET_OPTION_PRINT ix86_function_specific_print
30215 #undef TARGET_OPTION_CAN_INLINE_P
30216 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30218 #undef TARGET_EXPAND_TO_RTL_HOOK
30219 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30221 struct gcc_target targetm = TARGET_INITIALIZER;
30223 #include "gt-i386.h"