2008-07-01 Jerry DeLisle <jvdelisle@gcc.gnu.org>
[official-gcc.git] / gcc / config / i386 / i386.c
blob5a2c0c870a73e4f51c428960d37e866aecfeb6e6
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "tree-gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
62 #endif
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 static const
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
149 static const
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
169 3, /* MOVE_RATIO */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
195 1, /* Branch cost */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
219 static const
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
239 3, /* MOVE_RATIO */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
267 1, /* Branch cost */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
291 static const
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
311 6, /* MOVE_RATIO */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
337 2, /* Branch cost */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
361 static const
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
438 static const
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
509 static const
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
529 4, /* MOVE_RATIO */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 static const
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
602 9, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
628 5, /* Branch cost */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
655 static const
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
792 2, /* Branch cost */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
821 static const
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
867 2, /* Branch cost */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
892 static const
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
912 17, /* MOVE_RATIO */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {-1, libcall}}},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
965 static const
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1038 static const
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115 static const
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 m_486 | m_PENT,
1227 /* X86_TUNE_USE_BIT_TEST */
1228 m_386,
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 ~m_386,
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1262 m_PPRO,
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1274 m_K6,
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 m_PENT4,
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 m_PPRO,
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1286 ~m_PENT,
1288 /* X86_TUNE_READ_MODIFY */
1289 ~(m_PENT | m_PPRO),
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1308 ~m_PPRO,
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1314 m_PPRO,
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 m_AMDFAM10,
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1356 m_ATHLON_K8,
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1359 m_AMD_MULTIPLE,
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1374 ~m_486,
1376 /* X86_TUNE_USE_FFREEP */
1377 m_AMD_MULTIPLE,
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 ~(m_AMDFAM10),
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1393 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1405 ~m_K8,
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 m_K8 | m_GENERIC64,
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 ~(m_386 | m_486),
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419 machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 than a MOV. */
1424 m_PENT,
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1428 m_PENT,
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1433 m_K6,
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1437 m_AMDFAM10,
1439 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1440 with a subsequent conditional jump instruction into a single
1441 compare-and-branch uop. */
1442 m_CORE2,
1445 /* Feature tests against the various architecture variations. */
1446 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1447 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1448 ~(m_386 | m_486 | m_PENT | m_K6),
1450 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1451 ~m_386,
1453 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1454 ~(m_386 | m_486),
1456 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1457 ~m_386,
1459 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1460 ~m_386,
1463 static const unsigned int x86_accumulate_outgoing_args
1464 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1466 static const unsigned int x86_arch_always_fancy_math_387
1467 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1468 | m_NOCONA | m_CORE2 | m_GENERIC;
1470 static enum stringop_alg stringop_alg = no_stringop;
1472 /* In case the average insn count for single function invocation is
1473 lower than this constant, emit fast (but longer) prologue and
1474 epilogue code. */
1475 #define FAST_PROLOGUE_INSN_COUNT 20
1477 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1478 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1479 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1480 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1482 /* Array of the smallest class containing reg number REGNO, indexed by
1483 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1485 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1487 /* ax, dx, cx, bx */
1488 AREG, DREG, CREG, BREG,
1489 /* si, di, bp, sp */
1490 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1491 /* FP registers */
1492 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1493 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1494 /* arg pointer */
1495 NON_Q_REGS,
1496 /* flags, fpsr, fpcr, frame */
1497 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1498 /* SSE registers */
1499 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1500 SSE_REGS, SSE_REGS,
1501 /* MMX registers */
1502 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1503 MMX_REGS, MMX_REGS,
1504 /* REX registers */
1505 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1506 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1507 /* SSE REX registers */
1508 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1509 SSE_REGS, SSE_REGS,
1512 /* The "default" register map used in 32bit mode. */
1514 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1516 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1517 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1519 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1520 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1521 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1522 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1525 static int const x86_64_int_parameter_registers[6] =
1527 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_ms_abi_int_parameter_registers[4] =
1533 2 /*RCX*/, 1 /*RDX*/,
1534 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1537 static int const x86_64_int_return_registers[4] =
1539 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1542 /* The "default" register map used in 64bit mode. */
1543 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1545 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1546 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1547 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1548 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1549 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1550 8,9,10,11,12,13,14,15, /* extended integer registers */
1551 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1554 /* Define the register numbers to be used in Dwarf debugging information.
1555 The SVR4 reference port C compiler uses the following register numbers
1556 in its Dwarf output code:
1557 0 for %eax (gcc regno = 0)
1558 1 for %ecx (gcc regno = 2)
1559 2 for %edx (gcc regno = 1)
1560 3 for %ebx (gcc regno = 3)
1561 4 for %esp (gcc regno = 7)
1562 5 for %ebp (gcc regno = 6)
1563 6 for %esi (gcc regno = 4)
1564 7 for %edi (gcc regno = 5)
1565 The following three DWARF register numbers are never generated by
1566 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1567 believes these numbers have these meanings.
1568 8 for %eip (no gcc equivalent)
1569 9 for %eflags (gcc regno = 17)
1570 10 for %trapno (no gcc equivalent)
1571 It is not at all clear how we should number the FP stack registers
1572 for the x86 architecture. If the version of SDB on x86/svr4 were
1573 a bit less brain dead with respect to floating-point then we would
1574 have a precedent to follow with respect to DWARF register numbers
1575 for x86 FP registers, but the SDB on x86/svr4 is so completely
1576 broken with respect to FP registers that it is hardly worth thinking
1577 of it as something to strive for compatibility with.
1578 The version of x86/svr4 SDB I have at the moment does (partially)
1579 seem to believe that DWARF register number 11 is associated with
1580 the x86 register %st(0), but that's about all. Higher DWARF
1581 register numbers don't seem to be associated with anything in
1582 particular, and even for DWARF regno 11, SDB only seems to under-
1583 stand that it should say that a variable lives in %st(0) (when
1584 asked via an `=' command) if we said it was in DWARF regno 11,
1585 but SDB still prints garbage when asked for the value of the
1586 variable in question (via a `/' command).
1587 (Also note that the labels SDB prints for various FP stack regs
1588 when doing an `x' command are all wrong.)
1589 Note that these problems generally don't affect the native SVR4
1590 C compiler because it doesn't allow the use of -O with -g and
1591 because when it is *not* optimizing, it allocates a memory
1592 location for each floating-point variable, and the memory
1593 location is what gets described in the DWARF AT_location
1594 attribute for the variable in question.
1595 Regardless of the severe mental illness of the x86/svr4 SDB, we
1596 do something sensible here and we use the following DWARF
1597 register numbers. Note that these are all stack-top-relative
1598 numbers.
1599 11 for %st(0) (gcc regno = 8)
1600 12 for %st(1) (gcc regno = 9)
1601 13 for %st(2) (gcc regno = 10)
1602 14 for %st(3) (gcc regno = 11)
1603 15 for %st(4) (gcc regno = 12)
1604 16 for %st(5) (gcc regno = 13)
1605 17 for %st(6) (gcc regno = 14)
1606 18 for %st(7) (gcc regno = 15)
1608 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1610 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1611 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1612 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1613 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1614 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1615 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1616 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1619 /* Test and compare insns in i386.md store the information needed to
1620 generate branch and scc insns here. */
1622 rtx ix86_compare_op0 = NULL_RTX;
1623 rtx ix86_compare_op1 = NULL_RTX;
1624 rtx ix86_compare_emitted = NULL_RTX;
1626 /* Size of the register save area. */
1627 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1629 /* Define the structure for the machine field in struct function. */
1631 struct stack_local_entry GTY(())
1633 unsigned short mode;
1634 unsigned short n;
1635 rtx rtl;
1636 struct stack_local_entry *next;
1639 /* Structure describing stack frame layout.
1640 Stack grows downward:
1642 [arguments]
1643 <- ARG_POINTER
1644 saved pc
1646 saved frame pointer if frame_pointer_needed
1647 <- HARD_FRAME_POINTER
1648 [saved regs]
1650 [padding1] \
1652 [va_arg registers] (
1653 > to_allocate <- FRAME_POINTER
1654 [frame] (
1656 [padding2] /
1658 struct ix86_frame
1660 int nregs;
1661 int padding1;
1662 int va_arg_size;
1663 HOST_WIDE_INT frame;
1664 int padding2;
1665 int outgoing_arguments_size;
1666 int red_zone_size;
1668 HOST_WIDE_INT to_allocate;
1669 /* The offsets relative to ARG_POINTER. */
1670 HOST_WIDE_INT frame_pointer_offset;
1671 HOST_WIDE_INT hard_frame_pointer_offset;
1672 HOST_WIDE_INT stack_pointer_offset;
1674 /* When save_regs_using_mov is set, emit prologue using
1675 move instead of push instructions. */
1676 bool save_regs_using_mov;
1679 /* Code model option. */
1680 enum cmodel ix86_cmodel;
1681 /* Asm dialect. */
1682 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1683 /* TLS dialects. */
1684 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1686 /* Which unit we are generating floating point math for. */
1687 enum fpmath_unit ix86_fpmath;
1689 /* Which cpu are we scheduling for. */
1690 enum processor_type ix86_tune;
1692 /* Which instruction set architecture to use. */
1693 enum processor_type ix86_arch;
1695 /* true if sse prefetch instruction is not NOOP. */
1696 int x86_prefetch_sse;
1698 /* ix86_regparm_string as a number */
1699 static int ix86_regparm;
1701 /* -mstackrealign option */
1702 extern int ix86_force_align_arg_pointer;
1703 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1705 static rtx (*ix86_gen_leave) (void);
1706 static rtx (*ix86_gen_pop1) (rtx);
1707 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1708 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1709 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1710 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1711 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1713 /* Preferred alignment for stack boundary in bits. */
1714 unsigned int ix86_preferred_stack_boundary;
1716 /* Values 1-5: see jump.c */
1717 int ix86_branch_cost;
1719 /* Variables which are this size or smaller are put in the data/bss
1720 or ldata/lbss sections. */
1722 int ix86_section_threshold = 65536;
1724 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1725 char internal_label_prefix[16];
1726 int internal_label_prefix_len;
1728 /* Fence to use after loop using movnt. */
1729 tree x86_mfence;
1731 /* Register class used for passing given 64bit part of the argument.
1732 These represent classes as documented by the PS ABI, with the exception
1733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1734 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1736 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1737 whenever possible (upper half does contain padding). */
1738 enum x86_64_reg_class
1740 X86_64_NO_CLASS,
1741 X86_64_INTEGER_CLASS,
1742 X86_64_INTEGERSI_CLASS,
1743 X86_64_SSE_CLASS,
1744 X86_64_SSESF_CLASS,
1745 X86_64_SSEDF_CLASS,
1746 X86_64_SSEUP_CLASS,
1747 X86_64_X87_CLASS,
1748 X86_64_X87UP_CLASS,
1749 X86_64_COMPLEX_X87_CLASS,
1750 X86_64_MEMORY_CLASS
1752 static const char * const x86_64_reg_class_name[] =
1754 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1755 "sseup", "x87", "x87up", "cplx87", "no"
1758 #define MAX_CLASSES 4
1760 /* Table of constants used by fldpi, fldln2, etc.... */
1761 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1762 static bool ext_80387_constants_init = 0;
1765 static struct machine_function * ix86_init_machine_status (void);
1766 static rtx ix86_function_value (const_tree, const_tree, bool);
1767 static int ix86_function_regparm (const_tree, const_tree);
1768 static void ix86_compute_frame_layout (struct ix86_frame *);
1769 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1770 rtx, rtx, int);
1773 /* The svr4 ABI for the i386 says that records and unions are returned
1774 in memory. */
1775 #ifndef DEFAULT_PCC_STRUCT_RETURN
1776 #define DEFAULT_PCC_STRUCT_RETURN 1
1777 #endif
1779 /* Bit flags that specify the ISA we are compiling for. */
1780 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1782 /* A mask of ix86_isa_flags that includes bit X if X
1783 was set or cleared on the command line. */
1784 static int ix86_isa_flags_explicit;
1786 /* Define a set of ISAs which are available when a given ISA is
1787 enabled. MMX and SSE ISAs are handled separately. */
1789 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1790 #define OPTION_MASK_ISA_3DNOW_SET \
1791 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1793 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1794 #define OPTION_MASK_ISA_SSE2_SET \
1795 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1796 #define OPTION_MASK_ISA_SSE3_SET \
1797 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1798 #define OPTION_MASK_ISA_SSSE3_SET \
1799 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1800 #define OPTION_MASK_ISA_SSE4_1_SET \
1801 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1802 #define OPTION_MASK_ISA_SSE4_2_SET \
1803 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1805 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1806 as -msse4.2. */
1807 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1809 #define OPTION_MASK_ISA_SSE4A_SET \
1810 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1811 #define OPTION_MASK_ISA_SSE5_SET \
1812 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1814 /* Define a set of ISAs which aren't available when a given ISA is
1815 disabled. MMX and SSE ISAs are handled separately. */
1817 #define OPTION_MASK_ISA_MMX_UNSET \
1818 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1819 #define OPTION_MASK_ISA_3DNOW_UNSET \
1820 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1821 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1823 #define OPTION_MASK_ISA_SSE_UNSET \
1824 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1825 #define OPTION_MASK_ISA_SSE2_UNSET \
1826 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1827 #define OPTION_MASK_ISA_SSE3_UNSET \
1828 (OPTION_MASK_ISA_SSE3 \
1829 | OPTION_MASK_ISA_SSSE3_UNSET \
1830 | OPTION_MASK_ISA_SSE4A_UNSET )
1831 #define OPTION_MASK_ISA_SSSE3_UNSET \
1832 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1833 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1834 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1835 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1837 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1838 as -mno-sse4.1. */
1839 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1841 #define OPTION_MASK_ISA_SSE4A_UNSET \
1842 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1844 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1846 /* Vectorization library interface and handlers. */
1847 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1848 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1849 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1851 /* Implement TARGET_HANDLE_OPTION. */
1853 static bool
1854 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1856 switch (code)
1858 case OPT_mmmx:
1859 if (value)
1861 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1864 else
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1869 return true;
1871 case OPT_m3dnow:
1872 if (value)
1874 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1877 else
1879 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1882 return true;
1884 case OPT_m3dnowa:
1885 return false;
1887 case OPT_msse:
1888 if (value)
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1893 else
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1898 return true;
1900 case OPT_msse2:
1901 if (value)
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1906 else
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1911 return true;
1913 case OPT_msse3:
1914 if (value)
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1919 else
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1924 return true;
1926 case OPT_mssse3:
1927 if (value)
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1932 else
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1937 return true;
1939 case OPT_msse4_1:
1940 if (value)
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1945 else
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1950 return true;
1952 case OPT_msse4_2:
1953 if (value)
1955 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1956 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1958 else
1960 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1961 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1963 return true;
1965 case OPT_msse4:
1966 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1968 return true;
1970 case OPT_mno_sse4:
1971 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1973 return true;
1975 case OPT_msse4a:
1976 if (value)
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1981 else
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1986 return true;
1988 case OPT_msse5:
1989 if (value)
1991 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1992 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1994 else
1996 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1997 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1999 return true;
2001 default:
2002 return true;
2006 /* Sometimes certain combinations of command options do not make
2007 sense on a particular target machine. You can define a macro
2008 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2009 defined, is executed once just after all the command options have
2010 been parsed.
2012 Don't use this macro to turn on various extra optimizations for
2013 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2015 void
2016 override_options (void)
2018 int i;
2019 int ix86_tune_defaulted = 0;
2020 int ix86_arch_specified = 0;
2021 unsigned int ix86_arch_mask, ix86_tune_mask;
2023 /* Comes from final.c -- no real reason to change it. */
2024 #define MAX_CODE_ALIGN 16
2026 static struct ptt
2028 const struct processor_costs *cost; /* Processor costs */
2029 const int align_loop; /* Default alignments. */
2030 const int align_loop_max_skip;
2031 const int align_jump;
2032 const int align_jump_max_skip;
2033 const int align_func;
2035 const processor_target_table[PROCESSOR_max] =
2037 {&i386_cost, 4, 3, 4, 3, 4},
2038 {&i486_cost, 16, 15, 16, 15, 16},
2039 {&pentium_cost, 16, 7, 16, 7, 16},
2040 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2041 {&geode_cost, 0, 0, 0, 0, 0},
2042 {&k6_cost, 32, 7, 32, 7, 32},
2043 {&athlon_cost, 16, 7, 16, 7, 16},
2044 {&pentium4_cost, 0, 0, 0, 0, 0},
2045 {&k8_cost, 16, 7, 16, 7, 16},
2046 {&nocona_cost, 0, 0, 0, 0, 0},
2047 {&core2_cost, 16, 10, 16, 10, 16},
2048 {&generic32_cost, 16, 7, 16, 7, 16},
2049 {&generic64_cost, 16, 10, 16, 10, 16},
2050 {&amdfam10_cost, 32, 24, 32, 7, 32}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2055 "generic",
2056 "i386",
2057 "i486",
2058 "pentium",
2059 "pentium-mmx",
2060 "pentiumpro",
2061 "pentium2",
2062 "pentium3",
2063 "pentium4",
2064 "pentium-m",
2065 "prescott",
2066 "nocona",
2067 "core2",
2068 "geode",
2069 "k6",
2070 "k6-2",
2071 "k6-3",
2072 "athlon",
2073 "athlon-4",
2074 "k8",
2075 "amdfam10"
2078 enum pta_flags
2080 PTA_SSE = 1 << 0,
2081 PTA_SSE2 = 1 << 1,
2082 PTA_SSE3 = 1 << 2,
2083 PTA_MMX = 1 << 3,
2084 PTA_PREFETCH_SSE = 1 << 4,
2085 PTA_3DNOW = 1 << 5,
2086 PTA_3DNOW_A = 1 << 6,
2087 PTA_64BIT = 1 << 7,
2088 PTA_SSSE3 = 1 << 8,
2089 PTA_CX16 = 1 << 9,
2090 PTA_POPCNT = 1 << 10,
2091 PTA_ABM = 1 << 11,
2092 PTA_SSE4A = 1 << 12,
2093 PTA_NO_SAHF = 1 << 13,
2094 PTA_SSE4_1 = 1 << 14,
2095 PTA_SSE4_2 = 1 << 15,
2096 PTA_SSE5 = 1 << 16,
2097 PTA_AES = 1 << 17,
2098 PTA_PCLMUL = 1 << 18
2101 static struct pta
2103 const char *const name; /* processor name or nickname. */
2104 const enum processor_type processor;
2105 const unsigned /*enum pta_flags*/ flags;
2107 const processor_alias_table[] =
2109 {"i386", PROCESSOR_I386, 0},
2110 {"i486", PROCESSOR_I486, 0},
2111 {"i586", PROCESSOR_PENTIUM, 0},
2112 {"pentium", PROCESSOR_PENTIUM, 0},
2113 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2114 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2115 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2116 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2117 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2118 {"i686", PROCESSOR_PENTIUMPRO, 0},
2119 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2120 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2121 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2122 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2123 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2124 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2125 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2126 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2127 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2128 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2129 | PTA_CX16 | PTA_NO_SAHF)},
2130 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2131 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2132 | PTA_SSSE3
2133 | PTA_CX16)},
2134 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 |PTA_PREFETCH_SSE)},
2136 {"k6", PROCESSOR_K6, PTA_MMX},
2137 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2138 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2139 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_PREFETCH_SSE)},
2141 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2142 | PTA_PREFETCH_SSE)},
2143 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE)},
2145 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2146 | PTA_SSE)},
2147 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE)},
2149 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2150 | PTA_MMX | PTA_SSE | PTA_SSE2
2151 | PTA_NO_SAHF)},
2152 {"k8", PROCESSOR_K8, (PTA_64BIT
2153 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2154 | PTA_SSE | PTA_SSE2
2155 | PTA_NO_SAHF)},
2156 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2157 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2158 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2159 | PTA_NO_SAHF)},
2160 {"opteron", PROCESSOR_K8, (PTA_64BIT
2161 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2162 | PTA_SSE | PTA_SSE2
2163 | PTA_NO_SAHF)},
2164 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2165 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2166 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2167 | PTA_NO_SAHF)},
2168 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2169 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2170 | PTA_SSE | PTA_SSE2
2171 | PTA_NO_SAHF)},
2172 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175 | PTA_NO_SAHF)},
2176 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2177 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2178 | PTA_SSE | PTA_SSE2
2179 | PTA_NO_SAHF)},
2180 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2181 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2182 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2183 | PTA_SSE4A
2184 | PTA_CX16 | PTA_ABM)},
2185 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2186 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2187 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2188 | PTA_SSE4A
2189 | PTA_CX16 | PTA_ABM)},
2190 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2191 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2194 int const pta_size = ARRAY_SIZE (processor_alias_table);
2196 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2197 SUBTARGET_OVERRIDE_OPTIONS;
2198 #endif
2200 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2201 SUBSUBTARGET_OVERRIDE_OPTIONS;
2202 #endif
2204 /* -fPIC is the default for x86_64. */
2205 if (TARGET_MACHO && TARGET_64BIT)
2206 flag_pic = 2;
2208 /* Set the default values for switches whose default depends on TARGET_64BIT
2209 in case they weren't overwritten by command line options. */
2210 if (TARGET_64BIT)
2212 /* Mach-O doesn't support omitting the frame pointer for now. */
2213 if (flag_omit_frame_pointer == 2)
2214 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2215 if (flag_asynchronous_unwind_tables == 2)
2216 flag_asynchronous_unwind_tables = 1;
2217 if (flag_pcc_struct_return == 2)
2218 flag_pcc_struct_return = 0;
2220 else
2222 if (flag_omit_frame_pointer == 2)
2223 flag_omit_frame_pointer = 0;
2224 if (flag_asynchronous_unwind_tables == 2)
2225 flag_asynchronous_unwind_tables = 0;
2226 if (flag_pcc_struct_return == 2)
2227 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2230 /* Need to check -mtune=generic first. */
2231 if (ix86_tune_string)
2233 if (!strcmp (ix86_tune_string, "generic")
2234 || !strcmp (ix86_tune_string, "i686")
2235 /* As special support for cross compilers we read -mtune=native
2236 as -mtune=generic. With native compilers we won't see the
2237 -mtune=native, as it was changed by the driver. */
2238 || !strcmp (ix86_tune_string, "native"))
2240 if (TARGET_64BIT)
2241 ix86_tune_string = "generic64";
2242 else
2243 ix86_tune_string = "generic32";
2245 else if (!strncmp (ix86_tune_string, "generic", 7))
2246 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2248 else
2250 if (ix86_arch_string)
2251 ix86_tune_string = ix86_arch_string;
2252 if (!ix86_tune_string)
2254 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2255 ix86_tune_defaulted = 1;
2258 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2259 need to use a sensible tune option. */
2260 if (!strcmp (ix86_tune_string, "generic")
2261 || !strcmp (ix86_tune_string, "x86-64")
2262 || !strcmp (ix86_tune_string, "i686"))
2264 if (TARGET_64BIT)
2265 ix86_tune_string = "generic64";
2266 else
2267 ix86_tune_string = "generic32";
2270 if (ix86_stringop_string)
2272 if (!strcmp (ix86_stringop_string, "rep_byte"))
2273 stringop_alg = rep_prefix_1_byte;
2274 else if (!strcmp (ix86_stringop_string, "libcall"))
2275 stringop_alg = libcall;
2276 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2277 stringop_alg = rep_prefix_4_byte;
2278 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2279 stringop_alg = rep_prefix_8_byte;
2280 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2281 stringop_alg = loop_1_byte;
2282 else if (!strcmp (ix86_stringop_string, "loop"))
2283 stringop_alg = loop;
2284 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2285 stringop_alg = unrolled_loop;
2286 else
2287 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2289 if (!strcmp (ix86_tune_string, "x86-64"))
2290 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2291 "-mtune=generic instead as appropriate.");
2293 if (!ix86_arch_string)
2294 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2295 else
2296 ix86_arch_specified = 1;
2298 if (!strcmp (ix86_arch_string, "generic"))
2299 error ("generic CPU can be used only for -mtune= switch");
2300 if (!strncmp (ix86_arch_string, "generic", 7))
2301 error ("bad value (%s) for -march= switch", ix86_arch_string);
2303 if (ix86_cmodel_string != 0)
2305 if (!strcmp (ix86_cmodel_string, "small"))
2306 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2307 else if (!strcmp (ix86_cmodel_string, "medium"))
2308 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2309 else if (!strcmp (ix86_cmodel_string, "large"))
2310 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2311 else if (flag_pic)
2312 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2313 else if (!strcmp (ix86_cmodel_string, "32"))
2314 ix86_cmodel = CM_32;
2315 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2316 ix86_cmodel = CM_KERNEL;
2317 else
2318 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2320 else
2322 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2323 use of rip-relative addressing. This eliminates fixups that
2324 would otherwise be needed if this object is to be placed in a
2325 DLL, and is essentially just as efficient as direct addressing. */
2326 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2327 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2328 else if (TARGET_64BIT)
2329 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2330 else
2331 ix86_cmodel = CM_32;
2333 if (ix86_asm_string != 0)
2335 if (! TARGET_MACHO
2336 && !strcmp (ix86_asm_string, "intel"))
2337 ix86_asm_dialect = ASM_INTEL;
2338 else if (!strcmp (ix86_asm_string, "att"))
2339 ix86_asm_dialect = ASM_ATT;
2340 else
2341 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2343 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2344 error ("code model %qs not supported in the %s bit mode",
2345 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2346 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2347 sorry ("%i-bit mode not compiled in",
2348 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2350 for (i = 0; i < pta_size; i++)
2351 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2353 ix86_arch = processor_alias_table[i].processor;
2354 /* Default cpu tuning to the architecture. */
2355 ix86_tune = ix86_arch;
2357 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2358 error ("CPU you selected does not support x86-64 "
2359 "instruction set");
2361 if (processor_alias_table[i].flags & PTA_MMX
2362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2363 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2364 if (processor_alias_table[i].flags & PTA_3DNOW
2365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2366 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2367 if (processor_alias_table[i].flags & PTA_3DNOW_A
2368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2369 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2370 if (processor_alias_table[i].flags & PTA_SSE
2371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2373 if (processor_alias_table[i].flags & PTA_SSE2
2374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2375 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2376 if (processor_alias_table[i].flags & PTA_SSE3
2377 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2378 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2379 if (processor_alias_table[i].flags & PTA_SSSE3
2380 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2381 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2382 if (processor_alias_table[i].flags & PTA_SSE4_1
2383 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2384 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2385 if (processor_alias_table[i].flags & PTA_SSE4_2
2386 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2387 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2388 if (processor_alias_table[i].flags & PTA_SSE4A
2389 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2390 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2391 if (processor_alias_table[i].flags & PTA_SSE5
2392 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2393 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2395 if (processor_alias_table[i].flags & PTA_ABM)
2396 x86_abm = true;
2397 if (processor_alias_table[i].flags & PTA_CX16)
2398 x86_cmpxchg16b = true;
2399 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2400 x86_popcnt = true;
2401 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2402 x86_prefetch_sse = true;
2403 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2404 x86_sahf = true;
2405 if (processor_alias_table[i].flags & PTA_AES)
2406 x86_aes = true;
2407 if (processor_alias_table[i].flags & PTA_PCLMUL)
2408 x86_pclmul = true;
2410 break;
2413 if (i == pta_size)
2414 error ("bad value (%s) for -march= switch", ix86_arch_string);
2416 ix86_arch_mask = 1u << ix86_arch;
2417 for (i = 0; i < X86_ARCH_LAST; ++i)
2418 ix86_arch_features[i] &= ix86_arch_mask;
2420 for (i = 0; i < pta_size; i++)
2421 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2423 ix86_tune = processor_alias_table[i].processor;
2424 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2426 if (ix86_tune_defaulted)
2428 ix86_tune_string = "x86-64";
2429 for (i = 0; i < pta_size; i++)
2430 if (! strcmp (ix86_tune_string,
2431 processor_alias_table[i].name))
2432 break;
2433 ix86_tune = processor_alias_table[i].processor;
2435 else
2436 error ("CPU you selected does not support x86-64 "
2437 "instruction set");
2439 /* Intel CPUs have always interpreted SSE prefetch instructions as
2440 NOPs; so, we can enable SSE prefetch instructions even when
2441 -mtune (rather than -march) points us to a processor that has them.
2442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2443 higher processors. */
2444 if (TARGET_CMOVE
2445 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2446 x86_prefetch_sse = true;
2447 break;
2449 if (i == pta_size)
2450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2452 /* Enable SSE2 if AES or PCLMUL is enabled. */
2453 if ((x86_aes || x86_pclmul)
2454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2456 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2460 ix86_tune_mask = 1u << ix86_tune;
2461 for (i = 0; i < X86_TUNE_LAST; ++i)
2462 ix86_tune_features[i] &= ix86_tune_mask;
2464 if (optimize_size)
2465 ix86_cost = &size_cost;
2466 else
2467 ix86_cost = processor_target_table[ix86_tune].cost;
2469 /* Arrange to set up i386_stack_locals for all functions. */
2470 init_machine_status = ix86_init_machine_status;
2472 /* Validate -mregparm= value. */
2473 if (ix86_regparm_string)
2475 if (TARGET_64BIT)
2476 warning (0, "-mregparm is ignored in 64-bit mode");
2477 i = atoi (ix86_regparm_string);
2478 if (i < 0 || i > REGPARM_MAX)
2479 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2480 else
2481 ix86_regparm = i;
2483 if (TARGET_64BIT)
2484 ix86_regparm = REGPARM_MAX;
2486 /* If the user has provided any of the -malign-* options,
2487 warn and use that value only if -falign-* is not set.
2488 Remove this code in GCC 3.2 or later. */
2489 if (ix86_align_loops_string)
2491 warning (0, "-malign-loops is obsolete, use -falign-loops");
2492 if (align_loops == 0)
2494 i = atoi (ix86_align_loops_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497 else
2498 align_loops = 1 << i;
2502 if (ix86_align_jumps_string)
2504 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2505 if (align_jumps == 0)
2507 i = atoi (ix86_align_jumps_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510 else
2511 align_jumps = 1 << i;
2515 if (ix86_align_funcs_string)
2517 warning (0, "-malign-functions is obsolete, use -falign-functions");
2518 if (align_functions == 0)
2520 i = atoi (ix86_align_funcs_string);
2521 if (i < 0 || i > MAX_CODE_ALIGN)
2522 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2523 else
2524 align_functions = 1 << i;
2528 /* Default align_* from the processor table. */
2529 if (align_loops == 0)
2531 align_loops = processor_target_table[ix86_tune].align_loop;
2532 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2534 if (align_jumps == 0)
2536 align_jumps = processor_target_table[ix86_tune].align_jump;
2537 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2539 if (align_functions == 0)
2541 align_functions = processor_target_table[ix86_tune].align_func;
2544 /* Validate -mbranch-cost= value, or provide default. */
2545 ix86_branch_cost = ix86_cost->branch_cost;
2546 if (ix86_branch_cost_string)
2548 i = atoi (ix86_branch_cost_string);
2549 if (i < 0 || i > 5)
2550 error ("-mbranch-cost=%d is not between 0 and 5", i);
2551 else
2552 ix86_branch_cost = i;
2554 if (ix86_section_threshold_string)
2556 i = atoi (ix86_section_threshold_string);
2557 if (i < 0)
2558 error ("-mlarge-data-threshold=%d is negative", i);
2559 else
2560 ix86_section_threshold = i;
2563 if (ix86_tls_dialect_string)
2565 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2566 ix86_tls_dialect = TLS_DIALECT_GNU;
2567 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2568 ix86_tls_dialect = TLS_DIALECT_GNU2;
2569 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2570 ix86_tls_dialect = TLS_DIALECT_SUN;
2571 else
2572 error ("bad value (%s) for -mtls-dialect= switch",
2573 ix86_tls_dialect_string);
2576 if (ix87_precision_string)
2578 i = atoi (ix87_precision_string);
2579 if (i != 32 && i != 64 && i != 80)
2580 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2583 if (TARGET_64BIT)
2585 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2587 /* Enable by default the SSE and MMX builtins. Do allow the user to
2588 explicitly disable any of these. In particular, disabling SSE and
2589 MMX for kernel code is extremely useful. */
2590 if (!ix86_arch_specified)
2591 ix86_isa_flags
2592 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2593 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2595 if (TARGET_RTD)
2596 warning (0, "-mrtd is ignored in 64bit mode");
2598 else
2600 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2602 if (!ix86_arch_specified)
2603 ix86_isa_flags
2604 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2606 /* i386 ABI does not specify red zone. It still makes sense to use it
2607 when programmer takes care to stack from being destroyed. */
2608 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2609 target_flags |= MASK_NO_RED_ZONE;
2612 /* Keep nonleaf frame pointers. */
2613 if (flag_omit_frame_pointer)
2614 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2615 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2616 flag_omit_frame_pointer = 1;
2618 /* If we're doing fast math, we don't care about comparison order
2619 wrt NaNs. This lets us use a shorter comparison sequence. */
2620 if (flag_finite_math_only)
2621 target_flags &= ~MASK_IEEE_FP;
2623 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2624 since the insns won't need emulation. */
2625 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2626 target_flags &= ~MASK_NO_FANCY_MATH_387;
2628 /* Likewise, if the target doesn't have a 387, or we've specified
2629 software floating point, don't use 387 inline intrinsics. */
2630 if (!TARGET_80387)
2631 target_flags |= MASK_NO_FANCY_MATH_387;
2633 /* Turn on MMX builtins for -msse. */
2634 if (TARGET_SSE)
2636 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2637 x86_prefetch_sse = true;
2640 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2641 if (TARGET_SSE4_2 || TARGET_ABM)
2642 x86_popcnt = true;
2644 /* Validate -mpreferred-stack-boundary= value, or provide default.
2645 The default of 128 bits is for Pentium III's SSE __m128. We can't
2646 change it because of optimize_size. Otherwise, we can't mix object
2647 files compiled with -Os and -On. */
2648 ix86_preferred_stack_boundary = 128;
2649 if (ix86_preferred_stack_boundary_string)
2651 i = atoi (ix86_preferred_stack_boundary_string);
2652 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2653 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2654 TARGET_64BIT ? 4 : 2);
2655 else
2656 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2659 /* Accept -msseregparm only if at least SSE support is enabled. */
2660 if (TARGET_SSEREGPARM
2661 && ! TARGET_SSE)
2662 error ("-msseregparm used without SSE enabled");
2664 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2665 if (ix86_fpmath_string != 0)
2667 if (! strcmp (ix86_fpmath_string, "387"))
2668 ix86_fpmath = FPMATH_387;
2669 else if (! strcmp (ix86_fpmath_string, "sse"))
2671 if (!TARGET_SSE)
2673 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2674 ix86_fpmath = FPMATH_387;
2676 else
2677 ix86_fpmath = FPMATH_SSE;
2679 else if (! strcmp (ix86_fpmath_string, "387,sse")
2680 || ! strcmp (ix86_fpmath_string, "sse,387"))
2682 if (!TARGET_SSE)
2684 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2685 ix86_fpmath = FPMATH_387;
2687 else if (!TARGET_80387)
2689 warning (0, "387 instruction set disabled, using SSE arithmetics");
2690 ix86_fpmath = FPMATH_SSE;
2692 else
2693 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2695 else
2696 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2699 /* If the i387 is disabled, then do not return values in it. */
2700 if (!TARGET_80387)
2701 target_flags &= ~MASK_FLOAT_RETURNS;
2703 /* Use external vectorized library in vectorizing intrinsics. */
2704 if (ix86_veclibabi_string)
2706 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2707 ix86_veclib_handler = ix86_veclibabi_svml;
2708 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2709 ix86_veclib_handler = ix86_veclibabi_acml;
2710 else
2711 error ("unknown vectorization library ABI type (%s) for "
2712 "-mveclibabi= switch", ix86_veclibabi_string);
2715 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2716 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 && !optimize_size)
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2720 /* ??? Unwind info is not correct around the CFG unless either a frame
2721 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2722 unwind info generation to be aware of the CFG and propagating states
2723 around edges. */
2724 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2725 || flag_exceptions || flag_non_call_exceptions)
2726 && flag_omit_frame_pointer
2727 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2729 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2730 warning (0, "unwind tables currently require either a frame pointer "
2731 "or -maccumulate-outgoing-args for correctness");
2732 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2735 /* If stack probes are required, the space used for large function
2736 arguments on the stack must also be probed, so enable
2737 -maccumulate-outgoing-args so this happens in the prologue. */
2738 if (TARGET_STACK_PROBE
2739 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2741 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2742 warning (0, "stack probing requires -maccumulate-outgoing-args "
2743 "for correctness");
2744 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2747 /* For sane SSE instruction set generation we need fcomi instruction.
2748 It is safe to enable all CMOVE instructions. */
2749 if (TARGET_SSE)
2750 TARGET_CMOVE = 1;
2752 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2754 char *p;
2755 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2756 p = strchr (internal_label_prefix, 'X');
2757 internal_label_prefix_len = p - internal_label_prefix;
2758 *p = '\0';
2761 /* When scheduling description is not available, disable scheduler pass
2762 so it won't slow down the compilation and make x87 code slower. */
2763 if (!TARGET_SCHEDULE)
2764 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2766 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2767 set_param_value ("simultaneous-prefetches",
2768 ix86_cost->simultaneous_prefetches);
2769 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2770 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2771 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2772 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2773 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2774 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2776 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2777 can be optimized to ap = __builtin_next_arg (0).
2778 For abi switching it should be corrected. */
2779 if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
2780 targetm.expand_builtin_va_start = NULL;
2782 if (TARGET_64BIT)
2784 ix86_gen_leave = gen_leave_rex64;
2785 ix86_gen_pop1 = gen_popdi1;
2786 ix86_gen_add3 = gen_adddi3;
2787 ix86_gen_sub3 = gen_subdi3;
2788 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
2789 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
2790 ix86_gen_monitor = gen_sse3_monitor64;
2792 else
2794 ix86_gen_leave = gen_leave;
2795 ix86_gen_pop1 = gen_popsi1;
2796 ix86_gen_add3 = gen_addsi3;
2797 ix86_gen_sub3 = gen_subsi3;
2798 ix86_gen_sub3_carry = gen_subsi3_carry;
2799 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
2800 ix86_gen_monitor = gen_sse3_monitor;
2803 #ifdef USE_IX86_CLD
2804 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2805 if (!TARGET_64BIT)
2806 target_flags |= MASK_CLD & ~target_flags_explicit;
2807 #endif
2810 /* Return true if this goes in large data/bss. */
2812 static bool
2813 ix86_in_large_data_p (tree exp)
2815 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2816 return false;
2818 /* Functions are never large data. */
2819 if (TREE_CODE (exp) == FUNCTION_DECL)
2820 return false;
2822 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2824 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2825 if (strcmp (section, ".ldata") == 0
2826 || strcmp (section, ".lbss") == 0)
2827 return true;
2828 return false;
2830 else
2832 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2834 /* If this is an incomplete type with size 0, then we can't put it
2835 in data because it might be too big when completed. */
2836 if (!size || size > ix86_section_threshold)
2837 return true;
2840 return false;
2843 /* Switch to the appropriate section for output of DECL.
2844 DECL is either a `VAR_DECL' node or a constant of some sort.
2845 RELOC indicates whether forming the initial value of DECL requires
2846 link-time relocations. */
2848 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2849 ATTRIBUTE_UNUSED;
2851 static section *
2852 x86_64_elf_select_section (tree decl, int reloc,
2853 unsigned HOST_WIDE_INT align)
2855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2856 && ix86_in_large_data_p (decl))
2858 const char *sname = NULL;
2859 unsigned int flags = SECTION_WRITE;
2860 switch (categorize_decl_for_section (decl, reloc))
2862 case SECCAT_DATA:
2863 sname = ".ldata";
2864 break;
2865 case SECCAT_DATA_REL:
2866 sname = ".ldata.rel";
2867 break;
2868 case SECCAT_DATA_REL_LOCAL:
2869 sname = ".ldata.rel.local";
2870 break;
2871 case SECCAT_DATA_REL_RO:
2872 sname = ".ldata.rel.ro";
2873 break;
2874 case SECCAT_DATA_REL_RO_LOCAL:
2875 sname = ".ldata.rel.ro.local";
2876 break;
2877 case SECCAT_BSS:
2878 sname = ".lbss";
2879 flags |= SECTION_BSS;
2880 break;
2881 case SECCAT_RODATA:
2882 case SECCAT_RODATA_MERGE_STR:
2883 case SECCAT_RODATA_MERGE_STR_INIT:
2884 case SECCAT_RODATA_MERGE_CONST:
2885 sname = ".lrodata";
2886 flags = 0;
2887 break;
2888 case SECCAT_SRODATA:
2889 case SECCAT_SDATA:
2890 case SECCAT_SBSS:
2891 gcc_unreachable ();
2892 case SECCAT_TEXT:
2893 case SECCAT_TDATA:
2894 case SECCAT_TBSS:
2895 /* We don't split these for medium model. Place them into
2896 default sections and hope for best. */
2897 break;
2898 case SECCAT_EMUTLS_VAR:
2899 case SECCAT_EMUTLS_TMPL:
2900 gcc_unreachable ();
2902 if (sname)
2904 /* We might get called with string constants, but get_named_section
2905 doesn't like them as they are not DECLs. Also, we need to set
2906 flags in that case. */
2907 if (!DECL_P (decl))
2908 return get_section (sname, flags, NULL);
2909 return get_named_section (decl, sname, reloc);
2912 return default_elf_select_section (decl, reloc, align);
2915 /* Build up a unique section name, expressed as a
2916 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2917 RELOC indicates whether the initial value of EXP requires
2918 link-time relocations. */
2920 static void ATTRIBUTE_UNUSED
2921 x86_64_elf_unique_section (tree decl, int reloc)
2923 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2924 && ix86_in_large_data_p (decl))
2926 const char *prefix = NULL;
2927 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2928 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2930 switch (categorize_decl_for_section (decl, reloc))
2932 case SECCAT_DATA:
2933 case SECCAT_DATA_REL:
2934 case SECCAT_DATA_REL_LOCAL:
2935 case SECCAT_DATA_REL_RO:
2936 case SECCAT_DATA_REL_RO_LOCAL:
2937 prefix = one_only ? ".ld" : ".ldata";
2938 break;
2939 case SECCAT_BSS:
2940 prefix = one_only ? ".lb" : ".lbss";
2941 break;
2942 case SECCAT_RODATA:
2943 case SECCAT_RODATA_MERGE_STR:
2944 case SECCAT_RODATA_MERGE_STR_INIT:
2945 case SECCAT_RODATA_MERGE_CONST:
2946 prefix = one_only ? ".lr" : ".lrodata";
2947 break;
2948 case SECCAT_SRODATA:
2949 case SECCAT_SDATA:
2950 case SECCAT_SBSS:
2951 gcc_unreachable ();
2952 case SECCAT_TEXT:
2953 case SECCAT_TDATA:
2954 case SECCAT_TBSS:
2955 /* We don't split these for medium model. Place them into
2956 default sections and hope for best. */
2957 break;
2958 case SECCAT_EMUTLS_VAR:
2959 prefix = targetm.emutls.var_section;
2960 break;
2961 case SECCAT_EMUTLS_TMPL:
2962 prefix = targetm.emutls.tmpl_section;
2963 break;
2965 if (prefix)
2967 const char *name, *linkonce;
2968 char *string;
2970 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2971 name = targetm.strip_name_encoding (name);
2973 /* If we're using one_only, then there needs to be a .gnu.linkonce
2974 prefix to the section name. */
2975 linkonce = one_only ? ".gnu.linkonce" : "";
2977 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2979 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2980 return;
2983 default_unique_section (decl, reloc);
2986 #ifdef COMMON_ASM_OP
2987 /* This says how to output assembler code to declare an
2988 uninitialized external linkage data object.
2990 For medium model x86-64 we need to use .largecomm opcode for
2991 large objects. */
2992 void
2993 x86_elf_aligned_common (FILE *file,
2994 const char *name, unsigned HOST_WIDE_INT size,
2995 int align)
2997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2998 && size > (unsigned int)ix86_section_threshold)
2999 fprintf (file, ".largecomm\t");
3000 else
3001 fprintf (file, "%s", COMMON_ASM_OP);
3002 assemble_name (file, name);
3003 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3004 size, align / BITS_PER_UNIT);
3006 #endif
3008 /* Utility function for targets to use in implementing
3009 ASM_OUTPUT_ALIGNED_BSS. */
3011 void
3012 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3013 const char *name, unsigned HOST_WIDE_INT size,
3014 int align)
3016 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3017 && size > (unsigned int)ix86_section_threshold)
3018 switch_to_section (get_named_section (decl, ".lbss", 0));
3019 else
3020 switch_to_section (bss_section);
3021 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3022 #ifdef ASM_DECLARE_OBJECT_NAME
3023 last_assemble_variable_decl = decl;
3024 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3025 #else
3026 /* Standard thing is just output label for the object. */
3027 ASM_OUTPUT_LABEL (file, name);
3028 #endif /* ASM_DECLARE_OBJECT_NAME */
3029 ASM_OUTPUT_SKIP (file, size ? size : 1);
3032 void
3033 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3035 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3036 make the problem with not enough registers even worse. */
3037 #ifdef INSN_SCHEDULING
3038 if (level > 1)
3039 flag_schedule_insns = 0;
3040 #endif
3042 if (TARGET_MACHO)
3043 /* The Darwin libraries never set errno, so we might as well
3044 avoid calling them when that's the only reason we would. */
3045 flag_errno_math = 0;
3047 /* The default values of these switches depend on the TARGET_64BIT
3048 that is not known at this moment. Mark these values with 2 and
3049 let user the to override these. In case there is no command line option
3050 specifying them, we will set the defaults in override_options. */
3051 if (optimize >= 1)
3052 flag_omit_frame_pointer = 2;
3053 flag_pcc_struct_return = 2;
3054 flag_asynchronous_unwind_tables = 2;
3055 flag_vect_cost_model = 1;
3056 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3057 SUBTARGET_OPTIMIZATION_OPTIONS;
3058 #endif
3061 /* Decide whether we can make a sibling call to a function. DECL is the
3062 declaration of the function being targeted by the call and EXP is the
3063 CALL_EXPR representing the call. */
3065 static bool
3066 ix86_function_ok_for_sibcall (tree decl, tree exp)
3068 tree func;
3069 rtx a, b;
3071 /* If we are generating position-independent code, we cannot sibcall
3072 optimize any indirect call, or a direct call to a global function,
3073 as the PLT requires %ebx be live. */
3074 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3075 return false;
3077 if (decl)
3078 func = decl;
3079 else
3081 func = TREE_TYPE (CALL_EXPR_FN (exp));
3082 if (POINTER_TYPE_P (func))
3083 func = TREE_TYPE (func);
3086 /* Check that the return value locations are the same. Like
3087 if we are returning floats on the 80387 register stack, we cannot
3088 make a sibcall from a function that doesn't return a float to a
3089 function that does or, conversely, from a function that does return
3090 a float to a function that doesn't; the necessary stack adjustment
3091 would not be executed. This is also the place we notice
3092 differences in the return value ABI. Note that it is ok for one
3093 of the functions to have void return type as long as the return
3094 value of the other is passed in a register. */
3095 a = ix86_function_value (TREE_TYPE (exp), func, false);
3096 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3097 cfun->decl, false);
3098 if (STACK_REG_P (a) || STACK_REG_P (b))
3100 if (!rtx_equal_p (a, b))
3101 return false;
3103 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3105 else if (!rtx_equal_p (a, b))
3106 return false;
3108 /* If this call is indirect, we'll need to be able to use a call-clobbered
3109 register for the address of the target function. Make sure that all
3110 such registers are not used for passing parameters. */
3111 if (!decl && !TARGET_64BIT)
3113 tree type;
3115 /* We're looking at the CALL_EXPR, we need the type of the function. */
3116 type = CALL_EXPR_FN (exp); /* pointer expression */
3117 type = TREE_TYPE (type); /* pointer type */
3118 type = TREE_TYPE (type); /* function type */
3120 if (ix86_function_regparm (type, NULL) >= 3)
3122 /* ??? Need to count the actual number of registers to be used,
3123 not the possible number of registers. Fix later. */
3124 return false;
3128 /* Dllimport'd functions are also called indirectly. */
3129 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3130 && decl && DECL_DLLIMPORT_P (decl)
3131 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3132 return false;
3134 /* If we forced aligned the stack, then sibcalling would unalign the
3135 stack, which may break the called function. */
3136 if (cfun->machine->force_align_arg_pointer)
3137 return false;
3139 /* Otherwise okay. That also includes certain types of indirect calls. */
3140 return true;
3143 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3144 calling convention attributes;
3145 arguments as in struct attribute_spec.handler. */
3147 static tree
3148 ix86_handle_cconv_attribute (tree *node, tree name,
3149 tree args,
3150 int flags ATTRIBUTE_UNUSED,
3151 bool *no_add_attrs)
3153 if (TREE_CODE (*node) != FUNCTION_TYPE
3154 && TREE_CODE (*node) != METHOD_TYPE
3155 && TREE_CODE (*node) != FIELD_DECL
3156 && TREE_CODE (*node) != TYPE_DECL)
3158 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3159 IDENTIFIER_POINTER (name));
3160 *no_add_attrs = true;
3161 return NULL_TREE;
3164 /* Can combine regparm with all attributes but fastcall. */
3165 if (is_attribute_p ("regparm", name))
3167 tree cst;
3169 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3171 error ("fastcall and regparm attributes are not compatible");
3174 cst = TREE_VALUE (args);
3175 if (TREE_CODE (cst) != INTEGER_CST)
3177 warning (OPT_Wattributes,
3178 "%qs attribute requires an integer constant argument",
3179 IDENTIFIER_POINTER (name));
3180 *no_add_attrs = true;
3182 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3184 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3185 IDENTIFIER_POINTER (name), REGPARM_MAX);
3186 *no_add_attrs = true;
3189 if (!TARGET_64BIT
3190 && lookup_attribute (ix86_force_align_arg_pointer_string,
3191 TYPE_ATTRIBUTES (*node))
3192 && compare_tree_int (cst, REGPARM_MAX-1))
3194 error ("%s functions limited to %d register parameters",
3195 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3198 return NULL_TREE;
3201 if (TARGET_64BIT)
3203 /* Do not warn when emulating the MS ABI. */
3204 if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
3205 warning (OPT_Wattributes, "%qs attribute ignored",
3206 IDENTIFIER_POINTER (name));
3207 *no_add_attrs = true;
3208 return NULL_TREE;
3211 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3212 if (is_attribute_p ("fastcall", name))
3214 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3216 error ("fastcall and cdecl attributes are not compatible");
3218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3220 error ("fastcall and stdcall attributes are not compatible");
3222 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3224 error ("fastcall and regparm attributes are not compatible");
3228 /* Can combine stdcall with fastcall (redundant), regparm and
3229 sseregparm. */
3230 else if (is_attribute_p ("stdcall", name))
3232 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3234 error ("stdcall and cdecl attributes are not compatible");
3236 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3238 error ("stdcall and fastcall attributes are not compatible");
3242 /* Can combine cdecl with regparm and sseregparm. */
3243 else if (is_attribute_p ("cdecl", name))
3245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3247 error ("stdcall and cdecl attributes are not compatible");
3249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3251 error ("fastcall and cdecl attributes are not compatible");
3255 /* Can combine sseregparm with all attributes. */
3257 return NULL_TREE;
3260 /* Return 0 if the attributes for two types are incompatible, 1 if they
3261 are compatible, and 2 if they are nearly compatible (which causes a
3262 warning to be generated). */
3264 static int
3265 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3267 /* Check for mismatch of non-default calling convention. */
3268 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3270 if (TREE_CODE (type1) != FUNCTION_TYPE
3271 && TREE_CODE (type1) != METHOD_TYPE)
3272 return 1;
3274 /* Check for mismatched fastcall/regparm types. */
3275 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3276 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3277 || (ix86_function_regparm (type1, NULL)
3278 != ix86_function_regparm (type2, NULL)))
3279 return 0;
3281 /* Check for mismatched sseregparm types. */
3282 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3283 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3284 return 0;
3286 /* Check for mismatched return types (cdecl vs stdcall). */
3287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3289 return 0;
3291 return 1;
3294 /* Return the regparm value for a function with the indicated TYPE and DECL.
3295 DECL may be NULL when calling function indirectly
3296 or considering a libcall. */
3298 static int
3299 ix86_function_regparm (const_tree type, const_tree decl)
3301 tree attr;
3302 int regparm = ix86_regparm;
3304 static bool error_issued;
3306 if (TARGET_64BIT)
3308 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3309 return regparm;
3310 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3313 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3314 if (attr)
3316 regparm
3317 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3319 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3321 /* We can't use regparm(3) for nested functions because
3322 these pass static chain pointer in %ecx register. */
3323 if (!error_issued && regparm == 3
3324 && decl_function_context (decl)
3325 && !DECL_NO_STATIC_CHAIN (decl))
3327 error ("nested functions are limited to 2 register parameters");
3328 error_issued = true;
3329 return 0;
3333 return regparm;
3336 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3337 return 2;
3339 /* Use register calling convention for local functions when possible. */
3340 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3341 && flag_unit_at_a_time && !profile_flag)
3343 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3344 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3345 if (i && i->local)
3347 int local_regparm, globals = 0, regno;
3348 struct function *f;
3350 /* Make sure no regparm register is taken by a
3351 fixed register variable. */
3352 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3353 if (fixed_regs[local_regparm])
3354 break;
3356 /* We can't use regparm(3) for nested functions as these use
3357 static chain pointer in third argument. */
3358 if (local_regparm == 3
3359 && (decl_function_context (decl)
3360 || ix86_force_align_arg_pointer)
3361 && !DECL_NO_STATIC_CHAIN (decl))
3362 local_regparm = 2;
3364 /* If the function realigns its stackpointer, the prologue will
3365 clobber %ecx. If we've already generated code for the callee,
3366 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3367 scanning the attributes for the self-realigning property. */
3368 f = DECL_STRUCT_FUNCTION (decl);
3369 if (local_regparm == 3
3370 && (f ? !!f->machine->force_align_arg_pointer
3371 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3372 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3373 local_regparm = 2;
3375 /* Each fixed register usage increases register pressure,
3376 so less registers should be used for argument passing.
3377 This functionality can be overriden by an explicit
3378 regparm value. */
3379 for (regno = 0; regno <= DI_REG; regno++)
3380 if (fixed_regs[regno])
3381 globals++;
3383 local_regparm
3384 = globals < local_regparm ? local_regparm - globals : 0;
3386 if (local_regparm > regparm)
3387 regparm = local_regparm;
3391 return regparm;
3394 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3395 DFmode (2) arguments in SSE registers for a function with the
3396 indicated TYPE and DECL. DECL may be NULL when calling function
3397 indirectly or considering a libcall. Otherwise return 0. */
3399 static int
3400 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3402 gcc_assert (!TARGET_64BIT);
3404 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3405 by the sseregparm attribute. */
3406 if (TARGET_SSEREGPARM
3407 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3409 if (!TARGET_SSE)
3411 if (warn)
3413 if (decl)
3414 error ("Calling %qD with attribute sseregparm without "
3415 "SSE/SSE2 enabled", decl);
3416 else
3417 error ("Calling %qT with attribute sseregparm without "
3418 "SSE/SSE2 enabled", type);
3420 return 0;
3423 return 2;
3426 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3427 (and DFmode for SSE2) arguments in SSE registers. */
3428 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3430 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3431 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3432 if (i && i->local)
3433 return TARGET_SSE2 ? 2 : 1;
3436 return 0;
3439 /* Return true if EAX is live at the start of the function. Used by
3440 ix86_expand_prologue to determine if we need special help before
3441 calling allocate_stack_worker. */
3443 static bool
3444 ix86_eax_live_at_start_p (void)
3446 /* Cheat. Don't bother working forward from ix86_function_regparm
3447 to the function type to whether an actual argument is located in
3448 eax. Instead just look at cfg info, which is still close enough
3449 to correct at this point. This gives false positives for broken
3450 functions that might use uninitialized data that happens to be
3451 allocated in eax, but who cares? */
3452 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3455 /* Value is the number of bytes of arguments automatically
3456 popped when returning from a subroutine call.
3457 FUNDECL is the declaration node of the function (as a tree),
3458 FUNTYPE is the data type of the function (as a tree),
3459 or for a library call it is an identifier node for the subroutine name.
3460 SIZE is the number of bytes of arguments passed on the stack.
3462 On the 80386, the RTD insn may be used to pop them if the number
3463 of args is fixed, but if the number is variable then the caller
3464 must pop them all. RTD can't be used for library calls now
3465 because the library is compiled with the Unix compiler.
3466 Use of RTD is a selectable option, since it is incompatible with
3467 standard Unix calling sequences. If the option is not selected,
3468 the caller must always pop the args.
3470 The attribute stdcall is equivalent to RTD on a per module basis. */
3473 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3475 int rtd;
3477 /* None of the 64-bit ABIs pop arguments. */
3478 if (TARGET_64BIT)
3479 return 0;
3481 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3483 /* Cdecl functions override -mrtd, and never pop the stack. */
3484 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3486 /* Stdcall and fastcall functions will pop the stack if not
3487 variable args. */
3488 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3489 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3490 rtd = 1;
3492 if (rtd && ! stdarg_p (funtype))
3493 return size;
3496 /* Lose any fake structure return argument if it is passed on the stack. */
3497 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3498 && !KEEP_AGGREGATE_RETURN_POINTER)
3500 int nregs = ix86_function_regparm (funtype, fundecl);
3501 if (nregs == 0)
3502 return GET_MODE_SIZE (Pmode);
3505 return 0;
3508 /* Argument support functions. */
3510 /* Return true when register may be used to pass function parameters. */
3511 bool
3512 ix86_function_arg_regno_p (int regno)
3514 int i;
3515 const int *parm_regs;
3517 if (!TARGET_64BIT)
3519 if (TARGET_MACHO)
3520 return (regno < REGPARM_MAX
3521 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3522 else
3523 return (regno < REGPARM_MAX
3524 || (TARGET_MMX && MMX_REGNO_P (regno)
3525 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3526 || (TARGET_SSE && SSE_REGNO_P (regno)
3527 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3530 if (TARGET_MACHO)
3532 if (SSE_REGNO_P (regno) && TARGET_SSE)
3533 return true;
3535 else
3537 if (TARGET_SSE && SSE_REGNO_P (regno)
3538 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3539 return true;
3542 /* TODO: The function should depend on current function ABI but
3543 builtins.c would need updating then. Therefore we use the
3544 default ABI. */
3546 /* RAX is used as hidden argument to va_arg functions. */
3547 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3548 return true;
3550 if (DEFAULT_ABI == MS_ABI)
3551 parm_regs = x86_64_ms_abi_int_parameter_registers;
3552 else
3553 parm_regs = x86_64_int_parameter_registers;
3554 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3555 : X86_64_REGPARM_MAX); i++)
3556 if (regno == parm_regs[i])
3557 return true;
3558 return false;
3561 /* Return if we do not know how to pass TYPE solely in registers. */
3563 static bool
3564 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3566 if (must_pass_in_stack_var_size_or_pad (mode, type))
3567 return true;
3569 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3570 The layout_type routine is crafty and tries to trick us into passing
3571 currently unsupported vector types on the stack by using TImode. */
3572 return (!TARGET_64BIT && mode == TImode
3573 && type && TREE_CODE (type) != VECTOR_TYPE);
3576 /* It returns the size, in bytes, of the area reserved for arguments passed
3577 in registers for the function represented by fndecl dependent to the used
3578 abi format. */
3580 ix86_reg_parm_stack_space (const_tree fndecl)
3582 int call_abi = 0;
3583 /* For libcalls it is possible that there is no fndecl at hand.
3584 Therefore assume for this case the default abi of the target. */
3585 if (!fndecl)
3586 call_abi = DEFAULT_ABI;
3587 else
3588 call_abi = ix86_function_abi (fndecl);
3589 if (call_abi == 1)
3590 return 32;
3591 return 0;
3594 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3595 call abi used. */
3597 ix86_function_type_abi (const_tree fntype)
3599 if (TARGET_64BIT && fntype != NULL)
3601 int abi;
3602 if (DEFAULT_ABI == SYSV_ABI)
3603 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3604 else
3605 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3607 if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3608 sorry ("using sysv calling convention on target w64 is not supported");
3610 return abi;
3612 return DEFAULT_ABI;
3616 ix86_function_abi (const_tree fndecl)
3618 if (! fndecl)
3619 return DEFAULT_ABI;
3620 return ix86_function_type_abi (TREE_TYPE (fndecl));
3623 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3624 call abi used. */
3626 ix86_cfun_abi (void)
3628 if (! cfun || ! TARGET_64BIT)
3629 return DEFAULT_ABI;
3630 return cfun->machine->call_abi;
3633 /* regclass.c */
3634 extern void init_regs (void);
3636 /* Implementation of call abi switching target hook. Specific to FNDECL
3637 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3638 for more details.
3639 To prevent redudant calls of costy function init_regs (), it checks not to
3640 reset register usage for default abi. */
3641 void
3642 ix86_call_abi_override (const_tree fndecl)
3644 if (fndecl == NULL_TREE)
3645 cfun->machine->call_abi = DEFAULT_ABI;
3646 else
3647 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3648 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3650 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3652 call_used_regs[4 /*RSI*/] = 0;
3653 call_used_regs[5 /*RDI*/] = 0;
3654 init_regs ();
3657 else if (TARGET_64BIT)
3659 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3661 call_used_regs[4 /*RSI*/] = 1;
3662 call_used_regs[5 /*RDI*/] = 1;
3663 init_regs ();
3668 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3669 for a call to a function whose data type is FNTYPE.
3670 For a library call, FNTYPE is 0. */
3672 void
3673 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3674 tree fntype, /* tree ptr for function decl */
3675 rtx libname, /* SYMBOL_REF of library name or 0 */
3676 tree fndecl)
3678 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3679 memset (cum, 0, sizeof (*cum));
3681 cum->call_abi = ix86_function_type_abi (fntype);
3682 /* Set up the number of registers to use for passing arguments. */
3683 cum->nregs = ix86_regparm;
3684 if (TARGET_64BIT)
3686 if (cum->call_abi != DEFAULT_ABI)
3687 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3688 : X64_REGPARM_MAX;
3690 if (TARGET_SSE)
3692 cum->sse_nregs = SSE_REGPARM_MAX;
3693 if (TARGET_64BIT)
3695 if (cum->call_abi != DEFAULT_ABI)
3696 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3697 : X64_SSE_REGPARM_MAX;
3700 if (TARGET_MMX)
3701 cum->mmx_nregs = MMX_REGPARM_MAX;
3702 cum->warn_sse = true;
3703 cum->warn_mmx = true;
3705 /* Because type might mismatch in between caller and callee, we need to
3706 use actual type of function for local calls.
3707 FIXME: cgraph_analyze can be told to actually record if function uses
3708 va_start so for local functions maybe_vaarg can be made aggressive
3709 helping K&R code.
3710 FIXME: once typesytem is fixed, we won't need this code anymore. */
3711 if (i && i->local)
3712 fntype = TREE_TYPE (fndecl);
3713 cum->maybe_vaarg = (fntype
3714 ? (!prototype_p (fntype) || stdarg_p (fntype))
3715 : !libname);
3717 if (!TARGET_64BIT)
3719 /* If there are variable arguments, then we won't pass anything
3720 in registers in 32-bit mode. */
3721 if (stdarg_p (fntype))
3723 cum->nregs = 0;
3724 cum->sse_nregs = 0;
3725 cum->mmx_nregs = 0;
3726 cum->warn_sse = 0;
3727 cum->warn_mmx = 0;
3728 return;
3731 /* Use ecx and edx registers if function has fastcall attribute,
3732 else look for regparm information. */
3733 if (fntype)
3735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3737 cum->nregs = 2;
3738 cum->fastcall = 1;
3740 else
3741 cum->nregs = ix86_function_regparm (fntype, fndecl);
3744 /* Set up the number of SSE registers used for passing SFmode
3745 and DFmode arguments. Warn for mismatching ABI. */
3746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3750 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3751 But in the case of vector types, it is some vector mode.
3753 When we have only some of our vector isa extensions enabled, then there
3754 are some modes for which vector_mode_supported_p is false. For these
3755 modes, the generic vector support in gcc will choose some non-vector mode
3756 in order to implement the type. By computing the natural mode, we'll
3757 select the proper ABI location for the operand and not depend on whatever
3758 the middle-end decides to do with these vector types. */
3760 static enum machine_mode
3761 type_natural_mode (const_tree type)
3763 enum machine_mode mode = TYPE_MODE (type);
3765 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if ((size == 8 || size == 16)
3769 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3770 && TYPE_VECTOR_SUBPARTS (type) > 1)
3772 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3774 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3775 mode = MIN_MODE_VECTOR_FLOAT;
3776 else
3777 mode = MIN_MODE_VECTOR_INT;
3779 /* Get the mode which has this inner mode and number of units. */
3780 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3781 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3782 && GET_MODE_INNER (mode) == innermode)
3783 return mode;
3785 gcc_unreachable ();
3789 return mode;
3792 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3793 this may not agree with the mode that the type system has chosen for the
3794 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3795 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3797 static rtx
3798 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3799 unsigned int regno)
3801 rtx tmp;
3803 if (orig_mode != BLKmode)
3804 tmp = gen_rtx_REG (orig_mode, regno);
3805 else
3807 tmp = gen_rtx_REG (mode, regno);
3808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3809 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3812 return tmp;
3815 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3816 of this code is to classify each 8bytes of incoming argument by the register
3817 class and assign registers accordingly. */
3819 /* Return the union class of CLASS1 and CLASS2.
3820 See the x86-64 PS ABI for details. */
3822 static enum x86_64_reg_class
3823 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3825 /* Rule #1: If both classes are equal, this is the resulting class. */
3826 if (class1 == class2)
3827 return class1;
3829 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3830 the other class. */
3831 if (class1 == X86_64_NO_CLASS)
3832 return class2;
3833 if (class2 == X86_64_NO_CLASS)
3834 return class1;
3836 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3837 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3838 return X86_64_MEMORY_CLASS;
3840 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3841 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3842 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3843 return X86_64_INTEGERSI_CLASS;
3844 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3845 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3846 return X86_64_INTEGER_CLASS;
3848 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3849 MEMORY is used. */
3850 if (class1 == X86_64_X87_CLASS
3851 || class1 == X86_64_X87UP_CLASS
3852 || class1 == X86_64_COMPLEX_X87_CLASS
3853 || class2 == X86_64_X87_CLASS
3854 || class2 == X86_64_X87UP_CLASS
3855 || class2 == X86_64_COMPLEX_X87_CLASS)
3856 return X86_64_MEMORY_CLASS;
3858 /* Rule #6: Otherwise class SSE is used. */
3859 return X86_64_SSE_CLASS;
3862 /* Classify the argument of type TYPE and mode MODE.
3863 CLASSES will be filled by the register class used to pass each word
3864 of the operand. The number of words is returned. In case the parameter
3865 should be passed in memory, 0 is returned. As a special case for zero
3866 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3868 BIT_OFFSET is used internally for handling records and specifies offset
3869 of the offset in bits modulo 256 to avoid overflow cases.
3871 See the x86-64 PS ABI for details.
3874 static int
3875 classify_argument (enum machine_mode mode, const_tree type,
3876 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3878 HOST_WIDE_INT bytes =
3879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3882 /* Variable sized entities are always passed/returned in memory. */
3883 if (bytes < 0)
3884 return 0;
3886 if (mode != VOIDmode
3887 && targetm.calls.must_pass_in_stack (mode, type))
3888 return 0;
3890 if (type && AGGREGATE_TYPE_P (type))
3892 int i;
3893 tree field;
3894 enum x86_64_reg_class subclasses[MAX_CLASSES];
3896 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3897 if (bytes > 16)
3898 return 0;
3900 for (i = 0; i < words; i++)
3901 classes[i] = X86_64_NO_CLASS;
3903 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3904 signalize memory class, so handle it as special case. */
3905 if (!words)
3907 classes[0] = X86_64_NO_CLASS;
3908 return 1;
3911 /* Classify each field of record and merge classes. */
3912 switch (TREE_CODE (type))
3914 case RECORD_TYPE:
3915 /* And now merge the fields of structure. */
3916 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3918 if (TREE_CODE (field) == FIELD_DECL)
3920 int num;
3922 if (TREE_TYPE (field) == error_mark_node)
3923 continue;
3925 /* Bitfields are always classified as integer. Handle them
3926 early, since later code would consider them to be
3927 misaligned integers. */
3928 if (DECL_BIT_FIELD (field))
3930 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3931 i < ((int_bit_position (field) + (bit_offset % 64))
3932 + tree_low_cst (DECL_SIZE (field), 0)
3933 + 63) / 8 / 8; i++)
3934 classes[i] =
3935 merge_classes (X86_64_INTEGER_CLASS,
3936 classes[i]);
3938 else
3940 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3941 TREE_TYPE (field), subclasses,
3942 (int_bit_position (field)
3943 + bit_offset) % 256);
3944 if (!num)
3945 return 0;
3946 for (i = 0; i < num; i++)
3948 int pos =
3949 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3950 classes[i + pos] =
3951 merge_classes (subclasses[i], classes[i + pos]);
3956 break;
3958 case ARRAY_TYPE:
3959 /* Arrays are handled as small records. */
3961 int num;
3962 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3963 TREE_TYPE (type), subclasses, bit_offset);
3964 if (!num)
3965 return 0;
3967 /* The partial classes are now full classes. */
3968 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3969 subclasses[0] = X86_64_SSE_CLASS;
3970 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3971 subclasses[0] = X86_64_INTEGER_CLASS;
3973 for (i = 0; i < words; i++)
3974 classes[i] = subclasses[i % num];
3976 break;
3978 case UNION_TYPE:
3979 case QUAL_UNION_TYPE:
3980 /* Unions are similar to RECORD_TYPE but offset is always 0.
3982 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3984 if (TREE_CODE (field) == FIELD_DECL)
3986 int num;
3988 if (TREE_TYPE (field) == error_mark_node)
3989 continue;
3991 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3992 TREE_TYPE (field), subclasses,
3993 bit_offset);
3994 if (!num)
3995 return 0;
3996 for (i = 0; i < num; i++)
3997 classes[i] = merge_classes (subclasses[i], classes[i]);
4000 break;
4002 default:
4003 gcc_unreachable ();
4006 /* Final merger cleanup. */
4007 for (i = 0; i < words; i++)
4009 /* If one class is MEMORY, everything should be passed in
4010 memory. */
4011 if (classes[i] == X86_64_MEMORY_CLASS)
4012 return 0;
4014 /* The X86_64_SSEUP_CLASS should be always preceded by
4015 X86_64_SSE_CLASS. */
4016 if (classes[i] == X86_64_SSEUP_CLASS
4017 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4018 classes[i] = X86_64_SSE_CLASS;
4020 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4021 if (classes[i] == X86_64_X87UP_CLASS
4022 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4023 classes[i] = X86_64_SSE_CLASS;
4025 return words;
4028 /* Compute alignment needed. We align all types to natural boundaries with
4029 exception of XFmode that is aligned to 64bits. */
4030 if (mode != VOIDmode && mode != BLKmode)
4032 int mode_alignment = GET_MODE_BITSIZE (mode);
4034 if (mode == XFmode)
4035 mode_alignment = 128;
4036 else if (mode == XCmode)
4037 mode_alignment = 256;
4038 if (COMPLEX_MODE_P (mode))
4039 mode_alignment /= 2;
4040 /* Misaligned fields are always returned in memory. */
4041 if (bit_offset % mode_alignment)
4042 return 0;
4045 /* for V1xx modes, just use the base mode */
4046 if (VECTOR_MODE_P (mode) && mode != V1DImode
4047 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4048 mode = GET_MODE_INNER (mode);
4050 /* Classification of atomic types. */
4051 switch (mode)
4053 case SDmode:
4054 case DDmode:
4055 classes[0] = X86_64_SSE_CLASS;
4056 return 1;
4057 case TDmode:
4058 classes[0] = X86_64_SSE_CLASS;
4059 classes[1] = X86_64_SSEUP_CLASS;
4060 return 2;
4061 case DImode:
4062 case SImode:
4063 case HImode:
4064 case QImode:
4065 case CSImode:
4066 case CHImode:
4067 case CQImode:
4068 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4069 classes[0] = X86_64_INTEGERSI_CLASS;
4070 else
4071 classes[0] = X86_64_INTEGER_CLASS;
4072 return 1;
4073 case CDImode:
4074 case TImode:
4075 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4076 return 2;
4077 case CTImode:
4078 return 0;
4079 case SFmode:
4080 if (!(bit_offset % 64))
4081 classes[0] = X86_64_SSESF_CLASS;
4082 else
4083 classes[0] = X86_64_SSE_CLASS;
4084 return 1;
4085 case DFmode:
4086 classes[0] = X86_64_SSEDF_CLASS;
4087 return 1;
4088 case XFmode:
4089 classes[0] = X86_64_X87_CLASS;
4090 classes[1] = X86_64_X87UP_CLASS;
4091 return 2;
4092 case TFmode:
4093 classes[0] = X86_64_SSE_CLASS;
4094 classes[1] = X86_64_SSEUP_CLASS;
4095 return 2;
4096 case SCmode:
4097 classes[0] = X86_64_SSE_CLASS;
4098 return 1;
4099 case DCmode:
4100 classes[0] = X86_64_SSEDF_CLASS;
4101 classes[1] = X86_64_SSEDF_CLASS;
4102 return 2;
4103 case XCmode:
4104 classes[0] = X86_64_COMPLEX_X87_CLASS;
4105 return 1;
4106 case TCmode:
4107 /* This modes is larger than 16 bytes. */
4108 return 0;
4109 case V4SFmode:
4110 case V4SImode:
4111 case V16QImode:
4112 case V8HImode:
4113 case V2DFmode:
4114 case V2DImode:
4115 classes[0] = X86_64_SSE_CLASS;
4116 classes[1] = X86_64_SSEUP_CLASS;
4117 return 2;
4118 case V1DImode:
4119 case V2SFmode:
4120 case V2SImode:
4121 case V4HImode:
4122 case V8QImode:
4123 classes[0] = X86_64_SSE_CLASS;
4124 return 1;
4125 case BLKmode:
4126 case VOIDmode:
4127 return 0;
4128 default:
4129 gcc_assert (VECTOR_MODE_P (mode));
4131 if (bytes > 16)
4132 return 0;
4134 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4136 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4137 classes[0] = X86_64_INTEGERSI_CLASS;
4138 else
4139 classes[0] = X86_64_INTEGER_CLASS;
4140 classes[1] = X86_64_INTEGER_CLASS;
4141 return 1 + (bytes > 8);
4145 /* Examine the argument and return set number of register required in each
4146 class. Return 0 iff parameter should be passed in memory. */
4147 static int
4148 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4149 int *int_nregs, int *sse_nregs)
4151 enum x86_64_reg_class regclass[MAX_CLASSES];
4152 int n = classify_argument (mode, type, regclass, 0);
4154 *int_nregs = 0;
4155 *sse_nregs = 0;
4156 if (!n)
4157 return 0;
4158 for (n--; n >= 0; n--)
4159 switch (regclass[n])
4161 case X86_64_INTEGER_CLASS:
4162 case X86_64_INTEGERSI_CLASS:
4163 (*int_nregs)++;
4164 break;
4165 case X86_64_SSE_CLASS:
4166 case X86_64_SSESF_CLASS:
4167 case X86_64_SSEDF_CLASS:
4168 (*sse_nregs)++;
4169 break;
4170 case X86_64_NO_CLASS:
4171 case X86_64_SSEUP_CLASS:
4172 break;
4173 case X86_64_X87_CLASS:
4174 case X86_64_X87UP_CLASS:
4175 if (!in_return)
4176 return 0;
4177 break;
4178 case X86_64_COMPLEX_X87_CLASS:
4179 return in_return ? 2 : 0;
4180 case X86_64_MEMORY_CLASS:
4181 gcc_unreachable ();
4183 return 1;
4186 /* Construct container for the argument used by GCC interface. See
4187 FUNCTION_ARG for the detailed description. */
4189 static rtx
4190 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4191 const_tree type, int in_return, int nintregs, int nsseregs,
4192 const int *intreg, int sse_regno)
4194 /* The following variables hold the static issued_error state. */
4195 static bool issued_sse_arg_error;
4196 static bool issued_sse_ret_error;
4197 static bool issued_x87_ret_error;
4199 enum machine_mode tmpmode;
4200 int bytes =
4201 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4202 enum x86_64_reg_class regclass[MAX_CLASSES];
4203 int n;
4204 int i;
4205 int nexps = 0;
4206 int needed_sseregs, needed_intregs;
4207 rtx exp[MAX_CLASSES];
4208 rtx ret;
4210 n = classify_argument (mode, type, regclass, 0);
4211 if (!n)
4212 return NULL;
4213 if (!examine_argument (mode, type, in_return, &needed_intregs,
4214 &needed_sseregs))
4215 return NULL;
4216 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4217 return NULL;
4219 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4220 some less clueful developer tries to use floating-point anyway. */
4221 if (needed_sseregs && !TARGET_SSE)
4223 if (in_return)
4225 if (!issued_sse_ret_error)
4227 error ("SSE register return with SSE disabled");
4228 issued_sse_ret_error = true;
4231 else if (!issued_sse_arg_error)
4233 error ("SSE register argument with SSE disabled");
4234 issued_sse_arg_error = true;
4236 return NULL;
4239 /* Likewise, error if the ABI requires us to return values in the
4240 x87 registers and the user specified -mno-80387. */
4241 if (!TARGET_80387 && in_return)
4242 for (i = 0; i < n; i++)
4243 if (regclass[i] == X86_64_X87_CLASS
4244 || regclass[i] == X86_64_X87UP_CLASS
4245 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4247 if (!issued_x87_ret_error)
4249 error ("x87 register return with x87 disabled");
4250 issued_x87_ret_error = true;
4252 return NULL;
4255 /* First construct simple cases. Avoid SCmode, since we want to use
4256 single register to pass this type. */
4257 if (n == 1 && mode != SCmode)
4258 switch (regclass[0])
4260 case X86_64_INTEGER_CLASS:
4261 case X86_64_INTEGERSI_CLASS:
4262 return gen_rtx_REG (mode, intreg[0]);
4263 case X86_64_SSE_CLASS:
4264 case X86_64_SSESF_CLASS:
4265 case X86_64_SSEDF_CLASS:
4266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4267 case X86_64_X87_CLASS:
4268 case X86_64_COMPLEX_X87_CLASS:
4269 return gen_rtx_REG (mode, FIRST_STACK_REG);
4270 case X86_64_NO_CLASS:
4271 /* Zero sized array, struct or class. */
4272 return NULL;
4273 default:
4274 gcc_unreachable ();
4276 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4277 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4280 if (n == 2
4281 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4282 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4283 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4284 && regclass[1] == X86_64_INTEGER_CLASS
4285 && (mode == CDImode || mode == TImode || mode == TFmode)
4286 && intreg[0] + 1 == intreg[1])
4287 return gen_rtx_REG (mode, intreg[0]);
4289 /* Otherwise figure out the entries of the PARALLEL. */
4290 for (i = 0; i < n; i++)
4292 switch (regclass[i])
4294 case X86_64_NO_CLASS:
4295 break;
4296 case X86_64_INTEGER_CLASS:
4297 case X86_64_INTEGERSI_CLASS:
4298 /* Merge TImodes on aligned occasions here too. */
4299 if (i * 8 + 8 > bytes)
4300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4301 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4302 tmpmode = SImode;
4303 else
4304 tmpmode = DImode;
4305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4306 if (tmpmode == BLKmode)
4307 tmpmode = DImode;
4308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4309 gen_rtx_REG (tmpmode, *intreg),
4310 GEN_INT (i*8));
4311 intreg++;
4312 break;
4313 case X86_64_SSESF_CLASS:
4314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4315 gen_rtx_REG (SFmode,
4316 SSE_REGNO (sse_regno)),
4317 GEN_INT (i*8));
4318 sse_regno++;
4319 break;
4320 case X86_64_SSEDF_CLASS:
4321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4322 gen_rtx_REG (DFmode,
4323 SSE_REGNO (sse_regno)),
4324 GEN_INT (i*8));
4325 sse_regno++;
4326 break;
4327 case X86_64_SSE_CLASS:
4328 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4329 tmpmode = TImode;
4330 else
4331 tmpmode = DImode;
4332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4333 gen_rtx_REG (tmpmode,
4334 SSE_REGNO (sse_regno)),
4335 GEN_INT (i*8));
4336 if (tmpmode == TImode)
4337 i++;
4338 sse_regno++;
4339 break;
4340 default:
4341 gcc_unreachable ();
4345 /* Empty aligned struct, union or class. */
4346 if (nexps == 0)
4347 return NULL;
4349 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4350 for (i = 0; i < nexps; i++)
4351 XVECEXP (ret, 0, i) = exp [i];
4352 return ret;
4355 /* Update the data in CUM to advance over an argument of mode MODE
4356 and data type TYPE. (TYPE is null for libcalls where that information
4357 may not be available.) */
4359 static void
4360 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4361 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4363 switch (mode)
4365 default:
4366 break;
4368 case BLKmode:
4369 if (bytes < 0)
4370 break;
4371 /* FALLTHRU */
4373 case DImode:
4374 case SImode:
4375 case HImode:
4376 case QImode:
4377 cum->words += words;
4378 cum->nregs -= words;
4379 cum->regno += words;
4381 if (cum->nregs <= 0)
4383 cum->nregs = 0;
4384 cum->regno = 0;
4386 break;
4388 case DFmode:
4389 if (cum->float_in_sse < 2)
4390 break;
4391 case SFmode:
4392 if (cum->float_in_sse < 1)
4393 break;
4394 /* FALLTHRU */
4396 case TImode:
4397 case V16QImode:
4398 case V8HImode:
4399 case V4SImode:
4400 case V2DImode:
4401 case V4SFmode:
4402 case V2DFmode:
4403 if (!type || !AGGREGATE_TYPE_P (type))
4405 cum->sse_words += words;
4406 cum->sse_nregs -= 1;
4407 cum->sse_regno += 1;
4408 if (cum->sse_nregs <= 0)
4410 cum->sse_nregs = 0;
4411 cum->sse_regno = 0;
4414 break;
4416 case V8QImode:
4417 case V4HImode:
4418 case V2SImode:
4419 case V2SFmode:
4420 case V1DImode:
4421 if (!type || !AGGREGATE_TYPE_P (type))
4423 cum->mmx_words += words;
4424 cum->mmx_nregs -= 1;
4425 cum->mmx_regno += 1;
4426 if (cum->mmx_nregs <= 0)
4428 cum->mmx_nregs = 0;
4429 cum->mmx_regno = 0;
4432 break;
4436 static void
4437 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, HOST_WIDE_INT words)
4440 int int_nregs, sse_nregs;
4442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4443 cum->words += words;
4444 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4446 cum->nregs -= int_nregs;
4447 cum->sse_nregs -= sse_nregs;
4448 cum->regno += int_nregs;
4449 cum->sse_regno += sse_nregs;
4451 else
4452 cum->words += words;
4455 static void
4456 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4457 HOST_WIDE_INT words)
4459 /* Otherwise, this should be passed indirect. */
4460 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4462 cum->words += words;
4463 if (cum->nregs > 0)
4465 cum->nregs -= 1;
4466 cum->regno += 1;
4470 void
4471 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4472 tree type, int named ATTRIBUTE_UNUSED)
4474 HOST_WIDE_INT bytes, words;
4476 if (mode == BLKmode)
4477 bytes = int_size_in_bytes (type);
4478 else
4479 bytes = GET_MODE_SIZE (mode);
4480 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4482 if (type)
4483 mode = type_natural_mode (type);
4485 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4486 function_arg_advance_ms_64 (cum, bytes, words);
4487 else if (TARGET_64BIT)
4488 function_arg_advance_64 (cum, mode, type, words);
4489 else
4490 function_arg_advance_32 (cum, mode, type, bytes, words);
4493 /* Define where to put the arguments to a function.
4494 Value is zero to push the argument on the stack,
4495 or a hard register in which to store the argument.
4497 MODE is the argument's machine mode.
4498 TYPE is the data type of the argument (as a tree).
4499 This is null for libcalls where that information may
4500 not be available.
4501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4502 the preceding args and about the function being called.
4503 NAMED is nonzero if this argument is a named parameter
4504 (otherwise it is an extra parameter matching an ellipsis). */
4506 static rtx
4507 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4508 enum machine_mode orig_mode, tree type,
4509 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4511 static bool warnedsse, warnedmmx;
4513 /* Avoid the AL settings for the Unix64 ABI. */
4514 if (mode == VOIDmode)
4515 return constm1_rtx;
4517 switch (mode)
4519 default:
4520 break;
4522 case BLKmode:
4523 if (bytes < 0)
4524 break;
4525 /* FALLTHRU */
4526 case DImode:
4527 case SImode:
4528 case HImode:
4529 case QImode:
4530 if (words <= cum->nregs)
4532 int regno = cum->regno;
4534 /* Fastcall allocates the first two DWORD (SImode) or
4535 smaller arguments to ECX and EDX if it isn't an
4536 aggregate type . */
4537 if (cum->fastcall)
4539 if (mode == BLKmode
4540 || mode == DImode
4541 || (type && AGGREGATE_TYPE_P (type)))
4542 break;
4544 /* ECX not EAX is the first allocated register. */
4545 if (regno == AX_REG)
4546 regno = CX_REG;
4548 return gen_rtx_REG (mode, regno);
4550 break;
4552 case DFmode:
4553 if (cum->float_in_sse < 2)
4554 break;
4555 case SFmode:
4556 if (cum->float_in_sse < 1)
4557 break;
4558 /* FALLTHRU */
4559 case TImode:
4560 case V16QImode:
4561 case V8HImode:
4562 case V4SImode:
4563 case V2DImode:
4564 case V4SFmode:
4565 case V2DFmode:
4566 if (!type || !AGGREGATE_TYPE_P (type))
4568 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4570 warnedsse = true;
4571 warning (0, "SSE vector argument without SSE enabled "
4572 "changes the ABI");
4574 if (cum->sse_nregs)
4575 return gen_reg_or_parallel (mode, orig_mode,
4576 cum->sse_regno + FIRST_SSE_REG);
4578 break;
4580 case V8QImode:
4581 case V4HImode:
4582 case V2SImode:
4583 case V2SFmode:
4584 case V1DImode:
4585 if (!type || !AGGREGATE_TYPE_P (type))
4587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4589 warnedmmx = true;
4590 warning (0, "MMX vector argument without MMX enabled "
4591 "changes the ABI");
4593 if (cum->mmx_nregs)
4594 return gen_reg_or_parallel (mode, orig_mode,
4595 cum->mmx_regno + FIRST_MMX_REG);
4597 break;
4600 return NULL_RTX;
4603 static rtx
4604 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4605 enum machine_mode orig_mode, tree type)
4607 /* Handle a hidden AL argument containing number of registers
4608 for varargs x86-64 functions. */
4609 if (mode == VOIDmode)
4610 return GEN_INT (cum->maybe_vaarg
4611 ? (cum->sse_nregs < 0
4612 ? (cum->call_abi == DEFAULT_ABI
4613 ? SSE_REGPARM_MAX
4614 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4615 : X64_SSE_REGPARM_MAX))
4616 : cum->sse_regno)
4617 : -1);
4619 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4620 cum->sse_nregs,
4621 &x86_64_int_parameter_registers [cum->regno],
4622 cum->sse_regno);
4625 static rtx
4626 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4627 enum machine_mode orig_mode, int named,
4628 HOST_WIDE_INT bytes)
4630 unsigned int regno;
4632 /* Avoid the AL settings for the Unix64 ABI. */
4633 if (mode == VOIDmode)
4634 return constm1_rtx;
4636 /* If we've run out of registers, it goes on the stack. */
4637 if (cum->nregs == 0)
4638 return NULL_RTX;
4640 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4642 /* Only floating point modes are passed in anything but integer regs. */
4643 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4645 if (named)
4646 regno = cum->regno + FIRST_SSE_REG;
4647 else
4649 rtx t1, t2;
4651 /* Unnamed floating parameters are passed in both the
4652 SSE and integer registers. */
4653 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4654 t2 = gen_rtx_REG (mode, regno);
4655 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4656 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4657 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4660 /* Handle aggregated types passed in register. */
4661 if (orig_mode == BLKmode)
4663 if (bytes > 0 && bytes <= 8)
4664 mode = (bytes > 4 ? DImode : SImode);
4665 if (mode == BLKmode)
4666 mode = DImode;
4669 return gen_reg_or_parallel (mode, orig_mode, regno);
4673 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4674 tree type, int named)
4676 enum machine_mode mode = omode;
4677 HOST_WIDE_INT bytes, words;
4679 if (mode == BLKmode)
4680 bytes = int_size_in_bytes (type);
4681 else
4682 bytes = GET_MODE_SIZE (mode);
4683 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4685 /* To simplify the code below, represent vector types with a vector mode
4686 even if MMX/SSE are not active. */
4687 if (type && TREE_CODE (type) == VECTOR_TYPE)
4688 mode = type_natural_mode (type);
4690 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4691 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4692 else if (TARGET_64BIT)
4693 return function_arg_64 (cum, mode, omode, type);
4694 else
4695 return function_arg_32 (cum, mode, omode, type, bytes, words);
4698 /* A C expression that indicates when an argument must be passed by
4699 reference. If nonzero for an argument, a copy of that argument is
4700 made in memory and a pointer to the argument is passed instead of
4701 the argument itself. The pointer is passed in whatever way is
4702 appropriate for passing a pointer to that type. */
4704 static bool
4705 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4706 enum machine_mode mode ATTRIBUTE_UNUSED,
4707 const_tree type, bool named ATTRIBUTE_UNUSED)
4709 /* See Windows x64 Software Convention. */
4710 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4712 int msize = (int) GET_MODE_SIZE (mode);
4713 if (type)
4715 /* Arrays are passed by reference. */
4716 if (TREE_CODE (type) == ARRAY_TYPE)
4717 return true;
4719 if (AGGREGATE_TYPE_P (type))
4721 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4722 are passed by reference. */
4723 msize = int_size_in_bytes (type);
4727 /* __m128 is passed by reference. */
4728 switch (msize) {
4729 case 1: case 2: case 4: case 8:
4730 break;
4731 default:
4732 return true;
4735 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4736 return 1;
4738 return 0;
4741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4742 ABI. */
4743 static bool
4744 contains_aligned_value_p (tree type)
4746 enum machine_mode mode = TYPE_MODE (type);
4747 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
4748 || mode == TDmode
4749 || mode == TFmode)
4750 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4751 return true;
4752 if (TYPE_ALIGN (type) < 128)
4753 return false;
4755 if (AGGREGATE_TYPE_P (type))
4757 /* Walk the aggregates recursively. */
4758 switch (TREE_CODE (type))
4760 case RECORD_TYPE:
4761 case UNION_TYPE:
4762 case QUAL_UNION_TYPE:
4764 tree field;
4766 /* Walk all the structure fields. */
4767 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4769 if (TREE_CODE (field) == FIELD_DECL
4770 && contains_aligned_value_p (TREE_TYPE (field)))
4771 return true;
4773 break;
4776 case ARRAY_TYPE:
4777 /* Just for use if some languages passes arrays by value. */
4778 if (contains_aligned_value_p (TREE_TYPE (type)))
4779 return true;
4780 break;
4782 default:
4783 gcc_unreachable ();
4786 return false;
4789 /* Gives the alignment boundary, in bits, of an argument with the
4790 specified mode and type. */
4793 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4795 int align;
4796 if (type)
4798 /* Since canonical type is used for call, we convert it to
4799 canonical type if needed. */
4800 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4801 type = TYPE_CANONICAL (type);
4802 align = TYPE_ALIGN (type);
4804 else
4805 align = GET_MODE_ALIGNMENT (mode);
4806 if (align < PARM_BOUNDARY)
4807 align = PARM_BOUNDARY;
4808 /* In 32bit, only _Decimal128 and __float128 are aligned to their
4809 natural boundaries. */
4810 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
4812 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4813 make an exception for SSE modes since these require 128bit
4814 alignment.
4816 The handling here differs from field_alignment. ICC aligns MMX
4817 arguments to 4 byte boundaries, while structure fields are aligned
4818 to 8 byte boundaries. */
4819 if (!type)
4821 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
4822 align = PARM_BOUNDARY;
4824 else
4826 if (!contains_aligned_value_p (type))
4827 align = PARM_BOUNDARY;
4830 if (align > BIGGEST_ALIGNMENT)
4831 align = BIGGEST_ALIGNMENT;
4832 return align;
4835 /* Return true if N is a possible register number of function value. */
4837 bool
4838 ix86_function_value_regno_p (int regno)
4840 switch (regno)
4842 case 0:
4843 return true;
4845 case FIRST_FLOAT_REG:
4846 /* TODO: The function should depend on current function ABI but
4847 builtins.c would need updating then. Therefore we use the
4848 default ABI. */
4849 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4850 return false;
4851 return TARGET_FLOAT_RETURNS_IN_80387;
4853 case FIRST_SSE_REG:
4854 return TARGET_SSE;
4856 case FIRST_MMX_REG:
4857 if (TARGET_MACHO || TARGET_64BIT)
4858 return false;
4859 return TARGET_MMX;
4862 return false;
4865 /* Define how to find the value returned by a function.
4866 VALTYPE is the data type of the value (as a tree).
4867 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4868 otherwise, FUNC is 0. */
4870 static rtx
4871 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4872 const_tree fntype, const_tree fn)
4874 unsigned int regno;
4876 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4877 we normally prevent this case when mmx is not available. However
4878 some ABIs may require the result to be returned like DImode. */
4879 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4880 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4882 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4883 we prevent this case when sse is not available. However some ABIs
4884 may require the result to be returned like integer TImode. */
4885 else if (mode == TImode
4886 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4887 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4889 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4890 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4891 regno = FIRST_FLOAT_REG;
4892 else
4893 /* Most things go in %eax. */
4894 regno = AX_REG;
4896 /* Override FP return register with %xmm0 for local functions when
4897 SSE math is enabled or for functions with sseregparm attribute. */
4898 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4900 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4901 if ((sse_level >= 1 && mode == SFmode)
4902 || (sse_level == 2 && mode == DFmode))
4903 regno = FIRST_SSE_REG;
4906 return gen_rtx_REG (orig_mode, regno);
4909 static rtx
4910 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4911 const_tree valtype)
4913 rtx ret;
4915 /* Handle libcalls, which don't provide a type node. */
4916 if (valtype == NULL)
4918 switch (mode)
4920 case SFmode:
4921 case SCmode:
4922 case DFmode:
4923 case DCmode:
4924 case TFmode:
4925 case SDmode:
4926 case DDmode:
4927 case TDmode:
4928 return gen_rtx_REG (mode, FIRST_SSE_REG);
4929 case XFmode:
4930 case XCmode:
4931 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4932 case TCmode:
4933 return NULL;
4934 default:
4935 return gen_rtx_REG (mode, AX_REG);
4939 ret = construct_container (mode, orig_mode, valtype, 1,
4940 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4941 x86_64_int_return_registers, 0);
4943 /* For zero sized structures, construct_container returns NULL, but we
4944 need to keep rest of compiler happy by returning meaningful value. */
4945 if (!ret)
4946 ret = gen_rtx_REG (orig_mode, AX_REG);
4948 return ret;
4951 static rtx
4952 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4954 unsigned int regno = AX_REG;
4956 if (TARGET_SSE)
4958 switch (GET_MODE_SIZE (mode))
4960 case 16:
4961 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4962 && !COMPLEX_MODE_P (mode))
4963 regno = FIRST_SSE_REG;
4964 break;
4965 case 8:
4966 case 4:
4967 if (mode == SFmode || mode == DFmode)
4968 regno = FIRST_SSE_REG;
4969 break;
4970 default:
4971 break;
4974 return gen_rtx_REG (orig_mode, regno);
4977 static rtx
4978 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4979 enum machine_mode orig_mode, enum machine_mode mode)
4981 const_tree fn, fntype;
4983 fn = NULL_TREE;
4984 if (fntype_or_decl && DECL_P (fntype_or_decl))
4985 fn = fntype_or_decl;
4986 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4988 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4989 return function_value_ms_64 (orig_mode, mode);
4990 else if (TARGET_64BIT)
4991 return function_value_64 (orig_mode, mode, valtype);
4992 else
4993 return function_value_32 (orig_mode, mode, fntype, fn);
4996 static rtx
4997 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4998 bool outgoing ATTRIBUTE_UNUSED)
5000 enum machine_mode mode, orig_mode;
5002 orig_mode = TYPE_MODE (valtype);
5003 mode = type_natural_mode (valtype);
5004 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5008 ix86_libcall_value (enum machine_mode mode)
5010 return ix86_function_value_1 (NULL, NULL, mode, mode);
5013 /* Return true iff type is returned in memory. */
5015 static int ATTRIBUTE_UNUSED
5016 return_in_memory_32 (const_tree type, enum machine_mode mode)
5018 HOST_WIDE_INT size;
5020 if (mode == BLKmode)
5021 return 1;
5023 size = int_size_in_bytes (type);
5025 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5026 return 0;
5028 if (VECTOR_MODE_P (mode) || mode == TImode)
5030 /* User-created vectors small enough to fit in EAX. */
5031 if (size < 8)
5032 return 0;
5034 /* MMX/3dNow values are returned in MM0,
5035 except when it doesn't exits. */
5036 if (size == 8)
5037 return (TARGET_MMX ? 0 : 1);
5039 /* SSE values are returned in XMM0, except when it doesn't exist. */
5040 if (size == 16)
5041 return (TARGET_SSE ? 0 : 1);
5044 if (mode == XFmode)
5045 return 0;
5047 if (size > 12)
5048 return 1;
5049 return 0;
5052 static int ATTRIBUTE_UNUSED
5053 return_in_memory_64 (const_tree type, enum machine_mode mode)
5055 int needed_intregs, needed_sseregs;
5056 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5059 static int ATTRIBUTE_UNUSED
5060 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5062 HOST_WIDE_INT size = int_size_in_bytes (type);
5064 /* __m128 is returned in xmm0. */
5065 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5066 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5067 return 0;
5069 /* Otherwise, the size must be exactly in [1248]. */
5070 return (size != 1 && size != 2 && size != 4 && size != 8);
5073 static bool
5074 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5076 #ifdef SUBTARGET_RETURN_IN_MEMORY
5077 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5078 #else
5079 const enum machine_mode mode = type_natural_mode (type);
5081 if (TARGET_64BIT_MS_ABI)
5082 return return_in_memory_ms_64 (type, mode);
5083 else if (TARGET_64BIT)
5084 return return_in_memory_64 (type, mode);
5085 else
5086 return return_in_memory_32 (type, mode);
5087 #endif
5090 /* Return false iff TYPE is returned in memory. This version is used
5091 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5092 but differs notably in that when MMX is available, 8-byte vectors
5093 are returned in memory, rather than in MMX registers. */
5095 bool
5096 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5098 int size;
5099 enum machine_mode mode = type_natural_mode (type);
5101 if (TARGET_64BIT)
5102 return return_in_memory_64 (type, mode);
5104 if (mode == BLKmode)
5105 return 1;
5107 size = int_size_in_bytes (type);
5109 if (VECTOR_MODE_P (mode))
5111 /* Return in memory only if MMX registers *are* available. This
5112 seems backwards, but it is consistent with the existing
5113 Solaris x86 ABI. */
5114 if (size == 8)
5115 return TARGET_MMX;
5116 if (size == 16)
5117 return !TARGET_SSE;
5119 else if (mode == TImode)
5120 return !TARGET_SSE;
5121 else if (mode == XFmode)
5122 return 0;
5124 return size > 12;
5127 /* When returning SSE vector types, we have a choice of either
5128 (1) being abi incompatible with a -march switch, or
5129 (2) generating an error.
5130 Given no good solution, I think the safest thing is one warning.
5131 The user won't be able to use -Werror, but....
5133 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5134 called in response to actually generating a caller or callee that
5135 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
5136 via aggregate_value_p for general type probing from tree-ssa. */
5138 static rtx
5139 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5141 static bool warnedsse, warnedmmx;
5143 if (!TARGET_64BIT && type)
5145 /* Look at the return type of the function, not the function type. */
5146 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5148 if (!TARGET_SSE && !warnedsse)
5150 if (mode == TImode
5151 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5153 warnedsse = true;
5154 warning (0, "SSE vector return without SSE enabled "
5155 "changes the ABI");
5159 if (!TARGET_MMX && !warnedmmx)
5161 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5163 warnedmmx = true;
5164 warning (0, "MMX vector return without MMX enabled "
5165 "changes the ABI");
5170 return NULL;
5174 /* Create the va_list data type. */
5176 static tree
5177 ix86_build_builtin_va_list (void)
5179 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5181 /* For i386 we use plain pointer to argument area. */
5182 if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
5183 return build_pointer_type (char_type_node);
5185 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5186 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5188 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5189 unsigned_type_node);
5190 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5191 unsigned_type_node);
5192 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5193 ptr_type_node);
5194 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5195 ptr_type_node);
5197 va_list_gpr_counter_field = f_gpr;
5198 va_list_fpr_counter_field = f_fpr;
5200 DECL_FIELD_CONTEXT (f_gpr) = record;
5201 DECL_FIELD_CONTEXT (f_fpr) = record;
5202 DECL_FIELD_CONTEXT (f_ovf) = record;
5203 DECL_FIELD_CONTEXT (f_sav) = record;
5205 TREE_CHAIN (record) = type_decl;
5206 TYPE_NAME (record) = type_decl;
5207 TYPE_FIELDS (record) = f_gpr;
5208 TREE_CHAIN (f_gpr) = f_fpr;
5209 TREE_CHAIN (f_fpr) = f_ovf;
5210 TREE_CHAIN (f_ovf) = f_sav;
5212 layout_type (record);
5214 /* The correct type is an array type of one element. */
5215 return build_array_type (record, build_index_type (size_zero_node));
5218 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5220 static void
5221 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5223 rtx save_area, mem;
5224 rtx label;
5225 rtx label_ref;
5226 rtx tmp_reg;
5227 rtx nsse_reg;
5228 alias_set_type set;
5229 int i;
5230 int regparm = ix86_regparm;
5232 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5233 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5235 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5236 return;
5238 /* Indicate to allocate space on the stack for varargs save area. */
5239 ix86_save_varrargs_registers = 1;
5240 /* We need 16-byte stack alignment to save SSE registers. If user
5241 asked for lower preferred_stack_boundary, lets just hope that he knows
5242 what he is doing and won't varargs SSE values.
5244 We also may end up assuming that only 64bit values are stored in SSE
5245 register let some floating point program work. */
5246 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5247 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5249 save_area = frame_pointer_rtx;
5250 set = get_varargs_alias_set ();
5252 for (i = cum->regno;
5253 i < regparm
5254 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5255 i++)
5257 mem = gen_rtx_MEM (Pmode,
5258 plus_constant (save_area, i * UNITS_PER_WORD));
5259 MEM_NOTRAP_P (mem) = 1;
5260 set_mem_alias_set (mem, set);
5261 emit_move_insn (mem, gen_rtx_REG (Pmode,
5262 x86_64_int_parameter_registers[i]));
5265 if (cum->sse_nregs && cfun->va_list_fpr_size)
5267 /* Now emit code to save SSE registers. The AX parameter contains number
5268 of SSE parameter registers used to call this function. We use
5269 sse_prologue_save insn template that produces computed jump across
5270 SSE saves. We need some preparation work to get this working. */
5272 label = gen_label_rtx ();
5273 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5275 /* Compute address to jump to :
5276 label - eax*4 + nnamed_sse_arguments*4 */
5277 tmp_reg = gen_reg_rtx (Pmode);
5278 nsse_reg = gen_reg_rtx (Pmode);
5279 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5280 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5281 gen_rtx_MULT (Pmode, nsse_reg,
5282 GEN_INT (4))));
5283 if (cum->sse_regno)
5284 emit_move_insn
5285 (nsse_reg,
5286 gen_rtx_CONST (DImode,
5287 gen_rtx_PLUS (DImode,
5288 label_ref,
5289 GEN_INT (cum->sse_regno * 4))));
5290 else
5291 emit_move_insn (nsse_reg, label_ref);
5292 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5294 /* Compute address of memory block we save into. We always use pointer
5295 pointing 127 bytes after first byte to store - this is needed to keep
5296 instruction size limited by 4 bytes. */
5297 tmp_reg = gen_reg_rtx (Pmode);
5298 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5299 plus_constant (save_area,
5300 8 * X86_64_REGPARM_MAX + 127)));
5301 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5302 MEM_NOTRAP_P (mem) = 1;
5303 set_mem_alias_set (mem, set);
5304 set_mem_align (mem, BITS_PER_WORD);
5306 /* And finally do the dirty job! */
5307 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5308 GEN_INT (cum->sse_regno), label));
5312 static void
5313 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5315 alias_set_type set = get_varargs_alias_set ();
5316 int i;
5318 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5320 rtx reg, mem;
5322 mem = gen_rtx_MEM (Pmode,
5323 plus_constant (virtual_incoming_args_rtx,
5324 i * UNITS_PER_WORD));
5325 MEM_NOTRAP_P (mem) = 1;
5326 set_mem_alias_set (mem, set);
5328 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5329 emit_move_insn (mem, reg);
5333 static void
5334 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5335 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5336 int no_rtl)
5338 CUMULATIVE_ARGS next_cum;
5339 tree fntype;
5341 /* This argument doesn't appear to be used anymore. Which is good,
5342 because the old code here didn't suppress rtl generation. */
5343 gcc_assert (!no_rtl);
5345 if (!TARGET_64BIT)
5346 return;
5348 fntype = TREE_TYPE (current_function_decl);
5350 /* For varargs, we do not want to skip the dummy va_dcl argument.
5351 For stdargs, we do want to skip the last named argument. */
5352 next_cum = *cum;
5353 if (stdarg_p (fntype))
5354 function_arg_advance (&next_cum, mode, type, 1);
5356 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5357 setup_incoming_varargs_ms_64 (&next_cum);
5358 else
5359 setup_incoming_varargs_64 (&next_cum);
5362 /* Implement va_start. */
5364 static void
5365 ix86_va_start (tree valist, rtx nextarg)
5367 HOST_WIDE_INT words, n_gpr, n_fpr;
5368 tree f_gpr, f_fpr, f_ovf, f_sav;
5369 tree gpr, fpr, ovf, sav, t;
5370 tree type;
5372 /* Only 64bit target needs something special. */
5373 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5375 std_expand_builtin_va_start (valist, nextarg);
5376 return;
5379 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5380 f_fpr = TREE_CHAIN (f_gpr);
5381 f_ovf = TREE_CHAIN (f_fpr);
5382 f_sav = TREE_CHAIN (f_ovf);
5384 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5385 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5386 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5387 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5388 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5390 /* Count number of gp and fp argument registers used. */
5391 words = crtl->args.info.words;
5392 n_gpr = crtl->args.info.regno;
5393 n_fpr = crtl->args.info.sse_regno;
5395 if (cfun->va_list_gpr_size)
5397 type = TREE_TYPE (gpr);
5398 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5399 build_int_cst (type, n_gpr * 8));
5400 TREE_SIDE_EFFECTS (t) = 1;
5401 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5404 if (cfun->va_list_fpr_size)
5406 type = TREE_TYPE (fpr);
5407 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5408 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5409 TREE_SIDE_EFFECTS (t) = 1;
5410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5413 /* Find the overflow area. */
5414 type = TREE_TYPE (ovf);
5415 t = make_tree (type, virtual_incoming_args_rtx);
5416 if (words != 0)
5417 t = build2 (POINTER_PLUS_EXPR, type, t,
5418 size_int (words * UNITS_PER_WORD));
5419 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5420 TREE_SIDE_EFFECTS (t) = 1;
5421 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5423 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5425 /* Find the register save area.
5426 Prologue of the function save it right above stack frame. */
5427 type = TREE_TYPE (sav);
5428 t = make_tree (type, frame_pointer_rtx);
5429 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5430 TREE_SIDE_EFFECTS (t) = 1;
5431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5435 /* Implement va_arg. */
5437 static tree
5438 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5440 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5441 tree f_gpr, f_fpr, f_ovf, f_sav;
5442 tree gpr, fpr, ovf, sav, t;
5443 int size, rsize;
5444 tree lab_false, lab_over = NULL_TREE;
5445 tree addr, t2;
5446 rtx container;
5447 int indirect_p = 0;
5448 tree ptrtype;
5449 enum machine_mode nat_mode;
5451 /* Only 64bit target needs something special. */
5452 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5453 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5455 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5456 f_fpr = TREE_CHAIN (f_gpr);
5457 f_ovf = TREE_CHAIN (f_fpr);
5458 f_sav = TREE_CHAIN (f_ovf);
5460 valist = build_va_arg_indirect_ref (valist);
5461 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5462 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5463 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5464 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5466 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5467 if (indirect_p)
5468 type = build_pointer_type (type);
5469 size = int_size_in_bytes (type);
5470 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5472 nat_mode = type_natural_mode (type);
5473 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5474 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5475 intreg, 0);
5477 /* Pull the value out of the saved registers. */
5479 addr = create_tmp_var (ptr_type_node, "addr");
5480 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5482 if (container)
5484 int needed_intregs, needed_sseregs;
5485 bool need_temp;
5486 tree int_addr, sse_addr;
5488 lab_false = create_artificial_label ();
5489 lab_over = create_artificial_label ();
5491 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5493 need_temp = (!REG_P (container)
5494 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5495 || TYPE_ALIGN (type) > 128));
5497 /* In case we are passing structure, verify that it is consecutive block
5498 on the register save area. If not we need to do moves. */
5499 if (!need_temp && !REG_P (container))
5501 /* Verify that all registers are strictly consecutive */
5502 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5504 int i;
5506 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5508 rtx slot = XVECEXP (container, 0, i);
5509 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5510 || INTVAL (XEXP (slot, 1)) != i * 16)
5511 need_temp = 1;
5514 else
5516 int i;
5518 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5520 rtx slot = XVECEXP (container, 0, i);
5521 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5522 || INTVAL (XEXP (slot, 1)) != i * 8)
5523 need_temp = 1;
5527 if (!need_temp)
5529 int_addr = addr;
5530 sse_addr = addr;
5532 else
5534 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5535 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5536 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5537 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5540 /* First ensure that we fit completely in registers. */
5541 if (needed_intregs)
5543 t = build_int_cst (TREE_TYPE (gpr),
5544 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5545 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5546 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5547 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5548 gimplify_and_add (t, pre_p);
5550 if (needed_sseregs)
5552 t = build_int_cst (TREE_TYPE (fpr),
5553 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5554 + X86_64_REGPARM_MAX * 8);
5555 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5556 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5557 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5558 gimplify_and_add (t, pre_p);
5561 /* Compute index to start of area used for integer regs. */
5562 if (needed_intregs)
5564 /* int_addr = gpr + sav; */
5565 t = fold_convert (sizetype, gpr);
5566 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5567 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5568 gimplify_and_add (t, pre_p);
5570 if (needed_sseregs)
5572 /* sse_addr = fpr + sav; */
5573 t = fold_convert (sizetype, fpr);
5574 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5575 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5576 gimplify_and_add (t, pre_p);
5578 if (need_temp)
5580 int i;
5581 tree temp = create_tmp_var (type, "va_arg_tmp");
5583 /* addr = &temp; */
5584 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5585 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5586 gimplify_and_add (t, pre_p);
5588 for (i = 0; i < XVECLEN (container, 0); i++)
5590 rtx slot = XVECEXP (container, 0, i);
5591 rtx reg = XEXP (slot, 0);
5592 enum machine_mode mode = GET_MODE (reg);
5593 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5594 tree addr_type = build_pointer_type (piece_type);
5595 tree src_addr, src;
5596 int src_offset;
5597 tree dest_addr, dest;
5599 if (SSE_REGNO_P (REGNO (reg)))
5601 src_addr = sse_addr;
5602 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5604 else
5606 src_addr = int_addr;
5607 src_offset = REGNO (reg) * 8;
5609 src_addr = fold_convert (addr_type, src_addr);
5610 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5611 size_int (src_offset));
5612 src = build_va_arg_indirect_ref (src_addr);
5614 dest_addr = fold_convert (addr_type, addr);
5615 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5616 size_int (INTVAL (XEXP (slot, 1))));
5617 dest = build_va_arg_indirect_ref (dest_addr);
5619 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5620 gimplify_and_add (t, pre_p);
5624 if (needed_intregs)
5626 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5627 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5628 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5629 gimplify_and_add (t, pre_p);
5631 if (needed_sseregs)
5633 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5634 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5635 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5636 gimplify_and_add (t, pre_p);
5639 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5640 gimplify_and_add (t, pre_p);
5642 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5643 append_to_statement_list (t, pre_p);
5646 /* ... otherwise out of the overflow area. */
5648 /* Care for on-stack alignment if needed. */
5649 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5650 || integer_zerop (TYPE_SIZE (type)))
5651 t = ovf;
5652 else
5654 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5655 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5656 size_int (align - 1));
5657 t = fold_convert (sizetype, t);
5658 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5659 size_int (-align));
5660 t = fold_convert (TREE_TYPE (ovf), t);
5662 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5664 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5665 gimplify_and_add (t2, pre_p);
5667 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5668 size_int (rsize * UNITS_PER_WORD));
5669 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5670 gimplify_and_add (t, pre_p);
5672 if (container)
5674 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5675 append_to_statement_list (t, pre_p);
5678 ptrtype = build_pointer_type (type);
5679 addr = fold_convert (ptrtype, addr);
5681 if (indirect_p)
5682 addr = build_va_arg_indirect_ref (addr);
5683 return build_va_arg_indirect_ref (addr);
5686 /* Return nonzero if OPNUM's MEM should be matched
5687 in movabs* patterns. */
5690 ix86_check_movabs (rtx insn, int opnum)
5692 rtx set, mem;
5694 set = PATTERN (insn);
5695 if (GET_CODE (set) == PARALLEL)
5696 set = XVECEXP (set, 0, 0);
5697 gcc_assert (GET_CODE (set) == SET);
5698 mem = XEXP (set, opnum);
5699 while (GET_CODE (mem) == SUBREG)
5700 mem = SUBREG_REG (mem);
5701 gcc_assert (MEM_P (mem));
5702 return (volatile_ok || !MEM_VOLATILE_P (mem));
5705 /* Initialize the table of extra 80387 mathematical constants. */
5707 static void
5708 init_ext_80387_constants (void)
5710 static const char * cst[5] =
5712 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5713 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5714 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5715 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5716 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5718 int i;
5720 for (i = 0; i < 5; i++)
5722 real_from_string (&ext_80387_constants_table[i], cst[i]);
5723 /* Ensure each constant is rounded to XFmode precision. */
5724 real_convert (&ext_80387_constants_table[i],
5725 XFmode, &ext_80387_constants_table[i]);
5728 ext_80387_constants_init = 1;
5731 /* Return true if the constant is something that can be loaded with
5732 a special instruction. */
5735 standard_80387_constant_p (rtx x)
5737 enum machine_mode mode = GET_MODE (x);
5739 REAL_VALUE_TYPE r;
5741 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5742 return -1;
5744 if (x == CONST0_RTX (mode))
5745 return 1;
5746 if (x == CONST1_RTX (mode))
5747 return 2;
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5751 /* For XFmode constants, try to find a special 80387 instruction when
5752 optimizing for size or on those CPUs that benefit from them. */
5753 if (mode == XFmode
5754 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5756 int i;
5758 if (! ext_80387_constants_init)
5759 init_ext_80387_constants ();
5761 for (i = 0; i < 5; i++)
5762 if (real_identical (&r, &ext_80387_constants_table[i]))
5763 return i + 3;
5766 /* Load of the constant -0.0 or -1.0 will be split as
5767 fldz;fchs or fld1;fchs sequence. */
5768 if (real_isnegzero (&r))
5769 return 8;
5770 if (real_identical (&r, &dconstm1))
5771 return 9;
5773 return 0;
5776 /* Return the opcode of the special instruction to be used to load
5777 the constant X. */
5779 const char *
5780 standard_80387_constant_opcode (rtx x)
5782 switch (standard_80387_constant_p (x))
5784 case 1:
5785 return "fldz";
5786 case 2:
5787 return "fld1";
5788 case 3:
5789 return "fldlg2";
5790 case 4:
5791 return "fldln2";
5792 case 5:
5793 return "fldl2e";
5794 case 6:
5795 return "fldl2t";
5796 case 7:
5797 return "fldpi";
5798 case 8:
5799 case 9:
5800 return "#";
5801 default:
5802 gcc_unreachable ();
5806 /* Return the CONST_DOUBLE representing the 80387 constant that is
5807 loaded by the specified special instruction. The argument IDX
5808 matches the return value from standard_80387_constant_p. */
5811 standard_80387_constant_rtx (int idx)
5813 int i;
5815 if (! ext_80387_constants_init)
5816 init_ext_80387_constants ();
5818 switch (idx)
5820 case 3:
5821 case 4:
5822 case 5:
5823 case 6:
5824 case 7:
5825 i = idx - 3;
5826 break;
5828 default:
5829 gcc_unreachable ();
5832 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5833 XFmode);
5836 /* Return 1 if mode is a valid mode for sse. */
5837 static int
5838 standard_sse_mode_p (enum machine_mode mode)
5840 switch (mode)
5842 case V16QImode:
5843 case V8HImode:
5844 case V4SImode:
5845 case V2DImode:
5846 case V4SFmode:
5847 case V2DFmode:
5848 return 1;
5850 default:
5851 return 0;
5855 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5858 standard_sse_constant_p (rtx x)
5860 enum machine_mode mode = GET_MODE (x);
5862 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5863 return 1;
5864 if (vector_all_ones_operand (x, mode)
5865 && standard_sse_mode_p (mode))
5866 return TARGET_SSE2 ? 2 : -1;
5868 return 0;
5871 /* Return the opcode of the special instruction to be used to load
5872 the constant X. */
5874 const char *
5875 standard_sse_constant_opcode (rtx insn, rtx x)
5877 switch (standard_sse_constant_p (x))
5879 case 1:
5880 if (get_attr_mode (insn) == MODE_V4SF)
5881 return "xorps\t%0, %0";
5882 else if (get_attr_mode (insn) == MODE_V2DF)
5883 return "xorpd\t%0, %0";
5884 else
5885 return "pxor\t%0, %0";
5886 case 2:
5887 return "pcmpeqd\t%0, %0";
5889 gcc_unreachable ();
5892 /* Returns 1 if OP contains a symbol reference */
5895 symbolic_reference_mentioned_p (rtx op)
5897 const char *fmt;
5898 int i;
5900 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5901 return 1;
5903 fmt = GET_RTX_FORMAT (GET_CODE (op));
5904 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5906 if (fmt[i] == 'E')
5908 int j;
5910 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5911 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5912 return 1;
5915 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5916 return 1;
5919 return 0;
5922 /* Return 1 if it is appropriate to emit `ret' instructions in the
5923 body of a function. Do this only if the epilogue is simple, needing a
5924 couple of insns. Prior to reloading, we can't tell how many registers
5925 must be saved, so return 0 then. Return 0 if there is no frame
5926 marker to de-allocate. */
5929 ix86_can_use_return_insn_p (void)
5931 struct ix86_frame frame;
5933 if (! reload_completed || frame_pointer_needed)
5934 return 0;
5936 /* Don't allow more than 32 pop, since that's all we can do
5937 with one instruction. */
5938 if (crtl->args.pops_args
5939 && crtl->args.size >= 32768)
5940 return 0;
5942 ix86_compute_frame_layout (&frame);
5943 return frame.to_allocate == 0 && frame.nregs == 0;
5946 /* Value should be nonzero if functions must have frame pointers.
5947 Zero means the frame pointer need not be set up (and parms may
5948 be accessed via the stack pointer) in functions that seem suitable. */
5951 ix86_frame_pointer_required (void)
5953 /* If we accessed previous frames, then the generated code expects
5954 to be able to access the saved ebp value in our frame. */
5955 if (cfun->machine->accesses_prev_frame)
5956 return 1;
5958 /* Several x86 os'es need a frame pointer for other reasons,
5959 usually pertaining to setjmp. */
5960 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5961 return 1;
5963 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5964 the frame pointer by default. Turn it back on now if we've not
5965 got a leaf function. */
5966 if (TARGET_OMIT_LEAF_FRAME_POINTER
5967 && (!current_function_is_leaf
5968 || ix86_current_function_calls_tls_descriptor))
5969 return 1;
5971 if (crtl->profile)
5972 return 1;
5974 return 0;
5977 /* Record that the current function accesses previous call frames. */
5979 void
5980 ix86_setup_frame_addresses (void)
5982 cfun->machine->accesses_prev_frame = 1;
5985 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5986 # define USE_HIDDEN_LINKONCE 1
5987 #else
5988 # define USE_HIDDEN_LINKONCE 0
5989 #endif
5991 static int pic_labels_used;
5993 /* Fills in the label name that should be used for a pc thunk for
5994 the given register. */
5996 static void
5997 get_pc_thunk_name (char name[32], unsigned int regno)
5999 gcc_assert (!TARGET_64BIT);
6001 if (USE_HIDDEN_LINKONCE)
6002 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6003 else
6004 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6008 /* This function generates code for -fpic that loads %ebx with
6009 the return address of the caller and then returns. */
6011 void
6012 ix86_file_end (void)
6014 rtx xops[2];
6015 int regno;
6017 for (regno = 0; regno < 8; ++regno)
6019 char name[32];
6021 if (! ((pic_labels_used >> regno) & 1))
6022 continue;
6024 get_pc_thunk_name (name, regno);
6026 #if TARGET_MACHO
6027 if (TARGET_MACHO)
6029 switch_to_section (darwin_sections[text_coal_section]);
6030 fputs ("\t.weak_definition\t", asm_out_file);
6031 assemble_name (asm_out_file, name);
6032 fputs ("\n\t.private_extern\t", asm_out_file);
6033 assemble_name (asm_out_file, name);
6034 fputs ("\n", asm_out_file);
6035 ASM_OUTPUT_LABEL (asm_out_file, name);
6037 else
6038 #endif
6039 if (USE_HIDDEN_LINKONCE)
6041 tree decl;
6043 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6044 error_mark_node);
6045 TREE_PUBLIC (decl) = 1;
6046 TREE_STATIC (decl) = 1;
6047 DECL_ONE_ONLY (decl) = 1;
6049 (*targetm.asm_out.unique_section) (decl, 0);
6050 switch_to_section (get_named_section (decl, NULL, 0));
6052 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6053 fputs ("\t.hidden\t", asm_out_file);
6054 assemble_name (asm_out_file, name);
6055 fputc ('\n', asm_out_file);
6056 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6058 else
6060 switch_to_section (text_section);
6061 ASM_OUTPUT_LABEL (asm_out_file, name);
6064 xops[0] = gen_rtx_REG (Pmode, regno);
6065 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6066 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6067 output_asm_insn ("ret", xops);
6070 if (NEED_INDICATE_EXEC_STACK)
6071 file_end_indicate_exec_stack ();
6074 /* Emit code for the SET_GOT patterns. */
6076 const char *
6077 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6079 rtx xops[3];
6081 xops[0] = dest;
6083 if (TARGET_VXWORKS_RTP && flag_pic)
6085 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6086 xops[2] = gen_rtx_MEM (Pmode,
6087 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6088 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6090 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6091 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6092 an unadorned address. */
6093 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6094 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6095 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6096 return "";
6099 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6101 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6103 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6105 if (!flag_pic)
6106 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6107 else
6108 output_asm_insn ("call\t%a2", xops);
6110 #if TARGET_MACHO
6111 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6112 is what will be referenced by the Mach-O PIC subsystem. */
6113 if (!label)
6114 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6115 #endif
6117 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6118 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6120 if (flag_pic)
6121 output_asm_insn ("pop%z0\t%0", xops);
6123 else
6125 char name[32];
6126 get_pc_thunk_name (name, REGNO (dest));
6127 pic_labels_used |= 1 << REGNO (dest);
6129 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6130 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6131 output_asm_insn ("call\t%X2", xops);
6132 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6133 is what will be referenced by the Mach-O PIC subsystem. */
6134 #if TARGET_MACHO
6135 if (!label)
6136 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6137 else
6138 targetm.asm_out.internal_label (asm_out_file, "L",
6139 CODE_LABEL_NUMBER (label));
6140 #endif
6143 if (TARGET_MACHO)
6144 return "";
6146 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6147 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6148 else
6149 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6151 return "";
6154 /* Generate an "push" pattern for input ARG. */
6156 static rtx
6157 gen_push (rtx arg)
6159 return gen_rtx_SET (VOIDmode,
6160 gen_rtx_MEM (Pmode,
6161 gen_rtx_PRE_DEC (Pmode,
6162 stack_pointer_rtx)),
6163 arg);
6166 /* Return >= 0 if there is an unused call-clobbered register available
6167 for the entire function. */
6169 static unsigned int
6170 ix86_select_alt_pic_regnum (void)
6172 if (current_function_is_leaf && !crtl->profile
6173 && !ix86_current_function_calls_tls_descriptor)
6175 int i;
6176 for (i = 2; i >= 0; --i)
6177 if (!df_regs_ever_live_p (i))
6178 return i;
6181 return INVALID_REGNUM;
6184 /* Return 1 if we need to save REGNO. */
6185 static int
6186 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6188 if (pic_offset_table_rtx
6189 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6190 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6191 || crtl->profile
6192 || crtl->calls_eh_return
6193 || crtl->uses_const_pool))
6195 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6196 return 0;
6197 return 1;
6200 if (crtl->calls_eh_return && maybe_eh_return)
6202 unsigned i;
6203 for (i = 0; ; i++)
6205 unsigned test = EH_RETURN_DATA_REGNO (i);
6206 if (test == INVALID_REGNUM)
6207 break;
6208 if (test == regno)
6209 return 1;
6213 if (cfun->machine->force_align_arg_pointer
6214 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6215 return 1;
6217 return (df_regs_ever_live_p (regno)
6218 && !call_used_regs[regno]
6219 && !fixed_regs[regno]
6220 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6223 /* Return number of registers to be saved on the stack. */
6225 static int
6226 ix86_nsaved_regs (void)
6228 int nregs = 0;
6229 int regno;
6231 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6232 if (ix86_save_reg (regno, true))
6233 nregs++;
6234 return nregs;
6237 /* Return the offset between two registers, one to be eliminated, and the other
6238 its replacement, at the start of a routine. */
6240 HOST_WIDE_INT
6241 ix86_initial_elimination_offset (int from, int to)
6243 struct ix86_frame frame;
6244 ix86_compute_frame_layout (&frame);
6246 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6247 return frame.hard_frame_pointer_offset;
6248 else if (from == FRAME_POINTER_REGNUM
6249 && to == HARD_FRAME_POINTER_REGNUM)
6250 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6251 else
6253 gcc_assert (to == STACK_POINTER_REGNUM);
6255 if (from == ARG_POINTER_REGNUM)
6256 return frame.stack_pointer_offset;
6258 gcc_assert (from == FRAME_POINTER_REGNUM);
6259 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6263 /* Fill structure ix86_frame about frame of currently computed function. */
6265 static void
6266 ix86_compute_frame_layout (struct ix86_frame *frame)
6268 HOST_WIDE_INT total_size;
6269 unsigned int stack_alignment_needed;
6270 HOST_WIDE_INT offset;
6271 unsigned int preferred_alignment;
6272 HOST_WIDE_INT size = get_frame_size ();
6274 frame->nregs = ix86_nsaved_regs ();
6275 total_size = size;
6277 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6278 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6280 /* During reload iteration the amount of registers saved can change.
6281 Recompute the value as needed. Do not recompute when amount of registers
6282 didn't change as reload does multiple calls to the function and does not
6283 expect the decision to change within single iteration. */
6284 if (!optimize_size
6285 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6287 int count = frame->nregs;
6289 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6290 /* The fast prologue uses move instead of push to save registers. This
6291 is significantly longer, but also executes faster as modern hardware
6292 can execute the moves in parallel, but can't do that for push/pop.
6294 Be careful about choosing what prologue to emit: When function takes
6295 many instructions to execute we may use slow version as well as in
6296 case function is known to be outside hot spot (this is known with
6297 feedback only). Weight the size of function by number of registers
6298 to save as it is cheap to use one or two push instructions but very
6299 slow to use many of them. */
6300 if (count)
6301 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6302 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6303 || (flag_branch_probabilities
6304 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6305 cfun->machine->use_fast_prologue_epilogue = false;
6306 else
6307 cfun->machine->use_fast_prologue_epilogue
6308 = !expensive_function_p (count);
6310 if (TARGET_PROLOGUE_USING_MOVE
6311 && cfun->machine->use_fast_prologue_epilogue)
6312 frame->save_regs_using_mov = true;
6313 else
6314 frame->save_regs_using_mov = false;
6317 /* Skip return address and saved base pointer. */
6318 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6320 frame->hard_frame_pointer_offset = offset;
6322 /* Do some sanity checking of stack_alignment_needed and
6323 preferred_alignment, since i386 port is the only using those features
6324 that may break easily. */
6326 gcc_assert (!size || stack_alignment_needed);
6327 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6328 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6329 gcc_assert (stack_alignment_needed
6330 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6332 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6333 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6335 /* Register save area */
6336 offset += frame->nregs * UNITS_PER_WORD;
6338 /* Va-arg area */
6339 if (ix86_save_varrargs_registers)
6341 offset += X86_64_VARARGS_SIZE;
6342 frame->va_arg_size = X86_64_VARARGS_SIZE;
6344 else
6345 frame->va_arg_size = 0;
6347 /* Align start of frame for local function. */
6348 frame->padding1 = ((offset + stack_alignment_needed - 1)
6349 & -stack_alignment_needed) - offset;
6351 offset += frame->padding1;
6353 /* Frame pointer points here. */
6354 frame->frame_pointer_offset = offset;
6356 offset += size;
6358 /* Add outgoing arguments area. Can be skipped if we eliminated
6359 all the function calls as dead code.
6360 Skipping is however impossible when function calls alloca. Alloca
6361 expander assumes that last crtl->outgoing_args_size
6362 of stack frame are unused. */
6363 if (ACCUMULATE_OUTGOING_ARGS
6364 && (!current_function_is_leaf || cfun->calls_alloca
6365 || ix86_current_function_calls_tls_descriptor))
6367 offset += crtl->outgoing_args_size;
6368 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6370 else
6371 frame->outgoing_arguments_size = 0;
6373 /* Align stack boundary. Only needed if we're calling another function
6374 or using alloca. */
6375 if (!current_function_is_leaf || cfun->calls_alloca
6376 || ix86_current_function_calls_tls_descriptor)
6377 frame->padding2 = ((offset + preferred_alignment - 1)
6378 & -preferred_alignment) - offset;
6379 else
6380 frame->padding2 = 0;
6382 offset += frame->padding2;
6384 /* We've reached end of stack frame. */
6385 frame->stack_pointer_offset = offset;
6387 /* Size prologue needs to allocate. */
6388 frame->to_allocate =
6389 (size + frame->padding1 + frame->padding2
6390 + frame->outgoing_arguments_size + frame->va_arg_size);
6392 if ((!frame->to_allocate && frame->nregs <= 1)
6393 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6394 frame->save_regs_using_mov = false;
6396 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
6397 && current_function_is_leaf
6398 && !ix86_current_function_calls_tls_descriptor)
6400 frame->red_zone_size = frame->to_allocate;
6401 if (frame->save_regs_using_mov)
6402 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6403 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6404 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6406 else
6407 frame->red_zone_size = 0;
6408 frame->to_allocate -= frame->red_zone_size;
6409 frame->stack_pointer_offset -= frame->red_zone_size;
6410 #if 0
6411 fprintf (stderr, "\n");
6412 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6413 fprintf (stderr, "size: %ld\n", (long)size);
6414 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6415 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6416 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6417 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6418 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6419 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6420 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6421 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6422 (long)frame->hard_frame_pointer_offset);
6423 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6424 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6425 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6426 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6427 #endif
6430 /* Emit code to save registers in the prologue. */
6432 static void
6433 ix86_emit_save_regs (void)
6435 unsigned int regno;
6436 rtx insn;
6438 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6439 if (ix86_save_reg (regno, true))
6441 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6442 RTX_FRAME_RELATED_P (insn) = 1;
6446 /* Emit code to save registers using MOV insns. First register
6447 is restored from POINTER + OFFSET. */
6448 static void
6449 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6451 unsigned int regno;
6452 rtx insn;
6454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6455 if (ix86_save_reg (regno, true))
6457 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6458 Pmode, offset),
6459 gen_rtx_REG (Pmode, regno));
6460 RTX_FRAME_RELATED_P (insn) = 1;
6461 offset += UNITS_PER_WORD;
6465 /* Expand prologue or epilogue stack adjustment.
6466 The pattern exist to put a dependency on all ebp-based memory accesses.
6467 STYLE should be negative if instructions should be marked as frame related,
6468 zero if %r11 register is live and cannot be freely used and positive
6469 otherwise. */
6471 static void
6472 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6474 rtx insn;
6476 if (! TARGET_64BIT)
6477 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6478 else if (x86_64_immediate_operand (offset, DImode))
6479 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6480 else
6482 rtx r11;
6483 /* r11 is used by indirect sibcall return as well, set before the
6484 epilogue and used after the epilogue. ATM indirect sibcall
6485 shouldn't be used together with huge frame sizes in one
6486 function because of the frame_size check in sibcall.c. */
6487 gcc_assert (style);
6488 r11 = gen_rtx_REG (DImode, R11_REG);
6489 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6490 if (style < 0)
6491 RTX_FRAME_RELATED_P (insn) = 1;
6492 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6493 offset));
6495 if (style < 0)
6496 RTX_FRAME_RELATED_P (insn) = 1;
6499 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6501 static rtx
6502 ix86_internal_arg_pointer (void)
6504 bool has_force_align_arg_pointer =
6505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6508 && DECL_NAME (current_function_decl)
6509 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6510 && DECL_FILE_SCOPE_P (current_function_decl))
6511 || ix86_force_align_arg_pointer
6512 || has_force_align_arg_pointer)
6514 /* Nested functions can't realign the stack due to a register
6515 conflict. */
6516 if (DECL_CONTEXT (current_function_decl)
6517 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6519 if (ix86_force_align_arg_pointer)
6520 warning (0, "-mstackrealign ignored for nested functions");
6521 if (has_force_align_arg_pointer)
6522 error ("%s not supported for nested functions",
6523 ix86_force_align_arg_pointer_string);
6524 return virtual_incoming_args_rtx;
6526 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6527 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6529 else
6530 return virtual_incoming_args_rtx;
6533 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6534 This is called from dwarf2out.c to emit call frame instructions
6535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6536 static void
6537 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6539 rtx unspec = SET_SRC (pattern);
6540 gcc_assert (GET_CODE (unspec) == UNSPEC);
6542 switch (index)
6544 case UNSPEC_REG_SAVE:
6545 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6546 SET_DEST (pattern));
6547 break;
6548 case UNSPEC_DEF_CFA:
6549 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6550 INTVAL (XVECEXP (unspec, 0, 0)));
6551 break;
6552 default:
6553 gcc_unreachable ();
6557 /* Expand the prologue into a bunch of separate insns. */
6559 void
6560 ix86_expand_prologue (void)
6562 rtx insn;
6563 bool pic_reg_used;
6564 struct ix86_frame frame;
6565 HOST_WIDE_INT allocate;
6567 ix86_compute_frame_layout (&frame);
6569 if (cfun->machine->force_align_arg_pointer)
6571 rtx x, y;
6573 /* Grab the argument pointer. */
6574 x = plus_constant (stack_pointer_rtx, 4);
6575 y = cfun->machine->force_align_arg_pointer;
6576 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6577 RTX_FRAME_RELATED_P (insn) = 1;
6579 /* The unwind info consists of two parts: install the fafp as the cfa,
6580 and record the fafp as the "save register" of the stack pointer.
6581 The later is there in order that the unwinder can see where it
6582 should restore the stack pointer across the and insn. */
6583 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6584 x = gen_rtx_SET (VOIDmode, y, x);
6585 RTX_FRAME_RELATED_P (x) = 1;
6586 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6587 UNSPEC_REG_SAVE);
6588 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6589 RTX_FRAME_RELATED_P (y) = 1;
6590 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6591 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6592 REG_NOTES (insn) = x;
6594 /* Align the stack. */
6595 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6596 GEN_INT (-16)));
6598 /* And here we cheat like madmen with the unwind info. We force the
6599 cfa register back to sp+4, which is exactly what it was at the
6600 start of the function. Re-pushing the return address results in
6601 the return at the same spot relative to the cfa, and thus is
6602 correct wrt the unwind info. */
6603 x = cfun->machine->force_align_arg_pointer;
6604 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6605 insn = emit_insn (gen_push (x));
6606 RTX_FRAME_RELATED_P (insn) = 1;
6608 x = GEN_INT (4);
6609 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6610 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6611 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6612 REG_NOTES (insn) = x;
6615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6616 slower on all targets. Also sdb doesn't like it. */
6618 if (frame_pointer_needed)
6620 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6621 RTX_FRAME_RELATED_P (insn) = 1;
6623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6624 RTX_FRAME_RELATED_P (insn) = 1;
6627 allocate = frame.to_allocate;
6629 if (!frame.save_regs_using_mov)
6630 ix86_emit_save_regs ();
6631 else
6632 allocate += frame.nregs * UNITS_PER_WORD;
6634 /* When using red zone we may start register saving before allocating
6635 the stack frame saving one cycle of the prologue. However I will
6636 avoid doing this if I am going to have to probe the stack since
6637 at least on x86_64 the stack probe can turn into a call that clobbers
6638 a red zone location */
6639 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
6640 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6641 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6642 : stack_pointer_rtx,
6643 -frame.nregs * UNITS_PER_WORD);
6645 if (allocate == 0)
6647 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6648 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6649 GEN_INT (-allocate), -1);
6650 else
6652 /* Only valid for Win32. */
6653 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6654 bool eax_live;
6655 rtx t;
6657 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6659 if (cfun->machine->call_abi == MS_ABI)
6660 eax_live = false;
6661 else
6662 eax_live = ix86_eax_live_at_start_p ();
6664 if (eax_live)
6666 emit_insn (gen_push (eax));
6667 allocate -= UNITS_PER_WORD;
6670 emit_move_insn (eax, GEN_INT (allocate));
6672 if (TARGET_64BIT)
6673 insn = gen_allocate_stack_worker_64 (eax);
6674 else
6675 insn = gen_allocate_stack_worker_32 (eax);
6676 insn = emit_insn (insn);
6677 RTX_FRAME_RELATED_P (insn) = 1;
6678 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6679 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6680 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6681 t, REG_NOTES (insn));
6683 if (eax_live)
6685 if (frame_pointer_needed)
6686 t = plus_constant (hard_frame_pointer_rtx,
6687 allocate
6688 - frame.to_allocate
6689 - frame.nregs * UNITS_PER_WORD);
6690 else
6691 t = plus_constant (stack_pointer_rtx, allocate);
6692 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6696 if (frame.save_regs_using_mov
6697 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
6698 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6700 if (!frame_pointer_needed || !frame.to_allocate)
6701 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6702 else
6703 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6704 -frame.nregs * UNITS_PER_WORD);
6707 pic_reg_used = false;
6708 if (pic_offset_table_rtx
6709 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6710 || crtl->profile))
6712 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6714 if (alt_pic_reg_used != INVALID_REGNUM)
6715 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6717 pic_reg_used = true;
6720 if (pic_reg_used)
6722 if (TARGET_64BIT)
6724 if (ix86_cmodel == CM_LARGE_PIC)
6726 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6727 rtx label = gen_label_rtx ();
6728 emit_label (label);
6729 LABEL_PRESERVE_P (label) = 1;
6730 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6731 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6732 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6733 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6734 pic_offset_table_rtx, tmp_reg));
6736 else
6737 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6739 else
6740 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6743 /* Prevent function calls from being scheduled before the call to mcount.
6744 In the pic_reg_used case, make sure that the got load isn't deleted. */
6745 if (crtl->profile)
6747 if (pic_reg_used)
6748 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6749 emit_insn (gen_blockage ());
6752 /* Emit cld instruction if stringops are used in the function. */
6753 if (TARGET_CLD && ix86_current_function_needs_cld)
6754 emit_insn (gen_cld ());
6757 /* Emit code to restore saved registers using MOV insns. First register
6758 is restored from POINTER + OFFSET. */
6759 static void
6760 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6761 int maybe_eh_return)
6763 int regno;
6764 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6767 if (ix86_save_reg (regno, maybe_eh_return))
6769 /* Ensure that adjust_address won't be forced to produce pointer
6770 out of range allowed by x86-64 instruction set. */
6771 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6773 rtx r11;
6775 r11 = gen_rtx_REG (DImode, R11_REG);
6776 emit_move_insn (r11, GEN_INT (offset));
6777 emit_insn (gen_adddi3 (r11, r11, pointer));
6778 base_address = gen_rtx_MEM (Pmode, r11);
6779 offset = 0;
6781 emit_move_insn (gen_rtx_REG (Pmode, regno),
6782 adjust_address (base_address, Pmode, offset));
6783 offset += UNITS_PER_WORD;
6787 /* Restore function stack, frame, and registers. */
6789 void
6790 ix86_expand_epilogue (int style)
6792 int regno;
6793 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6794 struct ix86_frame frame;
6795 HOST_WIDE_INT offset;
6797 ix86_compute_frame_layout (&frame);
6799 /* Calculate start of saved registers relative to ebp. Special care
6800 must be taken for the normal return case of a function using
6801 eh_return: the eax and edx registers are marked as saved, but not
6802 restored along this path. */
6803 offset = frame.nregs;
6804 if (crtl->calls_eh_return && style != 2)
6805 offset -= 2;
6806 offset *= -UNITS_PER_WORD;
6808 /* If we're only restoring one register and sp is not valid then
6809 using a move instruction to restore the register since it's
6810 less work than reloading sp and popping the register.
6812 The default code result in stack adjustment using add/lea instruction,
6813 while this code results in LEAVE instruction (or discrete equivalent),
6814 so it is profitable in some other cases as well. Especially when there
6815 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6816 and there is exactly one register to pop. This heuristic may need some
6817 tuning in future. */
6818 if ((!sp_valid && frame.nregs <= 1)
6819 || (TARGET_EPILOGUE_USING_MOVE
6820 && cfun->machine->use_fast_prologue_epilogue
6821 && (frame.nregs > 1 || frame.to_allocate))
6822 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6823 || (frame_pointer_needed && TARGET_USE_LEAVE
6824 && cfun->machine->use_fast_prologue_epilogue
6825 && frame.nregs == 1)
6826 || crtl->calls_eh_return)
6828 /* Restore registers. We can use ebp or esp to address the memory
6829 locations. If both are available, default to ebp, since offsets
6830 are known to be small. Only exception is esp pointing directly to the
6831 end of block of saved registers, where we may simplify addressing
6832 mode. */
6834 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6835 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6836 frame.to_allocate, style == 2);
6837 else
6838 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6839 offset, style == 2);
6841 /* eh_return epilogues need %ecx added to the stack pointer. */
6842 if (style == 2)
6844 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6846 if (frame_pointer_needed)
6848 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6849 tmp = plus_constant (tmp, UNITS_PER_WORD);
6850 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6852 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6853 emit_move_insn (hard_frame_pointer_rtx, tmp);
6855 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6856 const0_rtx, style);
6858 else
6860 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6861 tmp = plus_constant (tmp, (frame.to_allocate
6862 + frame.nregs * UNITS_PER_WORD));
6863 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6866 else if (!frame_pointer_needed)
6867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6868 GEN_INT (frame.to_allocate
6869 + frame.nregs * UNITS_PER_WORD),
6870 style);
6871 /* If not an i386, mov & pop is faster than "leave". */
6872 else if (TARGET_USE_LEAVE || optimize_size
6873 || !cfun->machine->use_fast_prologue_epilogue)
6874 emit_insn ((*ix86_gen_leave) ());
6875 else
6877 pro_epilogue_adjust_stack (stack_pointer_rtx,
6878 hard_frame_pointer_rtx,
6879 const0_rtx, style);
6881 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6884 else
6886 /* First step is to deallocate the stack frame so that we can
6887 pop the registers. */
6888 if (!sp_valid)
6890 gcc_assert (frame_pointer_needed);
6891 pro_epilogue_adjust_stack (stack_pointer_rtx,
6892 hard_frame_pointer_rtx,
6893 GEN_INT (offset), style);
6895 else if (frame.to_allocate)
6896 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6897 GEN_INT (frame.to_allocate), style);
6899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6900 if (ix86_save_reg (regno, false))
6901 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
6902 if (frame_pointer_needed)
6904 /* Leave results in shorter dependency chains on CPUs that are
6905 able to grok it fast. */
6906 if (TARGET_USE_LEAVE)
6907 emit_insn ((*ix86_gen_leave) ());
6908 else
6909 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6913 if (cfun->machine->force_align_arg_pointer)
6915 emit_insn (gen_addsi3 (stack_pointer_rtx,
6916 cfun->machine->force_align_arg_pointer,
6917 GEN_INT (-4)));
6920 /* Sibcall epilogues don't want a return instruction. */
6921 if (style == 0)
6922 return;
6924 if (crtl->args.pops_args && crtl->args.size)
6926 rtx popc = GEN_INT (crtl->args.pops_args);
6928 /* i386 can only pop 64K bytes. If asked to pop more, pop
6929 return address, do explicit add, and jump indirectly to the
6930 caller. */
6932 if (crtl->args.pops_args >= 65536)
6934 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6936 /* There is no "pascal" calling convention in any 64bit ABI. */
6937 gcc_assert (!TARGET_64BIT);
6939 emit_insn (gen_popsi1 (ecx));
6940 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6941 emit_jump_insn (gen_return_indirect_internal (ecx));
6943 else
6944 emit_jump_insn (gen_return_pop_internal (popc));
6946 else
6947 emit_jump_insn (gen_return_internal ());
6950 /* Reset from the function's potential modifications. */
6952 static void
6953 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6954 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6956 if (pic_offset_table_rtx)
6957 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6958 #if TARGET_MACHO
6959 /* Mach-O doesn't support labels at the end of objects, so if
6960 it looks like we might want one, insert a NOP. */
6962 rtx insn = get_last_insn ();
6963 while (insn
6964 && NOTE_P (insn)
6965 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6966 insn = PREV_INSN (insn);
6967 if (insn
6968 && (LABEL_P (insn)
6969 || (NOTE_P (insn)
6970 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6971 fputs ("\tnop\n", file);
6973 #endif
6977 /* Extract the parts of an RTL expression that is a valid memory address
6978 for an instruction. Return 0 if the structure of the address is
6979 grossly off. Return -1 if the address contains ASHIFT, so it is not
6980 strictly valid, but still used for computing length of lea instruction. */
6983 ix86_decompose_address (rtx addr, struct ix86_address *out)
6985 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6986 rtx base_reg, index_reg;
6987 HOST_WIDE_INT scale = 1;
6988 rtx scale_rtx = NULL_RTX;
6989 int retval = 1;
6990 enum ix86_address_seg seg = SEG_DEFAULT;
6992 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6993 base = addr;
6994 else if (GET_CODE (addr) == PLUS)
6996 rtx addends[4], op;
6997 int n = 0, i;
6999 op = addr;
7002 if (n >= 4)
7003 return 0;
7004 addends[n++] = XEXP (op, 1);
7005 op = XEXP (op, 0);
7007 while (GET_CODE (op) == PLUS);
7008 if (n >= 4)
7009 return 0;
7010 addends[n] = op;
7012 for (i = n; i >= 0; --i)
7014 op = addends[i];
7015 switch (GET_CODE (op))
7017 case MULT:
7018 if (index)
7019 return 0;
7020 index = XEXP (op, 0);
7021 scale_rtx = XEXP (op, 1);
7022 break;
7024 case UNSPEC:
7025 if (XINT (op, 1) == UNSPEC_TP
7026 && TARGET_TLS_DIRECT_SEG_REFS
7027 && seg == SEG_DEFAULT)
7028 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7029 else
7030 return 0;
7031 break;
7033 case REG:
7034 case SUBREG:
7035 if (!base)
7036 base = op;
7037 else if (!index)
7038 index = op;
7039 else
7040 return 0;
7041 break;
7043 case CONST:
7044 case CONST_INT:
7045 case SYMBOL_REF:
7046 case LABEL_REF:
7047 if (disp)
7048 return 0;
7049 disp = op;
7050 break;
7052 default:
7053 return 0;
7057 else if (GET_CODE (addr) == MULT)
7059 index = XEXP (addr, 0); /* index*scale */
7060 scale_rtx = XEXP (addr, 1);
7062 else if (GET_CODE (addr) == ASHIFT)
7064 rtx tmp;
7066 /* We're called for lea too, which implements ashift on occasion. */
7067 index = XEXP (addr, 0);
7068 tmp = XEXP (addr, 1);
7069 if (!CONST_INT_P (tmp))
7070 return 0;
7071 scale = INTVAL (tmp);
7072 if ((unsigned HOST_WIDE_INT) scale > 3)
7073 return 0;
7074 scale = 1 << scale;
7075 retval = -1;
7077 else
7078 disp = addr; /* displacement */
7080 /* Extract the integral value of scale. */
7081 if (scale_rtx)
7083 if (!CONST_INT_P (scale_rtx))
7084 return 0;
7085 scale = INTVAL (scale_rtx);
7088 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7089 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7091 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7092 if (base_reg && index_reg && scale == 1
7093 && (index_reg == arg_pointer_rtx
7094 || index_reg == frame_pointer_rtx
7095 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7097 rtx tmp;
7098 tmp = base, base = index, index = tmp;
7099 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7102 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7103 if ((base_reg == hard_frame_pointer_rtx
7104 || base_reg == frame_pointer_rtx
7105 || base_reg == arg_pointer_rtx) && !disp)
7106 disp = const0_rtx;
7108 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7109 Avoid this by transforming to [%esi+0]. */
7110 if (TARGET_K6 && !optimize_size
7111 && base_reg && !index_reg && !disp
7112 && REG_P (base_reg)
7113 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7114 disp = const0_rtx;
7116 /* Special case: encode reg+reg instead of reg*2. */
7117 if (!base && index && scale && scale == 2)
7118 base = index, base_reg = index_reg, scale = 1;
7120 /* Special case: scaling cannot be encoded without base or displacement. */
7121 if (!base && !disp && index && scale != 1)
7122 disp = const0_rtx;
7124 out->base = base;
7125 out->index = index;
7126 out->disp = disp;
7127 out->scale = scale;
7128 out->seg = seg;
7130 return retval;
7133 /* Return cost of the memory address x.
7134 For i386, it is better to use a complex address than let gcc copy
7135 the address into a reg and make a new pseudo. But not if the address
7136 requires to two regs - that would mean more pseudos with longer
7137 lifetimes. */
7138 static int
7139 ix86_address_cost (rtx x)
7141 struct ix86_address parts;
7142 int cost = 1;
7143 int ok = ix86_decompose_address (x, &parts);
7145 gcc_assert (ok);
7147 if (parts.base && GET_CODE (parts.base) == SUBREG)
7148 parts.base = SUBREG_REG (parts.base);
7149 if (parts.index && GET_CODE (parts.index) == SUBREG)
7150 parts.index = SUBREG_REG (parts.index);
7152 /* Attempt to minimize number of registers in the address. */
7153 if ((parts.base
7154 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7155 || (parts.index
7156 && (!REG_P (parts.index)
7157 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7158 cost++;
7160 if (parts.base
7161 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7162 && parts.index
7163 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7164 && parts.base != parts.index)
7165 cost++;
7167 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7168 since it's predecode logic can't detect the length of instructions
7169 and it degenerates to vector decoded. Increase cost of such
7170 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7171 to split such addresses or even refuse such addresses at all.
7173 Following addressing modes are affected:
7174 [base+scale*index]
7175 [scale*index+disp]
7176 [base+index]
7178 The first and last case may be avoidable by explicitly coding the zero in
7179 memory address, but I don't have AMD-K6 machine handy to check this
7180 theory. */
7182 if (TARGET_K6
7183 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7184 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7185 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7186 cost += 10;
7188 return cost;
7191 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7192 this is used for to form addresses to local data when -fPIC is in
7193 use. */
7195 static bool
7196 darwin_local_data_pic (rtx disp)
7198 if (GET_CODE (disp) == MINUS)
7200 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7201 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7202 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7204 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7205 if (! strcmp (sym_name, "<pic base>"))
7206 return true;
7210 return false;
7213 /* Determine if a given RTX is a valid constant. We already know this
7214 satisfies CONSTANT_P. */
7216 bool
7217 legitimate_constant_p (rtx x)
7219 switch (GET_CODE (x))
7221 case CONST:
7222 x = XEXP (x, 0);
7224 if (GET_CODE (x) == PLUS)
7226 if (!CONST_INT_P (XEXP (x, 1)))
7227 return false;
7228 x = XEXP (x, 0);
7231 if (TARGET_MACHO && darwin_local_data_pic (x))
7232 return true;
7234 /* Only some unspecs are valid as "constants". */
7235 if (GET_CODE (x) == UNSPEC)
7236 switch (XINT (x, 1))
7238 case UNSPEC_GOT:
7239 case UNSPEC_GOTOFF:
7240 case UNSPEC_PLTOFF:
7241 return TARGET_64BIT;
7242 case UNSPEC_TPOFF:
7243 case UNSPEC_NTPOFF:
7244 x = XVECEXP (x, 0, 0);
7245 return (GET_CODE (x) == SYMBOL_REF
7246 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7247 case UNSPEC_DTPOFF:
7248 x = XVECEXP (x, 0, 0);
7249 return (GET_CODE (x) == SYMBOL_REF
7250 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7251 default:
7252 return false;
7255 /* We must have drilled down to a symbol. */
7256 if (GET_CODE (x) == LABEL_REF)
7257 return true;
7258 if (GET_CODE (x) != SYMBOL_REF)
7259 return false;
7260 /* FALLTHRU */
7262 case SYMBOL_REF:
7263 /* TLS symbols are never valid. */
7264 if (SYMBOL_REF_TLS_MODEL (x))
7265 return false;
7267 /* DLLIMPORT symbols are never valid. */
7268 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7269 && SYMBOL_REF_DLLIMPORT_P (x))
7270 return false;
7271 break;
7273 case CONST_DOUBLE:
7274 if (GET_MODE (x) == TImode
7275 && x != CONST0_RTX (TImode)
7276 && !TARGET_64BIT)
7277 return false;
7278 break;
7280 case CONST_VECTOR:
7281 if (x == CONST0_RTX (GET_MODE (x)))
7282 return true;
7283 return false;
7285 default:
7286 break;
7289 /* Otherwise we handle everything else in the move patterns. */
7290 return true;
7293 /* Determine if it's legal to put X into the constant pool. This
7294 is not possible for the address of thread-local symbols, which
7295 is checked above. */
7297 static bool
7298 ix86_cannot_force_const_mem (rtx x)
7300 /* We can always put integral constants and vectors in memory. */
7301 switch (GET_CODE (x))
7303 case CONST_INT:
7304 case CONST_DOUBLE:
7305 case CONST_VECTOR:
7306 return false;
7308 default:
7309 break;
7311 return !legitimate_constant_p (x);
7314 /* Determine if a given RTX is a valid constant address. */
7316 bool
7317 constant_address_p (rtx x)
7319 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7322 /* Nonzero if the constant value X is a legitimate general operand
7323 when generating PIC code. It is given that flag_pic is on and
7324 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7326 bool
7327 legitimate_pic_operand_p (rtx x)
7329 rtx inner;
7331 switch (GET_CODE (x))
7333 case CONST:
7334 inner = XEXP (x, 0);
7335 if (GET_CODE (inner) == PLUS
7336 && CONST_INT_P (XEXP (inner, 1)))
7337 inner = XEXP (inner, 0);
7339 /* Only some unspecs are valid as "constants". */
7340 if (GET_CODE (inner) == UNSPEC)
7341 switch (XINT (inner, 1))
7343 case UNSPEC_GOT:
7344 case UNSPEC_GOTOFF:
7345 case UNSPEC_PLTOFF:
7346 return TARGET_64BIT;
7347 case UNSPEC_TPOFF:
7348 x = XVECEXP (inner, 0, 0);
7349 return (GET_CODE (x) == SYMBOL_REF
7350 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7351 default:
7352 return false;
7354 /* FALLTHRU */
7356 case SYMBOL_REF:
7357 case LABEL_REF:
7358 return legitimate_pic_address_disp_p (x);
7360 default:
7361 return true;
7365 /* Determine if a given CONST RTX is a valid memory displacement
7366 in PIC mode. */
7369 legitimate_pic_address_disp_p (rtx disp)
7371 bool saw_plus;
7373 /* In 64bit mode we can allow direct addresses of symbols and labels
7374 when they are not dynamic symbols. */
7375 if (TARGET_64BIT)
7377 rtx op0 = disp, op1;
7379 switch (GET_CODE (disp))
7381 case LABEL_REF:
7382 return true;
7384 case CONST:
7385 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7386 break;
7387 op0 = XEXP (XEXP (disp, 0), 0);
7388 op1 = XEXP (XEXP (disp, 0), 1);
7389 if (!CONST_INT_P (op1)
7390 || INTVAL (op1) >= 16*1024*1024
7391 || INTVAL (op1) < -16*1024*1024)
7392 break;
7393 if (GET_CODE (op0) == LABEL_REF)
7394 return true;
7395 if (GET_CODE (op0) != SYMBOL_REF)
7396 break;
7397 /* FALLTHRU */
7399 case SYMBOL_REF:
7400 /* TLS references should always be enclosed in UNSPEC. */
7401 if (SYMBOL_REF_TLS_MODEL (op0))
7402 return false;
7403 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7404 && ix86_cmodel != CM_LARGE_PIC)
7405 return true;
7406 break;
7408 default:
7409 break;
7412 if (GET_CODE (disp) != CONST)
7413 return 0;
7414 disp = XEXP (disp, 0);
7416 if (TARGET_64BIT)
7418 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7419 of GOT tables. We should not need these anyway. */
7420 if (GET_CODE (disp) != UNSPEC
7421 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7422 && XINT (disp, 1) != UNSPEC_GOTOFF
7423 && XINT (disp, 1) != UNSPEC_PLTOFF))
7424 return 0;
7426 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7427 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7428 return 0;
7429 return 1;
7432 saw_plus = false;
7433 if (GET_CODE (disp) == PLUS)
7435 if (!CONST_INT_P (XEXP (disp, 1)))
7436 return 0;
7437 disp = XEXP (disp, 0);
7438 saw_plus = true;
7441 if (TARGET_MACHO && darwin_local_data_pic (disp))
7442 return 1;
7444 if (GET_CODE (disp) != UNSPEC)
7445 return 0;
7447 switch (XINT (disp, 1))
7449 case UNSPEC_GOT:
7450 if (saw_plus)
7451 return false;
7452 /* We need to check for both symbols and labels because VxWorks loads
7453 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7454 details. */
7455 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7456 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7457 case UNSPEC_GOTOFF:
7458 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7459 While ABI specify also 32bit relocation but we don't produce it in
7460 small PIC model at all. */
7461 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7462 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7463 && !TARGET_64BIT)
7464 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7465 return false;
7466 case UNSPEC_GOTTPOFF:
7467 case UNSPEC_GOTNTPOFF:
7468 case UNSPEC_INDNTPOFF:
7469 if (saw_plus)
7470 return false;
7471 disp = XVECEXP (disp, 0, 0);
7472 return (GET_CODE (disp) == SYMBOL_REF
7473 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7474 case UNSPEC_NTPOFF:
7475 disp = XVECEXP (disp, 0, 0);
7476 return (GET_CODE (disp) == SYMBOL_REF
7477 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7478 case UNSPEC_DTPOFF:
7479 disp = XVECEXP (disp, 0, 0);
7480 return (GET_CODE (disp) == SYMBOL_REF
7481 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7484 return 0;
7487 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7488 memory address for an instruction. The MODE argument is the machine mode
7489 for the MEM expression that wants to use this address.
7491 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7492 convert common non-canonical forms to canonical form so that they will
7493 be recognized. */
7496 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7497 rtx addr, int strict)
7499 struct ix86_address parts;
7500 rtx base, index, disp;
7501 HOST_WIDE_INT scale;
7502 const char *reason = NULL;
7503 rtx reason_rtx = NULL_RTX;
7505 if (ix86_decompose_address (addr, &parts) <= 0)
7507 reason = "decomposition failed";
7508 goto report_error;
7511 base = parts.base;
7512 index = parts.index;
7513 disp = parts.disp;
7514 scale = parts.scale;
7516 /* Validate base register.
7518 Don't allow SUBREG's that span more than a word here. It can lead to spill
7519 failures when the base is one word out of a two word structure, which is
7520 represented internally as a DImode int. */
7522 if (base)
7524 rtx reg;
7525 reason_rtx = base;
7527 if (REG_P (base))
7528 reg = base;
7529 else if (GET_CODE (base) == SUBREG
7530 && REG_P (SUBREG_REG (base))
7531 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7532 <= UNITS_PER_WORD)
7533 reg = SUBREG_REG (base);
7534 else
7536 reason = "base is not a register";
7537 goto report_error;
7540 if (GET_MODE (base) != Pmode)
7542 reason = "base is not in Pmode";
7543 goto report_error;
7546 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7547 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7549 reason = "base is not valid";
7550 goto report_error;
7554 /* Validate index register.
7556 Don't allow SUBREG's that span more than a word here -- same as above. */
7558 if (index)
7560 rtx reg;
7561 reason_rtx = index;
7563 if (REG_P (index))
7564 reg = index;
7565 else if (GET_CODE (index) == SUBREG
7566 && REG_P (SUBREG_REG (index))
7567 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7568 <= UNITS_PER_WORD)
7569 reg = SUBREG_REG (index);
7570 else
7572 reason = "index is not a register";
7573 goto report_error;
7576 if (GET_MODE (index) != Pmode)
7578 reason = "index is not in Pmode";
7579 goto report_error;
7582 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7583 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7585 reason = "index is not valid";
7586 goto report_error;
7590 /* Validate scale factor. */
7591 if (scale != 1)
7593 reason_rtx = GEN_INT (scale);
7594 if (!index)
7596 reason = "scale without index";
7597 goto report_error;
7600 if (scale != 2 && scale != 4 && scale != 8)
7602 reason = "scale is not a valid multiplier";
7603 goto report_error;
7607 /* Validate displacement. */
7608 if (disp)
7610 reason_rtx = disp;
7612 if (GET_CODE (disp) == CONST
7613 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7614 switch (XINT (XEXP (disp, 0), 1))
7616 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7617 used. While ABI specify also 32bit relocations, we don't produce
7618 them at all and use IP relative instead. */
7619 case UNSPEC_GOT:
7620 case UNSPEC_GOTOFF:
7621 gcc_assert (flag_pic);
7622 if (!TARGET_64BIT)
7623 goto is_legitimate_pic;
7624 reason = "64bit address unspec";
7625 goto report_error;
7627 case UNSPEC_GOTPCREL:
7628 gcc_assert (flag_pic);
7629 goto is_legitimate_pic;
7631 case UNSPEC_GOTTPOFF:
7632 case UNSPEC_GOTNTPOFF:
7633 case UNSPEC_INDNTPOFF:
7634 case UNSPEC_NTPOFF:
7635 case UNSPEC_DTPOFF:
7636 break;
7638 default:
7639 reason = "invalid address unspec";
7640 goto report_error;
7643 else if (SYMBOLIC_CONST (disp)
7644 && (flag_pic
7645 || (TARGET_MACHO
7646 #if TARGET_MACHO
7647 && MACHOPIC_INDIRECT
7648 && !machopic_operand_p (disp)
7649 #endif
7653 is_legitimate_pic:
7654 if (TARGET_64BIT && (index || base))
7656 /* foo@dtpoff(%rX) is ok. */
7657 if (GET_CODE (disp) != CONST
7658 || GET_CODE (XEXP (disp, 0)) != PLUS
7659 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7660 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7661 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7662 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7664 reason = "non-constant pic memory reference";
7665 goto report_error;
7668 else if (! legitimate_pic_address_disp_p (disp))
7670 reason = "displacement is an invalid pic construct";
7671 goto report_error;
7674 /* This code used to verify that a symbolic pic displacement
7675 includes the pic_offset_table_rtx register.
7677 While this is good idea, unfortunately these constructs may
7678 be created by "adds using lea" optimization for incorrect
7679 code like:
7681 int a;
7682 int foo(int i)
7684 return *(&a+i);
7687 This code is nonsensical, but results in addressing
7688 GOT table with pic_offset_table_rtx base. We can't
7689 just refuse it easily, since it gets matched by
7690 "addsi3" pattern, that later gets split to lea in the
7691 case output register differs from input. While this
7692 can be handled by separate addsi pattern for this case
7693 that never results in lea, this seems to be easier and
7694 correct fix for crash to disable this test. */
7696 else if (GET_CODE (disp) != LABEL_REF
7697 && !CONST_INT_P (disp)
7698 && (GET_CODE (disp) != CONST
7699 || !legitimate_constant_p (disp))
7700 && (GET_CODE (disp) != SYMBOL_REF
7701 || !legitimate_constant_p (disp)))
7703 reason = "displacement is not constant";
7704 goto report_error;
7706 else if (TARGET_64BIT
7707 && !x86_64_immediate_operand (disp, VOIDmode))
7709 reason = "displacement is out of range";
7710 goto report_error;
7714 /* Everything looks valid. */
7715 return TRUE;
7717 report_error:
7718 return FALSE;
7721 /* Return a unique alias set for the GOT. */
7723 static alias_set_type
7724 ix86_GOT_alias_set (void)
7726 static alias_set_type set = -1;
7727 if (set == -1)
7728 set = new_alias_set ();
7729 return set;
7732 /* Return a legitimate reference for ORIG (an address) using the
7733 register REG. If REG is 0, a new pseudo is generated.
7735 There are two types of references that must be handled:
7737 1. Global data references must load the address from the GOT, via
7738 the PIC reg. An insn is emitted to do this load, and the reg is
7739 returned.
7741 2. Static data references, constant pool addresses, and code labels
7742 compute the address as an offset from the GOT, whose base is in
7743 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7744 differentiate them from global data objects. The returned
7745 address is the PIC reg + an unspec constant.
7747 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7748 reg also appears in the address. */
7750 static rtx
7751 legitimize_pic_address (rtx orig, rtx reg)
7753 rtx addr = orig;
7754 rtx new_rtx = orig;
7755 rtx base;
7757 #if TARGET_MACHO
7758 if (TARGET_MACHO && !TARGET_64BIT)
7760 if (reg == 0)
7761 reg = gen_reg_rtx (Pmode);
7762 /* Use the generic Mach-O PIC machinery. */
7763 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7765 #endif
7767 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7768 new_rtx = addr;
7769 else if (TARGET_64BIT
7770 && ix86_cmodel != CM_SMALL_PIC
7771 && gotoff_operand (addr, Pmode))
7773 rtx tmpreg;
7774 /* This symbol may be referenced via a displacement from the PIC
7775 base address (@GOTOFF). */
7777 if (reload_in_progress)
7778 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7779 if (GET_CODE (addr) == CONST)
7780 addr = XEXP (addr, 0);
7781 if (GET_CODE (addr) == PLUS)
7783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7784 UNSPEC_GOTOFF);
7785 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7787 else
7788 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7789 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7790 if (!reg)
7791 tmpreg = gen_reg_rtx (Pmode);
7792 else
7793 tmpreg = reg;
7794 emit_move_insn (tmpreg, new_rtx);
7796 if (reg != 0)
7798 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7799 tmpreg, 1, OPTAB_DIRECT);
7800 new_rtx = reg;
7802 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7804 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7806 /* This symbol may be referenced via a displacement from the PIC
7807 base address (@GOTOFF). */
7809 if (reload_in_progress)
7810 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7811 if (GET_CODE (addr) == CONST)
7812 addr = XEXP (addr, 0);
7813 if (GET_CODE (addr) == PLUS)
7815 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7816 UNSPEC_GOTOFF);
7817 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7819 else
7820 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7821 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7822 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7824 if (reg != 0)
7826 emit_move_insn (reg, new_rtx);
7827 new_rtx = reg;
7830 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7831 /* We can't use @GOTOFF for text labels on VxWorks;
7832 see gotoff_operand. */
7833 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7835 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7837 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7838 return legitimize_dllimport_symbol (addr, true);
7839 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7840 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7841 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7843 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7844 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7848 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7851 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7852 new_rtx = gen_const_mem (Pmode, new_rtx);
7853 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7855 if (reg == 0)
7856 reg = gen_reg_rtx (Pmode);
7857 /* Use directly gen_movsi, otherwise the address is loaded
7858 into register for CSE. We don't want to CSE this addresses,
7859 instead we CSE addresses from the GOT table, so skip this. */
7860 emit_insn (gen_movsi (reg, new_rtx));
7861 new_rtx = reg;
7863 else
7865 /* This symbol must be referenced via a load from the
7866 Global Offset Table (@GOT). */
7868 if (reload_in_progress)
7869 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7870 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7871 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7872 if (TARGET_64BIT)
7873 new_rtx = force_reg (Pmode, new_rtx);
7874 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7875 new_rtx = gen_const_mem (Pmode, new_rtx);
7876 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7878 if (reg == 0)
7879 reg = gen_reg_rtx (Pmode);
7880 emit_move_insn (reg, new_rtx);
7881 new_rtx = reg;
7884 else
7886 if (CONST_INT_P (addr)
7887 && !x86_64_immediate_operand (addr, VOIDmode))
7889 if (reg)
7891 emit_move_insn (reg, addr);
7892 new_rtx = reg;
7894 else
7895 new_rtx = force_reg (Pmode, addr);
7897 else if (GET_CODE (addr) == CONST)
7899 addr = XEXP (addr, 0);
7901 /* We must match stuff we generate before. Assume the only
7902 unspecs that can get here are ours. Not that we could do
7903 anything with them anyway.... */
7904 if (GET_CODE (addr) == UNSPEC
7905 || (GET_CODE (addr) == PLUS
7906 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7907 return orig;
7908 gcc_assert (GET_CODE (addr) == PLUS);
7910 if (GET_CODE (addr) == PLUS)
7912 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7914 /* Check first to see if this is a constant offset from a @GOTOFF
7915 symbol reference. */
7916 if (gotoff_operand (op0, Pmode)
7917 && CONST_INT_P (op1))
7919 if (!TARGET_64BIT)
7921 if (reload_in_progress)
7922 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7924 UNSPEC_GOTOFF);
7925 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7927 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7929 if (reg != 0)
7931 emit_move_insn (reg, new_rtx);
7932 new_rtx = reg;
7935 else
7937 if (INTVAL (op1) < -16*1024*1024
7938 || INTVAL (op1) >= 16*1024*1024)
7940 if (!x86_64_immediate_operand (op1, Pmode))
7941 op1 = force_reg (Pmode, op1);
7942 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7946 else
7948 base = legitimize_pic_address (XEXP (addr, 0), reg);
7949 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7950 base == reg ? NULL_RTX : reg);
7952 if (CONST_INT_P (new_rtx))
7953 new_rtx = plus_constant (base, INTVAL (new_rtx));
7954 else
7956 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7958 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7959 new_rtx = XEXP (new_rtx, 1);
7961 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7966 return new_rtx;
7969 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7971 static rtx
7972 get_thread_pointer (int to_reg)
7974 rtx tp, reg, insn;
7976 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7977 if (!to_reg)
7978 return tp;
7980 reg = gen_reg_rtx (Pmode);
7981 insn = gen_rtx_SET (VOIDmode, reg, tp);
7982 insn = emit_insn (insn);
7984 return reg;
7987 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7988 false if we expect this to be used for a memory address and true if
7989 we expect to load the address into a register. */
7991 static rtx
7992 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7994 rtx dest, base, off, pic, tp;
7995 int type;
7997 switch (model)
7999 case TLS_MODEL_GLOBAL_DYNAMIC:
8000 dest = gen_reg_rtx (Pmode);
8001 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8003 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8005 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8007 start_sequence ();
8008 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8009 insns = get_insns ();
8010 end_sequence ();
8012 RTL_CONST_CALL_P (insns) = 1;
8013 emit_libcall_block (insns, dest, rax, x);
8015 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8016 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8017 else
8018 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8020 if (TARGET_GNU2_TLS)
8022 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8024 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8026 break;
8028 case TLS_MODEL_LOCAL_DYNAMIC:
8029 base = gen_reg_rtx (Pmode);
8030 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8032 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8034 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8036 start_sequence ();
8037 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8038 insns = get_insns ();
8039 end_sequence ();
8041 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8042 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8043 RTL_CONST_CALL_P (insns) = 1;
8044 emit_libcall_block (insns, base, rax, note);
8046 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8047 emit_insn (gen_tls_local_dynamic_base_64 (base));
8048 else
8049 emit_insn (gen_tls_local_dynamic_base_32 (base));
8051 if (TARGET_GNU2_TLS)
8053 rtx x = ix86_tls_module_base ();
8055 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8056 gen_rtx_MINUS (Pmode, x, tp));
8059 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8060 off = gen_rtx_CONST (Pmode, off);
8062 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8064 if (TARGET_GNU2_TLS)
8066 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8068 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8071 break;
8073 case TLS_MODEL_INITIAL_EXEC:
8074 if (TARGET_64BIT)
8076 pic = NULL;
8077 type = UNSPEC_GOTNTPOFF;
8079 else if (flag_pic)
8081 if (reload_in_progress)
8082 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8083 pic = pic_offset_table_rtx;
8084 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8086 else if (!TARGET_ANY_GNU_TLS)
8088 pic = gen_reg_rtx (Pmode);
8089 emit_insn (gen_set_got (pic));
8090 type = UNSPEC_GOTTPOFF;
8092 else
8094 pic = NULL;
8095 type = UNSPEC_INDNTPOFF;
8098 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8099 off = gen_rtx_CONST (Pmode, off);
8100 if (pic)
8101 off = gen_rtx_PLUS (Pmode, pic, off);
8102 off = gen_const_mem (Pmode, off);
8103 set_mem_alias_set (off, ix86_GOT_alias_set ());
8105 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8107 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8108 off = force_reg (Pmode, off);
8109 return gen_rtx_PLUS (Pmode, base, off);
8111 else
8113 base = get_thread_pointer (true);
8114 dest = gen_reg_rtx (Pmode);
8115 emit_insn (gen_subsi3 (dest, base, off));
8117 break;
8119 case TLS_MODEL_LOCAL_EXEC:
8120 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8121 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8122 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8123 off = gen_rtx_CONST (Pmode, off);
8125 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8127 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8128 return gen_rtx_PLUS (Pmode, base, off);
8130 else
8132 base = get_thread_pointer (true);
8133 dest = gen_reg_rtx (Pmode);
8134 emit_insn (gen_subsi3 (dest, base, off));
8136 break;
8138 default:
8139 gcc_unreachable ();
8142 return dest;
8145 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8146 to symbol DECL. */
8148 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8149 htab_t dllimport_map;
8151 static tree
8152 get_dllimport_decl (tree decl)
8154 struct tree_map *h, in;
8155 void **loc;
8156 const char *name;
8157 const char *prefix;
8158 size_t namelen, prefixlen;
8159 char *imp_name;
8160 tree to;
8161 rtx rtl;
8163 if (!dllimport_map)
8164 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8166 in.hash = htab_hash_pointer (decl);
8167 in.base.from = decl;
8168 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8169 h = (struct tree_map *) *loc;
8170 if (h)
8171 return h->to;
8173 *loc = h = GGC_NEW (struct tree_map);
8174 h->hash = in.hash;
8175 h->base.from = decl;
8176 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8177 DECL_ARTIFICIAL (to) = 1;
8178 DECL_IGNORED_P (to) = 1;
8179 DECL_EXTERNAL (to) = 1;
8180 TREE_READONLY (to) = 1;
8182 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8183 name = targetm.strip_name_encoding (name);
8184 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8185 namelen = strlen (name);
8186 prefixlen = strlen (prefix);
8187 imp_name = (char *) alloca (namelen + prefixlen + 1);
8188 memcpy (imp_name, prefix, prefixlen);
8189 memcpy (imp_name + prefixlen, name, namelen + 1);
8191 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8192 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8193 SET_SYMBOL_REF_DECL (rtl, to);
8194 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8196 rtl = gen_const_mem (Pmode, rtl);
8197 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8199 SET_DECL_RTL (to, rtl);
8200 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8202 return to;
8205 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8206 true if we require the result be a register. */
8208 static rtx
8209 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8211 tree imp_decl;
8212 rtx x;
8214 gcc_assert (SYMBOL_REF_DECL (symbol));
8215 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8217 x = DECL_RTL (imp_decl);
8218 if (want_reg)
8219 x = force_reg (Pmode, x);
8220 return x;
8223 /* Try machine-dependent ways of modifying an illegitimate address
8224 to be legitimate. If we find one, return the new, valid address.
8225 This macro is used in only one place: `memory_address' in explow.c.
8227 OLDX is the address as it was before break_out_memory_refs was called.
8228 In some cases it is useful to look at this to decide what needs to be done.
8230 MODE and WIN are passed so that this macro can use
8231 GO_IF_LEGITIMATE_ADDRESS.
8233 It is always safe for this macro to do nothing. It exists to recognize
8234 opportunities to optimize the output.
8236 For the 80386, we handle X+REG by loading X into a register R and
8237 using R+REG. R will go in a general reg and indexing will be used.
8238 However, if REG is a broken-out memory address or multiplication,
8239 nothing needs to be done because REG can certainly go in a general reg.
8241 When -fpic is used, special handling is needed for symbolic references.
8242 See comments by legitimize_pic_address in i386.c for details. */
8245 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8247 int changed = 0;
8248 unsigned log;
8250 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8251 if (log)
8252 return legitimize_tls_address (x, (enum tls_model) log, false);
8253 if (GET_CODE (x) == CONST
8254 && GET_CODE (XEXP (x, 0)) == PLUS
8255 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8256 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8258 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8259 (enum tls_model) log, false);
8260 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8263 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8265 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8266 return legitimize_dllimport_symbol (x, true);
8267 if (GET_CODE (x) == CONST
8268 && GET_CODE (XEXP (x, 0)) == PLUS
8269 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8270 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8272 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8273 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8277 if (flag_pic && SYMBOLIC_CONST (x))
8278 return legitimize_pic_address (x, 0);
8280 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8281 if (GET_CODE (x) == ASHIFT
8282 && CONST_INT_P (XEXP (x, 1))
8283 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8285 changed = 1;
8286 log = INTVAL (XEXP (x, 1));
8287 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8288 GEN_INT (1 << log));
8291 if (GET_CODE (x) == PLUS)
8293 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8295 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8297 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8299 changed = 1;
8300 log = INTVAL (XEXP (XEXP (x, 0), 1));
8301 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8302 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8303 GEN_INT (1 << log));
8306 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8307 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8308 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8310 changed = 1;
8311 log = INTVAL (XEXP (XEXP (x, 1), 1));
8312 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8313 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8314 GEN_INT (1 << log));
8317 /* Put multiply first if it isn't already. */
8318 if (GET_CODE (XEXP (x, 1)) == MULT)
8320 rtx tmp = XEXP (x, 0);
8321 XEXP (x, 0) = XEXP (x, 1);
8322 XEXP (x, 1) = tmp;
8323 changed = 1;
8326 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8327 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8328 created by virtual register instantiation, register elimination, and
8329 similar optimizations. */
8330 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8332 changed = 1;
8333 x = gen_rtx_PLUS (Pmode,
8334 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8335 XEXP (XEXP (x, 1), 0)),
8336 XEXP (XEXP (x, 1), 1));
8339 /* Canonicalize
8340 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8341 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8342 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8344 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8345 && CONSTANT_P (XEXP (x, 1)))
8347 rtx constant;
8348 rtx other = NULL_RTX;
8350 if (CONST_INT_P (XEXP (x, 1)))
8352 constant = XEXP (x, 1);
8353 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8355 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8357 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8358 other = XEXP (x, 1);
8360 else
8361 constant = 0;
8363 if (constant)
8365 changed = 1;
8366 x = gen_rtx_PLUS (Pmode,
8367 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8368 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8369 plus_constant (other, INTVAL (constant)));
8373 if (changed && legitimate_address_p (mode, x, FALSE))
8374 return x;
8376 if (GET_CODE (XEXP (x, 0)) == MULT)
8378 changed = 1;
8379 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8382 if (GET_CODE (XEXP (x, 1)) == MULT)
8384 changed = 1;
8385 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8388 if (changed
8389 && REG_P (XEXP (x, 1))
8390 && REG_P (XEXP (x, 0)))
8391 return x;
8393 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8395 changed = 1;
8396 x = legitimize_pic_address (x, 0);
8399 if (changed && legitimate_address_p (mode, x, FALSE))
8400 return x;
8402 if (REG_P (XEXP (x, 0)))
8404 rtx temp = gen_reg_rtx (Pmode);
8405 rtx val = force_operand (XEXP (x, 1), temp);
8406 if (val != temp)
8407 emit_move_insn (temp, val);
8409 XEXP (x, 1) = temp;
8410 return x;
8413 else if (REG_P (XEXP (x, 1)))
8415 rtx temp = gen_reg_rtx (Pmode);
8416 rtx val = force_operand (XEXP (x, 0), temp);
8417 if (val != temp)
8418 emit_move_insn (temp, val);
8420 XEXP (x, 0) = temp;
8421 return x;
8425 return x;
8428 /* Print an integer constant expression in assembler syntax. Addition
8429 and subtraction are the only arithmetic that may appear in these
8430 expressions. FILE is the stdio stream to write to, X is the rtx, and
8431 CODE is the operand print code from the output string. */
8433 static void
8434 output_pic_addr_const (FILE *file, rtx x, int code)
8436 char buf[256];
8438 switch (GET_CODE (x))
8440 case PC:
8441 gcc_assert (flag_pic);
8442 putc ('.', file);
8443 break;
8445 case SYMBOL_REF:
8446 if (! TARGET_MACHO || TARGET_64BIT)
8447 output_addr_const (file, x);
8448 else
8450 const char *name = XSTR (x, 0);
8452 /* Mark the decl as referenced so that cgraph will
8453 output the function. */
8454 if (SYMBOL_REF_DECL (x))
8455 mark_decl_referenced (SYMBOL_REF_DECL (x));
8457 #if TARGET_MACHO
8458 if (MACHOPIC_INDIRECT
8459 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8460 name = machopic_indirection_name (x, /*stub_p=*/true);
8461 #endif
8462 assemble_name (file, name);
8464 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8465 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8466 fputs ("@PLT", file);
8467 break;
8469 case LABEL_REF:
8470 x = XEXP (x, 0);
8471 /* FALLTHRU */
8472 case CODE_LABEL:
8473 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8474 assemble_name (asm_out_file, buf);
8475 break;
8477 case CONST_INT:
8478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8479 break;
8481 case CONST:
8482 /* This used to output parentheses around the expression,
8483 but that does not work on the 386 (either ATT or BSD assembler). */
8484 output_pic_addr_const (file, XEXP (x, 0), code);
8485 break;
8487 case CONST_DOUBLE:
8488 if (GET_MODE (x) == VOIDmode)
8490 /* We can use %d if the number is <32 bits and positive. */
8491 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8492 fprintf (file, "0x%lx%08lx",
8493 (unsigned long) CONST_DOUBLE_HIGH (x),
8494 (unsigned long) CONST_DOUBLE_LOW (x));
8495 else
8496 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8498 else
8499 /* We can't handle floating point constants;
8500 PRINT_OPERAND must handle them. */
8501 output_operand_lossage ("floating constant misused");
8502 break;
8504 case PLUS:
8505 /* Some assemblers need integer constants to appear first. */
8506 if (CONST_INT_P (XEXP (x, 0)))
8508 output_pic_addr_const (file, XEXP (x, 0), code);
8509 putc ('+', file);
8510 output_pic_addr_const (file, XEXP (x, 1), code);
8512 else
8514 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8515 output_pic_addr_const (file, XEXP (x, 1), code);
8516 putc ('+', file);
8517 output_pic_addr_const (file, XEXP (x, 0), code);
8519 break;
8521 case MINUS:
8522 if (!TARGET_MACHO)
8523 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8524 output_pic_addr_const (file, XEXP (x, 0), code);
8525 putc ('-', file);
8526 output_pic_addr_const (file, XEXP (x, 1), code);
8527 if (!TARGET_MACHO)
8528 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8529 break;
8531 case UNSPEC:
8532 gcc_assert (XVECLEN (x, 0) == 1);
8533 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8534 switch (XINT (x, 1))
8536 case UNSPEC_GOT:
8537 fputs ("@GOT", file);
8538 break;
8539 case UNSPEC_GOTOFF:
8540 fputs ("@GOTOFF", file);
8541 break;
8542 case UNSPEC_PLTOFF:
8543 fputs ("@PLTOFF", file);
8544 break;
8545 case UNSPEC_GOTPCREL:
8546 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8547 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8548 break;
8549 case UNSPEC_GOTTPOFF:
8550 /* FIXME: This might be @TPOFF in Sun ld too. */
8551 fputs ("@GOTTPOFF", file);
8552 break;
8553 case UNSPEC_TPOFF:
8554 fputs ("@TPOFF", file);
8555 break;
8556 case UNSPEC_NTPOFF:
8557 if (TARGET_64BIT)
8558 fputs ("@TPOFF", file);
8559 else
8560 fputs ("@NTPOFF", file);
8561 break;
8562 case UNSPEC_DTPOFF:
8563 fputs ("@DTPOFF", file);
8564 break;
8565 case UNSPEC_GOTNTPOFF:
8566 if (TARGET_64BIT)
8567 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8568 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8569 else
8570 fputs ("@GOTNTPOFF", file);
8571 break;
8572 case UNSPEC_INDNTPOFF:
8573 fputs ("@INDNTPOFF", file);
8574 break;
8575 default:
8576 output_operand_lossage ("invalid UNSPEC as operand");
8577 break;
8579 break;
8581 default:
8582 output_operand_lossage ("invalid expression as operand");
8586 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8587 We need to emit DTP-relative relocations. */
8589 static void ATTRIBUTE_UNUSED
8590 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8592 fputs (ASM_LONG, file);
8593 output_addr_const (file, x);
8594 fputs ("@DTPOFF", file);
8595 switch (size)
8597 case 4:
8598 break;
8599 case 8:
8600 fputs (", 0", file);
8601 break;
8602 default:
8603 gcc_unreachable ();
8607 /* In the name of slightly smaller debug output, and to cater to
8608 general assembler lossage, recognize PIC+GOTOFF and turn it back
8609 into a direct symbol reference.
8611 On Darwin, this is necessary to avoid a crash, because Darwin
8612 has a different PIC label for each routine but the DWARF debugging
8613 information is not associated with any particular routine, so it's
8614 necessary to remove references to the PIC label from RTL stored by
8615 the DWARF output code. */
8617 static rtx
8618 ix86_delegitimize_address (rtx orig_x)
8620 rtx x = orig_x;
8621 /* reg_addend is NULL or a multiple of some register. */
8622 rtx reg_addend = NULL_RTX;
8623 /* const_addend is NULL or a const_int. */
8624 rtx const_addend = NULL_RTX;
8625 /* This is the result, or NULL. */
8626 rtx result = NULL_RTX;
8628 if (MEM_P (x))
8629 x = XEXP (x, 0);
8631 if (TARGET_64BIT)
8633 if (GET_CODE (x) != CONST
8634 || GET_CODE (XEXP (x, 0)) != UNSPEC
8635 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8636 || !MEM_P (orig_x))
8637 return orig_x;
8638 return XVECEXP (XEXP (x, 0), 0, 0);
8641 if (GET_CODE (x) != PLUS
8642 || GET_CODE (XEXP (x, 1)) != CONST)
8643 return orig_x;
8645 if (REG_P (XEXP (x, 0))
8646 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8647 /* %ebx + GOT/GOTOFF */
8649 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8651 /* %ebx + %reg * scale + GOT/GOTOFF */
8652 reg_addend = XEXP (x, 0);
8653 if (REG_P (XEXP (reg_addend, 0))
8654 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8655 reg_addend = XEXP (reg_addend, 1);
8656 else if (REG_P (XEXP (reg_addend, 1))
8657 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8658 reg_addend = XEXP (reg_addend, 0);
8659 else
8660 return orig_x;
8661 if (!REG_P (reg_addend)
8662 && GET_CODE (reg_addend) != MULT
8663 && GET_CODE (reg_addend) != ASHIFT)
8664 return orig_x;
8666 else
8667 return orig_x;
8669 x = XEXP (XEXP (x, 1), 0);
8670 if (GET_CODE (x) == PLUS
8671 && CONST_INT_P (XEXP (x, 1)))
8673 const_addend = XEXP (x, 1);
8674 x = XEXP (x, 0);
8677 if (GET_CODE (x) == UNSPEC
8678 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8679 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8680 result = XVECEXP (x, 0, 0);
8682 if (TARGET_MACHO && darwin_local_data_pic (x)
8683 && !MEM_P (orig_x))
8684 result = XEXP (x, 0);
8686 if (! result)
8687 return orig_x;
8689 if (const_addend)
8690 result = gen_rtx_PLUS (Pmode, result, const_addend);
8691 if (reg_addend)
8692 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8693 return result;
8696 /* If X is a machine specific address (i.e. a symbol or label being
8697 referenced as a displacement from the GOT implemented using an
8698 UNSPEC), then return the base term. Otherwise return X. */
8701 ix86_find_base_term (rtx x)
8703 rtx term;
8705 if (TARGET_64BIT)
8707 if (GET_CODE (x) != CONST)
8708 return x;
8709 term = XEXP (x, 0);
8710 if (GET_CODE (term) == PLUS
8711 && (CONST_INT_P (XEXP (term, 1))
8712 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8713 term = XEXP (term, 0);
8714 if (GET_CODE (term) != UNSPEC
8715 || XINT (term, 1) != UNSPEC_GOTPCREL)
8716 return x;
8718 term = XVECEXP (term, 0, 0);
8720 if (GET_CODE (term) != SYMBOL_REF
8721 && GET_CODE (term) != LABEL_REF)
8722 return x;
8724 return term;
8727 term = ix86_delegitimize_address (x);
8729 if (GET_CODE (term) != SYMBOL_REF
8730 && GET_CODE (term) != LABEL_REF)
8731 return x;
8733 return term;
8736 static void
8737 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8738 int fp, FILE *file)
8740 const char *suffix;
8742 if (mode == CCFPmode || mode == CCFPUmode)
8744 enum rtx_code second_code, bypass_code;
8745 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8746 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8747 code = ix86_fp_compare_code_to_integer (code);
8748 mode = CCmode;
8750 if (reverse)
8751 code = reverse_condition (code);
8753 switch (code)
8755 case EQ:
8756 switch (mode)
8758 case CCAmode:
8759 suffix = "a";
8760 break;
8762 case CCCmode:
8763 suffix = "c";
8764 break;
8766 case CCOmode:
8767 suffix = "o";
8768 break;
8770 case CCSmode:
8771 suffix = "s";
8772 break;
8774 default:
8775 suffix = "e";
8777 break;
8778 case NE:
8779 switch (mode)
8781 case CCAmode:
8782 suffix = "na";
8783 break;
8785 case CCCmode:
8786 suffix = "nc";
8787 break;
8789 case CCOmode:
8790 suffix = "no";
8791 break;
8793 case CCSmode:
8794 suffix = "ns";
8795 break;
8797 default:
8798 suffix = "ne";
8800 break;
8801 case GT:
8802 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8803 suffix = "g";
8804 break;
8805 case GTU:
8806 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8807 Those same assemblers have the same but opposite lossage on cmov. */
8808 if (mode == CCmode)
8809 suffix = fp ? "nbe" : "a";
8810 else if (mode == CCCmode)
8811 suffix = "b";
8812 else
8813 gcc_unreachable ();
8814 break;
8815 case LT:
8816 switch (mode)
8818 case CCNOmode:
8819 case CCGOCmode:
8820 suffix = "s";
8821 break;
8823 case CCmode:
8824 case CCGCmode:
8825 suffix = "l";
8826 break;
8828 default:
8829 gcc_unreachable ();
8831 break;
8832 case LTU:
8833 gcc_assert (mode == CCmode || mode == CCCmode);
8834 suffix = "b";
8835 break;
8836 case GE:
8837 switch (mode)
8839 case CCNOmode:
8840 case CCGOCmode:
8841 suffix = "ns";
8842 break;
8844 case CCmode:
8845 case CCGCmode:
8846 suffix = "ge";
8847 break;
8849 default:
8850 gcc_unreachable ();
8852 break;
8853 case GEU:
8854 /* ??? As above. */
8855 gcc_assert (mode == CCmode || mode == CCCmode);
8856 suffix = fp ? "nb" : "ae";
8857 break;
8858 case LE:
8859 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8860 suffix = "le";
8861 break;
8862 case LEU:
8863 /* ??? As above. */
8864 if (mode == CCmode)
8865 suffix = "be";
8866 else if (mode == CCCmode)
8867 suffix = fp ? "nb" : "ae";
8868 else
8869 gcc_unreachable ();
8870 break;
8871 case UNORDERED:
8872 suffix = fp ? "u" : "p";
8873 break;
8874 case ORDERED:
8875 suffix = fp ? "nu" : "np";
8876 break;
8877 default:
8878 gcc_unreachable ();
8880 fputs (suffix, file);
8883 /* Print the name of register X to FILE based on its machine mode and number.
8884 If CODE is 'w', pretend the mode is HImode.
8885 If CODE is 'b', pretend the mode is QImode.
8886 If CODE is 'k', pretend the mode is SImode.
8887 If CODE is 'q', pretend the mode is DImode.
8888 If CODE is 'h', pretend the reg is the 'high' byte register.
8889 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8891 void
8892 print_reg (rtx x, int code, FILE *file)
8894 gcc_assert (x == pc_rtx
8895 || (REGNO (x) != ARG_POINTER_REGNUM
8896 && REGNO (x) != FRAME_POINTER_REGNUM
8897 && REGNO (x) != FLAGS_REG
8898 && REGNO (x) != FPSR_REG
8899 && REGNO (x) != FPCR_REG));
8901 if (ASSEMBLER_DIALECT == ASM_ATT)
8902 putc ('%', file);
8904 if (x == pc_rtx)
8906 gcc_assert (TARGET_64BIT);
8907 fputs ("rip", file);
8908 return;
8911 if (code == 'w' || MMX_REG_P (x))
8912 code = 2;
8913 else if (code == 'b')
8914 code = 1;
8915 else if (code == 'k')
8916 code = 4;
8917 else if (code == 'q')
8918 code = 8;
8919 else if (code == 'y')
8920 code = 3;
8921 else if (code == 'h')
8922 code = 0;
8923 else
8924 code = GET_MODE_SIZE (GET_MODE (x));
8926 /* Irritatingly, AMD extended registers use different naming convention
8927 from the normal registers. */
8928 if (REX_INT_REG_P (x))
8930 gcc_assert (TARGET_64BIT);
8931 switch (code)
8933 case 0:
8934 error ("extended registers have no high halves");
8935 break;
8936 case 1:
8937 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8938 break;
8939 case 2:
8940 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8941 break;
8942 case 4:
8943 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8944 break;
8945 case 8:
8946 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8947 break;
8948 default:
8949 error ("unsupported operand size for extended register");
8950 break;
8952 return;
8954 switch (code)
8956 case 3:
8957 if (STACK_TOP_P (x))
8959 fputs ("st(0)", file);
8960 break;
8962 /* FALLTHRU */
8963 case 8:
8964 case 4:
8965 case 12:
8966 if (! ANY_FP_REG_P (x))
8967 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8968 /* FALLTHRU */
8969 case 16:
8970 case 2:
8971 normal:
8972 fputs (hi_reg_name[REGNO (x)], file);
8973 break;
8974 case 1:
8975 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8976 goto normal;
8977 fputs (qi_reg_name[REGNO (x)], file);
8978 break;
8979 case 0:
8980 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8981 goto normal;
8982 fputs (qi_high_reg_name[REGNO (x)], file);
8983 break;
8984 default:
8985 gcc_unreachable ();
8989 /* Locate some local-dynamic symbol still in use by this function
8990 so that we can print its name in some tls_local_dynamic_base
8991 pattern. */
8993 static int
8994 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8996 rtx x = *px;
8998 if (GET_CODE (x) == SYMBOL_REF
8999 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9001 cfun->machine->some_ld_name = XSTR (x, 0);
9002 return 1;
9005 return 0;
9008 static const char *
9009 get_some_local_dynamic_name (void)
9011 rtx insn;
9013 if (cfun->machine->some_ld_name)
9014 return cfun->machine->some_ld_name;
9016 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9017 if (INSN_P (insn)
9018 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9019 return cfun->machine->some_ld_name;
9021 gcc_unreachable ();
9024 /* Meaning of CODE:
9025 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9026 C -- print opcode suffix for set/cmov insn.
9027 c -- like C, but print reversed condition
9028 E,e -- likewise, but for compare-and-branch fused insn.
9029 F,f -- likewise, but for floating-point.
9030 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9031 otherwise nothing
9032 R -- print the prefix for register names.
9033 z -- print the opcode suffix for the size of the current operand.
9034 * -- print a star (in certain assembler syntax)
9035 A -- print an absolute memory reference.
9036 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9037 s -- print a shift double count, followed by the assemblers argument
9038 delimiter.
9039 b -- print the QImode name of the register for the indicated operand.
9040 %b0 would print %al if operands[0] is reg 0.
9041 w -- likewise, print the HImode name of the register.
9042 k -- likewise, print the SImode name of the register.
9043 q -- likewise, print the DImode name of the register.
9044 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9045 y -- print "st(0)" instead of "st" as a register.
9046 D -- print condition for SSE cmp instruction.
9047 P -- if PIC, print an @PLT suffix.
9048 X -- don't print any sort of PIC '@' suffix for a symbol.
9049 & -- print some in-use local-dynamic symbol name.
9050 H -- print a memory address offset by 8; used for sse high-parts
9051 Y -- print condition for SSE5 com* instruction.
9052 + -- print a branch hint as 'cs' or 'ds' prefix
9053 ; -- print a semicolon (after prefixes due to bug in older gas).
9056 void
9057 print_operand (FILE *file, rtx x, int code)
9059 if (code)
9061 switch (code)
9063 case '*':
9064 if (ASSEMBLER_DIALECT == ASM_ATT)
9065 putc ('*', file);
9066 return;
9068 case '&':
9069 assemble_name (file, get_some_local_dynamic_name ());
9070 return;
9072 case 'A':
9073 switch (ASSEMBLER_DIALECT)
9075 case ASM_ATT:
9076 putc ('*', file);
9077 break;
9079 case ASM_INTEL:
9080 /* Intel syntax. For absolute addresses, registers should not
9081 be surrounded by braces. */
9082 if (!REG_P (x))
9084 putc ('[', file);
9085 PRINT_OPERAND (file, x, 0);
9086 putc (']', file);
9087 return;
9089 break;
9091 default:
9092 gcc_unreachable ();
9095 PRINT_OPERAND (file, x, 0);
9096 return;
9099 case 'L':
9100 if (ASSEMBLER_DIALECT == ASM_ATT)
9101 putc ('l', file);
9102 return;
9104 case 'W':
9105 if (ASSEMBLER_DIALECT == ASM_ATT)
9106 putc ('w', file);
9107 return;
9109 case 'B':
9110 if (ASSEMBLER_DIALECT == ASM_ATT)
9111 putc ('b', file);
9112 return;
9114 case 'Q':
9115 if (ASSEMBLER_DIALECT == ASM_ATT)
9116 putc ('l', file);
9117 return;
9119 case 'S':
9120 if (ASSEMBLER_DIALECT == ASM_ATT)
9121 putc ('s', file);
9122 return;
9124 case 'T':
9125 if (ASSEMBLER_DIALECT == ASM_ATT)
9126 putc ('t', file);
9127 return;
9129 case 'z':
9130 /* 387 opcodes don't get size suffixes if the operands are
9131 registers. */
9132 if (STACK_REG_P (x))
9133 return;
9135 /* Likewise if using Intel opcodes. */
9136 if (ASSEMBLER_DIALECT == ASM_INTEL)
9137 return;
9139 /* This is the size of op from size of operand. */
9140 switch (GET_MODE_SIZE (GET_MODE (x)))
9142 case 1:
9143 putc ('b', file);
9144 return;
9146 case 2:
9147 if (MEM_P (x))
9149 #ifdef HAVE_GAS_FILDS_FISTS
9150 putc ('s', file);
9151 #endif
9152 return;
9154 else
9155 putc ('w', file);
9156 return;
9158 case 4:
9159 if (GET_MODE (x) == SFmode)
9161 putc ('s', file);
9162 return;
9164 else
9165 putc ('l', file);
9166 return;
9168 case 12:
9169 case 16:
9170 putc ('t', file);
9171 return;
9173 case 8:
9174 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9176 if (MEM_P (x))
9178 #ifdef GAS_MNEMONICS
9179 putc ('q', file);
9180 #else
9181 putc ('l', file);
9182 putc ('l', file);
9183 #endif
9185 else
9186 putc ('q', file);
9188 else
9189 putc ('l', file);
9190 return;
9192 default:
9193 gcc_unreachable ();
9196 case 'b':
9197 case 'w':
9198 case 'k':
9199 case 'q':
9200 case 'h':
9201 case 'y':
9202 case 'X':
9203 case 'P':
9204 break;
9206 case 's':
9207 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9209 PRINT_OPERAND (file, x, 0);
9210 fputs (", ", file);
9212 return;
9214 case 'D':
9215 /* Little bit of braindamage here. The SSE compare instructions
9216 does use completely different names for the comparisons that the
9217 fp conditional moves. */
9218 switch (GET_CODE (x))
9220 case EQ:
9221 case UNEQ:
9222 fputs ("eq", file);
9223 break;
9224 case LT:
9225 case UNLT:
9226 fputs ("lt", file);
9227 break;
9228 case LE:
9229 case UNLE:
9230 fputs ("le", file);
9231 break;
9232 case UNORDERED:
9233 fputs ("unord", file);
9234 break;
9235 case NE:
9236 case LTGT:
9237 fputs ("neq", file);
9238 break;
9239 case UNGE:
9240 case GE:
9241 fputs ("nlt", file);
9242 break;
9243 case UNGT:
9244 case GT:
9245 fputs ("nle", file);
9246 break;
9247 case ORDERED:
9248 fputs ("ord", file);
9249 break;
9250 default:
9251 gcc_unreachable ();
9253 return;
9254 case 'O':
9255 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9256 if (ASSEMBLER_DIALECT == ASM_ATT)
9258 switch (GET_MODE (x))
9260 case HImode: putc ('w', file); break;
9261 case SImode:
9262 case SFmode: putc ('l', file); break;
9263 case DImode:
9264 case DFmode: putc ('q', file); break;
9265 default: gcc_unreachable ();
9267 putc ('.', file);
9269 #endif
9270 return;
9271 case 'C':
9272 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9273 return;
9274 case 'F':
9275 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9276 if (ASSEMBLER_DIALECT == ASM_ATT)
9277 putc ('.', file);
9278 #endif
9279 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9280 return;
9282 /* Like above, but reverse condition */
9283 case 'c':
9284 /* Check to see if argument to %c is really a constant
9285 and not a condition code which needs to be reversed. */
9286 if (!COMPARISON_P (x))
9288 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9289 return;
9291 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9292 return;
9293 case 'f':
9294 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9295 if (ASSEMBLER_DIALECT == ASM_ATT)
9296 putc ('.', file);
9297 #endif
9298 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9299 return;
9301 case 'E':
9302 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
9303 return;
9305 case 'e':
9306 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
9307 return;
9309 case 'H':
9310 /* It doesn't actually matter what mode we use here, as we're
9311 only going to use this for printing. */
9312 x = adjust_address_nv (x, DImode, 8);
9313 break;
9315 case '+':
9317 rtx x;
9319 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9320 return;
9322 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9323 if (x)
9325 int pred_val = INTVAL (XEXP (x, 0));
9327 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9328 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9330 int taken = pred_val > REG_BR_PROB_BASE / 2;
9331 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9333 /* Emit hints only in the case default branch prediction
9334 heuristics would fail. */
9335 if (taken != cputaken)
9337 /* We use 3e (DS) prefix for taken branches and
9338 2e (CS) prefix for not taken branches. */
9339 if (taken)
9340 fputs ("ds ; ", file);
9341 else
9342 fputs ("cs ; ", file);
9346 return;
9349 case 'Y':
9350 switch (GET_CODE (x))
9352 case NE:
9353 fputs ("neq", file);
9354 break;
9355 case EQ:
9356 fputs ("eq", file);
9357 break;
9358 case GE:
9359 case GEU:
9360 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9361 break;
9362 case GT:
9363 case GTU:
9364 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9365 break;
9366 case LE:
9367 case LEU:
9368 fputs ("le", file);
9369 break;
9370 case LT:
9371 case LTU:
9372 fputs ("lt", file);
9373 break;
9374 case UNORDERED:
9375 fputs ("unord", file);
9376 break;
9377 case ORDERED:
9378 fputs ("ord", file);
9379 break;
9380 case UNEQ:
9381 fputs ("ueq", file);
9382 break;
9383 case UNGE:
9384 fputs ("nlt", file);
9385 break;
9386 case UNGT:
9387 fputs ("nle", file);
9388 break;
9389 case UNLE:
9390 fputs ("ule", file);
9391 break;
9392 case UNLT:
9393 fputs ("ult", file);
9394 break;
9395 case LTGT:
9396 fputs ("une", file);
9397 break;
9398 default:
9399 gcc_unreachable ();
9401 return;
9403 case ';':
9404 #if TARGET_MACHO
9405 fputs (" ; ", file);
9406 #else
9407 fputc (' ', file);
9408 #endif
9409 return;
9411 default:
9412 output_operand_lossage ("invalid operand code '%c'", code);
9416 if (REG_P (x))
9417 print_reg (x, code, file);
9419 else if (MEM_P (x))
9421 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9422 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9423 && GET_MODE (x) != BLKmode)
9425 const char * size;
9426 switch (GET_MODE_SIZE (GET_MODE (x)))
9428 case 1: size = "BYTE"; break;
9429 case 2: size = "WORD"; break;
9430 case 4: size = "DWORD"; break;
9431 case 8: size = "QWORD"; break;
9432 case 12: size = "XWORD"; break;
9433 case 16:
9434 if (GET_MODE (x) == XFmode)
9435 size = "XWORD";
9436 else
9437 size = "XMMWORD";
9438 break;
9439 default:
9440 gcc_unreachable ();
9443 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9444 if (code == 'b')
9445 size = "BYTE";
9446 else if (code == 'w')
9447 size = "WORD";
9448 else if (code == 'k')
9449 size = "DWORD";
9451 fputs (size, file);
9452 fputs (" PTR ", file);
9455 x = XEXP (x, 0);
9456 /* Avoid (%rip) for call operands. */
9457 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9458 && !CONST_INT_P (x))
9459 output_addr_const (file, x);
9460 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9461 output_operand_lossage ("invalid constraints for operand");
9462 else
9463 output_address (x);
9466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9468 REAL_VALUE_TYPE r;
9469 long l;
9471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9472 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9474 if (ASSEMBLER_DIALECT == ASM_ATT)
9475 putc ('$', file);
9476 fprintf (file, "0x%08lx", (long unsigned int) l);
9479 /* These float cases don't actually occur as immediate operands. */
9480 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9482 char dstr[30];
9484 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9485 fprintf (file, "%s", dstr);
9488 else if (GET_CODE (x) == CONST_DOUBLE
9489 && GET_MODE (x) == XFmode)
9491 char dstr[30];
9493 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9494 fprintf (file, "%s", dstr);
9497 else
9499 /* We have patterns that allow zero sets of memory, for instance.
9500 In 64-bit mode, we should probably support all 8-byte vectors,
9501 since we can in fact encode that into an immediate. */
9502 if (GET_CODE (x) == CONST_VECTOR)
9504 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9505 x = const0_rtx;
9508 if (code != 'P')
9510 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9512 if (ASSEMBLER_DIALECT == ASM_ATT)
9513 putc ('$', file);
9515 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9516 || GET_CODE (x) == LABEL_REF)
9518 if (ASSEMBLER_DIALECT == ASM_ATT)
9519 putc ('$', file);
9520 else
9521 fputs ("OFFSET FLAT:", file);
9524 if (CONST_INT_P (x))
9525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9526 else if (flag_pic)
9527 output_pic_addr_const (file, x, code);
9528 else
9529 output_addr_const (file, x);
9533 /* Print a memory operand whose address is ADDR. */
9535 void
9536 print_operand_address (FILE *file, rtx addr)
9538 struct ix86_address parts;
9539 rtx base, index, disp;
9540 int scale;
9541 int ok = ix86_decompose_address (addr, &parts);
9543 gcc_assert (ok);
9545 base = parts.base;
9546 index = parts.index;
9547 disp = parts.disp;
9548 scale = parts.scale;
9550 switch (parts.seg)
9552 case SEG_DEFAULT:
9553 break;
9554 case SEG_FS:
9555 case SEG_GS:
9556 if (ASSEMBLER_DIALECT == ASM_ATT)
9557 putc ('%', file);
9558 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9559 break;
9560 default:
9561 gcc_unreachable ();
9564 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9565 if (TARGET_64BIT && !base && !index)
9567 rtx symbol = disp;
9569 if (GET_CODE (disp) == CONST
9570 && GET_CODE (XEXP (disp, 0)) == PLUS
9571 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9572 symbol = XEXP (XEXP (disp, 0), 0);
9574 if (GET_CODE (symbol) == LABEL_REF
9575 || (GET_CODE (symbol) == SYMBOL_REF
9576 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9577 base = pc_rtx;
9579 if (!base && !index)
9581 /* Displacement only requires special attention. */
9583 if (CONST_INT_P (disp))
9585 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9586 fputs ("ds:", file);
9587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9589 else if (flag_pic)
9590 output_pic_addr_const (file, disp, 0);
9591 else
9592 output_addr_const (file, disp);
9594 else
9596 if (ASSEMBLER_DIALECT == ASM_ATT)
9598 if (disp)
9600 if (flag_pic)
9601 output_pic_addr_const (file, disp, 0);
9602 else if (GET_CODE (disp) == LABEL_REF)
9603 output_asm_label (disp);
9604 else
9605 output_addr_const (file, disp);
9608 putc ('(', file);
9609 if (base)
9610 print_reg (base, 0, file);
9611 if (index)
9613 putc (',', file);
9614 print_reg (index, 0, file);
9615 if (scale != 1)
9616 fprintf (file, ",%d", scale);
9618 putc (')', file);
9620 else
9622 rtx offset = NULL_RTX;
9624 if (disp)
9626 /* Pull out the offset of a symbol; print any symbol itself. */
9627 if (GET_CODE (disp) == CONST
9628 && GET_CODE (XEXP (disp, 0)) == PLUS
9629 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9631 offset = XEXP (XEXP (disp, 0), 1);
9632 disp = gen_rtx_CONST (VOIDmode,
9633 XEXP (XEXP (disp, 0), 0));
9636 if (flag_pic)
9637 output_pic_addr_const (file, disp, 0);
9638 else if (GET_CODE (disp) == LABEL_REF)
9639 output_asm_label (disp);
9640 else if (CONST_INT_P (disp))
9641 offset = disp;
9642 else
9643 output_addr_const (file, disp);
9646 putc ('[', file);
9647 if (base)
9649 print_reg (base, 0, file);
9650 if (offset)
9652 if (INTVAL (offset) >= 0)
9653 putc ('+', file);
9654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9657 else if (offset)
9658 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9659 else
9660 putc ('0', file);
9662 if (index)
9664 putc ('+', file);
9665 print_reg (index, 0, file);
9666 if (scale != 1)
9667 fprintf (file, "*%d", scale);
9669 putc (']', file);
9674 bool
9675 output_addr_const_extra (FILE *file, rtx x)
9677 rtx op;
9679 if (GET_CODE (x) != UNSPEC)
9680 return false;
9682 op = XVECEXP (x, 0, 0);
9683 switch (XINT (x, 1))
9685 case UNSPEC_GOTTPOFF:
9686 output_addr_const (file, op);
9687 /* FIXME: This might be @TPOFF in Sun ld. */
9688 fputs ("@GOTTPOFF", file);
9689 break;
9690 case UNSPEC_TPOFF:
9691 output_addr_const (file, op);
9692 fputs ("@TPOFF", file);
9693 break;
9694 case UNSPEC_NTPOFF:
9695 output_addr_const (file, op);
9696 if (TARGET_64BIT)
9697 fputs ("@TPOFF", file);
9698 else
9699 fputs ("@NTPOFF", file);
9700 break;
9701 case UNSPEC_DTPOFF:
9702 output_addr_const (file, op);
9703 fputs ("@DTPOFF", file);
9704 break;
9705 case UNSPEC_GOTNTPOFF:
9706 output_addr_const (file, op);
9707 if (TARGET_64BIT)
9708 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9709 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9710 else
9711 fputs ("@GOTNTPOFF", file);
9712 break;
9713 case UNSPEC_INDNTPOFF:
9714 output_addr_const (file, op);
9715 fputs ("@INDNTPOFF", file);
9716 break;
9718 default:
9719 return false;
9722 return true;
9725 /* Split one or more DImode RTL references into pairs of SImode
9726 references. The RTL can be REG, offsettable MEM, integer constant, or
9727 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9728 split and "num" is its length. lo_half and hi_half are output arrays
9729 that parallel "operands". */
9731 void
9732 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9734 while (num--)
9736 rtx op = operands[num];
9738 /* simplify_subreg refuse to split volatile memory addresses,
9739 but we still have to handle it. */
9740 if (MEM_P (op))
9742 lo_half[num] = adjust_address (op, SImode, 0);
9743 hi_half[num] = adjust_address (op, SImode, 4);
9745 else
9747 lo_half[num] = simplify_gen_subreg (SImode, op,
9748 GET_MODE (op) == VOIDmode
9749 ? DImode : GET_MODE (op), 0);
9750 hi_half[num] = simplify_gen_subreg (SImode, op,
9751 GET_MODE (op) == VOIDmode
9752 ? DImode : GET_MODE (op), 4);
9756 /* Split one or more TImode RTL references into pairs of DImode
9757 references. The RTL can be REG, offsettable MEM, integer constant, or
9758 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9759 split and "num" is its length. lo_half and hi_half are output arrays
9760 that parallel "operands". */
9762 void
9763 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9765 while (num--)
9767 rtx op = operands[num];
9769 /* simplify_subreg refuse to split volatile memory addresses, but we
9770 still have to handle it. */
9771 if (MEM_P (op))
9773 lo_half[num] = adjust_address (op, DImode, 0);
9774 hi_half[num] = adjust_address (op, DImode, 8);
9776 else
9778 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9779 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9784 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9785 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9786 is the expression of the binary operation. The output may either be
9787 emitted here, or returned to the caller, like all output_* functions.
9789 There is no guarantee that the operands are the same mode, as they
9790 might be within FLOAT or FLOAT_EXTEND expressions. */
9792 #ifndef SYSV386_COMPAT
9793 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9794 wants to fix the assemblers because that causes incompatibility
9795 with gcc. No-one wants to fix gcc because that causes
9796 incompatibility with assemblers... You can use the option of
9797 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9798 #define SYSV386_COMPAT 1
9799 #endif
9801 const char *
9802 output_387_binary_op (rtx insn, rtx *operands)
9804 static char buf[30];
9805 const char *p;
9806 const char *ssep;
9807 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9809 #ifdef ENABLE_CHECKING
9810 /* Even if we do not want to check the inputs, this documents input
9811 constraints. Which helps in understanding the following code. */
9812 if (STACK_REG_P (operands[0])
9813 && ((REG_P (operands[1])
9814 && REGNO (operands[0]) == REGNO (operands[1])
9815 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9816 || (REG_P (operands[2])
9817 && REGNO (operands[0]) == REGNO (operands[2])
9818 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9819 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9820 ; /* ok */
9821 else
9822 gcc_assert (is_sse);
9823 #endif
9825 switch (GET_CODE (operands[3]))
9827 case PLUS:
9828 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9830 p = "fiadd";
9831 else
9832 p = "fadd";
9833 ssep = "add";
9834 break;
9836 case MINUS:
9837 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9838 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9839 p = "fisub";
9840 else
9841 p = "fsub";
9842 ssep = "sub";
9843 break;
9845 case MULT:
9846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9848 p = "fimul";
9849 else
9850 p = "fmul";
9851 ssep = "mul";
9852 break;
9854 case DIV:
9855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9857 p = "fidiv";
9858 else
9859 p = "fdiv";
9860 ssep = "div";
9861 break;
9863 default:
9864 gcc_unreachable ();
9867 if (is_sse)
9869 strcpy (buf, ssep);
9870 if (GET_MODE (operands[0]) == SFmode)
9871 strcat (buf, "ss\t{%2, %0|%0, %2}");
9872 else
9873 strcat (buf, "sd\t{%2, %0|%0, %2}");
9874 return buf;
9876 strcpy (buf, p);
9878 switch (GET_CODE (operands[3]))
9880 case MULT:
9881 case PLUS:
9882 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9884 rtx temp = operands[2];
9885 operands[2] = operands[1];
9886 operands[1] = temp;
9889 /* know operands[0] == operands[1]. */
9891 if (MEM_P (operands[2]))
9893 p = "%z2\t%2";
9894 break;
9897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9899 if (STACK_TOP_P (operands[0]))
9900 /* How is it that we are storing to a dead operand[2]?
9901 Well, presumably operands[1] is dead too. We can't
9902 store the result to st(0) as st(0) gets popped on this
9903 instruction. Instead store to operands[2] (which I
9904 think has to be st(1)). st(1) will be popped later.
9905 gcc <= 2.8.1 didn't have this check and generated
9906 assembly code that the Unixware assembler rejected. */
9907 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9908 else
9909 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9910 break;
9913 if (STACK_TOP_P (operands[0]))
9914 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9915 else
9916 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9917 break;
9919 case MINUS:
9920 case DIV:
9921 if (MEM_P (operands[1]))
9923 p = "r%z1\t%1";
9924 break;
9927 if (MEM_P (operands[2]))
9929 p = "%z2\t%2";
9930 break;
9933 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9935 #if SYSV386_COMPAT
9936 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9937 derived assemblers, confusingly reverse the direction of
9938 the operation for fsub{r} and fdiv{r} when the
9939 destination register is not st(0). The Intel assembler
9940 doesn't have this brain damage. Read !SYSV386_COMPAT to
9941 figure out what the hardware really does. */
9942 if (STACK_TOP_P (operands[0]))
9943 p = "{p\t%0, %2|rp\t%2, %0}";
9944 else
9945 p = "{rp\t%2, %0|p\t%0, %2}";
9946 #else
9947 if (STACK_TOP_P (operands[0]))
9948 /* As above for fmul/fadd, we can't store to st(0). */
9949 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9950 else
9951 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9952 #endif
9953 break;
9956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9958 #if SYSV386_COMPAT
9959 if (STACK_TOP_P (operands[0]))
9960 p = "{rp\t%0, %1|p\t%1, %0}";
9961 else
9962 p = "{p\t%1, %0|rp\t%0, %1}";
9963 #else
9964 if (STACK_TOP_P (operands[0]))
9965 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9966 else
9967 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9968 #endif
9969 break;
9972 if (STACK_TOP_P (operands[0]))
9974 if (STACK_TOP_P (operands[1]))
9975 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9976 else
9977 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9978 break;
9980 else if (STACK_TOP_P (operands[1]))
9982 #if SYSV386_COMPAT
9983 p = "{\t%1, %0|r\t%0, %1}";
9984 #else
9985 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9986 #endif
9988 else
9990 #if SYSV386_COMPAT
9991 p = "{r\t%2, %0|\t%0, %2}";
9992 #else
9993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9994 #endif
9996 break;
9998 default:
9999 gcc_unreachable ();
10002 strcat (buf, p);
10003 return buf;
10006 /* Return needed mode for entity in optimize_mode_switching pass. */
10009 ix86_mode_needed (int entity, rtx insn)
10011 enum attr_i387_cw mode;
10013 /* The mode UNINITIALIZED is used to store control word after a
10014 function call or ASM pattern. The mode ANY specify that function
10015 has no requirements on the control word and make no changes in the
10016 bits we are interested in. */
10018 if (CALL_P (insn)
10019 || (NONJUMP_INSN_P (insn)
10020 && (asm_noperands (PATTERN (insn)) >= 0
10021 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10022 return I387_CW_UNINITIALIZED;
10024 if (recog_memoized (insn) < 0)
10025 return I387_CW_ANY;
10027 mode = get_attr_i387_cw (insn);
10029 switch (entity)
10031 case I387_TRUNC:
10032 if (mode == I387_CW_TRUNC)
10033 return mode;
10034 break;
10036 case I387_FLOOR:
10037 if (mode == I387_CW_FLOOR)
10038 return mode;
10039 break;
10041 case I387_CEIL:
10042 if (mode == I387_CW_CEIL)
10043 return mode;
10044 break;
10046 case I387_MASK_PM:
10047 if (mode == I387_CW_MASK_PM)
10048 return mode;
10049 break;
10051 default:
10052 gcc_unreachable ();
10055 return I387_CW_ANY;
10058 /* Output code to initialize control word copies used by trunc?f?i and
10059 rounding patterns. CURRENT_MODE is set to current control word,
10060 while NEW_MODE is set to new control word. */
10062 void
10063 emit_i387_cw_initialization (int mode)
10065 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10066 rtx new_mode;
10068 enum ix86_stack_slot slot;
10070 rtx reg = gen_reg_rtx (HImode);
10072 emit_insn (gen_x86_fnstcw_1 (stored_mode));
10073 emit_move_insn (reg, copy_rtx (stored_mode));
10075 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10077 switch (mode)
10079 case I387_CW_TRUNC:
10080 /* round toward zero (truncate) */
10081 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10082 slot = SLOT_CW_TRUNC;
10083 break;
10085 case I387_CW_FLOOR:
10086 /* round down toward -oo */
10087 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10088 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10089 slot = SLOT_CW_FLOOR;
10090 break;
10092 case I387_CW_CEIL:
10093 /* round up toward +oo */
10094 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10095 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10096 slot = SLOT_CW_CEIL;
10097 break;
10099 case I387_CW_MASK_PM:
10100 /* mask precision exception for nearbyint() */
10101 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10102 slot = SLOT_CW_MASK_PM;
10103 break;
10105 default:
10106 gcc_unreachable ();
10109 else
10111 switch (mode)
10113 case I387_CW_TRUNC:
10114 /* round toward zero (truncate) */
10115 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10116 slot = SLOT_CW_TRUNC;
10117 break;
10119 case I387_CW_FLOOR:
10120 /* round down toward -oo */
10121 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10122 slot = SLOT_CW_FLOOR;
10123 break;
10125 case I387_CW_CEIL:
10126 /* round up toward +oo */
10127 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10128 slot = SLOT_CW_CEIL;
10129 break;
10131 case I387_CW_MASK_PM:
10132 /* mask precision exception for nearbyint() */
10133 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10134 slot = SLOT_CW_MASK_PM;
10135 break;
10137 default:
10138 gcc_unreachable ();
10142 gcc_assert (slot < MAX_386_STACK_LOCALS);
10144 new_mode = assign_386_stack_local (HImode, slot);
10145 emit_move_insn (new_mode, reg);
10148 /* Output code for INSN to convert a float to a signed int. OPERANDS
10149 are the insn operands. The output may be [HSD]Imode and the input
10150 operand may be [SDX]Fmode. */
10152 const char *
10153 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10155 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10156 int dimode_p = GET_MODE (operands[0]) == DImode;
10157 int round_mode = get_attr_i387_cw (insn);
10159 /* Jump through a hoop or two for DImode, since the hardware has no
10160 non-popping instruction. We used to do this a different way, but
10161 that was somewhat fragile and broke with post-reload splitters. */
10162 if ((dimode_p || fisttp) && !stack_top_dies)
10163 output_asm_insn ("fld\t%y1", operands);
10165 gcc_assert (STACK_TOP_P (operands[1]));
10166 gcc_assert (MEM_P (operands[0]));
10167 gcc_assert (GET_MODE (operands[1]) != TFmode);
10169 if (fisttp)
10170 output_asm_insn ("fisttp%z0\t%0", operands);
10171 else
10173 if (round_mode != I387_CW_ANY)
10174 output_asm_insn ("fldcw\t%3", operands);
10175 if (stack_top_dies || dimode_p)
10176 output_asm_insn ("fistp%z0\t%0", operands);
10177 else
10178 output_asm_insn ("fist%z0\t%0", operands);
10179 if (round_mode != I387_CW_ANY)
10180 output_asm_insn ("fldcw\t%2", operands);
10183 return "";
10186 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10187 have the values zero or one, indicates the ffreep insn's operand
10188 from the OPERANDS array. */
10190 static const char *
10191 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10193 if (TARGET_USE_FFREEP)
10194 #if HAVE_AS_IX86_FFREEP
10195 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10196 #else
10198 static char retval[] = ".word\t0xc_df";
10199 int regno = REGNO (operands[opno]);
10201 gcc_assert (FP_REGNO_P (regno));
10203 retval[9] = '0' + (regno - FIRST_STACK_REG);
10204 return retval;
10206 #endif
10208 return opno ? "fstp\t%y1" : "fstp\t%y0";
10212 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10213 should be used. UNORDERED_P is true when fucom should be used. */
10215 const char *
10216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10218 int stack_top_dies;
10219 rtx cmp_op0, cmp_op1;
10220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10222 if (eflags_p)
10224 cmp_op0 = operands[0];
10225 cmp_op1 = operands[1];
10227 else
10229 cmp_op0 = operands[1];
10230 cmp_op1 = operands[2];
10233 if (is_sse)
10235 if (GET_MODE (operands[0]) == SFmode)
10236 if (unordered_p)
10237 return "ucomiss\t{%1, %0|%0, %1}";
10238 else
10239 return "comiss\t{%1, %0|%0, %1}";
10240 else
10241 if (unordered_p)
10242 return "ucomisd\t{%1, %0|%0, %1}";
10243 else
10244 return "comisd\t{%1, %0|%0, %1}";
10247 gcc_assert (STACK_TOP_P (cmp_op0));
10249 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10251 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10253 if (stack_top_dies)
10255 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10256 return output_387_ffreep (operands, 1);
10258 else
10259 return "ftst\n\tfnstsw\t%0";
10262 if (STACK_REG_P (cmp_op1)
10263 && stack_top_dies
10264 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10265 && REGNO (cmp_op1) != FIRST_STACK_REG)
10267 /* If both the top of the 387 stack dies, and the other operand
10268 is also a stack register that dies, then this must be a
10269 `fcompp' float compare */
10271 if (eflags_p)
10273 /* There is no double popping fcomi variant. Fortunately,
10274 eflags is immune from the fstp's cc clobbering. */
10275 if (unordered_p)
10276 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10277 else
10278 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10279 return output_387_ffreep (operands, 0);
10281 else
10283 if (unordered_p)
10284 return "fucompp\n\tfnstsw\t%0";
10285 else
10286 return "fcompp\n\tfnstsw\t%0";
10289 else
10291 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10293 static const char * const alt[16] =
10295 "fcom%z2\t%y2\n\tfnstsw\t%0",
10296 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10297 "fucom%z2\t%y2\n\tfnstsw\t%0",
10298 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10300 "ficom%z2\t%y2\n\tfnstsw\t%0",
10301 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10302 NULL,
10303 NULL,
10305 "fcomi\t{%y1, %0|%0, %y1}",
10306 "fcomip\t{%y1, %0|%0, %y1}",
10307 "fucomi\t{%y1, %0|%0, %y1}",
10308 "fucomip\t{%y1, %0|%0, %y1}",
10310 NULL,
10311 NULL,
10312 NULL,
10313 NULL
10316 int mask;
10317 const char *ret;
10319 mask = eflags_p << 3;
10320 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10321 mask |= unordered_p << 1;
10322 mask |= stack_top_dies;
10324 gcc_assert (mask < 16);
10325 ret = alt[mask];
10326 gcc_assert (ret);
10328 return ret;
10332 void
10333 ix86_output_addr_vec_elt (FILE *file, int value)
10335 const char *directive = ASM_LONG;
10337 #ifdef ASM_QUAD
10338 if (TARGET_64BIT)
10339 directive = ASM_QUAD;
10340 #else
10341 gcc_assert (!TARGET_64BIT);
10342 #endif
10344 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10347 void
10348 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10350 const char *directive = ASM_LONG;
10352 #ifdef ASM_QUAD
10353 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10354 directive = ASM_QUAD;
10355 #else
10356 gcc_assert (!TARGET_64BIT);
10357 #endif
10358 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10359 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10360 fprintf (file, "%s%s%d-%s%d\n",
10361 directive, LPREFIX, value, LPREFIX, rel);
10362 else if (HAVE_AS_GOTOFF_IN_DATA)
10363 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10364 #if TARGET_MACHO
10365 else if (TARGET_MACHO)
10367 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10368 machopic_output_function_base_name (file);
10369 fprintf(file, "\n");
10371 #endif
10372 else
10373 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10374 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10377 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10378 for the target. */
10380 void
10381 ix86_expand_clear (rtx dest)
10383 rtx tmp;
10385 /* We play register width games, which are only valid after reload. */
10386 gcc_assert (reload_completed);
10388 /* Avoid HImode and its attendant prefix byte. */
10389 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10390 dest = gen_rtx_REG (SImode, REGNO (dest));
10391 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10393 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10394 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10396 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10397 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10400 emit_insn (tmp);
10403 /* X is an unchanging MEM. If it is a constant pool reference, return
10404 the constant pool rtx, else NULL. */
10407 maybe_get_pool_constant (rtx x)
10409 x = ix86_delegitimize_address (XEXP (x, 0));
10411 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10412 return get_pool_constant (x);
10414 return NULL_RTX;
10417 void
10418 ix86_expand_move (enum machine_mode mode, rtx operands[])
10420 rtx op0, op1;
10421 enum tls_model model;
10423 op0 = operands[0];
10424 op1 = operands[1];
10426 if (GET_CODE (op1) == SYMBOL_REF)
10428 model = SYMBOL_REF_TLS_MODEL (op1);
10429 if (model)
10431 op1 = legitimize_tls_address (op1, model, true);
10432 op1 = force_operand (op1, op0);
10433 if (op1 == op0)
10434 return;
10436 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10437 && SYMBOL_REF_DLLIMPORT_P (op1))
10438 op1 = legitimize_dllimport_symbol (op1, false);
10440 else if (GET_CODE (op1) == CONST
10441 && GET_CODE (XEXP (op1, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10444 rtx addend = XEXP (XEXP (op1, 0), 1);
10445 rtx symbol = XEXP (XEXP (op1, 0), 0);
10446 rtx tmp = NULL;
10448 model = SYMBOL_REF_TLS_MODEL (symbol);
10449 if (model)
10450 tmp = legitimize_tls_address (symbol, model, true);
10451 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10452 && SYMBOL_REF_DLLIMPORT_P (symbol))
10453 tmp = legitimize_dllimport_symbol (symbol, true);
10455 if (tmp)
10457 tmp = force_operand (tmp, NULL);
10458 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10459 op0, 1, OPTAB_DIRECT);
10460 if (tmp == op0)
10461 return;
10465 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10467 if (TARGET_MACHO && !TARGET_64BIT)
10469 #if TARGET_MACHO
10470 if (MACHOPIC_PURE)
10472 rtx temp = ((reload_in_progress
10473 || ((op0 && REG_P (op0))
10474 && mode == Pmode))
10475 ? op0 : gen_reg_rtx (Pmode));
10476 op1 = machopic_indirect_data_reference (op1, temp);
10477 op1 = machopic_legitimize_pic_address (op1, mode,
10478 temp == op1 ? 0 : temp);
10480 else if (MACHOPIC_INDIRECT)
10481 op1 = machopic_indirect_data_reference (op1, 0);
10482 if (op0 == op1)
10483 return;
10484 #endif
10486 else
10488 if (MEM_P (op0))
10489 op1 = force_reg (Pmode, op1);
10490 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10492 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10493 op1 = legitimize_pic_address (op1, reg);
10494 if (op0 == op1)
10495 return;
10499 else
10501 if (MEM_P (op0)
10502 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10503 || !push_operand (op0, mode))
10504 && MEM_P (op1))
10505 op1 = force_reg (mode, op1);
10507 if (push_operand (op0, mode)
10508 && ! general_no_elim_operand (op1, mode))
10509 op1 = copy_to_mode_reg (mode, op1);
10511 /* Force large constants in 64bit compilation into register
10512 to get them CSEed. */
10513 if (can_create_pseudo_p ()
10514 && (mode == DImode) && TARGET_64BIT
10515 && immediate_operand (op1, mode)
10516 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10517 && !register_operand (op0, mode)
10518 && optimize)
10519 op1 = copy_to_mode_reg (mode, op1);
10521 if (can_create_pseudo_p ()
10522 && FLOAT_MODE_P (mode)
10523 && GET_CODE (op1) == CONST_DOUBLE)
10525 /* If we are loading a floating point constant to a register,
10526 force the value to memory now, since we'll get better code
10527 out the back end. */
10529 op1 = validize_mem (force_const_mem (mode, op1));
10530 if (!register_operand (op0, mode))
10532 rtx temp = gen_reg_rtx (mode);
10533 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10534 emit_move_insn (op0, temp);
10535 return;
10540 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10543 void
10544 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10546 rtx op0 = operands[0], op1 = operands[1];
10547 unsigned int align = GET_MODE_ALIGNMENT (mode);
10549 /* Force constants other than zero into memory. We do not know how
10550 the instructions used to build constants modify the upper 64 bits
10551 of the register, once we have that information we may be able
10552 to handle some of them more efficiently. */
10553 if (can_create_pseudo_p ()
10554 && register_operand (op0, mode)
10555 && (CONSTANT_P (op1)
10556 || (GET_CODE (op1) == SUBREG
10557 && CONSTANT_P (SUBREG_REG (op1))))
10558 && standard_sse_constant_p (op1) <= 0)
10559 op1 = validize_mem (force_const_mem (mode, op1));
10561 /* We need to check memory alignment for SSE mode since attribute
10562 can make operands unaligned. */
10563 if (can_create_pseudo_p ()
10564 && SSE_REG_MODE_P (mode)
10565 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10566 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10568 rtx tmp[2];
10570 /* ix86_expand_vector_move_misalign() does not like constants ... */
10571 if (CONSTANT_P (op1)
10572 || (GET_CODE (op1) == SUBREG
10573 && CONSTANT_P (SUBREG_REG (op1))))
10574 op1 = validize_mem (force_const_mem (mode, op1));
10576 /* ... nor both arguments in memory. */
10577 if (!register_operand (op0, mode)
10578 && !register_operand (op1, mode))
10579 op1 = force_reg (mode, op1);
10581 tmp[0] = op0; tmp[1] = op1;
10582 ix86_expand_vector_move_misalign (mode, tmp);
10583 return;
10586 /* Make operand1 a register if it isn't already. */
10587 if (can_create_pseudo_p ()
10588 && !register_operand (op0, mode)
10589 && !register_operand (op1, mode))
10591 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10592 return;
10595 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10598 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10599 straight to ix86_expand_vector_move. */
10600 /* Code generation for scalar reg-reg moves of single and double precision data:
10601 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10602 movaps reg, reg
10603 else
10604 movss reg, reg
10605 if (x86_sse_partial_reg_dependency == true)
10606 movapd reg, reg
10607 else
10608 movsd reg, reg
10610 Code generation for scalar loads of double precision data:
10611 if (x86_sse_split_regs == true)
10612 movlpd mem, reg (gas syntax)
10613 else
10614 movsd mem, reg
10616 Code generation for unaligned packed loads of single precision data
10617 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10618 if (x86_sse_unaligned_move_optimal)
10619 movups mem, reg
10621 if (x86_sse_partial_reg_dependency == true)
10623 xorps reg, reg
10624 movlps mem, reg
10625 movhps mem+8, reg
10627 else
10629 movlps mem, reg
10630 movhps mem+8, reg
10633 Code generation for unaligned packed loads of double precision data
10634 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10635 if (x86_sse_unaligned_move_optimal)
10636 movupd mem, reg
10638 if (x86_sse_split_regs == true)
10640 movlpd mem, reg
10641 movhpd mem+8, reg
10643 else
10645 movsd mem, reg
10646 movhpd mem+8, reg
10650 void
10651 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10653 rtx op0, op1, m;
10655 op0 = operands[0];
10656 op1 = operands[1];
10658 if (MEM_P (op1))
10660 /* If we're optimizing for size, movups is the smallest. */
10661 if (optimize_size)
10663 op0 = gen_lowpart (V4SFmode, op0);
10664 op1 = gen_lowpart (V4SFmode, op1);
10665 emit_insn (gen_sse_movups (op0, op1));
10666 return;
10669 /* ??? If we have typed data, then it would appear that using
10670 movdqu is the only way to get unaligned data loaded with
10671 integer type. */
10672 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10674 op0 = gen_lowpart (V16QImode, op0);
10675 op1 = gen_lowpart (V16QImode, op1);
10676 emit_insn (gen_sse2_movdqu (op0, op1));
10677 return;
10680 if (TARGET_SSE2 && mode == V2DFmode)
10682 rtx zero;
10684 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10686 op0 = gen_lowpart (V2DFmode, op0);
10687 op1 = gen_lowpart (V2DFmode, op1);
10688 emit_insn (gen_sse2_movupd (op0, op1));
10689 return;
10692 /* When SSE registers are split into halves, we can avoid
10693 writing to the top half twice. */
10694 if (TARGET_SSE_SPLIT_REGS)
10696 emit_clobber (op0);
10697 zero = op0;
10699 else
10701 /* ??? Not sure about the best option for the Intel chips.
10702 The following would seem to satisfy; the register is
10703 entirely cleared, breaking the dependency chain. We
10704 then store to the upper half, with a dependency depth
10705 of one. A rumor has it that Intel recommends two movsd
10706 followed by an unpacklpd, but this is unconfirmed. And
10707 given that the dependency depth of the unpacklpd would
10708 still be one, I'm not sure why this would be better. */
10709 zero = CONST0_RTX (V2DFmode);
10712 m = adjust_address (op1, DFmode, 0);
10713 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10714 m = adjust_address (op1, DFmode, 8);
10715 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10717 else
10719 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10721 op0 = gen_lowpart (V4SFmode, op0);
10722 op1 = gen_lowpart (V4SFmode, op1);
10723 emit_insn (gen_sse_movups (op0, op1));
10724 return;
10727 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10728 emit_move_insn (op0, CONST0_RTX (mode));
10729 else
10730 emit_clobber (op0);
10732 if (mode != V4SFmode)
10733 op0 = gen_lowpart (V4SFmode, op0);
10734 m = adjust_address (op1, V2SFmode, 0);
10735 emit_insn (gen_sse_loadlps (op0, op0, m));
10736 m = adjust_address (op1, V2SFmode, 8);
10737 emit_insn (gen_sse_loadhps (op0, op0, m));
10740 else if (MEM_P (op0))
10742 /* If we're optimizing for size, movups is the smallest. */
10743 if (optimize_size)
10745 op0 = gen_lowpart (V4SFmode, op0);
10746 op1 = gen_lowpart (V4SFmode, op1);
10747 emit_insn (gen_sse_movups (op0, op1));
10748 return;
10751 /* ??? Similar to above, only less clear because of quote
10752 typeless stores unquote. */
10753 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10754 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10756 op0 = gen_lowpart (V16QImode, op0);
10757 op1 = gen_lowpart (V16QImode, op1);
10758 emit_insn (gen_sse2_movdqu (op0, op1));
10759 return;
10762 if (TARGET_SSE2 && mode == V2DFmode)
10764 m = adjust_address (op0, DFmode, 0);
10765 emit_insn (gen_sse2_storelpd (m, op1));
10766 m = adjust_address (op0, DFmode, 8);
10767 emit_insn (gen_sse2_storehpd (m, op1));
10769 else
10771 if (mode != V4SFmode)
10772 op1 = gen_lowpart (V4SFmode, op1);
10773 m = adjust_address (op0, V2SFmode, 0);
10774 emit_insn (gen_sse_storelps (m, op1));
10775 m = adjust_address (op0, V2SFmode, 8);
10776 emit_insn (gen_sse_storehps (m, op1));
10779 else
10780 gcc_unreachable ();
10783 /* Expand a push in MODE. This is some mode for which we do not support
10784 proper push instructions, at least from the registers that we expect
10785 the value to live in. */
10787 void
10788 ix86_expand_push (enum machine_mode mode, rtx x)
10790 rtx tmp;
10792 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10793 GEN_INT (-GET_MODE_SIZE (mode)),
10794 stack_pointer_rtx, 1, OPTAB_DIRECT);
10795 if (tmp != stack_pointer_rtx)
10796 emit_move_insn (stack_pointer_rtx, tmp);
10798 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10799 emit_move_insn (tmp, x);
10802 /* Helper function of ix86_fixup_binary_operands to canonicalize
10803 operand order. Returns true if the operands should be swapped. */
10805 static bool
10806 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10807 rtx operands[])
10809 rtx dst = operands[0];
10810 rtx src1 = operands[1];
10811 rtx src2 = operands[2];
10813 /* If the operation is not commutative, we can't do anything. */
10814 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10815 return false;
10817 /* Highest priority is that src1 should match dst. */
10818 if (rtx_equal_p (dst, src1))
10819 return false;
10820 if (rtx_equal_p (dst, src2))
10821 return true;
10823 /* Next highest priority is that immediate constants come second. */
10824 if (immediate_operand (src2, mode))
10825 return false;
10826 if (immediate_operand (src1, mode))
10827 return true;
10829 /* Lowest priority is that memory references should come second. */
10830 if (MEM_P (src2))
10831 return false;
10832 if (MEM_P (src1))
10833 return true;
10835 return false;
10839 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10840 destination to use for the operation. If different from the true
10841 destination in operands[0], a copy operation will be required. */
10844 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10845 rtx operands[])
10847 rtx dst = operands[0];
10848 rtx src1 = operands[1];
10849 rtx src2 = operands[2];
10851 /* Canonicalize operand order. */
10852 if (ix86_swap_binary_operands_p (code, mode, operands))
10854 rtx temp;
10856 /* It is invalid to swap operands of different modes. */
10857 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10859 temp = src1;
10860 src1 = src2;
10861 src2 = temp;
10864 /* Both source operands cannot be in memory. */
10865 if (MEM_P (src1) && MEM_P (src2))
10867 /* Optimization: Only read from memory once. */
10868 if (rtx_equal_p (src1, src2))
10870 src2 = force_reg (mode, src2);
10871 src1 = src2;
10873 else
10874 src2 = force_reg (mode, src2);
10877 /* If the destination is memory, and we do not have matching source
10878 operands, do things in registers. */
10879 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10880 dst = gen_reg_rtx (mode);
10882 /* Source 1 cannot be a constant. */
10883 if (CONSTANT_P (src1))
10884 src1 = force_reg (mode, src1);
10886 /* Source 1 cannot be a non-matching memory. */
10887 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10888 src1 = force_reg (mode, src1);
10890 operands[1] = src1;
10891 operands[2] = src2;
10892 return dst;
10895 /* Similarly, but assume that the destination has already been
10896 set up properly. */
10898 void
10899 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10900 enum machine_mode mode, rtx operands[])
10902 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10903 gcc_assert (dst == operands[0]);
10906 /* Attempt to expand a binary operator. Make the expansion closer to the
10907 actual machine, then just general_operand, which will allow 3 separate
10908 memory references (one output, two input) in a single insn. */
10910 void
10911 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10912 rtx operands[])
10914 rtx src1, src2, dst, op, clob;
10916 dst = ix86_fixup_binary_operands (code, mode, operands);
10917 src1 = operands[1];
10918 src2 = operands[2];
10920 /* Emit the instruction. */
10922 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10923 if (reload_in_progress)
10925 /* Reload doesn't know about the flags register, and doesn't know that
10926 it doesn't want to clobber it. We can only do this with PLUS. */
10927 gcc_assert (code == PLUS);
10928 emit_insn (op);
10930 else
10932 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10933 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10936 /* Fix up the destination if needed. */
10937 if (dst != operands[0])
10938 emit_move_insn (operands[0], dst);
10941 /* Return TRUE or FALSE depending on whether the binary operator meets the
10942 appropriate constraints. */
10945 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10946 rtx operands[3])
10948 rtx dst = operands[0];
10949 rtx src1 = operands[1];
10950 rtx src2 = operands[2];
10952 /* Both source operands cannot be in memory. */
10953 if (MEM_P (src1) && MEM_P (src2))
10954 return 0;
10956 /* Canonicalize operand order for commutative operators. */
10957 if (ix86_swap_binary_operands_p (code, mode, operands))
10959 rtx temp = src1;
10960 src1 = src2;
10961 src2 = temp;
10964 /* If the destination is memory, we must have a matching source operand. */
10965 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10966 return 0;
10968 /* Source 1 cannot be a constant. */
10969 if (CONSTANT_P (src1))
10970 return 0;
10972 /* Source 1 cannot be a non-matching memory. */
10973 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10974 return 0;
10976 return 1;
10979 /* Attempt to expand a unary operator. Make the expansion closer to the
10980 actual machine, then just general_operand, which will allow 2 separate
10981 memory references (one output, one input) in a single insn. */
10983 void
10984 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10985 rtx operands[])
10987 int matching_memory;
10988 rtx src, dst, op, clob;
10990 dst = operands[0];
10991 src = operands[1];
10993 /* If the destination is memory, and we do not have matching source
10994 operands, do things in registers. */
10995 matching_memory = 0;
10996 if (MEM_P (dst))
10998 if (rtx_equal_p (dst, src))
10999 matching_memory = 1;
11000 else
11001 dst = gen_reg_rtx (mode);
11004 /* When source operand is memory, destination must match. */
11005 if (MEM_P (src) && !matching_memory)
11006 src = force_reg (mode, src);
11008 /* Emit the instruction. */
11010 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11011 if (reload_in_progress || code == NOT)
11013 /* Reload doesn't know about the flags register, and doesn't know that
11014 it doesn't want to clobber it. */
11015 gcc_assert (code == NOT);
11016 emit_insn (op);
11018 else
11020 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11021 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11024 /* Fix up the destination if needed. */
11025 if (dst != operands[0])
11026 emit_move_insn (operands[0], dst);
11029 /* Return TRUE or FALSE depending on whether the unary operator meets the
11030 appropriate constraints. */
11033 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11034 enum machine_mode mode ATTRIBUTE_UNUSED,
11035 rtx operands[2] ATTRIBUTE_UNUSED)
11037 /* If one of operands is memory, source and destination must match. */
11038 if ((MEM_P (operands[0])
11039 || MEM_P (operands[1]))
11040 && ! rtx_equal_p (operands[0], operands[1]))
11041 return FALSE;
11042 return TRUE;
11045 /* Post-reload splitter for converting an SF or DFmode value in an
11046 SSE register into an unsigned SImode. */
11048 void
11049 ix86_split_convert_uns_si_sse (rtx operands[])
11051 enum machine_mode vecmode;
11052 rtx value, large, zero_or_two31, input, two31, x;
11054 large = operands[1];
11055 zero_or_two31 = operands[2];
11056 input = operands[3];
11057 two31 = operands[4];
11058 vecmode = GET_MODE (large);
11059 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11061 /* Load up the value into the low element. We must ensure that the other
11062 elements are valid floats -- zero is the easiest such value. */
11063 if (MEM_P (input))
11065 if (vecmode == V4SFmode)
11066 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11067 else
11068 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11070 else
11072 input = gen_rtx_REG (vecmode, REGNO (input));
11073 emit_move_insn (value, CONST0_RTX (vecmode));
11074 if (vecmode == V4SFmode)
11075 emit_insn (gen_sse_movss (value, value, input));
11076 else
11077 emit_insn (gen_sse2_movsd (value, value, input));
11080 emit_move_insn (large, two31);
11081 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11083 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11084 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11086 x = gen_rtx_AND (vecmode, zero_or_two31, large);
11087 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11089 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11090 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11092 large = gen_rtx_REG (V4SImode, REGNO (large));
11093 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11095 x = gen_rtx_REG (V4SImode, REGNO (value));
11096 if (vecmode == V4SFmode)
11097 emit_insn (gen_sse2_cvttps2dq (x, value));
11098 else
11099 emit_insn (gen_sse2_cvttpd2dq (x, value));
11100 value = x;
11102 emit_insn (gen_xorv4si3 (value, value, large));
11105 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11106 Expects the 64-bit DImode to be supplied in a pair of integral
11107 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11108 -mfpmath=sse, !optimize_size only. */
11110 void
11111 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11113 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11114 rtx int_xmm, fp_xmm;
11115 rtx biases, exponents;
11116 rtx x;
11118 int_xmm = gen_reg_rtx (V4SImode);
11119 if (TARGET_INTER_UNIT_MOVES)
11120 emit_insn (gen_movdi_to_sse (int_xmm, input));
11121 else if (TARGET_SSE_SPLIT_REGS)
11123 emit_clobber (int_xmm);
11124 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11126 else
11128 x = gen_reg_rtx (V2DImode);
11129 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11130 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11133 x = gen_rtx_CONST_VECTOR (V4SImode,
11134 gen_rtvec (4, GEN_INT (0x43300000UL),
11135 GEN_INT (0x45300000UL),
11136 const0_rtx, const0_rtx));
11137 exponents = validize_mem (force_const_mem (V4SImode, x));
11139 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11140 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11142 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11143 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11144 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11145 (0x1.0p84 + double(fp_value_hi_xmm)).
11146 Note these exponents differ by 32. */
11148 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11150 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11151 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11152 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11153 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11154 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11155 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11156 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11157 biases = validize_mem (force_const_mem (V2DFmode, biases));
11158 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11160 /* Add the upper and lower DFmode values together. */
11161 if (TARGET_SSE3)
11162 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11163 else
11165 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11166 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11167 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11170 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11173 /* Not used, but eases macroization of patterns. */
11174 void
11175 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11176 rtx input ATTRIBUTE_UNUSED)
11178 gcc_unreachable ();
11181 /* Convert an unsigned SImode value into a DFmode. Only currently used
11182 for SSE, but applicable anywhere. */
11184 void
11185 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11187 REAL_VALUE_TYPE TWO31r;
11188 rtx x, fp;
11190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11191 NULL, 1, OPTAB_DIRECT);
11193 fp = gen_reg_rtx (DFmode);
11194 emit_insn (gen_floatsidf2 (fp, x));
11196 real_ldexp (&TWO31r, &dconst1, 31);
11197 x = const_double_from_real_value (TWO31r, DFmode);
11199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11200 if (x != target)
11201 emit_move_insn (target, x);
11204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11205 32-bit mode; otherwise we have a direct convert instruction. */
11207 void
11208 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11210 REAL_VALUE_TYPE TWO32r;
11211 rtx fp_lo, fp_hi, x;
11213 fp_lo = gen_reg_rtx (DFmode);
11214 fp_hi = gen_reg_rtx (DFmode);
11216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11218 real_ldexp (&TWO32r, &dconst1, 32);
11219 x = const_double_from_real_value (TWO32r, DFmode);
11220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11225 0, OPTAB_DIRECT);
11226 if (x != target)
11227 emit_move_insn (target, x);
11230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11231 For x86_32, -mfpmath=sse, !optimize_size only. */
11232 void
11233 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11235 REAL_VALUE_TYPE ONE16r;
11236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11238 real_ldexp (&ONE16r, &dconst1, 16);
11239 x = const_double_from_real_value (ONE16r, SFmode);
11240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11241 NULL, 0, OPTAB_DIRECT);
11242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11243 NULL, 0, OPTAB_DIRECT);
11244 fp_hi = gen_reg_rtx (SFmode);
11245 fp_lo = gen_reg_rtx (SFmode);
11246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11249 0, OPTAB_DIRECT);
11250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11251 0, OPTAB_DIRECT);
11252 if (!rtx_equal_p (target, fp_hi))
11253 emit_move_insn (target, fp_hi);
11256 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11257 then replicate the value for all elements of the vector
11258 register. */
11261 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11263 rtvec v;
11264 switch (mode)
11266 case SImode:
11267 gcc_assert (vect);
11268 v = gen_rtvec (4, value, value, value, value);
11269 return gen_rtx_CONST_VECTOR (V4SImode, v);
11271 case DImode:
11272 gcc_assert (vect);
11273 v = gen_rtvec (2, value, value);
11274 return gen_rtx_CONST_VECTOR (V2DImode, v);
11276 case SFmode:
11277 if (vect)
11278 v = gen_rtvec (4, value, value, value, value);
11279 else
11280 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11281 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11282 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11284 case DFmode:
11285 if (vect)
11286 v = gen_rtvec (2, value, value);
11287 else
11288 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11289 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11291 default:
11292 gcc_unreachable ();
11296 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11297 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11298 for an SSE register. If VECT is true, then replicate the mask for
11299 all elements of the vector register. If INVERT is true, then create
11300 a mask excluding the sign bit. */
11303 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11305 enum machine_mode vec_mode, imode;
11306 HOST_WIDE_INT hi, lo;
11307 int shift = 63;
11308 rtx v;
11309 rtx mask;
11311 /* Find the sign bit, sign extended to 2*HWI. */
11312 switch (mode)
11314 case SImode:
11315 case SFmode:
11316 imode = SImode;
11317 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11318 lo = 0x80000000, hi = lo < 0;
11319 break;
11321 case DImode:
11322 case DFmode:
11323 imode = DImode;
11324 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11325 if (HOST_BITS_PER_WIDE_INT >= 64)
11326 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11327 else
11328 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11329 break;
11331 case TImode:
11332 case TFmode:
11333 vec_mode = VOIDmode;
11334 if (HOST_BITS_PER_WIDE_INT >= 64)
11336 imode = TImode;
11337 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11339 else
11341 rtvec vec;
11343 imode = DImode;
11344 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11346 if (invert)
11348 lo = ~lo, hi = ~hi;
11349 v = constm1_rtx;
11351 else
11352 v = const0_rtx;
11354 mask = immed_double_const (lo, hi, imode);
11356 vec = gen_rtvec (2, v, mask);
11357 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
11358 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
11360 return v;
11362 break;
11364 default:
11365 gcc_unreachable ();
11368 if (invert)
11369 lo = ~lo, hi = ~hi;
11371 /* Force this value into the low part of a fp vector constant. */
11372 mask = immed_double_const (lo, hi, imode);
11373 mask = gen_lowpart (mode, mask);
11375 if (vec_mode == VOIDmode)
11376 return force_reg (mode, mask);
11378 v = ix86_build_const_vector (mode, vect, mask);
11379 return force_reg (vec_mode, v);
11382 /* Generate code for floating point ABS or NEG. */
11384 void
11385 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11386 rtx operands[])
11388 rtx mask, set, use, clob, dst, src;
11389 bool use_sse = false;
11390 bool vector_mode = VECTOR_MODE_P (mode);
11391 enum machine_mode elt_mode = mode;
11393 if (vector_mode)
11395 elt_mode = GET_MODE_INNER (mode);
11396 use_sse = true;
11398 else if (mode == TFmode)
11399 use_sse = true;
11400 else if (TARGET_SSE_MATH)
11401 use_sse = SSE_FLOAT_MODE_P (mode);
11403 /* NEG and ABS performed with SSE use bitwise mask operations.
11404 Create the appropriate mask now. */
11405 if (use_sse)
11406 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11407 else
11408 mask = NULL_RTX;
11410 dst = operands[0];
11411 src = operands[1];
11413 if (vector_mode)
11415 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11416 set = gen_rtx_SET (VOIDmode, dst, set);
11417 emit_insn (set);
11419 else
11421 set = gen_rtx_fmt_e (code, mode, src);
11422 set = gen_rtx_SET (VOIDmode, dst, set);
11423 if (mask)
11425 use = gen_rtx_USE (VOIDmode, mask);
11426 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11427 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11428 gen_rtvec (3, set, use, clob)));
11430 else
11431 emit_insn (set);
11435 /* Expand a copysign operation. Special case operand 0 being a constant. */
11437 void
11438 ix86_expand_copysign (rtx operands[])
11440 enum machine_mode mode;
11441 rtx dest, op0, op1, mask, nmask;
11443 dest = operands[0];
11444 op0 = operands[1];
11445 op1 = operands[2];
11447 mode = GET_MODE (dest);
11449 if (GET_CODE (op0) == CONST_DOUBLE)
11451 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11453 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11454 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11456 if (mode == SFmode || mode == DFmode)
11458 enum machine_mode vmode;
11460 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11462 if (op0 == CONST0_RTX (mode))
11463 op0 = CONST0_RTX (vmode);
11464 else
11466 rtvec v;
11468 if (mode == SFmode)
11469 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11470 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11471 else
11472 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11474 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11477 else if (op0 != CONST0_RTX (mode))
11478 op0 = force_reg (mode, op0);
11480 mask = ix86_build_signbit_mask (mode, 0, 0);
11482 if (mode == SFmode)
11483 copysign_insn = gen_copysignsf3_const;
11484 else if (mode == DFmode)
11485 copysign_insn = gen_copysigndf3_const;
11486 else
11487 copysign_insn = gen_copysigntf3_const;
11489 emit_insn (copysign_insn (dest, op0, op1, mask));
11491 else
11493 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11495 nmask = ix86_build_signbit_mask (mode, 0, 1);
11496 mask = ix86_build_signbit_mask (mode, 0, 0);
11498 if (mode == SFmode)
11499 copysign_insn = gen_copysignsf3_var;
11500 else if (mode == DFmode)
11501 copysign_insn = gen_copysigndf3_var;
11502 else
11503 copysign_insn = gen_copysigntf3_var;
11505 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11509 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11510 be a constant, and so has already been expanded into a vector constant. */
11512 void
11513 ix86_split_copysign_const (rtx operands[])
11515 enum machine_mode mode, vmode;
11516 rtx dest, op0, op1, mask, x;
11518 dest = operands[0];
11519 op0 = operands[1];
11520 op1 = operands[2];
11521 mask = operands[3];
11523 mode = GET_MODE (dest);
11524 vmode = GET_MODE (mask);
11526 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11527 x = gen_rtx_AND (vmode, dest, mask);
11528 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11530 if (op0 != CONST0_RTX (vmode))
11532 x = gen_rtx_IOR (vmode, dest, op0);
11533 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11537 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11538 so we have to do two masks. */
11540 void
11541 ix86_split_copysign_var (rtx operands[])
11543 enum machine_mode mode, vmode;
11544 rtx dest, scratch, op0, op1, mask, nmask, x;
11546 dest = operands[0];
11547 scratch = operands[1];
11548 op0 = operands[2];
11549 op1 = operands[3];
11550 nmask = operands[4];
11551 mask = operands[5];
11553 mode = GET_MODE (dest);
11554 vmode = GET_MODE (mask);
11556 if (rtx_equal_p (op0, op1))
11558 /* Shouldn't happen often (it's useless, obviously), but when it does
11559 we'd generate incorrect code if we continue below. */
11560 emit_move_insn (dest, op0);
11561 return;
11564 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11566 gcc_assert (REGNO (op1) == REGNO (scratch));
11568 x = gen_rtx_AND (vmode, scratch, mask);
11569 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11571 dest = mask;
11572 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11573 x = gen_rtx_NOT (vmode, dest);
11574 x = gen_rtx_AND (vmode, x, op0);
11575 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11577 else
11579 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11581 x = gen_rtx_AND (vmode, scratch, mask);
11583 else /* alternative 2,4 */
11585 gcc_assert (REGNO (mask) == REGNO (scratch));
11586 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11587 x = gen_rtx_AND (vmode, scratch, op1);
11589 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11591 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11593 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11594 x = gen_rtx_AND (vmode, dest, nmask);
11596 else /* alternative 3,4 */
11598 gcc_assert (REGNO (nmask) == REGNO (dest));
11599 dest = nmask;
11600 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11601 x = gen_rtx_AND (vmode, dest, op0);
11603 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11606 x = gen_rtx_IOR (vmode, dest, scratch);
11607 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11610 /* Return TRUE or FALSE depending on whether the first SET in INSN
11611 has source and destination with matching CC modes, and that the
11612 CC mode is at least as constrained as REQ_MODE. */
11615 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11617 rtx set;
11618 enum machine_mode set_mode;
11620 set = PATTERN (insn);
11621 if (GET_CODE (set) == PARALLEL)
11622 set = XVECEXP (set, 0, 0);
11623 gcc_assert (GET_CODE (set) == SET);
11624 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11626 set_mode = GET_MODE (SET_DEST (set));
11627 switch (set_mode)
11629 case CCNOmode:
11630 if (req_mode != CCNOmode
11631 && (req_mode != CCmode
11632 || XEXP (SET_SRC (set), 1) != const0_rtx))
11633 return 0;
11634 break;
11635 case CCmode:
11636 if (req_mode == CCGCmode)
11637 return 0;
11638 /* FALLTHRU */
11639 case CCGCmode:
11640 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11641 return 0;
11642 /* FALLTHRU */
11643 case CCGOCmode:
11644 if (req_mode == CCZmode)
11645 return 0;
11646 /* FALLTHRU */
11647 case CCZmode:
11648 break;
11650 default:
11651 gcc_unreachable ();
11654 return (GET_MODE (SET_SRC (set)) == set_mode);
11657 /* Generate insn patterns to do an integer compare of OPERANDS. */
11659 static rtx
11660 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11662 enum machine_mode cmpmode;
11663 rtx tmp, flags;
11665 cmpmode = SELECT_CC_MODE (code, op0, op1);
11666 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11668 /* This is very simple, but making the interface the same as in the
11669 FP case makes the rest of the code easier. */
11670 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11671 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11673 /* Return the test that should be put into the flags user, i.e.
11674 the bcc, scc, or cmov instruction. */
11675 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11678 /* Figure out whether to use ordered or unordered fp comparisons.
11679 Return the appropriate mode to use. */
11681 enum machine_mode
11682 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11684 /* ??? In order to make all comparisons reversible, we do all comparisons
11685 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11686 all forms trapping and nontrapping comparisons, we can make inequality
11687 comparisons trapping again, since it results in better code when using
11688 FCOM based compares. */
11689 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11692 enum machine_mode
11693 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11695 enum machine_mode mode = GET_MODE (op0);
11697 if (SCALAR_FLOAT_MODE_P (mode))
11699 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11700 return ix86_fp_compare_mode (code);
11703 switch (code)
11705 /* Only zero flag is needed. */
11706 case EQ: /* ZF=0 */
11707 case NE: /* ZF!=0 */
11708 return CCZmode;
11709 /* Codes needing carry flag. */
11710 case GEU: /* CF=0 */
11711 case LTU: /* CF=1 */
11712 /* Detect overflow checks. They need just the carry flag. */
11713 if (GET_CODE (op0) == PLUS
11714 && rtx_equal_p (op1, XEXP (op0, 0)))
11715 return CCCmode;
11716 else
11717 return CCmode;
11718 case GTU: /* CF=0 & ZF=0 */
11719 case LEU: /* CF=1 | ZF=1 */
11720 /* Detect overflow checks. They need just the carry flag. */
11721 if (GET_CODE (op0) == MINUS
11722 && rtx_equal_p (op1, XEXP (op0, 0)))
11723 return CCCmode;
11724 else
11725 return CCmode;
11726 /* Codes possibly doable only with sign flag when
11727 comparing against zero. */
11728 case GE: /* SF=OF or SF=0 */
11729 case LT: /* SF<>OF or SF=1 */
11730 if (op1 == const0_rtx)
11731 return CCGOCmode;
11732 else
11733 /* For other cases Carry flag is not required. */
11734 return CCGCmode;
11735 /* Codes doable only with sign flag when comparing
11736 against zero, but we miss jump instruction for it
11737 so we need to use relational tests against overflow
11738 that thus needs to be zero. */
11739 case GT: /* ZF=0 & SF=OF */
11740 case LE: /* ZF=1 | SF<>OF */
11741 if (op1 == const0_rtx)
11742 return CCNOmode;
11743 else
11744 return CCGCmode;
11745 /* strcmp pattern do (use flags) and combine may ask us for proper
11746 mode. */
11747 case USE:
11748 return CCmode;
11749 default:
11750 gcc_unreachable ();
11754 /* Return the fixed registers used for condition codes. */
11756 static bool
11757 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11759 *p1 = FLAGS_REG;
11760 *p2 = FPSR_REG;
11761 return true;
11764 /* If two condition code modes are compatible, return a condition code
11765 mode which is compatible with both. Otherwise, return
11766 VOIDmode. */
11768 static enum machine_mode
11769 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11771 if (m1 == m2)
11772 return m1;
11774 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11775 return VOIDmode;
11777 if ((m1 == CCGCmode && m2 == CCGOCmode)
11778 || (m1 == CCGOCmode && m2 == CCGCmode))
11779 return CCGCmode;
11781 switch (m1)
11783 default:
11784 gcc_unreachable ();
11786 case CCmode:
11787 case CCGCmode:
11788 case CCGOCmode:
11789 case CCNOmode:
11790 case CCAmode:
11791 case CCCmode:
11792 case CCOmode:
11793 case CCSmode:
11794 case CCZmode:
11795 switch (m2)
11797 default:
11798 return VOIDmode;
11800 case CCmode:
11801 case CCGCmode:
11802 case CCGOCmode:
11803 case CCNOmode:
11804 case CCAmode:
11805 case CCCmode:
11806 case CCOmode:
11807 case CCSmode:
11808 case CCZmode:
11809 return CCmode;
11812 case CCFPmode:
11813 case CCFPUmode:
11814 /* These are only compatible with themselves, which we already
11815 checked above. */
11816 return VOIDmode;
11820 /* Split comparison code CODE into comparisons we can do using branch
11821 instructions. BYPASS_CODE is comparison code for branch that will
11822 branch around FIRST_CODE and SECOND_CODE. If some of branches
11823 is not required, set value to UNKNOWN.
11824 We never require more than two branches. */
11826 void
11827 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11828 enum rtx_code *first_code,
11829 enum rtx_code *second_code)
11831 *first_code = code;
11832 *bypass_code = UNKNOWN;
11833 *second_code = UNKNOWN;
11835 /* The fcomi comparison sets flags as follows:
11837 cmp ZF PF CF
11838 > 0 0 0
11839 < 0 0 1
11840 = 1 0 0
11841 un 1 1 1 */
11843 switch (code)
11845 case GT: /* GTU - CF=0 & ZF=0 */
11846 case GE: /* GEU - CF=0 */
11847 case ORDERED: /* PF=0 */
11848 case UNORDERED: /* PF=1 */
11849 case UNEQ: /* EQ - ZF=1 */
11850 case UNLT: /* LTU - CF=1 */
11851 case UNLE: /* LEU - CF=1 | ZF=1 */
11852 case LTGT: /* EQ - ZF=0 */
11853 break;
11854 case LT: /* LTU - CF=1 - fails on unordered */
11855 *first_code = UNLT;
11856 *bypass_code = UNORDERED;
11857 break;
11858 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11859 *first_code = UNLE;
11860 *bypass_code = UNORDERED;
11861 break;
11862 case EQ: /* EQ - ZF=1 - fails on unordered */
11863 *first_code = UNEQ;
11864 *bypass_code = UNORDERED;
11865 break;
11866 case NE: /* NE - ZF=0 - fails on unordered */
11867 *first_code = LTGT;
11868 *second_code = UNORDERED;
11869 break;
11870 case UNGE: /* GEU - CF=0 - fails on unordered */
11871 *first_code = GE;
11872 *second_code = UNORDERED;
11873 break;
11874 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11875 *first_code = GT;
11876 *second_code = UNORDERED;
11877 break;
11878 default:
11879 gcc_unreachable ();
11881 if (!TARGET_IEEE_FP)
11883 *second_code = UNKNOWN;
11884 *bypass_code = UNKNOWN;
11888 /* Return cost of comparison done fcom + arithmetics operations on AX.
11889 All following functions do use number of instructions as a cost metrics.
11890 In future this should be tweaked to compute bytes for optimize_size and
11891 take into account performance of various instructions on various CPUs. */
11892 static int
11893 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11895 if (!TARGET_IEEE_FP)
11896 return 4;
11897 /* The cost of code output by ix86_expand_fp_compare. */
11898 switch (code)
11900 case UNLE:
11901 case UNLT:
11902 case LTGT:
11903 case GT:
11904 case GE:
11905 case UNORDERED:
11906 case ORDERED:
11907 case UNEQ:
11908 return 4;
11909 break;
11910 case LT:
11911 case NE:
11912 case EQ:
11913 case UNGE:
11914 return 5;
11915 break;
11916 case LE:
11917 case UNGT:
11918 return 6;
11919 break;
11920 default:
11921 gcc_unreachable ();
11925 /* Return cost of comparison done using fcomi operation.
11926 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11927 static int
11928 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11930 enum rtx_code bypass_code, first_code, second_code;
11931 /* Return arbitrarily high cost when instruction is not supported - this
11932 prevents gcc from using it. */
11933 if (!TARGET_CMOVE)
11934 return 1024;
11935 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11936 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11939 /* Return cost of comparison done using sahf operation.
11940 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11941 static int
11942 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11944 enum rtx_code bypass_code, first_code, second_code;
11945 /* Return arbitrarily high cost when instruction is not preferred - this
11946 avoids gcc from using it. */
11947 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11948 return 1024;
11949 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11950 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11953 /* Compute cost of the comparison done using any method.
11954 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11955 static int
11956 ix86_fp_comparison_cost (enum rtx_code code)
11958 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11959 int min;
11961 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11962 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11964 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11965 if (min > sahf_cost)
11966 min = sahf_cost;
11967 if (min > fcomi_cost)
11968 min = fcomi_cost;
11969 return min;
11972 /* Return true if we should use an FCOMI instruction for this
11973 fp comparison. */
11976 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11978 enum rtx_code swapped_code = swap_condition (code);
11980 return ((ix86_fp_comparison_cost (code)
11981 == ix86_fp_comparison_fcomi_cost (code))
11982 || (ix86_fp_comparison_cost (swapped_code)
11983 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11986 /* Swap, force into registers, or otherwise massage the two operands
11987 to a fp comparison. The operands are updated in place; the new
11988 comparison code is returned. */
11990 static enum rtx_code
11991 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11993 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11994 rtx op0 = *pop0, op1 = *pop1;
11995 enum machine_mode op_mode = GET_MODE (op0);
11996 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11998 /* All of the unordered compare instructions only work on registers.
11999 The same is true of the fcomi compare instructions. The XFmode
12000 compare instructions require registers except when comparing
12001 against zero or when converting operand 1 from fixed point to
12002 floating point. */
12004 if (!is_sse
12005 && (fpcmp_mode == CCFPUmode
12006 || (op_mode == XFmode
12007 && ! (standard_80387_constant_p (op0) == 1
12008 || standard_80387_constant_p (op1) == 1)
12009 && GET_CODE (op1) != FLOAT)
12010 || ix86_use_fcomi_compare (code)))
12012 op0 = force_reg (op_mode, op0);
12013 op1 = force_reg (op_mode, op1);
12015 else
12017 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
12018 things around if they appear profitable, otherwise force op0
12019 into a register. */
12021 if (standard_80387_constant_p (op0) == 0
12022 || (MEM_P (op0)
12023 && ! (standard_80387_constant_p (op1) == 0
12024 || MEM_P (op1))))
12026 rtx tmp;
12027 tmp = op0, op0 = op1, op1 = tmp;
12028 code = swap_condition (code);
12031 if (!REG_P (op0))
12032 op0 = force_reg (op_mode, op0);
12034 if (CONSTANT_P (op1))
12036 int tmp = standard_80387_constant_p (op1);
12037 if (tmp == 0)
12038 op1 = validize_mem (force_const_mem (op_mode, op1));
12039 else if (tmp == 1)
12041 if (TARGET_CMOVE)
12042 op1 = force_reg (op_mode, op1);
12044 else
12045 op1 = force_reg (op_mode, op1);
12049 /* Try to rearrange the comparison to make it cheaper. */
12050 if (ix86_fp_comparison_cost (code)
12051 > ix86_fp_comparison_cost (swap_condition (code))
12052 && (REG_P (op1) || can_create_pseudo_p ()))
12054 rtx tmp;
12055 tmp = op0, op0 = op1, op1 = tmp;
12056 code = swap_condition (code);
12057 if (!REG_P (op0))
12058 op0 = force_reg (op_mode, op0);
12061 *pop0 = op0;
12062 *pop1 = op1;
12063 return code;
12066 /* Convert comparison codes we use to represent FP comparison to integer
12067 code that will result in proper branch. Return UNKNOWN if no such code
12068 is available. */
12070 enum rtx_code
12071 ix86_fp_compare_code_to_integer (enum rtx_code code)
12073 switch (code)
12075 case GT:
12076 return GTU;
12077 case GE:
12078 return GEU;
12079 case ORDERED:
12080 case UNORDERED:
12081 return code;
12082 break;
12083 case UNEQ:
12084 return EQ;
12085 break;
12086 case UNLT:
12087 return LTU;
12088 break;
12089 case UNLE:
12090 return LEU;
12091 break;
12092 case LTGT:
12093 return NE;
12094 break;
12095 default:
12096 return UNKNOWN;
12100 /* Generate insn patterns to do a floating point compare of OPERANDS. */
12102 static rtx
12103 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12104 rtx *second_test, rtx *bypass_test)
12106 enum machine_mode fpcmp_mode, intcmp_mode;
12107 rtx tmp, tmp2;
12108 int cost = ix86_fp_comparison_cost (code);
12109 enum rtx_code bypass_code, first_code, second_code;
12111 fpcmp_mode = ix86_fp_compare_mode (code);
12112 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12114 if (second_test)
12115 *second_test = NULL_RTX;
12116 if (bypass_test)
12117 *bypass_test = NULL_RTX;
12119 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12121 /* Do fcomi/sahf based test when profitable. */
12122 if (ix86_fp_comparison_arithmetics_cost (code) > cost
12123 && (bypass_code == UNKNOWN || bypass_test)
12124 && (second_code == UNKNOWN || second_test))
12126 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12127 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12128 tmp);
12129 if (TARGET_CMOVE)
12130 emit_insn (tmp);
12131 else
12133 gcc_assert (TARGET_SAHF);
12135 if (!scratch)
12136 scratch = gen_reg_rtx (HImode);
12137 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12139 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12142 /* The FP codes work out to act like unsigned. */
12143 intcmp_mode = fpcmp_mode;
12144 code = first_code;
12145 if (bypass_code != UNKNOWN)
12146 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12147 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12148 const0_rtx);
12149 if (second_code != UNKNOWN)
12150 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12151 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12152 const0_rtx);
12154 else
12156 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
12157 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12158 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12159 if (!scratch)
12160 scratch = gen_reg_rtx (HImode);
12161 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12163 /* In the unordered case, we have to check C2 for NaN's, which
12164 doesn't happen to work out to anything nice combination-wise.
12165 So do some bit twiddling on the value we've got in AH to come
12166 up with an appropriate set of condition codes. */
12168 intcmp_mode = CCNOmode;
12169 switch (code)
12171 case GT:
12172 case UNGT:
12173 if (code == GT || !TARGET_IEEE_FP)
12175 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12176 code = EQ;
12178 else
12180 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12181 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12182 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12183 intcmp_mode = CCmode;
12184 code = GEU;
12186 break;
12187 case LT:
12188 case UNLT:
12189 if (code == LT && TARGET_IEEE_FP)
12191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12192 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12193 intcmp_mode = CCmode;
12194 code = EQ;
12196 else
12198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12199 code = NE;
12201 break;
12202 case GE:
12203 case UNGE:
12204 if (code == GE || !TARGET_IEEE_FP)
12206 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12207 code = EQ;
12209 else
12211 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12212 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12213 GEN_INT (0x01)));
12214 code = NE;
12216 break;
12217 case LE:
12218 case UNLE:
12219 if (code == LE && TARGET_IEEE_FP)
12221 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12222 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12223 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12224 intcmp_mode = CCmode;
12225 code = LTU;
12227 else
12229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12230 code = NE;
12232 break;
12233 case EQ:
12234 case UNEQ:
12235 if (code == EQ && TARGET_IEEE_FP)
12237 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12238 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12239 intcmp_mode = CCmode;
12240 code = EQ;
12242 else
12244 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12245 code = NE;
12246 break;
12248 break;
12249 case NE:
12250 case LTGT:
12251 if (code == NE && TARGET_IEEE_FP)
12253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12255 GEN_INT (0x40)));
12256 code = NE;
12258 else
12260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12261 code = EQ;
12263 break;
12265 case UNORDERED:
12266 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12267 code = NE;
12268 break;
12269 case ORDERED:
12270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12271 code = EQ;
12272 break;
12274 default:
12275 gcc_unreachable ();
12279 /* Return the test that should be put into the flags user, i.e.
12280 the bcc, scc, or cmov instruction. */
12281 return gen_rtx_fmt_ee (code, VOIDmode,
12282 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12283 const0_rtx);
12287 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12289 rtx op0, op1, ret;
12290 op0 = ix86_compare_op0;
12291 op1 = ix86_compare_op1;
12293 if (second_test)
12294 *second_test = NULL_RTX;
12295 if (bypass_test)
12296 *bypass_test = NULL_RTX;
12298 if (ix86_compare_emitted)
12300 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12301 ix86_compare_emitted = NULL_RTX;
12303 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12305 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12306 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12307 second_test, bypass_test);
12309 else
12310 ret = ix86_expand_int_compare (code, op0, op1);
12312 return ret;
12315 /* Return true if the CODE will result in nontrivial jump sequence. */
12316 bool
12317 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12319 enum rtx_code bypass_code, first_code, second_code;
12320 if (!TARGET_CMOVE)
12321 return true;
12322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12323 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12326 void
12327 ix86_expand_branch (enum rtx_code code, rtx label)
12329 rtx tmp;
12331 /* If we have emitted a compare insn, go straight to simple.
12332 ix86_expand_compare won't emit anything if ix86_compare_emitted
12333 is non NULL. */
12334 if (ix86_compare_emitted)
12335 goto simple;
12337 switch (GET_MODE (ix86_compare_op0))
12339 case QImode:
12340 case HImode:
12341 case SImode:
12342 simple:
12343 tmp = ix86_expand_compare (code, NULL, NULL);
12344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12345 gen_rtx_LABEL_REF (VOIDmode, label),
12346 pc_rtx);
12347 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12348 return;
12350 case SFmode:
12351 case DFmode:
12352 case XFmode:
12354 rtvec vec;
12355 int use_fcomi;
12356 enum rtx_code bypass_code, first_code, second_code;
12358 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12359 &ix86_compare_op1);
12361 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12363 /* Check whether we will use the natural sequence with one jump. If
12364 so, we can expand jump early. Otherwise delay expansion by
12365 creating compound insn to not confuse optimizers. */
12366 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12368 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12369 gen_rtx_LABEL_REF (VOIDmode, label),
12370 pc_rtx, NULL_RTX, NULL_RTX);
12372 else
12374 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12375 ix86_compare_op0, ix86_compare_op1);
12376 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12377 gen_rtx_LABEL_REF (VOIDmode, label),
12378 pc_rtx);
12379 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12381 use_fcomi = ix86_use_fcomi_compare (code);
12382 vec = rtvec_alloc (3 + !use_fcomi);
12383 RTVEC_ELT (vec, 0) = tmp;
12384 RTVEC_ELT (vec, 1)
12385 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12386 RTVEC_ELT (vec, 2)
12387 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12388 if (! use_fcomi)
12389 RTVEC_ELT (vec, 3)
12390 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12392 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12394 return;
12397 case DImode:
12398 if (TARGET_64BIT)
12399 goto simple;
12400 case TImode:
12401 /* Expand DImode branch into multiple compare+branch. */
12403 rtx lo[2], hi[2], label2;
12404 enum rtx_code code1, code2, code3;
12405 enum machine_mode submode;
12407 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12409 tmp = ix86_compare_op0;
12410 ix86_compare_op0 = ix86_compare_op1;
12411 ix86_compare_op1 = tmp;
12412 code = swap_condition (code);
12414 if (GET_MODE (ix86_compare_op0) == DImode)
12416 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12417 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12418 submode = SImode;
12420 else
12422 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12423 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12424 submode = DImode;
12427 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12428 avoid two branches. This costs one extra insn, so disable when
12429 optimizing for size. */
12431 if ((code == EQ || code == NE)
12432 && (!optimize_size
12433 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12435 rtx xor0, xor1;
12437 xor1 = hi[0];
12438 if (hi[1] != const0_rtx)
12439 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12440 NULL_RTX, 0, OPTAB_WIDEN);
12442 xor0 = lo[0];
12443 if (lo[1] != const0_rtx)
12444 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12445 NULL_RTX, 0, OPTAB_WIDEN);
12447 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12448 NULL_RTX, 0, OPTAB_WIDEN);
12450 ix86_compare_op0 = tmp;
12451 ix86_compare_op1 = const0_rtx;
12452 ix86_expand_branch (code, label);
12453 return;
12456 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12457 op1 is a constant and the low word is zero, then we can just
12458 examine the high word. Similarly for low word -1 and
12459 less-or-equal-than or greater-than. */
12461 if (CONST_INT_P (hi[1]))
12462 switch (code)
12464 case LT: case LTU: case GE: case GEU:
12465 if (lo[1] == const0_rtx)
12467 ix86_compare_op0 = hi[0];
12468 ix86_compare_op1 = hi[1];
12469 ix86_expand_branch (code, label);
12470 return;
12472 break;
12473 case LE: case LEU: case GT: case GTU:
12474 if (lo[1] == constm1_rtx)
12476 ix86_compare_op0 = hi[0];
12477 ix86_compare_op1 = hi[1];
12478 ix86_expand_branch (code, label);
12479 return;
12481 break;
12482 default:
12483 break;
12486 /* Otherwise, we need two or three jumps. */
12488 label2 = gen_label_rtx ();
12490 code1 = code;
12491 code2 = swap_condition (code);
12492 code3 = unsigned_condition (code);
12494 switch (code)
12496 case LT: case GT: case LTU: case GTU:
12497 break;
12499 case LE: code1 = LT; code2 = GT; break;
12500 case GE: code1 = GT; code2 = LT; break;
12501 case LEU: code1 = LTU; code2 = GTU; break;
12502 case GEU: code1 = GTU; code2 = LTU; break;
12504 case EQ: code1 = UNKNOWN; code2 = NE; break;
12505 case NE: code2 = UNKNOWN; break;
12507 default:
12508 gcc_unreachable ();
12512 * a < b =>
12513 * if (hi(a) < hi(b)) goto true;
12514 * if (hi(a) > hi(b)) goto false;
12515 * if (lo(a) < lo(b)) goto true;
12516 * false:
12519 ix86_compare_op0 = hi[0];
12520 ix86_compare_op1 = hi[1];
12522 if (code1 != UNKNOWN)
12523 ix86_expand_branch (code1, label);
12524 if (code2 != UNKNOWN)
12525 ix86_expand_branch (code2, label2);
12527 ix86_compare_op0 = lo[0];
12528 ix86_compare_op1 = lo[1];
12529 ix86_expand_branch (code3, label);
12531 if (code2 != UNKNOWN)
12532 emit_label (label2);
12533 return;
12536 default:
12537 gcc_unreachable ();
12541 /* Split branch based on floating point condition. */
12542 void
12543 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12544 rtx target1, rtx target2, rtx tmp, rtx pushed)
12546 rtx second, bypass;
12547 rtx label = NULL_RTX;
12548 rtx condition;
12549 int bypass_probability = -1, second_probability = -1, probability = -1;
12550 rtx i;
12552 if (target2 != pc_rtx)
12554 rtx tmp = target2;
12555 code = reverse_condition_maybe_unordered (code);
12556 target2 = target1;
12557 target1 = tmp;
12560 condition = ix86_expand_fp_compare (code, op1, op2,
12561 tmp, &second, &bypass);
12563 /* Remove pushed operand from stack. */
12564 if (pushed)
12565 ix86_free_from_memory (GET_MODE (pushed));
12567 if (split_branch_probability >= 0)
12569 /* Distribute the probabilities across the jumps.
12570 Assume the BYPASS and SECOND to be always test
12571 for UNORDERED. */
12572 probability = split_branch_probability;
12574 /* Value of 1 is low enough to make no need for probability
12575 to be updated. Later we may run some experiments and see
12576 if unordered values are more frequent in practice. */
12577 if (bypass)
12578 bypass_probability = 1;
12579 if (second)
12580 second_probability = 1;
12582 if (bypass != NULL_RTX)
12584 label = gen_label_rtx ();
12585 i = emit_jump_insn (gen_rtx_SET
12586 (VOIDmode, pc_rtx,
12587 gen_rtx_IF_THEN_ELSE (VOIDmode,
12588 bypass,
12589 gen_rtx_LABEL_REF (VOIDmode,
12590 label),
12591 pc_rtx)));
12592 if (bypass_probability >= 0)
12593 REG_NOTES (i)
12594 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12595 GEN_INT (bypass_probability),
12596 REG_NOTES (i));
12598 i = emit_jump_insn (gen_rtx_SET
12599 (VOIDmode, pc_rtx,
12600 gen_rtx_IF_THEN_ELSE (VOIDmode,
12601 condition, target1, target2)));
12602 if (probability >= 0)
12603 REG_NOTES (i)
12604 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12605 GEN_INT (probability),
12606 REG_NOTES (i));
12607 if (second != NULL_RTX)
12609 i = emit_jump_insn (gen_rtx_SET
12610 (VOIDmode, pc_rtx,
12611 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12612 target2)));
12613 if (second_probability >= 0)
12614 REG_NOTES (i)
12615 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12616 GEN_INT (second_probability),
12617 REG_NOTES (i));
12619 if (label != NULL_RTX)
12620 emit_label (label);
12624 ix86_expand_setcc (enum rtx_code code, rtx dest)
12626 rtx ret, tmp, tmpreg, equiv;
12627 rtx second_test, bypass_test;
12629 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12630 return 0; /* FAIL */
12632 gcc_assert (GET_MODE (dest) == QImode);
12634 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12635 PUT_MODE (ret, QImode);
12637 tmp = dest;
12638 tmpreg = dest;
12640 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12641 if (bypass_test || second_test)
12643 rtx test = second_test;
12644 int bypass = 0;
12645 rtx tmp2 = gen_reg_rtx (QImode);
12646 if (bypass_test)
12648 gcc_assert (!second_test);
12649 test = bypass_test;
12650 bypass = 1;
12651 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12653 PUT_MODE (test, QImode);
12654 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12656 if (bypass)
12657 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12658 else
12659 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12662 /* Attach a REG_EQUAL note describing the comparison result. */
12663 if (ix86_compare_op0 && ix86_compare_op1)
12665 equiv = simplify_gen_relational (code, QImode,
12666 GET_MODE (ix86_compare_op0),
12667 ix86_compare_op0, ix86_compare_op1);
12668 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12671 return 1; /* DONE */
12674 /* Expand comparison setting or clearing carry flag. Return true when
12675 successful and set pop for the operation. */
12676 static bool
12677 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12679 enum machine_mode mode =
12680 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12682 /* Do not handle DImode compares that go through special path. */
12683 if (mode == (TARGET_64BIT ? TImode : DImode))
12684 return false;
12686 if (SCALAR_FLOAT_MODE_P (mode))
12688 rtx second_test = NULL, bypass_test = NULL;
12689 rtx compare_op, compare_seq;
12691 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12693 /* Shortcut: following common codes never translate
12694 into carry flag compares. */
12695 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12696 || code == ORDERED || code == UNORDERED)
12697 return false;
12699 /* These comparisons require zero flag; swap operands so they won't. */
12700 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12701 && !TARGET_IEEE_FP)
12703 rtx tmp = op0;
12704 op0 = op1;
12705 op1 = tmp;
12706 code = swap_condition (code);
12709 /* Try to expand the comparison and verify that we end up with
12710 carry flag based comparison. This fails to be true only when
12711 we decide to expand comparison using arithmetic that is not
12712 too common scenario. */
12713 start_sequence ();
12714 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12715 &second_test, &bypass_test);
12716 compare_seq = get_insns ();
12717 end_sequence ();
12719 if (second_test || bypass_test)
12720 return false;
12722 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12723 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12724 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12725 else
12726 code = GET_CODE (compare_op);
12728 if (code != LTU && code != GEU)
12729 return false;
12731 emit_insn (compare_seq);
12732 *pop = compare_op;
12733 return true;
12736 if (!INTEGRAL_MODE_P (mode))
12737 return false;
12739 switch (code)
12741 case LTU:
12742 case GEU:
12743 break;
12745 /* Convert a==0 into (unsigned)a<1. */
12746 case EQ:
12747 case NE:
12748 if (op1 != const0_rtx)
12749 return false;
12750 op1 = const1_rtx;
12751 code = (code == EQ ? LTU : GEU);
12752 break;
12754 /* Convert a>b into b<a or a>=b-1. */
12755 case GTU:
12756 case LEU:
12757 if (CONST_INT_P (op1))
12759 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12760 /* Bail out on overflow. We still can swap operands but that
12761 would force loading of the constant into register. */
12762 if (op1 == const0_rtx
12763 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12764 return false;
12765 code = (code == GTU ? GEU : LTU);
12767 else
12769 rtx tmp = op1;
12770 op1 = op0;
12771 op0 = tmp;
12772 code = (code == GTU ? LTU : GEU);
12774 break;
12776 /* Convert a>=0 into (unsigned)a<0x80000000. */
12777 case LT:
12778 case GE:
12779 if (mode == DImode || op1 != const0_rtx)
12780 return false;
12781 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12782 code = (code == LT ? GEU : LTU);
12783 break;
12784 case LE:
12785 case GT:
12786 if (mode == DImode || op1 != constm1_rtx)
12787 return false;
12788 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12789 code = (code == LE ? GEU : LTU);
12790 break;
12792 default:
12793 return false;
12795 /* Swapping operands may cause constant to appear as first operand. */
12796 if (!nonimmediate_operand (op0, VOIDmode))
12798 if (!can_create_pseudo_p ())
12799 return false;
12800 op0 = force_reg (mode, op0);
12802 ix86_compare_op0 = op0;
12803 ix86_compare_op1 = op1;
12804 *pop = ix86_expand_compare (code, NULL, NULL);
12805 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12806 return true;
12810 ix86_expand_int_movcc (rtx operands[])
12812 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12813 rtx compare_seq, compare_op;
12814 rtx second_test, bypass_test;
12815 enum machine_mode mode = GET_MODE (operands[0]);
12816 bool sign_bit_compare_p = false;;
12818 start_sequence ();
12819 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12820 compare_seq = get_insns ();
12821 end_sequence ();
12823 compare_code = GET_CODE (compare_op);
12825 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12826 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12827 sign_bit_compare_p = true;
12829 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12830 HImode insns, we'd be swallowed in word prefix ops. */
12832 if ((mode != HImode || TARGET_FAST_PREFIX)
12833 && (mode != (TARGET_64BIT ? TImode : DImode))
12834 && CONST_INT_P (operands[2])
12835 && CONST_INT_P (operands[3]))
12837 rtx out = operands[0];
12838 HOST_WIDE_INT ct = INTVAL (operands[2]);
12839 HOST_WIDE_INT cf = INTVAL (operands[3]);
12840 HOST_WIDE_INT diff;
12842 diff = ct - cf;
12843 /* Sign bit compares are better done using shifts than we do by using
12844 sbb. */
12845 if (sign_bit_compare_p
12846 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12847 ix86_compare_op1, &compare_op))
12849 /* Detect overlap between destination and compare sources. */
12850 rtx tmp = out;
12852 if (!sign_bit_compare_p)
12854 bool fpcmp = false;
12856 compare_code = GET_CODE (compare_op);
12858 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12859 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12861 fpcmp = true;
12862 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12865 /* To simplify rest of code, restrict to the GEU case. */
12866 if (compare_code == LTU)
12868 HOST_WIDE_INT tmp = ct;
12869 ct = cf;
12870 cf = tmp;
12871 compare_code = reverse_condition (compare_code);
12872 code = reverse_condition (code);
12874 else
12876 if (fpcmp)
12877 PUT_CODE (compare_op,
12878 reverse_condition_maybe_unordered
12879 (GET_CODE (compare_op)));
12880 else
12881 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12883 diff = ct - cf;
12885 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12886 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12887 tmp = gen_reg_rtx (mode);
12889 if (mode == DImode)
12890 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12891 else
12892 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12894 else
12896 if (code == GT || code == GE)
12897 code = reverse_condition (code);
12898 else
12900 HOST_WIDE_INT tmp = ct;
12901 ct = cf;
12902 cf = tmp;
12903 diff = ct - cf;
12905 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12906 ix86_compare_op1, VOIDmode, 0, -1);
12909 if (diff == 1)
12912 * cmpl op0,op1
12913 * sbbl dest,dest
12914 * [addl dest, ct]
12916 * Size 5 - 8.
12918 if (ct)
12919 tmp = expand_simple_binop (mode, PLUS,
12920 tmp, GEN_INT (ct),
12921 copy_rtx (tmp), 1, OPTAB_DIRECT);
12923 else if (cf == -1)
12926 * cmpl op0,op1
12927 * sbbl dest,dest
12928 * orl $ct, dest
12930 * Size 8.
12932 tmp = expand_simple_binop (mode, IOR,
12933 tmp, GEN_INT (ct),
12934 copy_rtx (tmp), 1, OPTAB_DIRECT);
12936 else if (diff == -1 && ct)
12939 * cmpl op0,op1
12940 * sbbl dest,dest
12941 * notl dest
12942 * [addl dest, cf]
12944 * Size 8 - 11.
12946 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12947 if (cf)
12948 tmp = expand_simple_binop (mode, PLUS,
12949 copy_rtx (tmp), GEN_INT (cf),
12950 copy_rtx (tmp), 1, OPTAB_DIRECT);
12952 else
12955 * cmpl op0,op1
12956 * sbbl dest,dest
12957 * [notl dest]
12958 * andl cf - ct, dest
12959 * [addl dest, ct]
12961 * Size 8 - 11.
12964 if (cf == 0)
12966 cf = ct;
12967 ct = 0;
12968 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12971 tmp = expand_simple_binop (mode, AND,
12972 copy_rtx (tmp),
12973 gen_int_mode (cf - ct, mode),
12974 copy_rtx (tmp), 1, OPTAB_DIRECT);
12975 if (ct)
12976 tmp = expand_simple_binop (mode, PLUS,
12977 copy_rtx (tmp), GEN_INT (ct),
12978 copy_rtx (tmp), 1, OPTAB_DIRECT);
12981 if (!rtx_equal_p (tmp, out))
12982 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12984 return 1; /* DONE */
12987 if (diff < 0)
12989 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12991 HOST_WIDE_INT tmp;
12992 tmp = ct, ct = cf, cf = tmp;
12993 diff = -diff;
12995 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12997 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12999 /* We may be reversing unordered compare to normal compare, that
13000 is not valid in general (we may convert non-trapping condition
13001 to trapping one), however on i386 we currently emit all
13002 comparisons unordered. */
13003 compare_code = reverse_condition_maybe_unordered (compare_code);
13004 code = reverse_condition_maybe_unordered (code);
13006 else
13008 compare_code = reverse_condition (compare_code);
13009 code = reverse_condition (code);
13013 compare_code = UNKNOWN;
13014 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
13015 && CONST_INT_P (ix86_compare_op1))
13017 if (ix86_compare_op1 == const0_rtx
13018 && (code == LT || code == GE))
13019 compare_code = code;
13020 else if (ix86_compare_op1 == constm1_rtx)
13022 if (code == LE)
13023 compare_code = LT;
13024 else if (code == GT)
13025 compare_code = GE;
13029 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13030 if (compare_code != UNKNOWN
13031 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13032 && (cf == -1 || ct == -1))
13034 /* If lea code below could be used, only optimize
13035 if it results in a 2 insn sequence. */
13037 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13038 || diff == 3 || diff == 5 || diff == 9)
13039 || (compare_code == LT && ct == -1)
13040 || (compare_code == GE && cf == -1))
13043 * notl op1 (if necessary)
13044 * sarl $31, op1
13045 * orl cf, op1
13047 if (ct != -1)
13049 cf = ct;
13050 ct = -1;
13051 code = reverse_condition (code);
13054 out = emit_store_flag (out, code, ix86_compare_op0,
13055 ix86_compare_op1, VOIDmode, 0, -1);
13057 out = expand_simple_binop (mode, IOR,
13058 out, GEN_INT (cf),
13059 out, 1, OPTAB_DIRECT);
13060 if (out != operands[0])
13061 emit_move_insn (operands[0], out);
13063 return 1; /* DONE */
13068 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13069 || diff == 3 || diff == 5 || diff == 9)
13070 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13071 && (mode != DImode
13072 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13075 * xorl dest,dest
13076 * cmpl op1,op2
13077 * setcc dest
13078 * lea cf(dest*(ct-cf)),dest
13080 * Size 14.
13082 * This also catches the degenerate setcc-only case.
13085 rtx tmp;
13086 int nops;
13088 out = emit_store_flag (out, code, ix86_compare_op0,
13089 ix86_compare_op1, VOIDmode, 0, 1);
13091 nops = 0;
13092 /* On x86_64 the lea instruction operates on Pmode, so we need
13093 to get arithmetics done in proper mode to match. */
13094 if (diff == 1)
13095 tmp = copy_rtx (out);
13096 else
13098 rtx out1;
13099 out1 = copy_rtx (out);
13100 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13101 nops++;
13102 if (diff & 1)
13104 tmp = gen_rtx_PLUS (mode, tmp, out1);
13105 nops++;
13108 if (cf != 0)
13110 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13111 nops++;
13113 if (!rtx_equal_p (tmp, out))
13115 if (nops == 1)
13116 out = force_operand (tmp, copy_rtx (out));
13117 else
13118 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13120 if (!rtx_equal_p (out, operands[0]))
13121 emit_move_insn (operands[0], copy_rtx (out));
13123 return 1; /* DONE */
13127 * General case: Jumpful:
13128 * xorl dest,dest cmpl op1, op2
13129 * cmpl op1, op2 movl ct, dest
13130 * setcc dest jcc 1f
13131 * decl dest movl cf, dest
13132 * andl (cf-ct),dest 1:
13133 * addl ct,dest
13135 * Size 20. Size 14.
13137 * This is reasonably steep, but branch mispredict costs are
13138 * high on modern cpus, so consider failing only if optimizing
13139 * for space.
13142 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13143 && BRANCH_COST >= 2)
13145 if (cf == 0)
13147 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13149 cf = ct;
13150 ct = 0;
13152 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13154 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13156 /* We may be reversing unordered compare to normal compare,
13157 that is not valid in general (we may convert non-trapping
13158 condition to trapping one), however on i386 we currently
13159 emit all comparisons unordered. */
13160 code = reverse_condition_maybe_unordered (code);
13162 else
13164 code = reverse_condition (code);
13165 if (compare_code != UNKNOWN)
13166 compare_code = reverse_condition (compare_code);
13170 if (compare_code != UNKNOWN)
13172 /* notl op1 (if needed)
13173 sarl $31, op1
13174 andl (cf-ct), op1
13175 addl ct, op1
13177 For x < 0 (resp. x <= -1) there will be no notl,
13178 so if possible swap the constants to get rid of the
13179 complement.
13180 True/false will be -1/0 while code below (store flag
13181 followed by decrement) is 0/-1, so the constants need
13182 to be exchanged once more. */
13184 if (compare_code == GE || !cf)
13186 code = reverse_condition (code);
13187 compare_code = LT;
13189 else
13191 HOST_WIDE_INT tmp = cf;
13192 cf = ct;
13193 ct = tmp;
13196 out = emit_store_flag (out, code, ix86_compare_op0,
13197 ix86_compare_op1, VOIDmode, 0, -1);
13199 else
13201 out = emit_store_flag (out, code, ix86_compare_op0,
13202 ix86_compare_op1, VOIDmode, 0, 1);
13204 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13205 copy_rtx (out), 1, OPTAB_DIRECT);
13208 out = expand_simple_binop (mode, AND, copy_rtx (out),
13209 gen_int_mode (cf - ct, mode),
13210 copy_rtx (out), 1, OPTAB_DIRECT);
13211 if (ct)
13212 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13213 copy_rtx (out), 1, OPTAB_DIRECT);
13214 if (!rtx_equal_p (out, operands[0]))
13215 emit_move_insn (operands[0], copy_rtx (out));
13217 return 1; /* DONE */
13221 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13223 /* Try a few things more with specific constants and a variable. */
13225 optab op;
13226 rtx var, orig_out, out, tmp;
13228 if (BRANCH_COST <= 2)
13229 return 0; /* FAIL */
13231 /* If one of the two operands is an interesting constant, load a
13232 constant with the above and mask it in with a logical operation. */
13234 if (CONST_INT_P (operands[2]))
13236 var = operands[3];
13237 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13238 operands[3] = constm1_rtx, op = and_optab;
13239 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13240 operands[3] = const0_rtx, op = ior_optab;
13241 else
13242 return 0; /* FAIL */
13244 else if (CONST_INT_P (operands[3]))
13246 var = operands[2];
13247 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13248 operands[2] = constm1_rtx, op = and_optab;
13249 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13250 operands[2] = const0_rtx, op = ior_optab;
13251 else
13252 return 0; /* FAIL */
13254 else
13255 return 0; /* FAIL */
13257 orig_out = operands[0];
13258 tmp = gen_reg_rtx (mode);
13259 operands[0] = tmp;
13261 /* Recurse to get the constant loaded. */
13262 if (ix86_expand_int_movcc (operands) == 0)
13263 return 0; /* FAIL */
13265 /* Mask in the interesting variable. */
13266 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13267 OPTAB_WIDEN);
13268 if (!rtx_equal_p (out, orig_out))
13269 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13271 return 1; /* DONE */
13275 * For comparison with above,
13277 * movl cf,dest
13278 * movl ct,tmp
13279 * cmpl op1,op2
13280 * cmovcc tmp,dest
13282 * Size 15.
13285 if (! nonimmediate_operand (operands[2], mode))
13286 operands[2] = force_reg (mode, operands[2]);
13287 if (! nonimmediate_operand (operands[3], mode))
13288 operands[3] = force_reg (mode, operands[3]);
13290 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13292 rtx tmp = gen_reg_rtx (mode);
13293 emit_move_insn (tmp, operands[3]);
13294 operands[3] = tmp;
13296 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13298 rtx tmp = gen_reg_rtx (mode);
13299 emit_move_insn (tmp, operands[2]);
13300 operands[2] = tmp;
13303 if (! register_operand (operands[2], VOIDmode)
13304 && (mode == QImode
13305 || ! register_operand (operands[3], VOIDmode)))
13306 operands[2] = force_reg (mode, operands[2]);
13308 if (mode == QImode
13309 && ! register_operand (operands[3], VOIDmode))
13310 operands[3] = force_reg (mode, operands[3]);
13312 emit_insn (compare_seq);
13313 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13314 gen_rtx_IF_THEN_ELSE (mode,
13315 compare_op, operands[2],
13316 operands[3])));
13317 if (bypass_test)
13318 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13319 gen_rtx_IF_THEN_ELSE (mode,
13320 bypass_test,
13321 copy_rtx (operands[3]),
13322 copy_rtx (operands[0]))));
13323 if (second_test)
13324 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13325 gen_rtx_IF_THEN_ELSE (mode,
13326 second_test,
13327 copy_rtx (operands[2]),
13328 copy_rtx (operands[0]))));
13330 return 1; /* DONE */
13333 /* Swap, force into registers, or otherwise massage the two operands
13334 to an sse comparison with a mask result. Thus we differ a bit from
13335 ix86_prepare_fp_compare_args which expects to produce a flags result.
13337 The DEST operand exists to help determine whether to commute commutative
13338 operators. The POP0/POP1 operands are updated in place. The new
13339 comparison code is returned, or UNKNOWN if not implementable. */
13341 static enum rtx_code
13342 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13343 rtx *pop0, rtx *pop1)
13345 rtx tmp;
13347 switch (code)
13349 case LTGT:
13350 case UNEQ:
13351 /* We have no LTGT as an operator. We could implement it with
13352 NE & ORDERED, but this requires an extra temporary. It's
13353 not clear that it's worth it. */
13354 return UNKNOWN;
13356 case LT:
13357 case LE:
13358 case UNGT:
13359 case UNGE:
13360 /* These are supported directly. */
13361 break;
13363 case EQ:
13364 case NE:
13365 case UNORDERED:
13366 case ORDERED:
13367 /* For commutative operators, try to canonicalize the destination
13368 operand to be first in the comparison - this helps reload to
13369 avoid extra moves. */
13370 if (!dest || !rtx_equal_p (dest, *pop1))
13371 break;
13372 /* FALLTHRU */
13374 case GE:
13375 case GT:
13376 case UNLE:
13377 case UNLT:
13378 /* These are not supported directly. Swap the comparison operands
13379 to transform into something that is supported. */
13380 tmp = *pop0;
13381 *pop0 = *pop1;
13382 *pop1 = tmp;
13383 code = swap_condition (code);
13384 break;
13386 default:
13387 gcc_unreachable ();
13390 return code;
13393 /* Detect conditional moves that exactly match min/max operational
13394 semantics. Note that this is IEEE safe, as long as we don't
13395 interchange the operands.
13397 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13398 and TRUE if the operation is successful and instructions are emitted. */
13400 static bool
13401 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13402 rtx cmp_op1, rtx if_true, rtx if_false)
13404 enum machine_mode mode;
13405 bool is_min;
13406 rtx tmp;
13408 if (code == LT)
13410 else if (code == UNGE)
13412 tmp = if_true;
13413 if_true = if_false;
13414 if_false = tmp;
13416 else
13417 return false;
13419 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13420 is_min = true;
13421 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13422 is_min = false;
13423 else
13424 return false;
13426 mode = GET_MODE (dest);
13428 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13429 but MODE may be a vector mode and thus not appropriate. */
13430 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13432 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13433 rtvec v;
13435 if_true = force_reg (mode, if_true);
13436 v = gen_rtvec (2, if_true, if_false);
13437 tmp = gen_rtx_UNSPEC (mode, v, u);
13439 else
13441 code = is_min ? SMIN : SMAX;
13442 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13445 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13446 return true;
13449 /* Expand an sse vector comparison. Return the register with the result. */
13451 static rtx
13452 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13453 rtx op_true, rtx op_false)
13455 enum machine_mode mode = GET_MODE (dest);
13456 rtx x;
13458 cmp_op0 = force_reg (mode, cmp_op0);
13459 if (!nonimmediate_operand (cmp_op1, mode))
13460 cmp_op1 = force_reg (mode, cmp_op1);
13462 if (optimize
13463 || reg_overlap_mentioned_p (dest, op_true)
13464 || reg_overlap_mentioned_p (dest, op_false))
13465 dest = gen_reg_rtx (mode);
13467 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13470 return dest;
13473 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13474 operations. This is used for both scalar and vector conditional moves. */
13476 static void
13477 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13479 enum machine_mode mode = GET_MODE (dest);
13480 rtx t2, t3, x;
13482 if (op_false == CONST0_RTX (mode))
13484 op_true = force_reg (mode, op_true);
13485 x = gen_rtx_AND (mode, cmp, op_true);
13486 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13488 else if (op_true == CONST0_RTX (mode))
13490 op_false = force_reg (mode, op_false);
13491 x = gen_rtx_NOT (mode, cmp);
13492 x = gen_rtx_AND (mode, x, op_false);
13493 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13495 else if (TARGET_SSE5)
13497 rtx pcmov = gen_rtx_SET (mode, dest,
13498 gen_rtx_IF_THEN_ELSE (mode, cmp,
13499 op_true,
13500 op_false));
13501 emit_insn (pcmov);
13503 else
13505 op_true = force_reg (mode, op_true);
13506 op_false = force_reg (mode, op_false);
13508 t2 = gen_reg_rtx (mode);
13509 if (optimize)
13510 t3 = gen_reg_rtx (mode);
13511 else
13512 t3 = dest;
13514 x = gen_rtx_AND (mode, op_true, cmp);
13515 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13517 x = gen_rtx_NOT (mode, cmp);
13518 x = gen_rtx_AND (mode, x, op_false);
13519 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13521 x = gen_rtx_IOR (mode, t3, t2);
13522 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13526 /* Expand a floating-point conditional move. Return true if successful. */
13529 ix86_expand_fp_movcc (rtx operands[])
13531 enum machine_mode mode = GET_MODE (operands[0]);
13532 enum rtx_code code = GET_CODE (operands[1]);
13533 rtx tmp, compare_op, second_test, bypass_test;
13535 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13537 enum machine_mode cmode;
13539 /* Since we've no cmove for sse registers, don't force bad register
13540 allocation just to gain access to it. Deny movcc when the
13541 comparison mode doesn't match the move mode. */
13542 cmode = GET_MODE (ix86_compare_op0);
13543 if (cmode == VOIDmode)
13544 cmode = GET_MODE (ix86_compare_op1);
13545 if (cmode != mode)
13546 return 0;
13548 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13549 &ix86_compare_op0,
13550 &ix86_compare_op1);
13551 if (code == UNKNOWN)
13552 return 0;
13554 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13555 ix86_compare_op1, operands[2],
13556 operands[3]))
13557 return 1;
13559 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13560 ix86_compare_op1, operands[2], operands[3]);
13561 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13562 return 1;
13565 /* The floating point conditional move instructions don't directly
13566 support conditions resulting from a signed integer comparison. */
13568 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13570 /* The floating point conditional move instructions don't directly
13571 support signed integer comparisons. */
13573 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13575 gcc_assert (!second_test && !bypass_test);
13576 tmp = gen_reg_rtx (QImode);
13577 ix86_expand_setcc (code, tmp);
13578 code = NE;
13579 ix86_compare_op0 = tmp;
13580 ix86_compare_op1 = const0_rtx;
13581 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13583 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13585 tmp = gen_reg_rtx (mode);
13586 emit_move_insn (tmp, operands[3]);
13587 operands[3] = tmp;
13589 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13591 tmp = gen_reg_rtx (mode);
13592 emit_move_insn (tmp, operands[2]);
13593 operands[2] = tmp;
13596 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13597 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13598 operands[2], operands[3])));
13599 if (bypass_test)
13600 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13601 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13602 operands[3], operands[0])));
13603 if (second_test)
13604 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13605 gen_rtx_IF_THEN_ELSE (mode, second_test,
13606 operands[2], operands[0])));
13608 return 1;
13611 /* Expand a floating-point vector conditional move; a vcond operation
13612 rather than a movcc operation. */
13614 bool
13615 ix86_expand_fp_vcond (rtx operands[])
13617 enum rtx_code code = GET_CODE (operands[3]);
13618 rtx cmp;
13620 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13621 &operands[4], &operands[5]);
13622 if (code == UNKNOWN)
13623 return false;
13625 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13626 operands[5], operands[1], operands[2]))
13627 return true;
13629 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13630 operands[1], operands[2]);
13631 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13632 return true;
13635 /* Expand a signed/unsigned integral vector conditional move. */
13637 bool
13638 ix86_expand_int_vcond (rtx operands[])
13640 enum machine_mode mode = GET_MODE (operands[0]);
13641 enum rtx_code code = GET_CODE (operands[3]);
13642 bool negate = false;
13643 rtx x, cop0, cop1;
13645 cop0 = operands[4];
13646 cop1 = operands[5];
13648 /* SSE5 supports all of the comparisons on all vector int types. */
13649 if (!TARGET_SSE5)
13651 /* Canonicalize the comparison to EQ, GT, GTU. */
13652 switch (code)
13654 case EQ:
13655 case GT:
13656 case GTU:
13657 break;
13659 case NE:
13660 case LE:
13661 case LEU:
13662 code = reverse_condition (code);
13663 negate = true;
13664 break;
13666 case GE:
13667 case GEU:
13668 code = reverse_condition (code);
13669 negate = true;
13670 /* FALLTHRU */
13672 case LT:
13673 case LTU:
13674 code = swap_condition (code);
13675 x = cop0, cop0 = cop1, cop1 = x;
13676 break;
13678 default:
13679 gcc_unreachable ();
13682 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13683 if (mode == V2DImode)
13685 switch (code)
13687 case EQ:
13688 /* SSE4.1 supports EQ. */
13689 if (!TARGET_SSE4_1)
13690 return false;
13691 break;
13693 case GT:
13694 case GTU:
13695 /* SSE4.2 supports GT/GTU. */
13696 if (!TARGET_SSE4_2)
13697 return false;
13698 break;
13700 default:
13701 gcc_unreachable ();
13705 /* Unsigned parallel compare is not supported by the hardware. Play some
13706 tricks to turn this into a signed comparison against 0. */
13707 if (code == GTU)
13709 cop0 = force_reg (mode, cop0);
13711 switch (mode)
13713 case V4SImode:
13714 case V2DImode:
13716 rtx t1, t2, mask;
13718 /* Perform a parallel modulo subtraction. */
13719 t1 = gen_reg_rtx (mode);
13720 emit_insn ((mode == V4SImode
13721 ? gen_subv4si3
13722 : gen_subv2di3) (t1, cop0, cop1));
13724 /* Extract the original sign bit of op0. */
13725 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13726 true, false);
13727 t2 = gen_reg_rtx (mode);
13728 emit_insn ((mode == V4SImode
13729 ? gen_andv4si3
13730 : gen_andv2di3) (t2, cop0, mask));
13732 /* XOR it back into the result of the subtraction. This results
13733 in the sign bit set iff we saw unsigned underflow. */
13734 x = gen_reg_rtx (mode);
13735 emit_insn ((mode == V4SImode
13736 ? gen_xorv4si3
13737 : gen_xorv2di3) (x, t1, t2));
13739 code = GT;
13741 break;
13743 case V16QImode:
13744 case V8HImode:
13745 /* Perform a parallel unsigned saturating subtraction. */
13746 x = gen_reg_rtx (mode);
13747 emit_insn (gen_rtx_SET (VOIDmode, x,
13748 gen_rtx_US_MINUS (mode, cop0, cop1)));
13750 code = EQ;
13751 negate = !negate;
13752 break;
13754 default:
13755 gcc_unreachable ();
13758 cop0 = x;
13759 cop1 = CONST0_RTX (mode);
13763 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13764 operands[1+negate], operands[2-negate]);
13766 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13767 operands[2-negate]);
13768 return true;
13771 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13772 true if we should do zero extension, else sign extension. HIGH_P is
13773 true if we want the N/2 high elements, else the low elements. */
13775 void
13776 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13778 enum machine_mode imode = GET_MODE (operands[1]);
13779 rtx (*unpack)(rtx, rtx, rtx);
13780 rtx se, dest;
13782 switch (imode)
13784 case V16QImode:
13785 if (high_p)
13786 unpack = gen_vec_interleave_highv16qi;
13787 else
13788 unpack = gen_vec_interleave_lowv16qi;
13789 break;
13790 case V8HImode:
13791 if (high_p)
13792 unpack = gen_vec_interleave_highv8hi;
13793 else
13794 unpack = gen_vec_interleave_lowv8hi;
13795 break;
13796 case V4SImode:
13797 if (high_p)
13798 unpack = gen_vec_interleave_highv4si;
13799 else
13800 unpack = gen_vec_interleave_lowv4si;
13801 break;
13802 default:
13803 gcc_unreachable ();
13806 dest = gen_lowpart (imode, operands[0]);
13808 if (unsigned_p)
13809 se = force_reg (imode, CONST0_RTX (imode));
13810 else
13811 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13812 operands[1], pc_rtx, pc_rtx);
13814 emit_insn (unpack (dest, operands[1], se));
13817 /* This function performs the same task as ix86_expand_sse_unpack,
13818 but with SSE4.1 instructions. */
13820 void
13821 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13823 enum machine_mode imode = GET_MODE (operands[1]);
13824 rtx (*unpack)(rtx, rtx);
13825 rtx src, dest;
13827 switch (imode)
13829 case V16QImode:
13830 if (unsigned_p)
13831 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13832 else
13833 unpack = gen_sse4_1_extendv8qiv8hi2;
13834 break;
13835 case V8HImode:
13836 if (unsigned_p)
13837 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13838 else
13839 unpack = gen_sse4_1_extendv4hiv4si2;
13840 break;
13841 case V4SImode:
13842 if (unsigned_p)
13843 unpack = gen_sse4_1_zero_extendv2siv2di2;
13844 else
13845 unpack = gen_sse4_1_extendv2siv2di2;
13846 break;
13847 default:
13848 gcc_unreachable ();
13851 dest = operands[0];
13852 if (high_p)
13854 /* Shift higher 8 bytes to lower 8 bytes. */
13855 src = gen_reg_rtx (imode);
13856 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13857 gen_lowpart (TImode, operands[1]),
13858 GEN_INT (64)));
13860 else
13861 src = operands[1];
13863 emit_insn (unpack (dest, src));
13866 /* This function performs the same task as ix86_expand_sse_unpack,
13867 but with sse5 instructions. */
13869 void
13870 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13872 enum machine_mode imode = GET_MODE (operands[1]);
13873 int pperm_bytes[16];
13874 int i;
13875 int h = (high_p) ? 8 : 0;
13876 int h2;
13877 int sign_extend;
13878 rtvec v = rtvec_alloc (16);
13879 rtvec vs;
13880 rtx x, p;
13881 rtx op0 = operands[0], op1 = operands[1];
13883 switch (imode)
13885 case V16QImode:
13886 vs = rtvec_alloc (8);
13887 h2 = (high_p) ? 8 : 0;
13888 for (i = 0; i < 8; i++)
13890 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13891 pperm_bytes[2*i+1] = ((unsigned_p)
13892 ? PPERM_ZERO
13893 : PPERM_SIGN | PPERM_SRC2 | i | h);
13896 for (i = 0; i < 16; i++)
13897 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13899 for (i = 0; i < 8; i++)
13900 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13902 p = gen_rtx_PARALLEL (VOIDmode, vs);
13903 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13904 if (unsigned_p)
13905 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13906 else
13907 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13908 break;
13910 case V8HImode:
13911 vs = rtvec_alloc (4);
13912 h2 = (high_p) ? 4 : 0;
13913 for (i = 0; i < 4; i++)
13915 sign_extend = ((unsigned_p)
13916 ? PPERM_ZERO
13917 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13918 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13919 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13920 pperm_bytes[4*i+2] = sign_extend;
13921 pperm_bytes[4*i+3] = sign_extend;
13924 for (i = 0; i < 16; i++)
13925 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13927 for (i = 0; i < 4; i++)
13928 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13930 p = gen_rtx_PARALLEL (VOIDmode, vs);
13931 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13932 if (unsigned_p)
13933 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13934 else
13935 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13936 break;
13938 case V4SImode:
13939 vs = rtvec_alloc (2);
13940 h2 = (high_p) ? 2 : 0;
13941 for (i = 0; i < 2; i++)
13943 sign_extend = ((unsigned_p)
13944 ? PPERM_ZERO
13945 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13946 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13947 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13948 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13949 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13950 pperm_bytes[8*i+4] = sign_extend;
13951 pperm_bytes[8*i+5] = sign_extend;
13952 pperm_bytes[8*i+6] = sign_extend;
13953 pperm_bytes[8*i+7] = sign_extend;
13956 for (i = 0; i < 16; i++)
13957 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13959 for (i = 0; i < 2; i++)
13960 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13962 p = gen_rtx_PARALLEL (VOIDmode, vs);
13963 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13964 if (unsigned_p)
13965 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13966 else
13967 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13968 break;
13970 default:
13971 gcc_unreachable ();
13974 return;
13977 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13978 next narrower integer vector type */
13979 void
13980 ix86_expand_sse5_pack (rtx operands[3])
13982 enum machine_mode imode = GET_MODE (operands[0]);
13983 int pperm_bytes[16];
13984 int i;
13985 rtvec v = rtvec_alloc (16);
13986 rtx x;
13987 rtx op0 = operands[0];
13988 rtx op1 = operands[1];
13989 rtx op2 = operands[2];
13991 switch (imode)
13993 case V16QImode:
13994 for (i = 0; i < 8; i++)
13996 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13997 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
14000 for (i = 0; i < 16; i++)
14001 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14003 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14004 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
14005 break;
14007 case V8HImode:
14008 for (i = 0; i < 4; i++)
14010 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
14011 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
14012 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
14013 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
14016 for (i = 0; i < 16; i++)
14017 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14019 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14020 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
14021 break;
14023 case V4SImode:
14024 for (i = 0; i < 2; i++)
14026 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14027 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14028 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14029 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14030 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14031 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14032 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14033 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14036 for (i = 0; i < 16; i++)
14037 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14039 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14040 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14041 break;
14043 default:
14044 gcc_unreachable ();
14047 return;
14050 /* Expand conditional increment or decrement using adb/sbb instructions.
14051 The default case using setcc followed by the conditional move can be
14052 done by generic code. */
14054 ix86_expand_int_addcc (rtx operands[])
14056 enum rtx_code code = GET_CODE (operands[1]);
14057 rtx compare_op;
14058 rtx val = const0_rtx;
14059 bool fpcmp = false;
14060 enum machine_mode mode = GET_MODE (operands[0]);
14062 if (operands[3] != const1_rtx
14063 && operands[3] != constm1_rtx)
14064 return 0;
14065 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14066 ix86_compare_op1, &compare_op))
14067 return 0;
14068 code = GET_CODE (compare_op);
14070 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14071 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14073 fpcmp = true;
14074 code = ix86_fp_compare_code_to_integer (code);
14077 if (code != LTU)
14079 val = constm1_rtx;
14080 if (fpcmp)
14081 PUT_CODE (compare_op,
14082 reverse_condition_maybe_unordered
14083 (GET_CODE (compare_op)));
14084 else
14085 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14087 PUT_MODE (compare_op, mode);
14089 /* Construct either adc or sbb insn. */
14090 if ((code == LTU) == (operands[3] == constm1_rtx))
14092 switch (GET_MODE (operands[0]))
14094 case QImode:
14095 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14096 break;
14097 case HImode:
14098 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14099 break;
14100 case SImode:
14101 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14102 break;
14103 case DImode:
14104 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14105 break;
14106 default:
14107 gcc_unreachable ();
14110 else
14112 switch (GET_MODE (operands[0]))
14114 case QImode:
14115 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14116 break;
14117 case HImode:
14118 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14119 break;
14120 case SImode:
14121 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14122 break;
14123 case DImode:
14124 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14125 break;
14126 default:
14127 gcc_unreachable ();
14130 return 1; /* DONE */
14134 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14135 works for floating pointer parameters and nonoffsetable memories.
14136 For pushes, it returns just stack offsets; the values will be saved
14137 in the right order. Maximally three parts are generated. */
14139 static int
14140 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14142 int size;
14144 if (!TARGET_64BIT)
14145 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14146 else
14147 size = (GET_MODE_SIZE (mode) + 4) / 8;
14149 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14150 gcc_assert (size >= 2 && size <= 4);
14152 /* Optimize constant pool reference to immediates. This is used by fp
14153 moves, that force all constants to memory to allow combining. */
14154 if (MEM_P (operand) && MEM_READONLY_P (operand))
14156 rtx tmp = maybe_get_pool_constant (operand);
14157 if (tmp)
14158 operand = tmp;
14161 if (MEM_P (operand) && !offsettable_memref_p (operand))
14163 /* The only non-offsetable memories we handle are pushes. */
14164 int ok = push_operand (operand, VOIDmode);
14166 gcc_assert (ok);
14168 operand = copy_rtx (operand);
14169 PUT_MODE (operand, Pmode);
14170 parts[0] = parts[1] = parts[2] = parts[3] = operand;
14171 return size;
14174 if (GET_CODE (operand) == CONST_VECTOR)
14176 enum machine_mode imode = int_mode_for_mode (mode);
14177 /* Caution: if we looked through a constant pool memory above,
14178 the operand may actually have a different mode now. That's
14179 ok, since we want to pun this all the way back to an integer. */
14180 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14181 gcc_assert (operand != NULL);
14182 mode = imode;
14185 if (!TARGET_64BIT)
14187 if (mode == DImode)
14188 split_di (&operand, 1, &parts[0], &parts[1]);
14189 else
14191 int i;
14193 if (REG_P (operand))
14195 gcc_assert (reload_completed);
14196 for (i = 0; i < size; i++)
14197 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
14199 else if (offsettable_memref_p (operand))
14201 operand = adjust_address (operand, SImode, 0);
14202 parts[0] = operand;
14203 for (i = 1; i < size; i++)
14204 parts[i] = adjust_address (operand, SImode, 4 * i);
14206 else if (GET_CODE (operand) == CONST_DOUBLE)
14208 REAL_VALUE_TYPE r;
14209 long l[4];
14211 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14212 switch (mode)
14214 case TFmode:
14215 real_to_target (l, &r, mode);
14216 parts[3] = gen_int_mode (l[3], SImode);
14217 parts[2] = gen_int_mode (l[2], SImode);
14218 break;
14219 case XFmode:
14220 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14221 parts[2] = gen_int_mode (l[2], SImode);
14222 break;
14223 case DFmode:
14224 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14225 break;
14226 default:
14227 gcc_unreachable ();
14229 parts[1] = gen_int_mode (l[1], SImode);
14230 parts[0] = gen_int_mode (l[0], SImode);
14232 else
14233 gcc_unreachable ();
14236 else
14238 if (mode == TImode)
14239 split_ti (&operand, 1, &parts[0], &parts[1]);
14240 if (mode == XFmode || mode == TFmode)
14242 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14243 if (REG_P (operand))
14245 gcc_assert (reload_completed);
14246 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14247 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14249 else if (offsettable_memref_p (operand))
14251 operand = adjust_address (operand, DImode, 0);
14252 parts[0] = operand;
14253 parts[1] = adjust_address (operand, upper_mode, 8);
14255 else if (GET_CODE (operand) == CONST_DOUBLE)
14257 REAL_VALUE_TYPE r;
14258 long l[4];
14260 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14261 real_to_target (l, &r, mode);
14263 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14264 if (HOST_BITS_PER_WIDE_INT >= 64)
14265 parts[0]
14266 = gen_int_mode
14267 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14268 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14269 DImode);
14270 else
14271 parts[0] = immed_double_const (l[0], l[1], DImode);
14273 if (upper_mode == SImode)
14274 parts[1] = gen_int_mode (l[2], SImode);
14275 else if (HOST_BITS_PER_WIDE_INT >= 64)
14276 parts[1]
14277 = gen_int_mode
14278 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14279 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14280 DImode);
14281 else
14282 parts[1] = immed_double_const (l[2], l[3], DImode);
14284 else
14285 gcc_unreachable ();
14289 return size;
14292 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
14293 Return false when normal moves are needed; true when all required
14294 insns have been emitted. Operands 2-4 contain the input values
14295 int the correct order; operands 5-7 contain the output values. */
14297 void
14298 ix86_split_long_move (rtx operands[])
14300 rtx part[2][4];
14301 int nparts, i, j;
14302 int push = 0;
14303 int collisions = 0;
14304 enum machine_mode mode = GET_MODE (operands[0]);
14305 bool collisionparts[4];
14307 /* The DFmode expanders may ask us to move double.
14308 For 64bit target this is single move. By hiding the fact
14309 here we simplify i386.md splitters. */
14310 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14312 /* Optimize constant pool reference to immediates. This is used by
14313 fp moves, that force all constants to memory to allow combining. */
14315 if (MEM_P (operands[1])
14316 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14317 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14318 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14319 if (push_operand (operands[0], VOIDmode))
14321 operands[0] = copy_rtx (operands[0]);
14322 PUT_MODE (operands[0], Pmode);
14324 else
14325 operands[0] = gen_lowpart (DImode, operands[0]);
14326 operands[1] = gen_lowpart (DImode, operands[1]);
14327 emit_move_insn (operands[0], operands[1]);
14328 return;
14331 /* The only non-offsettable memory we handle is push. */
14332 if (push_operand (operands[0], VOIDmode))
14333 push = 1;
14334 else
14335 gcc_assert (!MEM_P (operands[0])
14336 || offsettable_memref_p (operands[0]));
14338 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14339 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14341 /* When emitting push, take care for source operands on the stack. */
14342 if (push && MEM_P (operands[1])
14343 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14344 for (i = 0; i < nparts - 1; i++)
14345 part[1][i] = change_address (part[1][i],
14346 GET_MODE (part[1][i]),
14347 XEXP (part[1][i + 1], 0));
14349 /* We need to do copy in the right order in case an address register
14350 of the source overlaps the destination. */
14351 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14353 rtx tmp;
14355 for (i = 0; i < nparts; i++)
14357 collisionparts[i]
14358 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
14359 if (collisionparts[i])
14360 collisions++;
14363 /* Collision in the middle part can be handled by reordering. */
14364 if (collisions == 1 && nparts == 3 && collisionparts [1])
14366 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14367 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14369 else if (collisions == 1
14370 && nparts == 4
14371 && (collisionparts [1] || collisionparts [2]))
14373 if (collisionparts [1])
14375 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14376 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14378 else
14380 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
14381 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
14385 /* If there are more collisions, we can't handle it by reordering.
14386 Do an lea to the last part and use only one colliding move. */
14387 else if (collisions > 1)
14389 rtx base;
14391 collisions = 1;
14393 base = part[0][nparts - 1];
14395 /* Handle the case when the last part isn't valid for lea.
14396 Happens in 64-bit mode storing the 12-byte XFmode. */
14397 if (GET_MODE (base) != Pmode)
14398 base = gen_rtx_REG (Pmode, REGNO (base));
14400 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14401 part[1][0] = replace_equiv_address (part[1][0], base);
14402 for (i = 1; i < nparts; i++)
14404 tmp = plus_constant (base, UNITS_PER_WORD * i);
14405 part[1][i] = replace_equiv_address (part[1][i], tmp);
14410 if (push)
14412 if (!TARGET_64BIT)
14414 if (nparts == 3)
14416 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14417 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14418 emit_move_insn (part[0][2], part[1][2]);
14420 else if (nparts == 4)
14422 emit_move_insn (part[0][3], part[1][3]);
14423 emit_move_insn (part[0][2], part[1][2]);
14426 else
14428 /* In 64bit mode we don't have 32bit push available. In case this is
14429 register, it is OK - we will just use larger counterpart. We also
14430 retype memory - these comes from attempt to avoid REX prefix on
14431 moving of second half of TFmode value. */
14432 if (GET_MODE (part[1][1]) == SImode)
14434 switch (GET_CODE (part[1][1]))
14436 case MEM:
14437 part[1][1] = adjust_address (part[1][1], DImode, 0);
14438 break;
14440 case REG:
14441 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14442 break;
14444 default:
14445 gcc_unreachable ();
14448 if (GET_MODE (part[1][0]) == SImode)
14449 part[1][0] = part[1][1];
14452 emit_move_insn (part[0][1], part[1][1]);
14453 emit_move_insn (part[0][0], part[1][0]);
14454 return;
14457 /* Choose correct order to not overwrite the source before it is copied. */
14458 if ((REG_P (part[0][0])
14459 && REG_P (part[1][1])
14460 && (REGNO (part[0][0]) == REGNO (part[1][1])
14461 || (nparts == 3
14462 && REGNO (part[0][0]) == REGNO (part[1][2]))
14463 || (nparts == 4
14464 && REGNO (part[0][0]) == REGNO (part[1][3]))))
14465 || (collisions > 0
14466 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14468 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
14470 operands[2 + i] = part[0][j];
14471 operands[6 + i] = part[1][j];
14474 else
14476 for (i = 0; i < nparts; i++)
14478 operands[2 + i] = part[0][i];
14479 operands[6 + i] = part[1][i];
14483 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14484 if (optimize_size)
14486 for (j = 0; j < nparts - 1; j++)
14487 if (CONST_INT_P (operands[6 + j])
14488 && operands[6 + j] != const0_rtx
14489 && REG_P (operands[2 + j]))
14490 for (i = j; i < nparts - 1; i++)
14491 if (CONST_INT_P (operands[7 + i])
14492 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
14493 operands[7 + i] = operands[2 + j];
14496 for (i = 0; i < nparts; i++)
14497 emit_move_insn (operands[2 + i], operands[6 + i]);
14499 return;
14502 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14503 left shift by a constant, either using a single shift or
14504 a sequence of add instructions. */
14506 static void
14507 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14509 if (count == 1)
14511 emit_insn ((mode == DImode
14512 ? gen_addsi3
14513 : gen_adddi3) (operand, operand, operand));
14515 else if (!optimize_size
14516 && count * ix86_cost->add <= ix86_cost->shift_const)
14518 int i;
14519 for (i=0; i<count; i++)
14521 emit_insn ((mode == DImode
14522 ? gen_addsi3
14523 : gen_adddi3) (operand, operand, operand));
14526 else
14527 emit_insn ((mode == DImode
14528 ? gen_ashlsi3
14529 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14532 void
14533 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14535 rtx low[2], high[2];
14536 int count;
14537 const int single_width = mode == DImode ? 32 : 64;
14539 if (CONST_INT_P (operands[2]))
14541 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14542 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14544 if (count >= single_width)
14546 emit_move_insn (high[0], low[1]);
14547 emit_move_insn (low[0], const0_rtx);
14549 if (count > single_width)
14550 ix86_expand_ashl_const (high[0], count - single_width, mode);
14552 else
14554 if (!rtx_equal_p (operands[0], operands[1]))
14555 emit_move_insn (operands[0], operands[1]);
14556 emit_insn ((mode == DImode
14557 ? gen_x86_shld
14558 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14559 ix86_expand_ashl_const (low[0], count, mode);
14561 return;
14564 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14566 if (operands[1] == const1_rtx)
14568 /* Assuming we've chosen a QImode capable registers, then 1 << N
14569 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14570 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14572 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14574 ix86_expand_clear (low[0]);
14575 ix86_expand_clear (high[0]);
14576 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14578 d = gen_lowpart (QImode, low[0]);
14579 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14580 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14581 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14583 d = gen_lowpart (QImode, high[0]);
14584 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14585 s = gen_rtx_NE (QImode, flags, const0_rtx);
14586 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14589 /* Otherwise, we can get the same results by manually performing
14590 a bit extract operation on bit 5/6, and then performing the two
14591 shifts. The two methods of getting 0/1 into low/high are exactly
14592 the same size. Avoiding the shift in the bit extract case helps
14593 pentium4 a bit; no one else seems to care much either way. */
14594 else
14596 rtx x;
14598 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14599 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14600 else
14601 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14602 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14604 emit_insn ((mode == DImode
14605 ? gen_lshrsi3
14606 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14607 emit_insn ((mode == DImode
14608 ? gen_andsi3
14609 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14610 emit_move_insn (low[0], high[0]);
14611 emit_insn ((mode == DImode
14612 ? gen_xorsi3
14613 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14616 emit_insn ((mode == DImode
14617 ? gen_ashlsi3
14618 : gen_ashldi3) (low[0], low[0], operands[2]));
14619 emit_insn ((mode == DImode
14620 ? gen_ashlsi3
14621 : gen_ashldi3) (high[0], high[0], operands[2]));
14622 return;
14625 if (operands[1] == constm1_rtx)
14627 /* For -1 << N, we can avoid the shld instruction, because we
14628 know that we're shifting 0...31/63 ones into a -1. */
14629 emit_move_insn (low[0], constm1_rtx);
14630 if (optimize_size)
14631 emit_move_insn (high[0], low[0]);
14632 else
14633 emit_move_insn (high[0], constm1_rtx);
14635 else
14637 if (!rtx_equal_p (operands[0], operands[1]))
14638 emit_move_insn (operands[0], operands[1]);
14640 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14641 emit_insn ((mode == DImode
14642 ? gen_x86_shld
14643 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14646 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14648 if (TARGET_CMOVE && scratch)
14650 ix86_expand_clear (scratch);
14651 emit_insn ((mode == DImode
14652 ? gen_x86_shift_adj_1
14653 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14655 else
14656 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14659 void
14660 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14662 rtx low[2], high[2];
14663 int count;
14664 const int single_width = mode == DImode ? 32 : 64;
14666 if (CONST_INT_P (operands[2]))
14668 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14669 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14671 if (count == single_width * 2 - 1)
14673 emit_move_insn (high[0], high[1]);
14674 emit_insn ((mode == DImode
14675 ? gen_ashrsi3
14676 : gen_ashrdi3) (high[0], high[0],
14677 GEN_INT (single_width - 1)));
14678 emit_move_insn (low[0], high[0]);
14681 else if (count >= single_width)
14683 emit_move_insn (low[0], high[1]);
14684 emit_move_insn (high[0], low[0]);
14685 emit_insn ((mode == DImode
14686 ? gen_ashrsi3
14687 : gen_ashrdi3) (high[0], high[0],
14688 GEN_INT (single_width - 1)));
14689 if (count > single_width)
14690 emit_insn ((mode == DImode
14691 ? gen_ashrsi3
14692 : gen_ashrdi3) (low[0], low[0],
14693 GEN_INT (count - single_width)));
14695 else
14697 if (!rtx_equal_p (operands[0], operands[1]))
14698 emit_move_insn (operands[0], operands[1]);
14699 emit_insn ((mode == DImode
14700 ? gen_x86_shrd
14701 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14702 emit_insn ((mode == DImode
14703 ? gen_ashrsi3
14704 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14707 else
14709 if (!rtx_equal_p (operands[0], operands[1]))
14710 emit_move_insn (operands[0], operands[1]);
14712 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14714 emit_insn ((mode == DImode
14715 ? gen_x86_shrd
14716 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14717 emit_insn ((mode == DImode
14718 ? gen_ashrsi3
14719 : gen_ashrdi3) (high[0], high[0], operands[2]));
14721 if (TARGET_CMOVE && scratch)
14723 emit_move_insn (scratch, high[0]);
14724 emit_insn ((mode == DImode
14725 ? gen_ashrsi3
14726 : gen_ashrdi3) (scratch, scratch,
14727 GEN_INT (single_width - 1)));
14728 emit_insn ((mode == DImode
14729 ? gen_x86_shift_adj_1
14730 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14731 scratch));
14733 else
14734 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14738 void
14739 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14741 rtx low[2], high[2];
14742 int count;
14743 const int single_width = mode == DImode ? 32 : 64;
14745 if (CONST_INT_P (operands[2]))
14747 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14748 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14750 if (count >= single_width)
14752 emit_move_insn (low[0], high[1]);
14753 ix86_expand_clear (high[0]);
14755 if (count > single_width)
14756 emit_insn ((mode == DImode
14757 ? gen_lshrsi3
14758 : gen_lshrdi3) (low[0], low[0],
14759 GEN_INT (count - single_width)));
14761 else
14763 if (!rtx_equal_p (operands[0], operands[1]))
14764 emit_move_insn (operands[0], operands[1]);
14765 emit_insn ((mode == DImode
14766 ? gen_x86_shrd
14767 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14768 emit_insn ((mode == DImode
14769 ? gen_lshrsi3
14770 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14773 else
14775 if (!rtx_equal_p (operands[0], operands[1]))
14776 emit_move_insn (operands[0], operands[1]);
14778 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14780 emit_insn ((mode == DImode
14781 ? gen_x86_shrd
14782 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14783 emit_insn ((mode == DImode
14784 ? gen_lshrsi3
14785 : gen_lshrdi3) (high[0], high[0], operands[2]));
14787 /* Heh. By reversing the arguments, we can reuse this pattern. */
14788 if (TARGET_CMOVE && scratch)
14790 ix86_expand_clear (scratch);
14791 emit_insn ((mode == DImode
14792 ? gen_x86_shift_adj_1
14793 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14794 scratch));
14796 else
14797 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14801 /* Predict just emitted jump instruction to be taken with probability PROB. */
14802 static void
14803 predict_jump (int prob)
14805 rtx insn = get_last_insn ();
14806 gcc_assert (JUMP_P (insn));
14807 REG_NOTES (insn)
14808 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14809 GEN_INT (prob),
14810 REG_NOTES (insn));
14813 /* Helper function for the string operations below. Dest VARIABLE whether
14814 it is aligned to VALUE bytes. If true, jump to the label. */
14815 static rtx
14816 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14818 rtx label = gen_label_rtx ();
14819 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14820 if (GET_MODE (variable) == DImode)
14821 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14822 else
14823 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14824 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14825 1, label);
14826 if (epilogue)
14827 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14828 else
14829 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14830 return label;
14833 /* Adjust COUNTER by the VALUE. */
14834 static void
14835 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14837 if (GET_MODE (countreg) == DImode)
14838 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14839 else
14840 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14843 /* Zero extend possibly SImode EXP to Pmode register. */
14845 ix86_zero_extend_to_Pmode (rtx exp)
14847 rtx r;
14848 if (GET_MODE (exp) == VOIDmode)
14849 return force_reg (Pmode, exp);
14850 if (GET_MODE (exp) == Pmode)
14851 return copy_to_mode_reg (Pmode, exp);
14852 r = gen_reg_rtx (Pmode);
14853 emit_insn (gen_zero_extendsidi2 (r, exp));
14854 return r;
14857 /* Divide COUNTREG by SCALE. */
14858 static rtx
14859 scale_counter (rtx countreg, int scale)
14861 rtx sc;
14862 rtx piece_size_mask;
14864 if (scale == 1)
14865 return countreg;
14866 if (CONST_INT_P (countreg))
14867 return GEN_INT (INTVAL (countreg) / scale);
14868 gcc_assert (REG_P (countreg));
14870 piece_size_mask = GEN_INT (scale - 1);
14871 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14872 GEN_INT (exact_log2 (scale)),
14873 NULL, 1, OPTAB_DIRECT);
14874 return sc;
14877 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14878 DImode for constant loop counts. */
14880 static enum machine_mode
14881 counter_mode (rtx count_exp)
14883 if (GET_MODE (count_exp) != VOIDmode)
14884 return GET_MODE (count_exp);
14885 if (GET_CODE (count_exp) != CONST_INT)
14886 return Pmode;
14887 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14888 return DImode;
14889 return SImode;
14892 /* When SRCPTR is non-NULL, output simple loop to move memory
14893 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14894 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14895 equivalent loop to set memory by VALUE (supposed to be in MODE).
14897 The size is rounded down to whole number of chunk size moved at once.
14898 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14901 static void
14902 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14903 rtx destptr, rtx srcptr, rtx value,
14904 rtx count, enum machine_mode mode, int unroll,
14905 int expected_size)
14907 rtx out_label, top_label, iter, tmp;
14908 enum machine_mode iter_mode = counter_mode (count);
14909 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14910 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14911 rtx size;
14912 rtx x_addr;
14913 rtx y_addr;
14914 int i;
14916 top_label = gen_label_rtx ();
14917 out_label = gen_label_rtx ();
14918 iter = gen_reg_rtx (iter_mode);
14920 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14921 NULL, 1, OPTAB_DIRECT);
14922 /* Those two should combine. */
14923 if (piece_size == const1_rtx)
14925 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14926 true, out_label);
14927 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14929 emit_move_insn (iter, const0_rtx);
14931 emit_label (top_label);
14933 tmp = convert_modes (Pmode, iter_mode, iter, true);
14934 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14935 destmem = change_address (destmem, mode, x_addr);
14937 if (srcmem)
14939 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14940 srcmem = change_address (srcmem, mode, y_addr);
14942 /* When unrolling for chips that reorder memory reads and writes,
14943 we can save registers by using single temporary.
14944 Also using 4 temporaries is overkill in 32bit mode. */
14945 if (!TARGET_64BIT && 0)
14947 for (i = 0; i < unroll; i++)
14949 if (i)
14951 destmem =
14952 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14953 srcmem =
14954 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14956 emit_move_insn (destmem, srcmem);
14959 else
14961 rtx tmpreg[4];
14962 gcc_assert (unroll <= 4);
14963 for (i = 0; i < unroll; i++)
14965 tmpreg[i] = gen_reg_rtx (mode);
14966 if (i)
14968 srcmem =
14969 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14971 emit_move_insn (tmpreg[i], srcmem);
14973 for (i = 0; i < unroll; i++)
14975 if (i)
14977 destmem =
14978 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14980 emit_move_insn (destmem, tmpreg[i]);
14984 else
14985 for (i = 0; i < unroll; i++)
14987 if (i)
14988 destmem =
14989 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14990 emit_move_insn (destmem, value);
14993 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14994 true, OPTAB_LIB_WIDEN);
14995 if (tmp != iter)
14996 emit_move_insn (iter, tmp);
14998 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14999 true, top_label);
15000 if (expected_size != -1)
15002 expected_size /= GET_MODE_SIZE (mode) * unroll;
15003 if (expected_size == 0)
15004 predict_jump (0);
15005 else if (expected_size > REG_BR_PROB_BASE)
15006 predict_jump (REG_BR_PROB_BASE - 1);
15007 else
15008 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
15010 else
15011 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15012 iter = ix86_zero_extend_to_Pmode (iter);
15013 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15014 true, OPTAB_LIB_WIDEN);
15015 if (tmp != destptr)
15016 emit_move_insn (destptr, tmp);
15017 if (srcptr)
15019 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15020 true, OPTAB_LIB_WIDEN);
15021 if (tmp != srcptr)
15022 emit_move_insn (srcptr, tmp);
15024 emit_label (out_label);
15027 /* Output "rep; mov" instruction.
15028 Arguments have same meaning as for previous function */
15029 static void
15030 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15031 rtx destptr, rtx srcptr,
15032 rtx count,
15033 enum machine_mode mode)
15035 rtx destexp;
15036 rtx srcexp;
15037 rtx countreg;
15039 /* If the size is known, it is shorter to use rep movs. */
15040 if (mode == QImode && CONST_INT_P (count)
15041 && !(INTVAL (count) & 3))
15042 mode = SImode;
15044 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15045 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15046 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15047 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15048 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15049 if (mode != QImode)
15051 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15052 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15053 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15054 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15055 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15056 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15058 else
15060 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15061 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15063 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15064 destexp, srcexp));
15067 /* Output "rep; stos" instruction.
15068 Arguments have same meaning as for previous function */
15069 static void
15070 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15071 rtx count,
15072 enum machine_mode mode)
15074 rtx destexp;
15075 rtx countreg;
15077 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15078 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15079 value = force_reg (mode, gen_lowpart (mode, value));
15080 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15081 if (mode != QImode)
15083 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15084 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15085 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15087 else
15088 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15089 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15092 static void
15093 emit_strmov (rtx destmem, rtx srcmem,
15094 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15096 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15097 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15098 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15101 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15102 static void
15103 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15104 rtx destptr, rtx srcptr, rtx count, int max_size)
15106 rtx src, dest;
15107 if (CONST_INT_P (count))
15109 HOST_WIDE_INT countval = INTVAL (count);
15110 int offset = 0;
15112 if ((countval & 0x10) && max_size > 16)
15114 if (TARGET_64BIT)
15116 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15117 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15119 else
15120 gcc_unreachable ();
15121 offset += 16;
15123 if ((countval & 0x08) && max_size > 8)
15125 if (TARGET_64BIT)
15126 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15127 else
15129 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15130 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15132 offset += 8;
15134 if ((countval & 0x04) && max_size > 4)
15136 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15137 offset += 4;
15139 if ((countval & 0x02) && max_size > 2)
15141 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15142 offset += 2;
15144 if ((countval & 0x01) && max_size > 1)
15146 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15147 offset += 1;
15149 return;
15151 if (max_size > 8)
15153 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15154 count, 1, OPTAB_DIRECT);
15155 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15156 count, QImode, 1, 4);
15157 return;
15160 /* When there are stringops, we can cheaply increase dest and src pointers.
15161 Otherwise we save code size by maintaining offset (zero is readily
15162 available from preceding rep operation) and using x86 addressing modes.
15164 if (TARGET_SINGLE_STRINGOP)
15166 if (max_size > 4)
15168 rtx label = ix86_expand_aligntest (count, 4, true);
15169 src = change_address (srcmem, SImode, srcptr);
15170 dest = change_address (destmem, SImode, destptr);
15171 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15172 emit_label (label);
15173 LABEL_NUSES (label) = 1;
15175 if (max_size > 2)
15177 rtx label = ix86_expand_aligntest (count, 2, true);
15178 src = change_address (srcmem, HImode, srcptr);
15179 dest = change_address (destmem, HImode, destptr);
15180 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15181 emit_label (label);
15182 LABEL_NUSES (label) = 1;
15184 if (max_size > 1)
15186 rtx label = ix86_expand_aligntest (count, 1, true);
15187 src = change_address (srcmem, QImode, srcptr);
15188 dest = change_address (destmem, QImode, destptr);
15189 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15190 emit_label (label);
15191 LABEL_NUSES (label) = 1;
15194 else
15196 rtx offset = force_reg (Pmode, const0_rtx);
15197 rtx tmp;
15199 if (max_size > 4)
15201 rtx label = ix86_expand_aligntest (count, 4, true);
15202 src = change_address (srcmem, SImode, srcptr);
15203 dest = change_address (destmem, SImode, destptr);
15204 emit_move_insn (dest, src);
15205 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15206 true, OPTAB_LIB_WIDEN);
15207 if (tmp != offset)
15208 emit_move_insn (offset, tmp);
15209 emit_label (label);
15210 LABEL_NUSES (label) = 1;
15212 if (max_size > 2)
15214 rtx label = ix86_expand_aligntest (count, 2, true);
15215 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15216 src = change_address (srcmem, HImode, tmp);
15217 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15218 dest = change_address (destmem, HImode, tmp);
15219 emit_move_insn (dest, src);
15220 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15221 true, OPTAB_LIB_WIDEN);
15222 if (tmp != offset)
15223 emit_move_insn (offset, tmp);
15224 emit_label (label);
15225 LABEL_NUSES (label) = 1;
15227 if (max_size > 1)
15229 rtx label = ix86_expand_aligntest (count, 1, true);
15230 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15231 src = change_address (srcmem, QImode, tmp);
15232 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15233 dest = change_address (destmem, QImode, tmp);
15234 emit_move_insn (dest, src);
15235 emit_label (label);
15236 LABEL_NUSES (label) = 1;
15241 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15242 static void
15243 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15244 rtx count, int max_size)
15246 count =
15247 expand_simple_binop (counter_mode (count), AND, count,
15248 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15249 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15250 gen_lowpart (QImode, value), count, QImode,
15251 1, max_size / 2);
15254 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15255 static void
15256 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15258 rtx dest;
15260 if (CONST_INT_P (count))
15262 HOST_WIDE_INT countval = INTVAL (count);
15263 int offset = 0;
15265 if ((countval & 0x10) && max_size > 16)
15267 if (TARGET_64BIT)
15269 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15270 emit_insn (gen_strset (destptr, dest, value));
15271 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15272 emit_insn (gen_strset (destptr, dest, value));
15274 else
15275 gcc_unreachable ();
15276 offset += 16;
15278 if ((countval & 0x08) && max_size > 8)
15280 if (TARGET_64BIT)
15282 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15283 emit_insn (gen_strset (destptr, dest, value));
15285 else
15287 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15288 emit_insn (gen_strset (destptr, dest, value));
15289 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15290 emit_insn (gen_strset (destptr, dest, value));
15292 offset += 8;
15294 if ((countval & 0x04) && max_size > 4)
15296 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15297 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15298 offset += 4;
15300 if ((countval & 0x02) && max_size > 2)
15302 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15303 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15304 offset += 2;
15306 if ((countval & 0x01) && max_size > 1)
15308 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15309 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15310 offset += 1;
15312 return;
15314 if (max_size > 32)
15316 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15317 return;
15319 if (max_size > 16)
15321 rtx label = ix86_expand_aligntest (count, 16, true);
15322 if (TARGET_64BIT)
15324 dest = change_address (destmem, DImode, destptr);
15325 emit_insn (gen_strset (destptr, dest, value));
15326 emit_insn (gen_strset (destptr, dest, value));
15328 else
15330 dest = change_address (destmem, SImode, destptr);
15331 emit_insn (gen_strset (destptr, dest, value));
15332 emit_insn (gen_strset (destptr, dest, value));
15333 emit_insn (gen_strset (destptr, dest, value));
15334 emit_insn (gen_strset (destptr, dest, value));
15336 emit_label (label);
15337 LABEL_NUSES (label) = 1;
15339 if (max_size > 8)
15341 rtx label = ix86_expand_aligntest (count, 8, true);
15342 if (TARGET_64BIT)
15344 dest = change_address (destmem, DImode, destptr);
15345 emit_insn (gen_strset (destptr, dest, value));
15347 else
15349 dest = change_address (destmem, SImode, destptr);
15350 emit_insn (gen_strset (destptr, dest, value));
15351 emit_insn (gen_strset (destptr, dest, value));
15353 emit_label (label);
15354 LABEL_NUSES (label) = 1;
15356 if (max_size > 4)
15358 rtx label = ix86_expand_aligntest (count, 4, true);
15359 dest = change_address (destmem, SImode, destptr);
15360 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15361 emit_label (label);
15362 LABEL_NUSES (label) = 1;
15364 if (max_size > 2)
15366 rtx label = ix86_expand_aligntest (count, 2, true);
15367 dest = change_address (destmem, HImode, destptr);
15368 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15369 emit_label (label);
15370 LABEL_NUSES (label) = 1;
15372 if (max_size > 1)
15374 rtx label = ix86_expand_aligntest (count, 1, true);
15375 dest = change_address (destmem, QImode, destptr);
15376 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15377 emit_label (label);
15378 LABEL_NUSES (label) = 1;
15382 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15383 DESIRED_ALIGNMENT. */
15384 static void
15385 expand_movmem_prologue (rtx destmem, rtx srcmem,
15386 rtx destptr, rtx srcptr, rtx count,
15387 int align, int desired_alignment)
15389 if (align <= 1 && desired_alignment > 1)
15391 rtx label = ix86_expand_aligntest (destptr, 1, false);
15392 srcmem = change_address (srcmem, QImode, srcptr);
15393 destmem = change_address (destmem, QImode, destptr);
15394 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15395 ix86_adjust_counter (count, 1);
15396 emit_label (label);
15397 LABEL_NUSES (label) = 1;
15399 if (align <= 2 && desired_alignment > 2)
15401 rtx label = ix86_expand_aligntest (destptr, 2, false);
15402 srcmem = change_address (srcmem, HImode, srcptr);
15403 destmem = change_address (destmem, HImode, destptr);
15404 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15405 ix86_adjust_counter (count, 2);
15406 emit_label (label);
15407 LABEL_NUSES (label) = 1;
15409 if (align <= 4 && desired_alignment > 4)
15411 rtx label = ix86_expand_aligntest (destptr, 4, false);
15412 srcmem = change_address (srcmem, SImode, srcptr);
15413 destmem = change_address (destmem, SImode, destptr);
15414 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15415 ix86_adjust_counter (count, 4);
15416 emit_label (label);
15417 LABEL_NUSES (label) = 1;
15419 gcc_assert (desired_alignment <= 8);
15422 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15423 DESIRED_ALIGNMENT. */
15424 static void
15425 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15426 int align, int desired_alignment)
15428 if (align <= 1 && desired_alignment > 1)
15430 rtx label = ix86_expand_aligntest (destptr, 1, false);
15431 destmem = change_address (destmem, QImode, destptr);
15432 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15433 ix86_adjust_counter (count, 1);
15434 emit_label (label);
15435 LABEL_NUSES (label) = 1;
15437 if (align <= 2 && desired_alignment > 2)
15439 rtx label = ix86_expand_aligntest (destptr, 2, false);
15440 destmem = change_address (destmem, HImode, destptr);
15441 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15442 ix86_adjust_counter (count, 2);
15443 emit_label (label);
15444 LABEL_NUSES (label) = 1;
15446 if (align <= 4 && desired_alignment > 4)
15448 rtx label = ix86_expand_aligntest (destptr, 4, false);
15449 destmem = change_address (destmem, SImode, destptr);
15450 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15451 ix86_adjust_counter (count, 4);
15452 emit_label (label);
15453 LABEL_NUSES (label) = 1;
15455 gcc_assert (desired_alignment <= 8);
15458 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15459 static enum stringop_alg
15460 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15461 int *dynamic_check)
15463 const struct stringop_algs * algs;
15464 /* Algorithms using the rep prefix want at least edi and ecx;
15465 additionally, memset wants eax and memcpy wants esi. Don't
15466 consider such algorithms if the user has appropriated those
15467 registers for their own purposes. */
15468 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15469 || (memset
15470 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15472 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15473 || (alg != rep_prefix_1_byte \
15474 && alg != rep_prefix_4_byte \
15475 && alg != rep_prefix_8_byte))
15477 *dynamic_check = -1;
15478 if (memset)
15479 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15480 else
15481 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15482 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15483 return stringop_alg;
15484 /* rep; movq or rep; movl is the smallest variant. */
15485 else if (optimize_size)
15487 if (!count || (count & 3))
15488 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15489 else
15490 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15492 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15494 else if (expected_size != -1 && expected_size < 4)
15495 return loop_1_byte;
15496 else if (expected_size != -1)
15498 unsigned int i;
15499 enum stringop_alg alg = libcall;
15500 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15502 /* We get here if the algorithms that were not libcall-based
15503 were rep-prefix based and we are unable to use rep prefixes
15504 based on global register usage. Break out of the loop and
15505 use the heuristic below. */
15506 if (algs->size[i].max == 0)
15507 break;
15508 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15510 enum stringop_alg candidate = algs->size[i].alg;
15512 if (candidate != libcall && ALG_USABLE_P (candidate))
15513 alg = candidate;
15514 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15515 last non-libcall inline algorithm. */
15516 if (TARGET_INLINE_ALL_STRINGOPS)
15518 /* When the current size is best to be copied by a libcall,
15519 but we are still forced to inline, run the heuristic below
15520 that will pick code for medium sized blocks. */
15521 if (alg != libcall)
15522 return alg;
15523 break;
15525 else if (ALG_USABLE_P (candidate))
15526 return candidate;
15529 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15531 /* When asked to inline the call anyway, try to pick meaningful choice.
15532 We look for maximal size of block that is faster to copy by hand and
15533 take blocks of at most of that size guessing that average size will
15534 be roughly half of the block.
15536 If this turns out to be bad, we might simply specify the preferred
15537 choice in ix86_costs. */
15538 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15539 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15541 int max = -1;
15542 enum stringop_alg alg;
15543 int i;
15544 bool any_alg_usable_p = true;
15546 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15548 enum stringop_alg candidate = algs->size[i].alg;
15549 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15551 if (candidate != libcall && candidate
15552 && ALG_USABLE_P (candidate))
15553 max = algs->size[i].max;
15555 /* If there aren't any usable algorithms, then recursing on
15556 smaller sizes isn't going to find anything. Just return the
15557 simple byte-at-a-time copy loop. */
15558 if (!any_alg_usable_p)
15560 /* Pick something reasonable. */
15561 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15562 *dynamic_check = 128;
15563 return loop_1_byte;
15565 if (max == -1)
15566 max = 4096;
15567 alg = decide_alg (count, max / 2, memset, dynamic_check);
15568 gcc_assert (*dynamic_check == -1);
15569 gcc_assert (alg != libcall);
15570 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15571 *dynamic_check = max;
15572 return alg;
15574 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15575 #undef ALG_USABLE_P
15578 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15579 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15580 static int
15581 decide_alignment (int align,
15582 enum stringop_alg alg,
15583 int expected_size)
15585 int desired_align = 0;
15586 switch (alg)
15588 case no_stringop:
15589 gcc_unreachable ();
15590 case loop:
15591 case unrolled_loop:
15592 desired_align = GET_MODE_SIZE (Pmode);
15593 break;
15594 case rep_prefix_8_byte:
15595 desired_align = 8;
15596 break;
15597 case rep_prefix_4_byte:
15598 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15599 copying whole cacheline at once. */
15600 if (TARGET_PENTIUMPRO)
15601 desired_align = 8;
15602 else
15603 desired_align = 4;
15604 break;
15605 case rep_prefix_1_byte:
15606 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15607 copying whole cacheline at once. */
15608 if (TARGET_PENTIUMPRO)
15609 desired_align = 8;
15610 else
15611 desired_align = 1;
15612 break;
15613 case loop_1_byte:
15614 desired_align = 1;
15615 break;
15616 case libcall:
15617 return 0;
15620 if (optimize_size)
15621 desired_align = 1;
15622 if (desired_align < align)
15623 desired_align = align;
15624 if (expected_size != -1 && expected_size < 4)
15625 desired_align = align;
15626 return desired_align;
15629 /* Return the smallest power of 2 greater than VAL. */
15630 static int
15631 smallest_pow2_greater_than (int val)
15633 int ret = 1;
15634 while (ret <= val)
15635 ret <<= 1;
15636 return ret;
15639 /* Expand string move (memcpy) operation. Use i386 string operations when
15640 profitable. expand_setmem contains similar code. The code depends upon
15641 architecture, block size and alignment, but always has the same
15642 overall structure:
15644 1) Prologue guard: Conditional that jumps up to epilogues for small
15645 blocks that can be handled by epilogue alone. This is faster but
15646 also needed for correctness, since prologue assume the block is larger
15647 than the desired alignment.
15649 Optional dynamic check for size and libcall for large
15650 blocks is emitted here too, with -minline-stringops-dynamically.
15652 2) Prologue: copy first few bytes in order to get destination aligned
15653 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15654 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15655 We emit either a jump tree on power of two sized blocks, or a byte loop.
15657 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15658 with specified algorithm.
15660 4) Epilogue: code copying tail of the block that is too small to be
15661 handled by main body (or up to size guarded by prologue guard). */
15664 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15665 rtx expected_align_exp, rtx expected_size_exp)
15667 rtx destreg;
15668 rtx srcreg;
15669 rtx label = NULL;
15670 rtx tmp;
15671 rtx jump_around_label = NULL;
15672 HOST_WIDE_INT align = 1;
15673 unsigned HOST_WIDE_INT count = 0;
15674 HOST_WIDE_INT expected_size = -1;
15675 int size_needed = 0, epilogue_size_needed;
15676 int desired_align = 0;
15677 enum stringop_alg alg;
15678 int dynamic_check;
15680 if (CONST_INT_P (align_exp))
15681 align = INTVAL (align_exp);
15682 /* i386 can do misaligned access on reasonably increased cost. */
15683 if (CONST_INT_P (expected_align_exp)
15684 && INTVAL (expected_align_exp) > align)
15685 align = INTVAL (expected_align_exp);
15686 if (CONST_INT_P (count_exp))
15687 count = expected_size = INTVAL (count_exp);
15688 if (CONST_INT_P (expected_size_exp) && count == 0)
15689 expected_size = INTVAL (expected_size_exp);
15691 /* Make sure we don't need to care about overflow later on. */
15692 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15693 return 0;
15695 /* Step 0: Decide on preferred algorithm, desired alignment and
15696 size of chunks to be copied by main loop. */
15698 alg = decide_alg (count, expected_size, false, &dynamic_check);
15699 desired_align = decide_alignment (align, alg, expected_size);
15701 if (!TARGET_ALIGN_STRINGOPS)
15702 align = desired_align;
15704 if (alg == libcall)
15705 return 0;
15706 gcc_assert (alg != no_stringop);
15707 if (!count)
15708 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15709 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15710 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15711 switch (alg)
15713 case libcall:
15714 case no_stringop:
15715 gcc_unreachable ();
15716 case loop:
15717 size_needed = GET_MODE_SIZE (Pmode);
15718 break;
15719 case unrolled_loop:
15720 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15721 break;
15722 case rep_prefix_8_byte:
15723 size_needed = 8;
15724 break;
15725 case rep_prefix_4_byte:
15726 size_needed = 4;
15727 break;
15728 case rep_prefix_1_byte:
15729 case loop_1_byte:
15730 size_needed = 1;
15731 break;
15734 epilogue_size_needed = size_needed;
15736 /* Step 1: Prologue guard. */
15738 /* Alignment code needs count to be in register. */
15739 if (CONST_INT_P (count_exp) && desired_align > align)
15740 count_exp = force_reg (counter_mode (count_exp), count_exp);
15741 gcc_assert (desired_align >= 1 && align >= 1);
15743 /* Ensure that alignment prologue won't copy past end of block. */
15744 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15746 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15747 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15748 Make sure it is power of 2. */
15749 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15751 if (CONST_INT_P (count_exp))
15753 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15754 goto epilogue;
15756 else
15758 label = gen_label_rtx ();
15759 emit_cmp_and_jump_insns (count_exp,
15760 GEN_INT (epilogue_size_needed),
15761 LTU, 0, counter_mode (count_exp), 1, label);
15762 if (expected_size == -1 || expected_size < epilogue_size_needed)
15763 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15764 else
15765 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15769 /* Emit code to decide on runtime whether library call or inline should be
15770 used. */
15771 if (dynamic_check != -1)
15773 if (CONST_INT_P (count_exp))
15775 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15777 emit_block_move_via_libcall (dst, src, count_exp, false);
15778 count_exp = const0_rtx;
15779 goto epilogue;
15782 else
15784 rtx hot_label = gen_label_rtx ();
15785 jump_around_label = gen_label_rtx ();
15786 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15787 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15788 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15789 emit_block_move_via_libcall (dst, src, count_exp, false);
15790 emit_jump (jump_around_label);
15791 emit_label (hot_label);
15795 /* Step 2: Alignment prologue. */
15797 if (desired_align > align)
15799 /* Except for the first move in epilogue, we no longer know
15800 constant offset in aliasing info. It don't seems to worth
15801 the pain to maintain it for the first move, so throw away
15802 the info early. */
15803 src = change_address (src, BLKmode, srcreg);
15804 dst = change_address (dst, BLKmode, destreg);
15805 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15806 desired_align);
15808 if (label && size_needed == 1)
15810 emit_label (label);
15811 LABEL_NUSES (label) = 1;
15812 label = NULL;
15815 /* Step 3: Main loop. */
15817 switch (alg)
15819 case libcall:
15820 case no_stringop:
15821 gcc_unreachable ();
15822 case loop_1_byte:
15823 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15824 count_exp, QImode, 1, expected_size);
15825 break;
15826 case loop:
15827 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15828 count_exp, Pmode, 1, expected_size);
15829 break;
15830 case unrolled_loop:
15831 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15832 registers for 4 temporaries anyway. */
15833 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15834 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15835 expected_size);
15836 break;
15837 case rep_prefix_8_byte:
15838 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15839 DImode);
15840 break;
15841 case rep_prefix_4_byte:
15842 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15843 SImode);
15844 break;
15845 case rep_prefix_1_byte:
15846 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15847 QImode);
15848 break;
15850 /* Adjust properly the offset of src and dest memory for aliasing. */
15851 if (CONST_INT_P (count_exp))
15853 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15854 (count / size_needed) * size_needed);
15855 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15856 (count / size_needed) * size_needed);
15858 else
15860 src = change_address (src, BLKmode, srcreg);
15861 dst = change_address (dst, BLKmode, destreg);
15864 /* Step 4: Epilogue to copy the remaining bytes. */
15865 epilogue:
15866 if (label)
15868 /* When the main loop is done, COUNT_EXP might hold original count,
15869 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15870 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15871 bytes. Compensate if needed. */
15873 if (size_needed < epilogue_size_needed)
15875 tmp =
15876 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15877 GEN_INT (size_needed - 1), count_exp, 1,
15878 OPTAB_DIRECT);
15879 if (tmp != count_exp)
15880 emit_move_insn (count_exp, tmp);
15882 emit_label (label);
15883 LABEL_NUSES (label) = 1;
15886 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15887 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15888 epilogue_size_needed);
15889 if (jump_around_label)
15890 emit_label (jump_around_label);
15891 return 1;
15894 /* Helper function for memcpy. For QImode value 0xXY produce
15895 0xXYXYXYXY of wide specified by MODE. This is essentially
15896 a * 0x10101010, but we can do slightly better than
15897 synth_mult by unwinding the sequence by hand on CPUs with
15898 slow multiply. */
15899 static rtx
15900 promote_duplicated_reg (enum machine_mode mode, rtx val)
15902 enum machine_mode valmode = GET_MODE (val);
15903 rtx tmp;
15904 int nops = mode == DImode ? 3 : 2;
15906 gcc_assert (mode == SImode || mode == DImode);
15907 if (val == const0_rtx)
15908 return copy_to_mode_reg (mode, const0_rtx);
15909 if (CONST_INT_P (val))
15911 HOST_WIDE_INT v = INTVAL (val) & 255;
15913 v |= v << 8;
15914 v |= v << 16;
15915 if (mode == DImode)
15916 v |= (v << 16) << 16;
15917 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15920 if (valmode == VOIDmode)
15921 valmode = QImode;
15922 if (valmode != QImode)
15923 val = gen_lowpart (QImode, val);
15924 if (mode == QImode)
15925 return val;
15926 if (!TARGET_PARTIAL_REG_STALL)
15927 nops--;
15928 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15929 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15930 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15931 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15933 rtx reg = convert_modes (mode, QImode, val, true);
15934 tmp = promote_duplicated_reg (mode, const1_rtx);
15935 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15936 OPTAB_DIRECT);
15938 else
15940 rtx reg = convert_modes (mode, QImode, val, true);
15942 if (!TARGET_PARTIAL_REG_STALL)
15943 if (mode == SImode)
15944 emit_insn (gen_movsi_insv_1 (reg, reg));
15945 else
15946 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15947 else
15949 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15950 NULL, 1, OPTAB_DIRECT);
15951 reg =
15952 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15954 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15955 NULL, 1, OPTAB_DIRECT);
15956 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15957 if (mode == SImode)
15958 return reg;
15959 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15960 NULL, 1, OPTAB_DIRECT);
15961 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15962 return reg;
15966 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15967 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15968 alignment from ALIGN to DESIRED_ALIGN. */
15969 static rtx
15970 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15972 rtx promoted_val;
15974 if (TARGET_64BIT
15975 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15976 promoted_val = promote_duplicated_reg (DImode, val);
15977 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15978 promoted_val = promote_duplicated_reg (SImode, val);
15979 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15980 promoted_val = promote_duplicated_reg (HImode, val);
15981 else
15982 promoted_val = val;
15984 return promoted_val;
15987 /* Expand string clear operation (bzero). Use i386 string operations when
15988 profitable. See expand_movmem comment for explanation of individual
15989 steps performed. */
15991 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15992 rtx expected_align_exp, rtx expected_size_exp)
15994 rtx destreg;
15995 rtx label = NULL;
15996 rtx tmp;
15997 rtx jump_around_label = NULL;
15998 HOST_WIDE_INT align = 1;
15999 unsigned HOST_WIDE_INT count = 0;
16000 HOST_WIDE_INT expected_size = -1;
16001 int size_needed = 0, epilogue_size_needed;
16002 int desired_align = 0;
16003 enum stringop_alg alg;
16004 rtx promoted_val = NULL;
16005 bool force_loopy_epilogue = false;
16006 int dynamic_check;
16008 if (CONST_INT_P (align_exp))
16009 align = INTVAL (align_exp);
16010 /* i386 can do misaligned access on reasonably increased cost. */
16011 if (CONST_INT_P (expected_align_exp)
16012 && INTVAL (expected_align_exp) > align)
16013 align = INTVAL (expected_align_exp);
16014 if (CONST_INT_P (count_exp))
16015 count = expected_size = INTVAL (count_exp);
16016 if (CONST_INT_P (expected_size_exp) && count == 0)
16017 expected_size = INTVAL (expected_size_exp);
16019 /* Make sure we don't need to care about overflow later on. */
16020 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16021 return 0;
16023 /* Step 0: Decide on preferred algorithm, desired alignment and
16024 size of chunks to be copied by main loop. */
16026 alg = decide_alg (count, expected_size, true, &dynamic_check);
16027 desired_align = decide_alignment (align, alg, expected_size);
16029 if (!TARGET_ALIGN_STRINGOPS)
16030 align = desired_align;
16032 if (alg == libcall)
16033 return 0;
16034 gcc_assert (alg != no_stringop);
16035 if (!count)
16036 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16037 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16038 switch (alg)
16040 case libcall:
16041 case no_stringop:
16042 gcc_unreachable ();
16043 case loop:
16044 size_needed = GET_MODE_SIZE (Pmode);
16045 break;
16046 case unrolled_loop:
16047 size_needed = GET_MODE_SIZE (Pmode) * 4;
16048 break;
16049 case rep_prefix_8_byte:
16050 size_needed = 8;
16051 break;
16052 case rep_prefix_4_byte:
16053 size_needed = 4;
16054 break;
16055 case rep_prefix_1_byte:
16056 case loop_1_byte:
16057 size_needed = 1;
16058 break;
16060 epilogue_size_needed = size_needed;
16062 /* Step 1: Prologue guard. */
16064 /* Alignment code needs count to be in register. */
16065 if (CONST_INT_P (count_exp) && desired_align > align)
16067 enum machine_mode mode = SImode;
16068 if (TARGET_64BIT && (count & ~0xffffffff))
16069 mode = DImode;
16070 count_exp = force_reg (mode, count_exp);
16072 /* Do the cheap promotion to allow better CSE across the
16073 main loop and epilogue (ie one load of the big constant in the
16074 front of all code. */
16075 if (CONST_INT_P (val_exp))
16076 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16077 desired_align, align);
16078 /* Ensure that alignment prologue won't copy past end of block. */
16079 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16081 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16082 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16083 Make sure it is power of 2. */
16084 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16086 /* To improve performance of small blocks, we jump around the VAL
16087 promoting mode. This mean that if the promoted VAL is not constant,
16088 we might not use it in the epilogue and have to use byte
16089 loop variant. */
16090 if (epilogue_size_needed > 2 && !promoted_val)
16091 force_loopy_epilogue = true;
16092 label = gen_label_rtx ();
16093 emit_cmp_and_jump_insns (count_exp,
16094 GEN_INT (epilogue_size_needed),
16095 LTU, 0, counter_mode (count_exp), 1, label);
16096 if (GET_CODE (count_exp) == CONST_INT)
16098 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16099 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16100 else
16101 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16103 if (dynamic_check != -1)
16105 rtx hot_label = gen_label_rtx ();
16106 jump_around_label = gen_label_rtx ();
16107 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16108 LEU, 0, counter_mode (count_exp), 1, hot_label);
16109 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16110 set_storage_via_libcall (dst, count_exp, val_exp, false);
16111 emit_jump (jump_around_label);
16112 emit_label (hot_label);
16115 /* Step 2: Alignment prologue. */
16117 /* Do the expensive promotion once we branched off the small blocks. */
16118 if (!promoted_val)
16119 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16120 desired_align, align);
16121 gcc_assert (desired_align >= 1 && align >= 1);
16123 if (desired_align > align)
16125 /* Except for the first move in epilogue, we no longer know
16126 constant offset in aliasing info. It don't seems to worth
16127 the pain to maintain it for the first move, so throw away
16128 the info early. */
16129 dst = change_address (dst, BLKmode, destreg);
16130 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16131 desired_align);
16133 if (label && size_needed == 1)
16135 emit_label (label);
16136 LABEL_NUSES (label) = 1;
16137 label = NULL;
16140 /* Step 3: Main loop. */
16142 switch (alg)
16144 case libcall:
16145 case no_stringop:
16146 gcc_unreachable ();
16147 case loop_1_byte:
16148 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16149 count_exp, QImode, 1, expected_size);
16150 break;
16151 case loop:
16152 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16153 count_exp, Pmode, 1, expected_size);
16154 break;
16155 case unrolled_loop:
16156 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16157 count_exp, Pmode, 4, expected_size);
16158 break;
16159 case rep_prefix_8_byte:
16160 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16161 DImode);
16162 break;
16163 case rep_prefix_4_byte:
16164 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16165 SImode);
16166 break;
16167 case rep_prefix_1_byte:
16168 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16169 QImode);
16170 break;
16172 /* Adjust properly the offset of src and dest memory for aliasing. */
16173 if (CONST_INT_P (count_exp))
16174 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16175 (count / size_needed) * size_needed);
16176 else
16177 dst = change_address (dst, BLKmode, destreg);
16179 /* Step 4: Epilogue to copy the remaining bytes. */
16181 if (label)
16183 /* When the main loop is done, COUNT_EXP might hold original count,
16184 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16185 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16186 bytes. Compensate if needed. */
16188 if (size_needed < desired_align - align)
16190 tmp =
16191 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16192 GEN_INT (size_needed - 1), count_exp, 1,
16193 OPTAB_DIRECT);
16194 size_needed = desired_align - align + 1;
16195 if (tmp != count_exp)
16196 emit_move_insn (count_exp, tmp);
16198 emit_label (label);
16199 LABEL_NUSES (label) = 1;
16201 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16203 if (force_loopy_epilogue)
16204 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16205 size_needed);
16206 else
16207 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16208 size_needed);
16210 if (jump_around_label)
16211 emit_label (jump_around_label);
16212 return 1;
16215 /* Expand the appropriate insns for doing strlen if not just doing
16216 repnz; scasb
16218 out = result, initialized with the start address
16219 align_rtx = alignment of the address.
16220 scratch = scratch register, initialized with the startaddress when
16221 not aligned, otherwise undefined
16223 This is just the body. It needs the initializations mentioned above and
16224 some address computing at the end. These things are done in i386.md. */
16226 static void
16227 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16229 int align;
16230 rtx tmp;
16231 rtx align_2_label = NULL_RTX;
16232 rtx align_3_label = NULL_RTX;
16233 rtx align_4_label = gen_label_rtx ();
16234 rtx end_0_label = gen_label_rtx ();
16235 rtx mem;
16236 rtx tmpreg = gen_reg_rtx (SImode);
16237 rtx scratch = gen_reg_rtx (SImode);
16238 rtx cmp;
16240 align = 0;
16241 if (CONST_INT_P (align_rtx))
16242 align = INTVAL (align_rtx);
16244 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16246 /* Is there a known alignment and is it less than 4? */
16247 if (align < 4)
16249 rtx scratch1 = gen_reg_rtx (Pmode);
16250 emit_move_insn (scratch1, out);
16251 /* Is there a known alignment and is it not 2? */
16252 if (align != 2)
16254 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16255 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16257 /* Leave just the 3 lower bits. */
16258 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16259 NULL_RTX, 0, OPTAB_WIDEN);
16261 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16262 Pmode, 1, align_4_label);
16263 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16264 Pmode, 1, align_2_label);
16265 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16266 Pmode, 1, align_3_label);
16268 else
16270 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16271 check if is aligned to 4 - byte. */
16273 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16274 NULL_RTX, 0, OPTAB_WIDEN);
16276 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16277 Pmode, 1, align_4_label);
16280 mem = change_address (src, QImode, out);
16282 /* Now compare the bytes. */
16284 /* Compare the first n unaligned byte on a byte per byte basis. */
16285 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16286 QImode, 1, end_0_label);
16288 /* Increment the address. */
16289 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16291 /* Not needed with an alignment of 2 */
16292 if (align != 2)
16294 emit_label (align_2_label);
16296 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16297 end_0_label);
16299 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16301 emit_label (align_3_label);
16304 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16305 end_0_label);
16307 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16310 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16311 align this loop. It gives only huge programs, but does not help to
16312 speed up. */
16313 emit_label (align_4_label);
16315 mem = change_address (src, SImode, out);
16316 emit_move_insn (scratch, mem);
16317 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
16319 /* This formula yields a nonzero result iff one of the bytes is zero.
16320 This saves three branches inside loop and many cycles. */
16322 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16323 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16324 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16325 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16326 gen_int_mode (0x80808080, SImode)));
16327 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16328 align_4_label);
16330 if (TARGET_CMOVE)
16332 rtx reg = gen_reg_rtx (SImode);
16333 rtx reg2 = gen_reg_rtx (Pmode);
16334 emit_move_insn (reg, tmpreg);
16335 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16337 /* If zero is not in the first two bytes, move two bytes forward. */
16338 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16339 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16340 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16341 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16342 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16343 reg,
16344 tmpreg)));
16345 /* Emit lea manually to avoid clobbering of flags. */
16346 emit_insn (gen_rtx_SET (SImode, reg2,
16347 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16349 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16350 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16351 emit_insn (gen_rtx_SET (VOIDmode, out,
16352 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16353 reg2,
16354 out)));
16357 else
16359 rtx end_2_label = gen_label_rtx ();
16360 /* Is zero in the first two bytes? */
16362 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16363 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16364 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16365 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16366 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16367 pc_rtx);
16368 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16369 JUMP_LABEL (tmp) = end_2_label;
16371 /* Not in the first two. Move two bytes forward. */
16372 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16373 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
16375 emit_label (end_2_label);
16379 /* Avoid branch in fixing the byte. */
16380 tmpreg = gen_lowpart (QImode, tmpreg);
16381 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16382 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16383 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
16385 emit_label (end_0_label);
16388 /* Expand strlen. */
16391 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16393 rtx addr, scratch1, scratch2, scratch3, scratch4;
16395 /* The generic case of strlen expander is long. Avoid it's
16396 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16398 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16399 && !TARGET_INLINE_ALL_STRINGOPS
16400 && !optimize_size
16401 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16402 return 0;
16404 addr = force_reg (Pmode, XEXP (src, 0));
16405 scratch1 = gen_reg_rtx (Pmode);
16407 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16408 && !optimize_size)
16410 /* Well it seems that some optimizer does not combine a call like
16411 foo(strlen(bar), strlen(bar));
16412 when the move and the subtraction is done here. It does calculate
16413 the length just once when these instructions are done inside of
16414 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16415 often used and I use one fewer register for the lifetime of
16416 output_strlen_unroll() this is better. */
16418 emit_move_insn (out, addr);
16420 ix86_expand_strlensi_unroll_1 (out, src, align);
16422 /* strlensi_unroll_1 returns the address of the zero at the end of
16423 the string, like memchr(), so compute the length by subtracting
16424 the start address. */
16425 emit_insn ((*ix86_gen_sub3) (out, out, addr));
16427 else
16429 rtx unspec;
16431 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16432 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16433 return false;
16435 scratch2 = gen_reg_rtx (Pmode);
16436 scratch3 = gen_reg_rtx (Pmode);
16437 scratch4 = force_reg (Pmode, constm1_rtx);
16439 emit_move_insn (scratch3, addr);
16440 eoschar = force_reg (QImode, eoschar);
16442 src = replace_equiv_address_nv (src, scratch3);
16444 /* If .md starts supporting :P, this can be done in .md. */
16445 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16446 scratch4), UNSPEC_SCAS);
16447 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16448 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
16449 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
16451 return 1;
16454 /* For given symbol (function) construct code to compute address of it's PLT
16455 entry in large x86-64 PIC model. */
16457 construct_plt_address (rtx symbol)
16459 rtx tmp = gen_reg_rtx (Pmode);
16460 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16462 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16463 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16465 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16466 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16467 return tmp;
16470 void
16471 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16472 rtx callarg2 ATTRIBUTE_UNUSED,
16473 rtx pop, int sibcall)
16475 rtx use = NULL, call;
16477 if (pop == const0_rtx)
16478 pop = NULL;
16479 gcc_assert (!TARGET_64BIT || !pop);
16481 if (TARGET_MACHO && !TARGET_64BIT)
16483 #if TARGET_MACHO
16484 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16485 fnaddr = machopic_indirect_call_target (fnaddr);
16486 #endif
16488 else
16490 /* Static functions and indirect calls don't need the pic register. */
16491 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16492 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16493 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16494 use_reg (&use, pic_offset_table_rtx);
16497 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16499 rtx al = gen_rtx_REG (QImode, AX_REG);
16500 emit_move_insn (al, callarg2);
16501 use_reg (&use, al);
16504 if (ix86_cmodel == CM_LARGE_PIC
16505 && GET_CODE (fnaddr) == MEM
16506 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16507 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16508 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16509 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16511 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16512 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16514 if (sibcall && TARGET_64BIT
16515 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16517 rtx addr;
16518 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16519 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16520 emit_move_insn (fnaddr, addr);
16521 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16524 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16525 if (retval)
16526 call = gen_rtx_SET (VOIDmode, retval, call);
16527 if (pop)
16529 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16530 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16531 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16534 call = emit_call_insn (call);
16535 if (use)
16536 CALL_INSN_FUNCTION_USAGE (call) = use;
16540 /* Clear stack slot assignments remembered from previous functions.
16541 This is called from INIT_EXPANDERS once before RTL is emitted for each
16542 function. */
16544 static struct machine_function *
16545 ix86_init_machine_status (void)
16547 struct machine_function *f;
16549 f = GGC_CNEW (struct machine_function);
16550 f->use_fast_prologue_epilogue_nregs = -1;
16551 f->tls_descriptor_call_expanded_p = 0;
16552 f->call_abi = DEFAULT_ABI;
16554 return f;
16557 /* Return a MEM corresponding to a stack slot with mode MODE.
16558 Allocate a new slot if necessary.
16560 The RTL for a function can have several slots available: N is
16561 which slot to use. */
16564 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16566 struct stack_local_entry *s;
16568 gcc_assert (n < MAX_386_STACK_LOCALS);
16570 /* Virtual slot is valid only before vregs are instantiated. */
16571 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16573 for (s = ix86_stack_locals; s; s = s->next)
16574 if (s->mode == mode && s->n == n)
16575 return copy_rtx (s->rtl);
16577 s = (struct stack_local_entry *)
16578 ggc_alloc (sizeof (struct stack_local_entry));
16579 s->n = n;
16580 s->mode = mode;
16581 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16583 s->next = ix86_stack_locals;
16584 ix86_stack_locals = s;
16585 return s->rtl;
16588 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16590 static GTY(()) rtx ix86_tls_symbol;
16592 ix86_tls_get_addr (void)
16595 if (!ix86_tls_symbol)
16597 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16598 (TARGET_ANY_GNU_TLS
16599 && !TARGET_64BIT)
16600 ? "___tls_get_addr"
16601 : "__tls_get_addr");
16604 return ix86_tls_symbol;
16607 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16609 static GTY(()) rtx ix86_tls_module_base_symbol;
16611 ix86_tls_module_base (void)
16614 if (!ix86_tls_module_base_symbol)
16616 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16617 "_TLS_MODULE_BASE_");
16618 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16619 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16622 return ix86_tls_module_base_symbol;
16625 /* Calculate the length of the memory address in the instruction
16626 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16629 memory_address_length (rtx addr)
16631 struct ix86_address parts;
16632 rtx base, index, disp;
16633 int len;
16634 int ok;
16636 if (GET_CODE (addr) == PRE_DEC
16637 || GET_CODE (addr) == POST_INC
16638 || GET_CODE (addr) == PRE_MODIFY
16639 || GET_CODE (addr) == POST_MODIFY)
16640 return 0;
16642 ok = ix86_decompose_address (addr, &parts);
16643 gcc_assert (ok);
16645 if (parts.base && GET_CODE (parts.base) == SUBREG)
16646 parts.base = SUBREG_REG (parts.base);
16647 if (parts.index && GET_CODE (parts.index) == SUBREG)
16648 parts.index = SUBREG_REG (parts.index);
16650 base = parts.base;
16651 index = parts.index;
16652 disp = parts.disp;
16653 len = 0;
16655 /* Rule of thumb:
16656 - esp as the base always wants an index,
16657 - ebp as the base always wants a displacement. */
16659 /* Register Indirect. */
16660 if (base && !index && !disp)
16662 /* esp (for its index) and ebp (for its displacement) need
16663 the two-byte modrm form. */
16664 if (addr == stack_pointer_rtx
16665 || addr == arg_pointer_rtx
16666 || addr == frame_pointer_rtx
16667 || addr == hard_frame_pointer_rtx)
16668 len = 1;
16671 /* Direct Addressing. */
16672 else if (disp && !base && !index)
16673 len = 4;
16675 else
16677 /* Find the length of the displacement constant. */
16678 if (disp)
16680 if (base && satisfies_constraint_K (disp))
16681 len = 1;
16682 else
16683 len = 4;
16685 /* ebp always wants a displacement. */
16686 else if (base == hard_frame_pointer_rtx)
16687 len = 1;
16689 /* An index requires the two-byte modrm form.... */
16690 if (index
16691 /* ...like esp, which always wants an index. */
16692 || base == stack_pointer_rtx
16693 || base == arg_pointer_rtx
16694 || base == frame_pointer_rtx)
16695 len += 1;
16698 return len;
16701 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16702 is set, expect that insn have 8bit immediate alternative. */
16704 ix86_attr_length_immediate_default (rtx insn, int shortform)
16706 int len = 0;
16707 int i;
16708 extract_insn_cached (insn);
16709 for (i = recog_data.n_operands - 1; i >= 0; --i)
16710 if (CONSTANT_P (recog_data.operand[i]))
16712 gcc_assert (!len);
16713 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16714 len = 1;
16715 else
16717 switch (get_attr_mode (insn))
16719 case MODE_QI:
16720 len+=1;
16721 break;
16722 case MODE_HI:
16723 len+=2;
16724 break;
16725 case MODE_SI:
16726 len+=4;
16727 break;
16728 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16729 case MODE_DI:
16730 len+=4;
16731 break;
16732 default:
16733 fatal_insn ("unknown insn mode", insn);
16737 return len;
16739 /* Compute default value for "length_address" attribute. */
16741 ix86_attr_length_address_default (rtx insn)
16743 int i;
16745 if (get_attr_type (insn) == TYPE_LEA)
16747 rtx set = PATTERN (insn);
16749 if (GET_CODE (set) == PARALLEL)
16750 set = XVECEXP (set, 0, 0);
16752 gcc_assert (GET_CODE (set) == SET);
16754 return memory_address_length (SET_SRC (set));
16757 extract_insn_cached (insn);
16758 for (i = recog_data.n_operands - 1; i >= 0; --i)
16759 if (MEM_P (recog_data.operand[i]))
16761 return memory_address_length (XEXP (recog_data.operand[i], 0));
16762 break;
16764 return 0;
16767 /* Return the maximum number of instructions a cpu can issue. */
16769 static int
16770 ix86_issue_rate (void)
16772 switch (ix86_tune)
16774 case PROCESSOR_PENTIUM:
16775 case PROCESSOR_K6:
16776 return 2;
16778 case PROCESSOR_PENTIUMPRO:
16779 case PROCESSOR_PENTIUM4:
16780 case PROCESSOR_ATHLON:
16781 case PROCESSOR_K8:
16782 case PROCESSOR_AMDFAM10:
16783 case PROCESSOR_NOCONA:
16784 case PROCESSOR_GENERIC32:
16785 case PROCESSOR_GENERIC64:
16786 return 3;
16788 case PROCESSOR_CORE2:
16789 return 4;
16791 default:
16792 return 1;
16796 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16797 by DEP_INSN and nothing set by DEP_INSN. */
16799 static int
16800 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16802 rtx set, set2;
16804 /* Simplify the test for uninteresting insns. */
16805 if (insn_type != TYPE_SETCC
16806 && insn_type != TYPE_ICMOV
16807 && insn_type != TYPE_FCMOV
16808 && insn_type != TYPE_IBR)
16809 return 0;
16811 if ((set = single_set (dep_insn)) != 0)
16813 set = SET_DEST (set);
16814 set2 = NULL_RTX;
16816 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16817 && XVECLEN (PATTERN (dep_insn), 0) == 2
16818 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16819 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16821 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16822 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16824 else
16825 return 0;
16827 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16828 return 0;
16830 /* This test is true if the dependent insn reads the flags but
16831 not any other potentially set register. */
16832 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16833 return 0;
16835 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16836 return 0;
16838 return 1;
16841 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16842 address with operands set by DEP_INSN. */
16844 static int
16845 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16847 rtx addr;
16849 if (insn_type == TYPE_LEA
16850 && TARGET_PENTIUM)
16852 addr = PATTERN (insn);
16854 if (GET_CODE (addr) == PARALLEL)
16855 addr = XVECEXP (addr, 0, 0);
16857 gcc_assert (GET_CODE (addr) == SET);
16859 addr = SET_SRC (addr);
16861 else
16863 int i;
16864 extract_insn_cached (insn);
16865 for (i = recog_data.n_operands - 1; i >= 0; --i)
16866 if (MEM_P (recog_data.operand[i]))
16868 addr = XEXP (recog_data.operand[i], 0);
16869 goto found;
16871 return 0;
16872 found:;
16875 return modified_in_p (addr, dep_insn);
16878 static int
16879 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16881 enum attr_type insn_type, dep_insn_type;
16882 enum attr_memory memory;
16883 rtx set, set2;
16884 int dep_insn_code_number;
16886 /* Anti and output dependencies have zero cost on all CPUs. */
16887 if (REG_NOTE_KIND (link) != 0)
16888 return 0;
16890 dep_insn_code_number = recog_memoized (dep_insn);
16892 /* If we can't recognize the insns, we can't really do anything. */
16893 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16894 return cost;
16896 insn_type = get_attr_type (insn);
16897 dep_insn_type = get_attr_type (dep_insn);
16899 switch (ix86_tune)
16901 case PROCESSOR_PENTIUM:
16902 /* Address Generation Interlock adds a cycle of latency. */
16903 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16904 cost += 1;
16906 /* ??? Compares pair with jump/setcc. */
16907 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16908 cost = 0;
16910 /* Floating point stores require value to be ready one cycle earlier. */
16911 if (insn_type == TYPE_FMOV
16912 && get_attr_memory (insn) == MEMORY_STORE
16913 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16914 cost += 1;
16915 break;
16917 case PROCESSOR_PENTIUMPRO:
16918 memory = get_attr_memory (insn);
16920 /* INT->FP conversion is expensive. */
16921 if (get_attr_fp_int_src (dep_insn))
16922 cost += 5;
16924 /* There is one cycle extra latency between an FP op and a store. */
16925 if (insn_type == TYPE_FMOV
16926 && (set = single_set (dep_insn)) != NULL_RTX
16927 && (set2 = single_set (insn)) != NULL_RTX
16928 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16929 && MEM_P (SET_DEST (set2)))
16930 cost += 1;
16932 /* Show ability of reorder buffer to hide latency of load by executing
16933 in parallel with previous instruction in case
16934 previous instruction is not needed to compute the address. */
16935 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16936 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16938 /* Claim moves to take one cycle, as core can issue one load
16939 at time and the next load can start cycle later. */
16940 if (dep_insn_type == TYPE_IMOV
16941 || dep_insn_type == TYPE_FMOV)
16942 cost = 1;
16943 else if (cost > 1)
16944 cost--;
16946 break;
16948 case PROCESSOR_K6:
16949 memory = get_attr_memory (insn);
16951 /* The esp dependency is resolved before the instruction is really
16952 finished. */
16953 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16954 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16955 return 1;
16957 /* INT->FP conversion is expensive. */
16958 if (get_attr_fp_int_src (dep_insn))
16959 cost += 5;
16961 /* Show ability of reorder buffer to hide latency of load by executing
16962 in parallel with previous instruction in case
16963 previous instruction is not needed to compute the address. */
16964 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16965 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16967 /* Claim moves to take one cycle, as core can issue one load
16968 at time and the next load can start cycle later. */
16969 if (dep_insn_type == TYPE_IMOV
16970 || dep_insn_type == TYPE_FMOV)
16971 cost = 1;
16972 else if (cost > 2)
16973 cost -= 2;
16974 else
16975 cost = 1;
16977 break;
16979 case PROCESSOR_ATHLON:
16980 case PROCESSOR_K8:
16981 case PROCESSOR_AMDFAM10:
16982 case PROCESSOR_GENERIC32:
16983 case PROCESSOR_GENERIC64:
16984 memory = get_attr_memory (insn);
16986 /* Show ability of reorder buffer to hide latency of load by executing
16987 in parallel with previous instruction in case
16988 previous instruction is not needed to compute the address. */
16989 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16990 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16992 enum attr_unit unit = get_attr_unit (insn);
16993 int loadcost = 3;
16995 /* Because of the difference between the length of integer and
16996 floating unit pipeline preparation stages, the memory operands
16997 for floating point are cheaper.
16999 ??? For Athlon it the difference is most probably 2. */
17000 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17001 loadcost = 3;
17002 else
17003 loadcost = TARGET_ATHLON ? 2 : 0;
17005 if (cost >= loadcost)
17006 cost -= loadcost;
17007 else
17008 cost = 0;
17011 default:
17012 break;
17015 return cost;
17018 /* How many alternative schedules to try. This should be as wide as the
17019 scheduling freedom in the DFA, but no wider. Making this value too
17020 large results extra work for the scheduler. */
17022 static int
17023 ia32_multipass_dfa_lookahead (void)
17025 switch (ix86_tune)
17027 case PROCESSOR_PENTIUM:
17028 return 2;
17030 case PROCESSOR_PENTIUMPRO:
17031 case PROCESSOR_K6:
17032 return 1;
17034 default:
17035 return 0;
17040 /* Compute the alignment given to a constant that is being placed in memory.
17041 EXP is the constant and ALIGN is the alignment that the object would
17042 ordinarily have.
17043 The value of this function is used instead of that alignment to align
17044 the object. */
17047 ix86_constant_alignment (tree exp, int align)
17049 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17050 || TREE_CODE (exp) == INTEGER_CST)
17052 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17053 return 64;
17054 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17055 return 128;
17057 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17058 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17059 return BITS_PER_WORD;
17061 return align;
17064 /* Compute the alignment for a static variable.
17065 TYPE is the data type, and ALIGN is the alignment that
17066 the object would ordinarily have. The value of this function is used
17067 instead of that alignment to align the object. */
17070 ix86_data_alignment (tree type, int align)
17072 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17074 if (AGGREGATE_TYPE_P (type)
17075 && TYPE_SIZE (type)
17076 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17077 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17078 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17079 && align < max_align)
17080 align = max_align;
17082 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17083 to 16byte boundary. */
17084 if (TARGET_64BIT)
17086 if (AGGREGATE_TYPE_P (type)
17087 && TYPE_SIZE (type)
17088 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17089 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17090 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17091 return 128;
17094 if (TREE_CODE (type) == ARRAY_TYPE)
17096 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17097 return 64;
17098 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17099 return 128;
17101 else if (TREE_CODE (type) == COMPLEX_TYPE)
17104 if (TYPE_MODE (type) == DCmode && align < 64)
17105 return 64;
17106 if (TYPE_MODE (type) == XCmode && align < 128)
17107 return 128;
17109 else if ((TREE_CODE (type) == RECORD_TYPE
17110 || TREE_CODE (type) == UNION_TYPE
17111 || TREE_CODE (type) == QUAL_UNION_TYPE)
17112 && TYPE_FIELDS (type))
17114 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17115 return 64;
17116 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17117 return 128;
17119 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17120 || TREE_CODE (type) == INTEGER_TYPE)
17122 if (TYPE_MODE (type) == DFmode && align < 64)
17123 return 64;
17124 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17125 return 128;
17128 return align;
17131 /* Compute the alignment for a local variable or a stack slot. TYPE is
17132 the data type, MODE is the widest mode available and ALIGN is the
17133 alignment that the object would ordinarily have. The value of this
17134 macro is used instead of that alignment to align the object. */
17136 unsigned int
17137 ix86_local_alignment (tree type, enum machine_mode mode,
17138 unsigned int align)
17140 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17141 register in MODE. We will return the largest alignment of XF
17142 and DF. */
17143 if (!type)
17145 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17146 align = GET_MODE_ALIGNMENT (DFmode);
17147 return align;
17150 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17151 to 16byte boundary. */
17152 if (TARGET_64BIT)
17154 if (AGGREGATE_TYPE_P (type)
17155 && TYPE_SIZE (type)
17156 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17157 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17158 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17159 return 128;
17161 if (TREE_CODE (type) == ARRAY_TYPE)
17163 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17164 return 64;
17165 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17166 return 128;
17168 else if (TREE_CODE (type) == COMPLEX_TYPE)
17170 if (TYPE_MODE (type) == DCmode && align < 64)
17171 return 64;
17172 if (TYPE_MODE (type) == XCmode && align < 128)
17173 return 128;
17175 else if ((TREE_CODE (type) == RECORD_TYPE
17176 || TREE_CODE (type) == UNION_TYPE
17177 || TREE_CODE (type) == QUAL_UNION_TYPE)
17178 && TYPE_FIELDS (type))
17180 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17181 return 64;
17182 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17183 return 128;
17185 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17186 || TREE_CODE (type) == INTEGER_TYPE)
17189 if (TYPE_MODE (type) == DFmode && align < 64)
17190 return 64;
17191 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17192 return 128;
17194 return align;
17197 /* Emit RTL insns to initialize the variable parts of a trampoline.
17198 FNADDR is an RTX for the address of the function's pure code.
17199 CXT is an RTX for the static chain value for the function. */
17200 void
17201 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17203 if (!TARGET_64BIT)
17205 /* Compute offset from the end of the jmp to the target function. */
17206 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17207 plus_constant (tramp, 10),
17208 NULL_RTX, 1, OPTAB_DIRECT);
17209 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17210 gen_int_mode (0xb9, QImode));
17211 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17212 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17213 gen_int_mode (0xe9, QImode));
17214 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17216 else
17218 int offset = 0;
17219 /* Try to load address using shorter movl instead of movabs.
17220 We may want to support movq for kernel mode, but kernel does not use
17221 trampolines at the moment. */
17222 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17224 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17225 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17226 gen_int_mode (0xbb41, HImode));
17227 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17228 gen_lowpart (SImode, fnaddr));
17229 offset += 6;
17231 else
17233 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17234 gen_int_mode (0xbb49, HImode));
17235 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17236 fnaddr);
17237 offset += 10;
17239 /* Load static chain using movabs to r10. */
17240 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17241 gen_int_mode (0xba49, HImode));
17242 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17243 cxt);
17244 offset += 10;
17245 /* Jump to the r11 */
17246 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17247 gen_int_mode (0xff49, HImode));
17248 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17249 gen_int_mode (0xe3, QImode));
17250 offset += 3;
17251 gcc_assert (offset <= TRAMPOLINE_SIZE);
17254 #ifdef ENABLE_EXECUTE_STACK
17255 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17256 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17257 #endif
17260 /* Codes for all the SSE/MMX builtins. */
17261 enum ix86_builtins
17263 IX86_BUILTIN_ADDPS,
17264 IX86_BUILTIN_ADDSS,
17265 IX86_BUILTIN_DIVPS,
17266 IX86_BUILTIN_DIVSS,
17267 IX86_BUILTIN_MULPS,
17268 IX86_BUILTIN_MULSS,
17269 IX86_BUILTIN_SUBPS,
17270 IX86_BUILTIN_SUBSS,
17272 IX86_BUILTIN_CMPEQPS,
17273 IX86_BUILTIN_CMPLTPS,
17274 IX86_BUILTIN_CMPLEPS,
17275 IX86_BUILTIN_CMPGTPS,
17276 IX86_BUILTIN_CMPGEPS,
17277 IX86_BUILTIN_CMPNEQPS,
17278 IX86_BUILTIN_CMPNLTPS,
17279 IX86_BUILTIN_CMPNLEPS,
17280 IX86_BUILTIN_CMPNGTPS,
17281 IX86_BUILTIN_CMPNGEPS,
17282 IX86_BUILTIN_CMPORDPS,
17283 IX86_BUILTIN_CMPUNORDPS,
17284 IX86_BUILTIN_CMPEQSS,
17285 IX86_BUILTIN_CMPLTSS,
17286 IX86_BUILTIN_CMPLESS,
17287 IX86_BUILTIN_CMPNEQSS,
17288 IX86_BUILTIN_CMPNLTSS,
17289 IX86_BUILTIN_CMPNLESS,
17290 IX86_BUILTIN_CMPNGTSS,
17291 IX86_BUILTIN_CMPNGESS,
17292 IX86_BUILTIN_CMPORDSS,
17293 IX86_BUILTIN_CMPUNORDSS,
17295 IX86_BUILTIN_COMIEQSS,
17296 IX86_BUILTIN_COMILTSS,
17297 IX86_BUILTIN_COMILESS,
17298 IX86_BUILTIN_COMIGTSS,
17299 IX86_BUILTIN_COMIGESS,
17300 IX86_BUILTIN_COMINEQSS,
17301 IX86_BUILTIN_UCOMIEQSS,
17302 IX86_BUILTIN_UCOMILTSS,
17303 IX86_BUILTIN_UCOMILESS,
17304 IX86_BUILTIN_UCOMIGTSS,
17305 IX86_BUILTIN_UCOMIGESS,
17306 IX86_BUILTIN_UCOMINEQSS,
17308 IX86_BUILTIN_CVTPI2PS,
17309 IX86_BUILTIN_CVTPS2PI,
17310 IX86_BUILTIN_CVTSI2SS,
17311 IX86_BUILTIN_CVTSI642SS,
17312 IX86_BUILTIN_CVTSS2SI,
17313 IX86_BUILTIN_CVTSS2SI64,
17314 IX86_BUILTIN_CVTTPS2PI,
17315 IX86_BUILTIN_CVTTSS2SI,
17316 IX86_BUILTIN_CVTTSS2SI64,
17318 IX86_BUILTIN_MAXPS,
17319 IX86_BUILTIN_MAXSS,
17320 IX86_BUILTIN_MINPS,
17321 IX86_BUILTIN_MINSS,
17323 IX86_BUILTIN_LOADUPS,
17324 IX86_BUILTIN_STOREUPS,
17325 IX86_BUILTIN_MOVSS,
17327 IX86_BUILTIN_MOVHLPS,
17328 IX86_BUILTIN_MOVLHPS,
17329 IX86_BUILTIN_LOADHPS,
17330 IX86_BUILTIN_LOADLPS,
17331 IX86_BUILTIN_STOREHPS,
17332 IX86_BUILTIN_STORELPS,
17334 IX86_BUILTIN_MASKMOVQ,
17335 IX86_BUILTIN_MOVMSKPS,
17336 IX86_BUILTIN_PMOVMSKB,
17338 IX86_BUILTIN_MOVNTPS,
17339 IX86_BUILTIN_MOVNTQ,
17341 IX86_BUILTIN_LOADDQU,
17342 IX86_BUILTIN_STOREDQU,
17344 IX86_BUILTIN_PACKSSWB,
17345 IX86_BUILTIN_PACKSSDW,
17346 IX86_BUILTIN_PACKUSWB,
17348 IX86_BUILTIN_PADDB,
17349 IX86_BUILTIN_PADDW,
17350 IX86_BUILTIN_PADDD,
17351 IX86_BUILTIN_PADDQ,
17352 IX86_BUILTIN_PADDSB,
17353 IX86_BUILTIN_PADDSW,
17354 IX86_BUILTIN_PADDUSB,
17355 IX86_BUILTIN_PADDUSW,
17356 IX86_BUILTIN_PSUBB,
17357 IX86_BUILTIN_PSUBW,
17358 IX86_BUILTIN_PSUBD,
17359 IX86_BUILTIN_PSUBQ,
17360 IX86_BUILTIN_PSUBSB,
17361 IX86_BUILTIN_PSUBSW,
17362 IX86_BUILTIN_PSUBUSB,
17363 IX86_BUILTIN_PSUBUSW,
17365 IX86_BUILTIN_PAND,
17366 IX86_BUILTIN_PANDN,
17367 IX86_BUILTIN_POR,
17368 IX86_BUILTIN_PXOR,
17370 IX86_BUILTIN_PAVGB,
17371 IX86_BUILTIN_PAVGW,
17373 IX86_BUILTIN_PCMPEQB,
17374 IX86_BUILTIN_PCMPEQW,
17375 IX86_BUILTIN_PCMPEQD,
17376 IX86_BUILTIN_PCMPGTB,
17377 IX86_BUILTIN_PCMPGTW,
17378 IX86_BUILTIN_PCMPGTD,
17380 IX86_BUILTIN_PMADDWD,
17382 IX86_BUILTIN_PMAXSW,
17383 IX86_BUILTIN_PMAXUB,
17384 IX86_BUILTIN_PMINSW,
17385 IX86_BUILTIN_PMINUB,
17387 IX86_BUILTIN_PMULHUW,
17388 IX86_BUILTIN_PMULHW,
17389 IX86_BUILTIN_PMULLW,
17391 IX86_BUILTIN_PSADBW,
17392 IX86_BUILTIN_PSHUFW,
17394 IX86_BUILTIN_PSLLW,
17395 IX86_BUILTIN_PSLLD,
17396 IX86_BUILTIN_PSLLQ,
17397 IX86_BUILTIN_PSRAW,
17398 IX86_BUILTIN_PSRAD,
17399 IX86_BUILTIN_PSRLW,
17400 IX86_BUILTIN_PSRLD,
17401 IX86_BUILTIN_PSRLQ,
17402 IX86_BUILTIN_PSLLWI,
17403 IX86_BUILTIN_PSLLDI,
17404 IX86_BUILTIN_PSLLQI,
17405 IX86_BUILTIN_PSRAWI,
17406 IX86_BUILTIN_PSRADI,
17407 IX86_BUILTIN_PSRLWI,
17408 IX86_BUILTIN_PSRLDI,
17409 IX86_BUILTIN_PSRLQI,
17411 IX86_BUILTIN_PUNPCKHBW,
17412 IX86_BUILTIN_PUNPCKHWD,
17413 IX86_BUILTIN_PUNPCKHDQ,
17414 IX86_BUILTIN_PUNPCKLBW,
17415 IX86_BUILTIN_PUNPCKLWD,
17416 IX86_BUILTIN_PUNPCKLDQ,
17418 IX86_BUILTIN_SHUFPS,
17420 IX86_BUILTIN_RCPPS,
17421 IX86_BUILTIN_RCPSS,
17422 IX86_BUILTIN_RSQRTPS,
17423 IX86_BUILTIN_RSQRTPS_NR,
17424 IX86_BUILTIN_RSQRTSS,
17425 IX86_BUILTIN_RSQRTF,
17426 IX86_BUILTIN_SQRTPS,
17427 IX86_BUILTIN_SQRTPS_NR,
17428 IX86_BUILTIN_SQRTSS,
17430 IX86_BUILTIN_UNPCKHPS,
17431 IX86_BUILTIN_UNPCKLPS,
17433 IX86_BUILTIN_ANDPS,
17434 IX86_BUILTIN_ANDNPS,
17435 IX86_BUILTIN_ORPS,
17436 IX86_BUILTIN_XORPS,
17438 IX86_BUILTIN_EMMS,
17439 IX86_BUILTIN_LDMXCSR,
17440 IX86_BUILTIN_STMXCSR,
17441 IX86_BUILTIN_SFENCE,
17443 /* 3DNow! Original */
17444 IX86_BUILTIN_FEMMS,
17445 IX86_BUILTIN_PAVGUSB,
17446 IX86_BUILTIN_PF2ID,
17447 IX86_BUILTIN_PFACC,
17448 IX86_BUILTIN_PFADD,
17449 IX86_BUILTIN_PFCMPEQ,
17450 IX86_BUILTIN_PFCMPGE,
17451 IX86_BUILTIN_PFCMPGT,
17452 IX86_BUILTIN_PFMAX,
17453 IX86_BUILTIN_PFMIN,
17454 IX86_BUILTIN_PFMUL,
17455 IX86_BUILTIN_PFRCP,
17456 IX86_BUILTIN_PFRCPIT1,
17457 IX86_BUILTIN_PFRCPIT2,
17458 IX86_BUILTIN_PFRSQIT1,
17459 IX86_BUILTIN_PFRSQRT,
17460 IX86_BUILTIN_PFSUB,
17461 IX86_BUILTIN_PFSUBR,
17462 IX86_BUILTIN_PI2FD,
17463 IX86_BUILTIN_PMULHRW,
17465 /* 3DNow! Athlon Extensions */
17466 IX86_BUILTIN_PF2IW,
17467 IX86_BUILTIN_PFNACC,
17468 IX86_BUILTIN_PFPNACC,
17469 IX86_BUILTIN_PI2FW,
17470 IX86_BUILTIN_PSWAPDSI,
17471 IX86_BUILTIN_PSWAPDSF,
17473 /* SSE2 */
17474 IX86_BUILTIN_ADDPD,
17475 IX86_BUILTIN_ADDSD,
17476 IX86_BUILTIN_DIVPD,
17477 IX86_BUILTIN_DIVSD,
17478 IX86_BUILTIN_MULPD,
17479 IX86_BUILTIN_MULSD,
17480 IX86_BUILTIN_SUBPD,
17481 IX86_BUILTIN_SUBSD,
17483 IX86_BUILTIN_CMPEQPD,
17484 IX86_BUILTIN_CMPLTPD,
17485 IX86_BUILTIN_CMPLEPD,
17486 IX86_BUILTIN_CMPGTPD,
17487 IX86_BUILTIN_CMPGEPD,
17488 IX86_BUILTIN_CMPNEQPD,
17489 IX86_BUILTIN_CMPNLTPD,
17490 IX86_BUILTIN_CMPNLEPD,
17491 IX86_BUILTIN_CMPNGTPD,
17492 IX86_BUILTIN_CMPNGEPD,
17493 IX86_BUILTIN_CMPORDPD,
17494 IX86_BUILTIN_CMPUNORDPD,
17495 IX86_BUILTIN_CMPEQSD,
17496 IX86_BUILTIN_CMPLTSD,
17497 IX86_BUILTIN_CMPLESD,
17498 IX86_BUILTIN_CMPNEQSD,
17499 IX86_BUILTIN_CMPNLTSD,
17500 IX86_BUILTIN_CMPNLESD,
17501 IX86_BUILTIN_CMPORDSD,
17502 IX86_BUILTIN_CMPUNORDSD,
17504 IX86_BUILTIN_COMIEQSD,
17505 IX86_BUILTIN_COMILTSD,
17506 IX86_BUILTIN_COMILESD,
17507 IX86_BUILTIN_COMIGTSD,
17508 IX86_BUILTIN_COMIGESD,
17509 IX86_BUILTIN_COMINEQSD,
17510 IX86_BUILTIN_UCOMIEQSD,
17511 IX86_BUILTIN_UCOMILTSD,
17512 IX86_BUILTIN_UCOMILESD,
17513 IX86_BUILTIN_UCOMIGTSD,
17514 IX86_BUILTIN_UCOMIGESD,
17515 IX86_BUILTIN_UCOMINEQSD,
17517 IX86_BUILTIN_MAXPD,
17518 IX86_BUILTIN_MAXSD,
17519 IX86_BUILTIN_MINPD,
17520 IX86_BUILTIN_MINSD,
17522 IX86_BUILTIN_ANDPD,
17523 IX86_BUILTIN_ANDNPD,
17524 IX86_BUILTIN_ORPD,
17525 IX86_BUILTIN_XORPD,
17527 IX86_BUILTIN_SQRTPD,
17528 IX86_BUILTIN_SQRTSD,
17530 IX86_BUILTIN_UNPCKHPD,
17531 IX86_BUILTIN_UNPCKLPD,
17533 IX86_BUILTIN_SHUFPD,
17535 IX86_BUILTIN_LOADUPD,
17536 IX86_BUILTIN_STOREUPD,
17537 IX86_BUILTIN_MOVSD,
17539 IX86_BUILTIN_LOADHPD,
17540 IX86_BUILTIN_LOADLPD,
17542 IX86_BUILTIN_CVTDQ2PD,
17543 IX86_BUILTIN_CVTDQ2PS,
17545 IX86_BUILTIN_CVTPD2DQ,
17546 IX86_BUILTIN_CVTPD2PI,
17547 IX86_BUILTIN_CVTPD2PS,
17548 IX86_BUILTIN_CVTTPD2DQ,
17549 IX86_BUILTIN_CVTTPD2PI,
17551 IX86_BUILTIN_CVTPI2PD,
17552 IX86_BUILTIN_CVTSI2SD,
17553 IX86_BUILTIN_CVTSI642SD,
17555 IX86_BUILTIN_CVTSD2SI,
17556 IX86_BUILTIN_CVTSD2SI64,
17557 IX86_BUILTIN_CVTSD2SS,
17558 IX86_BUILTIN_CVTSS2SD,
17559 IX86_BUILTIN_CVTTSD2SI,
17560 IX86_BUILTIN_CVTTSD2SI64,
17562 IX86_BUILTIN_CVTPS2DQ,
17563 IX86_BUILTIN_CVTPS2PD,
17564 IX86_BUILTIN_CVTTPS2DQ,
17566 IX86_BUILTIN_MOVNTI,
17567 IX86_BUILTIN_MOVNTPD,
17568 IX86_BUILTIN_MOVNTDQ,
17570 /* SSE2 MMX */
17571 IX86_BUILTIN_MASKMOVDQU,
17572 IX86_BUILTIN_MOVMSKPD,
17573 IX86_BUILTIN_PMOVMSKB128,
17575 IX86_BUILTIN_PACKSSWB128,
17576 IX86_BUILTIN_PACKSSDW128,
17577 IX86_BUILTIN_PACKUSWB128,
17579 IX86_BUILTIN_PADDB128,
17580 IX86_BUILTIN_PADDW128,
17581 IX86_BUILTIN_PADDD128,
17582 IX86_BUILTIN_PADDQ128,
17583 IX86_BUILTIN_PADDSB128,
17584 IX86_BUILTIN_PADDSW128,
17585 IX86_BUILTIN_PADDUSB128,
17586 IX86_BUILTIN_PADDUSW128,
17587 IX86_BUILTIN_PSUBB128,
17588 IX86_BUILTIN_PSUBW128,
17589 IX86_BUILTIN_PSUBD128,
17590 IX86_BUILTIN_PSUBQ128,
17591 IX86_BUILTIN_PSUBSB128,
17592 IX86_BUILTIN_PSUBSW128,
17593 IX86_BUILTIN_PSUBUSB128,
17594 IX86_BUILTIN_PSUBUSW128,
17596 IX86_BUILTIN_PAND128,
17597 IX86_BUILTIN_PANDN128,
17598 IX86_BUILTIN_POR128,
17599 IX86_BUILTIN_PXOR128,
17601 IX86_BUILTIN_PAVGB128,
17602 IX86_BUILTIN_PAVGW128,
17604 IX86_BUILTIN_PCMPEQB128,
17605 IX86_BUILTIN_PCMPEQW128,
17606 IX86_BUILTIN_PCMPEQD128,
17607 IX86_BUILTIN_PCMPGTB128,
17608 IX86_BUILTIN_PCMPGTW128,
17609 IX86_BUILTIN_PCMPGTD128,
17611 IX86_BUILTIN_PMADDWD128,
17613 IX86_BUILTIN_PMAXSW128,
17614 IX86_BUILTIN_PMAXUB128,
17615 IX86_BUILTIN_PMINSW128,
17616 IX86_BUILTIN_PMINUB128,
17618 IX86_BUILTIN_PMULUDQ,
17619 IX86_BUILTIN_PMULUDQ128,
17620 IX86_BUILTIN_PMULHUW128,
17621 IX86_BUILTIN_PMULHW128,
17622 IX86_BUILTIN_PMULLW128,
17624 IX86_BUILTIN_PSADBW128,
17625 IX86_BUILTIN_PSHUFHW,
17626 IX86_BUILTIN_PSHUFLW,
17627 IX86_BUILTIN_PSHUFD,
17629 IX86_BUILTIN_PSLLDQI128,
17630 IX86_BUILTIN_PSLLWI128,
17631 IX86_BUILTIN_PSLLDI128,
17632 IX86_BUILTIN_PSLLQI128,
17633 IX86_BUILTIN_PSRAWI128,
17634 IX86_BUILTIN_PSRADI128,
17635 IX86_BUILTIN_PSRLDQI128,
17636 IX86_BUILTIN_PSRLWI128,
17637 IX86_BUILTIN_PSRLDI128,
17638 IX86_BUILTIN_PSRLQI128,
17640 IX86_BUILTIN_PSLLDQ128,
17641 IX86_BUILTIN_PSLLW128,
17642 IX86_BUILTIN_PSLLD128,
17643 IX86_BUILTIN_PSLLQ128,
17644 IX86_BUILTIN_PSRAW128,
17645 IX86_BUILTIN_PSRAD128,
17646 IX86_BUILTIN_PSRLW128,
17647 IX86_BUILTIN_PSRLD128,
17648 IX86_BUILTIN_PSRLQ128,
17650 IX86_BUILTIN_PUNPCKHBW128,
17651 IX86_BUILTIN_PUNPCKHWD128,
17652 IX86_BUILTIN_PUNPCKHDQ128,
17653 IX86_BUILTIN_PUNPCKHQDQ128,
17654 IX86_BUILTIN_PUNPCKLBW128,
17655 IX86_BUILTIN_PUNPCKLWD128,
17656 IX86_BUILTIN_PUNPCKLDQ128,
17657 IX86_BUILTIN_PUNPCKLQDQ128,
17659 IX86_BUILTIN_CLFLUSH,
17660 IX86_BUILTIN_MFENCE,
17661 IX86_BUILTIN_LFENCE,
17663 /* SSE3. */
17664 IX86_BUILTIN_ADDSUBPS,
17665 IX86_BUILTIN_HADDPS,
17666 IX86_BUILTIN_HSUBPS,
17667 IX86_BUILTIN_MOVSHDUP,
17668 IX86_BUILTIN_MOVSLDUP,
17669 IX86_BUILTIN_ADDSUBPD,
17670 IX86_BUILTIN_HADDPD,
17671 IX86_BUILTIN_HSUBPD,
17672 IX86_BUILTIN_LDDQU,
17674 IX86_BUILTIN_MONITOR,
17675 IX86_BUILTIN_MWAIT,
17677 /* SSSE3. */
17678 IX86_BUILTIN_PHADDW,
17679 IX86_BUILTIN_PHADDD,
17680 IX86_BUILTIN_PHADDSW,
17681 IX86_BUILTIN_PHSUBW,
17682 IX86_BUILTIN_PHSUBD,
17683 IX86_BUILTIN_PHSUBSW,
17684 IX86_BUILTIN_PMADDUBSW,
17685 IX86_BUILTIN_PMULHRSW,
17686 IX86_BUILTIN_PSHUFB,
17687 IX86_BUILTIN_PSIGNB,
17688 IX86_BUILTIN_PSIGNW,
17689 IX86_BUILTIN_PSIGND,
17690 IX86_BUILTIN_PALIGNR,
17691 IX86_BUILTIN_PABSB,
17692 IX86_BUILTIN_PABSW,
17693 IX86_BUILTIN_PABSD,
17695 IX86_BUILTIN_PHADDW128,
17696 IX86_BUILTIN_PHADDD128,
17697 IX86_BUILTIN_PHADDSW128,
17698 IX86_BUILTIN_PHSUBW128,
17699 IX86_BUILTIN_PHSUBD128,
17700 IX86_BUILTIN_PHSUBSW128,
17701 IX86_BUILTIN_PMADDUBSW128,
17702 IX86_BUILTIN_PMULHRSW128,
17703 IX86_BUILTIN_PSHUFB128,
17704 IX86_BUILTIN_PSIGNB128,
17705 IX86_BUILTIN_PSIGNW128,
17706 IX86_BUILTIN_PSIGND128,
17707 IX86_BUILTIN_PALIGNR128,
17708 IX86_BUILTIN_PABSB128,
17709 IX86_BUILTIN_PABSW128,
17710 IX86_BUILTIN_PABSD128,
17712 /* AMDFAM10 - SSE4A New Instructions. */
17713 IX86_BUILTIN_MOVNTSD,
17714 IX86_BUILTIN_MOVNTSS,
17715 IX86_BUILTIN_EXTRQI,
17716 IX86_BUILTIN_EXTRQ,
17717 IX86_BUILTIN_INSERTQI,
17718 IX86_BUILTIN_INSERTQ,
17720 /* SSE4.1. */
17721 IX86_BUILTIN_BLENDPD,
17722 IX86_BUILTIN_BLENDPS,
17723 IX86_BUILTIN_BLENDVPD,
17724 IX86_BUILTIN_BLENDVPS,
17725 IX86_BUILTIN_PBLENDVB128,
17726 IX86_BUILTIN_PBLENDW128,
17728 IX86_BUILTIN_DPPD,
17729 IX86_BUILTIN_DPPS,
17731 IX86_BUILTIN_INSERTPS128,
17733 IX86_BUILTIN_MOVNTDQA,
17734 IX86_BUILTIN_MPSADBW128,
17735 IX86_BUILTIN_PACKUSDW128,
17736 IX86_BUILTIN_PCMPEQQ,
17737 IX86_BUILTIN_PHMINPOSUW128,
17739 IX86_BUILTIN_PMAXSB128,
17740 IX86_BUILTIN_PMAXSD128,
17741 IX86_BUILTIN_PMAXUD128,
17742 IX86_BUILTIN_PMAXUW128,
17744 IX86_BUILTIN_PMINSB128,
17745 IX86_BUILTIN_PMINSD128,
17746 IX86_BUILTIN_PMINUD128,
17747 IX86_BUILTIN_PMINUW128,
17749 IX86_BUILTIN_PMOVSXBW128,
17750 IX86_BUILTIN_PMOVSXBD128,
17751 IX86_BUILTIN_PMOVSXBQ128,
17752 IX86_BUILTIN_PMOVSXWD128,
17753 IX86_BUILTIN_PMOVSXWQ128,
17754 IX86_BUILTIN_PMOVSXDQ128,
17756 IX86_BUILTIN_PMOVZXBW128,
17757 IX86_BUILTIN_PMOVZXBD128,
17758 IX86_BUILTIN_PMOVZXBQ128,
17759 IX86_BUILTIN_PMOVZXWD128,
17760 IX86_BUILTIN_PMOVZXWQ128,
17761 IX86_BUILTIN_PMOVZXDQ128,
17763 IX86_BUILTIN_PMULDQ128,
17764 IX86_BUILTIN_PMULLD128,
17766 IX86_BUILTIN_ROUNDPD,
17767 IX86_BUILTIN_ROUNDPS,
17768 IX86_BUILTIN_ROUNDSD,
17769 IX86_BUILTIN_ROUNDSS,
17771 IX86_BUILTIN_PTESTZ,
17772 IX86_BUILTIN_PTESTC,
17773 IX86_BUILTIN_PTESTNZC,
17775 IX86_BUILTIN_VEC_INIT_V2SI,
17776 IX86_BUILTIN_VEC_INIT_V4HI,
17777 IX86_BUILTIN_VEC_INIT_V8QI,
17778 IX86_BUILTIN_VEC_EXT_V2DF,
17779 IX86_BUILTIN_VEC_EXT_V2DI,
17780 IX86_BUILTIN_VEC_EXT_V4SF,
17781 IX86_BUILTIN_VEC_EXT_V4SI,
17782 IX86_BUILTIN_VEC_EXT_V8HI,
17783 IX86_BUILTIN_VEC_EXT_V2SI,
17784 IX86_BUILTIN_VEC_EXT_V4HI,
17785 IX86_BUILTIN_VEC_EXT_V16QI,
17786 IX86_BUILTIN_VEC_SET_V2DI,
17787 IX86_BUILTIN_VEC_SET_V4SF,
17788 IX86_BUILTIN_VEC_SET_V4SI,
17789 IX86_BUILTIN_VEC_SET_V8HI,
17790 IX86_BUILTIN_VEC_SET_V4HI,
17791 IX86_BUILTIN_VEC_SET_V16QI,
17793 IX86_BUILTIN_VEC_PACK_SFIX,
17795 /* SSE4.2. */
17796 IX86_BUILTIN_CRC32QI,
17797 IX86_BUILTIN_CRC32HI,
17798 IX86_BUILTIN_CRC32SI,
17799 IX86_BUILTIN_CRC32DI,
17801 IX86_BUILTIN_PCMPESTRI128,
17802 IX86_BUILTIN_PCMPESTRM128,
17803 IX86_BUILTIN_PCMPESTRA128,
17804 IX86_BUILTIN_PCMPESTRC128,
17805 IX86_BUILTIN_PCMPESTRO128,
17806 IX86_BUILTIN_PCMPESTRS128,
17807 IX86_BUILTIN_PCMPESTRZ128,
17808 IX86_BUILTIN_PCMPISTRI128,
17809 IX86_BUILTIN_PCMPISTRM128,
17810 IX86_BUILTIN_PCMPISTRA128,
17811 IX86_BUILTIN_PCMPISTRC128,
17812 IX86_BUILTIN_PCMPISTRO128,
17813 IX86_BUILTIN_PCMPISTRS128,
17814 IX86_BUILTIN_PCMPISTRZ128,
17816 IX86_BUILTIN_PCMPGTQ,
17818 /* AES instructions */
17819 IX86_BUILTIN_AESENC128,
17820 IX86_BUILTIN_AESENCLAST128,
17821 IX86_BUILTIN_AESDEC128,
17822 IX86_BUILTIN_AESDECLAST128,
17823 IX86_BUILTIN_AESIMC128,
17824 IX86_BUILTIN_AESKEYGENASSIST128,
17826 /* PCLMUL instruction */
17827 IX86_BUILTIN_PCLMULQDQ128,
17829 /* TFmode support builtins. */
17830 IX86_BUILTIN_INFQ,
17831 IX86_BUILTIN_FABSQ,
17832 IX86_BUILTIN_COPYSIGNQ,
17834 /* SSE5 instructions */
17835 IX86_BUILTIN_FMADDSS,
17836 IX86_BUILTIN_FMADDSD,
17837 IX86_BUILTIN_FMADDPS,
17838 IX86_BUILTIN_FMADDPD,
17839 IX86_BUILTIN_FMSUBSS,
17840 IX86_BUILTIN_FMSUBSD,
17841 IX86_BUILTIN_FMSUBPS,
17842 IX86_BUILTIN_FMSUBPD,
17843 IX86_BUILTIN_FNMADDSS,
17844 IX86_BUILTIN_FNMADDSD,
17845 IX86_BUILTIN_FNMADDPS,
17846 IX86_BUILTIN_FNMADDPD,
17847 IX86_BUILTIN_FNMSUBSS,
17848 IX86_BUILTIN_FNMSUBSD,
17849 IX86_BUILTIN_FNMSUBPS,
17850 IX86_BUILTIN_FNMSUBPD,
17851 IX86_BUILTIN_PCMOV_V2DI,
17852 IX86_BUILTIN_PCMOV_V4SI,
17853 IX86_BUILTIN_PCMOV_V8HI,
17854 IX86_BUILTIN_PCMOV_V16QI,
17855 IX86_BUILTIN_PCMOV_V4SF,
17856 IX86_BUILTIN_PCMOV_V2DF,
17857 IX86_BUILTIN_PPERM,
17858 IX86_BUILTIN_PERMPS,
17859 IX86_BUILTIN_PERMPD,
17860 IX86_BUILTIN_PMACSSWW,
17861 IX86_BUILTIN_PMACSWW,
17862 IX86_BUILTIN_PMACSSWD,
17863 IX86_BUILTIN_PMACSWD,
17864 IX86_BUILTIN_PMACSSDD,
17865 IX86_BUILTIN_PMACSDD,
17866 IX86_BUILTIN_PMACSSDQL,
17867 IX86_BUILTIN_PMACSSDQH,
17868 IX86_BUILTIN_PMACSDQL,
17869 IX86_BUILTIN_PMACSDQH,
17870 IX86_BUILTIN_PMADCSSWD,
17871 IX86_BUILTIN_PMADCSWD,
17872 IX86_BUILTIN_PHADDBW,
17873 IX86_BUILTIN_PHADDBD,
17874 IX86_BUILTIN_PHADDBQ,
17875 IX86_BUILTIN_PHADDWD,
17876 IX86_BUILTIN_PHADDWQ,
17877 IX86_BUILTIN_PHADDDQ,
17878 IX86_BUILTIN_PHADDUBW,
17879 IX86_BUILTIN_PHADDUBD,
17880 IX86_BUILTIN_PHADDUBQ,
17881 IX86_BUILTIN_PHADDUWD,
17882 IX86_BUILTIN_PHADDUWQ,
17883 IX86_BUILTIN_PHADDUDQ,
17884 IX86_BUILTIN_PHSUBBW,
17885 IX86_BUILTIN_PHSUBWD,
17886 IX86_BUILTIN_PHSUBDQ,
17887 IX86_BUILTIN_PROTB,
17888 IX86_BUILTIN_PROTW,
17889 IX86_BUILTIN_PROTD,
17890 IX86_BUILTIN_PROTQ,
17891 IX86_BUILTIN_PROTB_IMM,
17892 IX86_BUILTIN_PROTW_IMM,
17893 IX86_BUILTIN_PROTD_IMM,
17894 IX86_BUILTIN_PROTQ_IMM,
17895 IX86_BUILTIN_PSHLB,
17896 IX86_BUILTIN_PSHLW,
17897 IX86_BUILTIN_PSHLD,
17898 IX86_BUILTIN_PSHLQ,
17899 IX86_BUILTIN_PSHAB,
17900 IX86_BUILTIN_PSHAW,
17901 IX86_BUILTIN_PSHAD,
17902 IX86_BUILTIN_PSHAQ,
17903 IX86_BUILTIN_FRCZSS,
17904 IX86_BUILTIN_FRCZSD,
17905 IX86_BUILTIN_FRCZPS,
17906 IX86_BUILTIN_FRCZPD,
17907 IX86_BUILTIN_CVTPH2PS,
17908 IX86_BUILTIN_CVTPS2PH,
17910 IX86_BUILTIN_COMEQSS,
17911 IX86_BUILTIN_COMNESS,
17912 IX86_BUILTIN_COMLTSS,
17913 IX86_BUILTIN_COMLESS,
17914 IX86_BUILTIN_COMGTSS,
17915 IX86_BUILTIN_COMGESS,
17916 IX86_BUILTIN_COMUEQSS,
17917 IX86_BUILTIN_COMUNESS,
17918 IX86_BUILTIN_COMULTSS,
17919 IX86_BUILTIN_COMULESS,
17920 IX86_BUILTIN_COMUGTSS,
17921 IX86_BUILTIN_COMUGESS,
17922 IX86_BUILTIN_COMORDSS,
17923 IX86_BUILTIN_COMUNORDSS,
17924 IX86_BUILTIN_COMFALSESS,
17925 IX86_BUILTIN_COMTRUESS,
17927 IX86_BUILTIN_COMEQSD,
17928 IX86_BUILTIN_COMNESD,
17929 IX86_BUILTIN_COMLTSD,
17930 IX86_BUILTIN_COMLESD,
17931 IX86_BUILTIN_COMGTSD,
17932 IX86_BUILTIN_COMGESD,
17933 IX86_BUILTIN_COMUEQSD,
17934 IX86_BUILTIN_COMUNESD,
17935 IX86_BUILTIN_COMULTSD,
17936 IX86_BUILTIN_COMULESD,
17937 IX86_BUILTIN_COMUGTSD,
17938 IX86_BUILTIN_COMUGESD,
17939 IX86_BUILTIN_COMORDSD,
17940 IX86_BUILTIN_COMUNORDSD,
17941 IX86_BUILTIN_COMFALSESD,
17942 IX86_BUILTIN_COMTRUESD,
17944 IX86_BUILTIN_COMEQPS,
17945 IX86_BUILTIN_COMNEPS,
17946 IX86_BUILTIN_COMLTPS,
17947 IX86_BUILTIN_COMLEPS,
17948 IX86_BUILTIN_COMGTPS,
17949 IX86_BUILTIN_COMGEPS,
17950 IX86_BUILTIN_COMUEQPS,
17951 IX86_BUILTIN_COMUNEPS,
17952 IX86_BUILTIN_COMULTPS,
17953 IX86_BUILTIN_COMULEPS,
17954 IX86_BUILTIN_COMUGTPS,
17955 IX86_BUILTIN_COMUGEPS,
17956 IX86_BUILTIN_COMORDPS,
17957 IX86_BUILTIN_COMUNORDPS,
17958 IX86_BUILTIN_COMFALSEPS,
17959 IX86_BUILTIN_COMTRUEPS,
17961 IX86_BUILTIN_COMEQPD,
17962 IX86_BUILTIN_COMNEPD,
17963 IX86_BUILTIN_COMLTPD,
17964 IX86_BUILTIN_COMLEPD,
17965 IX86_BUILTIN_COMGTPD,
17966 IX86_BUILTIN_COMGEPD,
17967 IX86_BUILTIN_COMUEQPD,
17968 IX86_BUILTIN_COMUNEPD,
17969 IX86_BUILTIN_COMULTPD,
17970 IX86_BUILTIN_COMULEPD,
17971 IX86_BUILTIN_COMUGTPD,
17972 IX86_BUILTIN_COMUGEPD,
17973 IX86_BUILTIN_COMORDPD,
17974 IX86_BUILTIN_COMUNORDPD,
17975 IX86_BUILTIN_COMFALSEPD,
17976 IX86_BUILTIN_COMTRUEPD,
17978 IX86_BUILTIN_PCOMEQUB,
17979 IX86_BUILTIN_PCOMNEUB,
17980 IX86_BUILTIN_PCOMLTUB,
17981 IX86_BUILTIN_PCOMLEUB,
17982 IX86_BUILTIN_PCOMGTUB,
17983 IX86_BUILTIN_PCOMGEUB,
17984 IX86_BUILTIN_PCOMFALSEUB,
17985 IX86_BUILTIN_PCOMTRUEUB,
17986 IX86_BUILTIN_PCOMEQUW,
17987 IX86_BUILTIN_PCOMNEUW,
17988 IX86_BUILTIN_PCOMLTUW,
17989 IX86_BUILTIN_PCOMLEUW,
17990 IX86_BUILTIN_PCOMGTUW,
17991 IX86_BUILTIN_PCOMGEUW,
17992 IX86_BUILTIN_PCOMFALSEUW,
17993 IX86_BUILTIN_PCOMTRUEUW,
17994 IX86_BUILTIN_PCOMEQUD,
17995 IX86_BUILTIN_PCOMNEUD,
17996 IX86_BUILTIN_PCOMLTUD,
17997 IX86_BUILTIN_PCOMLEUD,
17998 IX86_BUILTIN_PCOMGTUD,
17999 IX86_BUILTIN_PCOMGEUD,
18000 IX86_BUILTIN_PCOMFALSEUD,
18001 IX86_BUILTIN_PCOMTRUEUD,
18002 IX86_BUILTIN_PCOMEQUQ,
18003 IX86_BUILTIN_PCOMNEUQ,
18004 IX86_BUILTIN_PCOMLTUQ,
18005 IX86_BUILTIN_PCOMLEUQ,
18006 IX86_BUILTIN_PCOMGTUQ,
18007 IX86_BUILTIN_PCOMGEUQ,
18008 IX86_BUILTIN_PCOMFALSEUQ,
18009 IX86_BUILTIN_PCOMTRUEUQ,
18011 IX86_BUILTIN_PCOMEQB,
18012 IX86_BUILTIN_PCOMNEB,
18013 IX86_BUILTIN_PCOMLTB,
18014 IX86_BUILTIN_PCOMLEB,
18015 IX86_BUILTIN_PCOMGTB,
18016 IX86_BUILTIN_PCOMGEB,
18017 IX86_BUILTIN_PCOMFALSEB,
18018 IX86_BUILTIN_PCOMTRUEB,
18019 IX86_BUILTIN_PCOMEQW,
18020 IX86_BUILTIN_PCOMNEW,
18021 IX86_BUILTIN_PCOMLTW,
18022 IX86_BUILTIN_PCOMLEW,
18023 IX86_BUILTIN_PCOMGTW,
18024 IX86_BUILTIN_PCOMGEW,
18025 IX86_BUILTIN_PCOMFALSEW,
18026 IX86_BUILTIN_PCOMTRUEW,
18027 IX86_BUILTIN_PCOMEQD,
18028 IX86_BUILTIN_PCOMNED,
18029 IX86_BUILTIN_PCOMLTD,
18030 IX86_BUILTIN_PCOMLED,
18031 IX86_BUILTIN_PCOMGTD,
18032 IX86_BUILTIN_PCOMGED,
18033 IX86_BUILTIN_PCOMFALSED,
18034 IX86_BUILTIN_PCOMTRUED,
18035 IX86_BUILTIN_PCOMEQQ,
18036 IX86_BUILTIN_PCOMNEQ,
18037 IX86_BUILTIN_PCOMLTQ,
18038 IX86_BUILTIN_PCOMLEQ,
18039 IX86_BUILTIN_PCOMGTQ,
18040 IX86_BUILTIN_PCOMGEQ,
18041 IX86_BUILTIN_PCOMFALSEQ,
18042 IX86_BUILTIN_PCOMTRUEQ,
18044 IX86_BUILTIN_MAX
18047 /* Table for the ix86 builtin decls. */
18048 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18050 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
18051 * if the target_flags include one of MASK. Stores the function decl
18052 * in the ix86_builtins array.
18053 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18055 static inline tree
18056 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18058 tree decl = NULL_TREE;
18060 if (mask & ix86_isa_flags
18061 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18063 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18064 NULL, NULL_TREE);
18065 ix86_builtins[(int) code] = decl;
18068 return decl;
18071 /* Like def_builtin, but also marks the function decl "const". */
18073 static inline tree
18074 def_builtin_const (int mask, const char *name, tree type,
18075 enum ix86_builtins code)
18077 tree decl = def_builtin (mask, name, type, code);
18078 if (decl)
18079 TREE_READONLY (decl) = 1;
18080 return decl;
18083 /* Bits for builtin_description.flag. */
18085 /* Set when we don't support the comparison natively, and should
18086 swap_comparison in order to support it. */
18087 #define BUILTIN_DESC_SWAP_OPERANDS 1
18089 struct builtin_description
18091 const unsigned int mask;
18092 const enum insn_code icode;
18093 const char *const name;
18094 const enum ix86_builtins code;
18095 const enum rtx_code comparison;
18096 const int flag;
18099 static const struct builtin_description bdesc_comi[] =
18101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18127 static const struct builtin_description bdesc_pcmpestr[] =
18129 /* SSE4.2 */
18130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18133 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18134 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18135 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18136 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18139 static const struct builtin_description bdesc_pcmpistr[] =
18141 /* SSE4.2 */
18142 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18151 /* Special builtin types */
18152 enum ix86_special_builtin_type
18154 SPECIAL_FTYPE_UNKNOWN,
18155 VOID_FTYPE_VOID,
18156 V16QI_FTYPE_PCCHAR,
18157 V4SF_FTYPE_PCFLOAT,
18158 V2DF_FTYPE_PCDOUBLE,
18159 V4SF_FTYPE_V4SF_PCV2SF,
18160 V2DF_FTYPE_V2DF_PCDOUBLE,
18161 V2DI_FTYPE_PV2DI,
18162 VOID_FTYPE_PV2SF_V4SF,
18163 VOID_FTYPE_PV2DI_V2DI,
18164 VOID_FTYPE_PCHAR_V16QI,
18165 VOID_FTYPE_PFLOAT_V4SF,
18166 VOID_FTYPE_PDOUBLE_V2DF,
18167 VOID_FTYPE_PDI_DI,
18168 VOID_FTYPE_PINT_INT
18171 /* Builtin types */
18172 enum ix86_builtin_type
18174 FTYPE_UNKNOWN,
18175 FLOAT128_FTYPE_FLOAT128,
18176 FLOAT_FTYPE_FLOAT,
18177 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18178 INT_FTYPE_V2DI_V2DI_PTEST,
18179 INT64_FTYPE_V4SF,
18180 INT64_FTYPE_V2DF,
18181 INT_FTYPE_V16QI,
18182 INT_FTYPE_V8QI,
18183 INT_FTYPE_V4SF,
18184 INT_FTYPE_V2DF,
18185 V16QI_FTYPE_V16QI,
18186 V8HI_FTYPE_V8HI,
18187 V8HI_FTYPE_V16QI,
18188 V8QI_FTYPE_V8QI,
18189 V4SI_FTYPE_V4SI,
18190 V4SI_FTYPE_V16QI,
18191 V4SI_FTYPE_V8HI,
18192 V4SI_FTYPE_V4SF,
18193 V4SI_FTYPE_V2DF,
18194 V4HI_FTYPE_V4HI,
18195 V4SF_FTYPE_V4SF,
18196 V4SF_FTYPE_V4SF_VEC_MERGE,
18197 V4SF_FTYPE_V4SI,
18198 V4SF_FTYPE_V2DF,
18199 V2DI_FTYPE_V2DI,
18200 V2DI_FTYPE_V16QI,
18201 V2DI_FTYPE_V8HI,
18202 V2DI_FTYPE_V4SI,
18203 V2DF_FTYPE_V2DF,
18204 V2DF_FTYPE_V2DF_VEC_MERGE,
18205 V2DF_FTYPE_V4SI,
18206 V2DF_FTYPE_V4SF,
18207 V2DF_FTYPE_V2SI,
18208 V2SI_FTYPE_V2SI,
18209 V2SI_FTYPE_V4SF,
18210 V2SI_FTYPE_V2SF,
18211 V2SI_FTYPE_V2DF,
18212 V2SF_FTYPE_V2SF,
18213 V2SF_FTYPE_V2SI,
18214 V16QI_FTYPE_V16QI_V16QI,
18215 V16QI_FTYPE_V8HI_V8HI,
18216 V8QI_FTYPE_V8QI_V8QI,
18217 V8QI_FTYPE_V4HI_V4HI,
18218 V8HI_FTYPE_V8HI_V8HI,
18219 V8HI_FTYPE_V8HI_V8HI_COUNT,
18220 V8HI_FTYPE_V16QI_V16QI,
18221 V8HI_FTYPE_V4SI_V4SI,
18222 V8HI_FTYPE_V8HI_SI_COUNT,
18223 V4SI_FTYPE_V4SI_V4SI,
18224 V4SI_FTYPE_V4SI_V4SI_COUNT,
18225 V4SI_FTYPE_V8HI_V8HI,
18226 V4SI_FTYPE_V4SF_V4SF,
18227 V4SI_FTYPE_V2DF_V2DF,
18228 V4SI_FTYPE_V4SI_SI_COUNT,
18229 V4HI_FTYPE_V4HI_V4HI,
18230 V4HI_FTYPE_V4HI_V4HI_COUNT,
18231 V4HI_FTYPE_V8QI_V8QI,
18232 V4HI_FTYPE_V2SI_V2SI,
18233 V4HI_FTYPE_V4HI_SI_COUNT,
18234 V4SF_FTYPE_V4SF_V4SF,
18235 V4SF_FTYPE_V4SF_V4SF_SWAP,
18236 V4SF_FTYPE_V4SF_V2SI,
18237 V4SF_FTYPE_V4SF_V2DF,
18238 V4SF_FTYPE_V4SF_DI,
18239 V4SF_FTYPE_V4SF_SI,
18240 V2DI_FTYPE_V2DI_V2DI,
18241 V2DI_FTYPE_V2DI_V2DI_COUNT,
18242 V2DI_FTYPE_V16QI_V16QI,
18243 V2DI_FTYPE_V4SI_V4SI,
18244 V2DI_FTYPE_V2DI_V16QI,
18245 V2DI_FTYPE_V2DF_V2DF,
18246 V2DI_FTYPE_V2DI_SI_COUNT,
18247 V2SI_FTYPE_V2SI_V2SI,
18248 V2SI_FTYPE_V2SI_V2SI_COUNT,
18249 V2SI_FTYPE_V4HI_V4HI,
18250 V2SI_FTYPE_V2SF_V2SF,
18251 V2SI_FTYPE_V2SI_SI_COUNT,
18252 V2DF_FTYPE_V2DF_V2DF,
18253 V2DF_FTYPE_V2DF_V2DF_SWAP,
18254 V2DF_FTYPE_V2DF_V4SF,
18255 V2DF_FTYPE_V2DF_DI,
18256 V2DF_FTYPE_V2DF_SI,
18257 V2SF_FTYPE_V2SF_V2SF,
18258 V1DI_FTYPE_V1DI_V1DI,
18259 V1DI_FTYPE_V1DI_V1DI_COUNT,
18260 V1DI_FTYPE_V8QI_V8QI,
18261 V1DI_FTYPE_V2SI_V2SI,
18262 V1DI_FTYPE_V1DI_SI_COUNT,
18263 UINT64_FTYPE_UINT64_UINT64,
18264 UINT_FTYPE_UINT_UINT,
18265 UINT_FTYPE_UINT_USHORT,
18266 UINT_FTYPE_UINT_UCHAR,
18267 V8HI_FTYPE_V8HI_INT,
18268 V4SI_FTYPE_V4SI_INT,
18269 V4HI_FTYPE_V4HI_INT,
18270 V4SF_FTYPE_V4SF_INT,
18271 V2DI_FTYPE_V2DI_INT,
18272 V2DI2TI_FTYPE_V2DI_INT,
18273 V2DF_FTYPE_V2DF_INT,
18274 V16QI_FTYPE_V16QI_V16QI_V16QI,
18275 V4SF_FTYPE_V4SF_V4SF_V4SF,
18276 V2DF_FTYPE_V2DF_V2DF_V2DF,
18277 V16QI_FTYPE_V16QI_V16QI_INT,
18278 V8HI_FTYPE_V8HI_V8HI_INT,
18279 V4SI_FTYPE_V4SI_V4SI_INT,
18280 V4SF_FTYPE_V4SF_V4SF_INT,
18281 V2DI_FTYPE_V2DI_V2DI_INT,
18282 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18283 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18284 V2DF_FTYPE_V2DF_V2DF_INT,
18285 V2DI_FTYPE_V2DI_UINT_UINT,
18286 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18289 /* Special builtins with variable number of arguments. */
18290 static const struct builtin_description bdesc_special_args[] =
18292 /* MMX */
18293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18295 /* 3DNow! */
18296 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18298 /* SSE */
18299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18308 /* SSE or 3DNow!A */
18309 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18312 /* SSE2 */
18313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18326 /* SSE3 */
18327 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18329 /* SSE4.1 */
18330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18332 /* SSE4A */
18333 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18334 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18337 /* Builtins with variable number of arguments. */
18338 static const struct builtin_description bdesc_args[] =
18340 /* MMX */
18341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18404 /* 3DNow! */
18405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18412 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18413 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18414 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18415 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18416 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18417 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18418 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18419 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18424 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18426 /* 3DNow!A */
18427 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18428 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18429 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18430 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18431 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18432 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18434 /* SSE */
18435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18437 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18439 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18443 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18446 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18450 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18451 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18452 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18482 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18483 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18487 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18489 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18490 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18500 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18502 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18508 /* SSE MMX or 3Dnow!A */
18509 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18510 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18511 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18514 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18515 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18516 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18518 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18519 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18521 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18523 /* SSE2 */
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18542 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18543 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18551 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18580 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18584 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18586 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18587 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18593 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18595 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18596 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18597 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18598 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18599 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18600 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18601 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18602 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18613 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18616 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18618 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18619 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18631 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18632 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18633 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18649 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18658 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18665 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18671 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18672 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18674 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18676 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18678 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18689 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18690 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18692 /* SSE2 MMX */
18693 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18694 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18696 /* SSE3 */
18697 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18698 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18700 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18701 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18702 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18703 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18704 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18705 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18707 /* SSSE3 */
18708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18740 /* SSSE3. */
18741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18744 /* SSE4.1 */
18745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18783 /* SSE4.1 and SSE5 */
18784 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18785 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18786 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18789 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18790 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18791 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18793 /* SSE4.2 */
18794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18795 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18796 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18797 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18798 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18800 /* SSE4A */
18801 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18802 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18803 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18804 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18806 /* AES */
18807 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18808 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18811 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18812 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18813 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18815 /* PCLMUL */
18816 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18819 /* SSE5 */
18820 enum multi_arg_type {
18821 MULTI_ARG_UNKNOWN,
18822 MULTI_ARG_3_SF,
18823 MULTI_ARG_3_DF,
18824 MULTI_ARG_3_DI,
18825 MULTI_ARG_3_SI,
18826 MULTI_ARG_3_SI_DI,
18827 MULTI_ARG_3_HI,
18828 MULTI_ARG_3_HI_SI,
18829 MULTI_ARG_3_QI,
18830 MULTI_ARG_3_PERMPS,
18831 MULTI_ARG_3_PERMPD,
18832 MULTI_ARG_2_SF,
18833 MULTI_ARG_2_DF,
18834 MULTI_ARG_2_DI,
18835 MULTI_ARG_2_SI,
18836 MULTI_ARG_2_HI,
18837 MULTI_ARG_2_QI,
18838 MULTI_ARG_2_DI_IMM,
18839 MULTI_ARG_2_SI_IMM,
18840 MULTI_ARG_2_HI_IMM,
18841 MULTI_ARG_2_QI_IMM,
18842 MULTI_ARG_2_SF_CMP,
18843 MULTI_ARG_2_DF_CMP,
18844 MULTI_ARG_2_DI_CMP,
18845 MULTI_ARG_2_SI_CMP,
18846 MULTI_ARG_2_HI_CMP,
18847 MULTI_ARG_2_QI_CMP,
18848 MULTI_ARG_2_DI_TF,
18849 MULTI_ARG_2_SI_TF,
18850 MULTI_ARG_2_HI_TF,
18851 MULTI_ARG_2_QI_TF,
18852 MULTI_ARG_2_SF_TF,
18853 MULTI_ARG_2_DF_TF,
18854 MULTI_ARG_1_SF,
18855 MULTI_ARG_1_DF,
18856 MULTI_ARG_1_DI,
18857 MULTI_ARG_1_SI,
18858 MULTI_ARG_1_HI,
18859 MULTI_ARG_1_QI,
18860 MULTI_ARG_1_SI_DI,
18861 MULTI_ARG_1_HI_DI,
18862 MULTI_ARG_1_HI_SI,
18863 MULTI_ARG_1_QI_DI,
18864 MULTI_ARG_1_QI_SI,
18865 MULTI_ARG_1_QI_HI,
18866 MULTI_ARG_1_PH2PS,
18867 MULTI_ARG_1_PS2PH
18870 static const struct builtin_description bdesc_multi_arg[] =
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19108 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
19109 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19110 builtins. */
19111 static void
19112 ix86_init_mmx_sse_builtins (void)
19114 const struct builtin_description * d;
19115 size_t i;
19117 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19118 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19119 tree V1DI_type_node
19120 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19121 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19122 tree V2DI_type_node
19123 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19124 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19125 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19126 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19127 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19128 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19129 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19131 tree pchar_type_node = build_pointer_type (char_type_node);
19132 tree pcchar_type_node
19133 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19134 tree pfloat_type_node = build_pointer_type (float_type_node);
19135 tree pcfloat_type_node
19136 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19137 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19138 tree pcv2sf_type_node
19139 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19140 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19141 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19143 /* Comparisons. */
19144 tree int_ftype_v4sf_v4sf
19145 = build_function_type_list (integer_type_node,
19146 V4SF_type_node, V4SF_type_node, NULL_TREE);
19147 tree v4si_ftype_v4sf_v4sf
19148 = build_function_type_list (V4SI_type_node,
19149 V4SF_type_node, V4SF_type_node, NULL_TREE);
19150 /* MMX/SSE/integer conversions. */
19151 tree int_ftype_v4sf
19152 = build_function_type_list (integer_type_node,
19153 V4SF_type_node, NULL_TREE);
19154 tree int64_ftype_v4sf
19155 = build_function_type_list (long_long_integer_type_node,
19156 V4SF_type_node, NULL_TREE);
19157 tree int_ftype_v8qi
19158 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19159 tree v4sf_ftype_v4sf_int
19160 = build_function_type_list (V4SF_type_node,
19161 V4SF_type_node, integer_type_node, NULL_TREE);
19162 tree v4sf_ftype_v4sf_int64
19163 = build_function_type_list (V4SF_type_node,
19164 V4SF_type_node, long_long_integer_type_node,
19165 NULL_TREE);
19166 tree v4sf_ftype_v4sf_v2si
19167 = build_function_type_list (V4SF_type_node,
19168 V4SF_type_node, V2SI_type_node, NULL_TREE);
19170 /* Miscellaneous. */
19171 tree v8qi_ftype_v4hi_v4hi
19172 = build_function_type_list (V8QI_type_node,
19173 V4HI_type_node, V4HI_type_node, NULL_TREE);
19174 tree v4hi_ftype_v2si_v2si
19175 = build_function_type_list (V4HI_type_node,
19176 V2SI_type_node, V2SI_type_node, NULL_TREE);
19177 tree v4sf_ftype_v4sf_v4sf_int
19178 = build_function_type_list (V4SF_type_node,
19179 V4SF_type_node, V4SF_type_node,
19180 integer_type_node, NULL_TREE);
19181 tree v2si_ftype_v4hi_v4hi
19182 = build_function_type_list (V2SI_type_node,
19183 V4HI_type_node, V4HI_type_node, NULL_TREE);
19184 tree v4hi_ftype_v4hi_int
19185 = build_function_type_list (V4HI_type_node,
19186 V4HI_type_node, integer_type_node, NULL_TREE);
19187 tree v2si_ftype_v2si_int
19188 = build_function_type_list (V2SI_type_node,
19189 V2SI_type_node, integer_type_node, NULL_TREE);
19190 tree v1di_ftype_v1di_int
19191 = build_function_type_list (V1DI_type_node,
19192 V1DI_type_node, integer_type_node, NULL_TREE);
19194 tree void_ftype_void
19195 = build_function_type (void_type_node, void_list_node);
19196 tree void_ftype_unsigned
19197 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19198 tree void_ftype_unsigned_unsigned
19199 = build_function_type_list (void_type_node, unsigned_type_node,
19200 unsigned_type_node, NULL_TREE);
19201 tree void_ftype_pcvoid_unsigned_unsigned
19202 = build_function_type_list (void_type_node, const_ptr_type_node,
19203 unsigned_type_node, unsigned_type_node,
19204 NULL_TREE);
19205 tree unsigned_ftype_void
19206 = build_function_type (unsigned_type_node, void_list_node);
19207 tree v2si_ftype_v4sf
19208 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19209 /* Loads/stores. */
19210 tree void_ftype_v8qi_v8qi_pchar
19211 = build_function_type_list (void_type_node,
19212 V8QI_type_node, V8QI_type_node,
19213 pchar_type_node, NULL_TREE);
19214 tree v4sf_ftype_pcfloat
19215 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19216 tree v4sf_ftype_v4sf_pcv2sf
19217 = build_function_type_list (V4SF_type_node,
19218 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19219 tree void_ftype_pv2sf_v4sf
19220 = build_function_type_list (void_type_node,
19221 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19222 tree void_ftype_pfloat_v4sf
19223 = build_function_type_list (void_type_node,
19224 pfloat_type_node, V4SF_type_node, NULL_TREE);
19225 tree void_ftype_pdi_di
19226 = build_function_type_list (void_type_node,
19227 pdi_type_node, long_long_unsigned_type_node,
19228 NULL_TREE);
19229 tree void_ftype_pv2di_v2di
19230 = build_function_type_list (void_type_node,
19231 pv2di_type_node, V2DI_type_node, NULL_TREE);
19232 /* Normal vector unops. */
19233 tree v4sf_ftype_v4sf
19234 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19235 tree v16qi_ftype_v16qi
19236 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19237 tree v8hi_ftype_v8hi
19238 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19239 tree v4si_ftype_v4si
19240 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19241 tree v8qi_ftype_v8qi
19242 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19243 tree v4hi_ftype_v4hi
19244 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19246 /* Normal vector binops. */
19247 tree v4sf_ftype_v4sf_v4sf
19248 = build_function_type_list (V4SF_type_node,
19249 V4SF_type_node, V4SF_type_node, NULL_TREE);
19250 tree v8qi_ftype_v8qi_v8qi
19251 = build_function_type_list (V8QI_type_node,
19252 V8QI_type_node, V8QI_type_node, NULL_TREE);
19253 tree v4hi_ftype_v4hi_v4hi
19254 = build_function_type_list (V4HI_type_node,
19255 V4HI_type_node, V4HI_type_node, NULL_TREE);
19256 tree v2si_ftype_v2si_v2si
19257 = build_function_type_list (V2SI_type_node,
19258 V2SI_type_node, V2SI_type_node, NULL_TREE);
19259 tree v1di_ftype_v1di_v1di
19260 = build_function_type_list (V1DI_type_node,
19261 V1DI_type_node, V1DI_type_node, NULL_TREE);
19262 tree v1di_ftype_v1di_v1di_int
19263 = build_function_type_list (V1DI_type_node,
19264 V1DI_type_node, V1DI_type_node,
19265 integer_type_node, NULL_TREE);
19266 tree v2si_ftype_v2sf
19267 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19268 tree v2sf_ftype_v2si
19269 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19270 tree v2si_ftype_v2si
19271 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19272 tree v2sf_ftype_v2sf
19273 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19274 tree v2sf_ftype_v2sf_v2sf
19275 = build_function_type_list (V2SF_type_node,
19276 V2SF_type_node, V2SF_type_node, NULL_TREE);
19277 tree v2si_ftype_v2sf_v2sf
19278 = build_function_type_list (V2SI_type_node,
19279 V2SF_type_node, V2SF_type_node, NULL_TREE);
19280 tree pint_type_node = build_pointer_type (integer_type_node);
19281 tree pdouble_type_node = build_pointer_type (double_type_node);
19282 tree pcdouble_type_node = build_pointer_type (
19283 build_type_variant (double_type_node, 1, 0));
19284 tree int_ftype_v2df_v2df
19285 = build_function_type_list (integer_type_node,
19286 V2DF_type_node, V2DF_type_node, NULL_TREE);
19288 tree void_ftype_pcvoid
19289 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19290 tree v4sf_ftype_v4si
19291 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19292 tree v4si_ftype_v4sf
19293 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19294 tree v2df_ftype_v4si
19295 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19296 tree v4si_ftype_v2df
19297 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19298 tree v4si_ftype_v2df_v2df
19299 = build_function_type_list (V4SI_type_node,
19300 V2DF_type_node, V2DF_type_node, NULL_TREE);
19301 tree v2si_ftype_v2df
19302 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19303 tree v4sf_ftype_v2df
19304 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19305 tree v2df_ftype_v2si
19306 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19307 tree v2df_ftype_v4sf
19308 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19309 tree int_ftype_v2df
19310 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19311 tree int64_ftype_v2df
19312 = build_function_type_list (long_long_integer_type_node,
19313 V2DF_type_node, NULL_TREE);
19314 tree v2df_ftype_v2df_int
19315 = build_function_type_list (V2DF_type_node,
19316 V2DF_type_node, integer_type_node, NULL_TREE);
19317 tree v2df_ftype_v2df_int64
19318 = build_function_type_list (V2DF_type_node,
19319 V2DF_type_node, long_long_integer_type_node,
19320 NULL_TREE);
19321 tree v4sf_ftype_v4sf_v2df
19322 = build_function_type_list (V4SF_type_node,
19323 V4SF_type_node, V2DF_type_node, NULL_TREE);
19324 tree v2df_ftype_v2df_v4sf
19325 = build_function_type_list (V2DF_type_node,
19326 V2DF_type_node, V4SF_type_node, NULL_TREE);
19327 tree v2df_ftype_v2df_v2df_int
19328 = build_function_type_list (V2DF_type_node,
19329 V2DF_type_node, V2DF_type_node,
19330 integer_type_node,
19331 NULL_TREE);
19332 tree v2df_ftype_v2df_pcdouble
19333 = build_function_type_list (V2DF_type_node,
19334 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19335 tree void_ftype_pdouble_v2df
19336 = build_function_type_list (void_type_node,
19337 pdouble_type_node, V2DF_type_node, NULL_TREE);
19338 tree void_ftype_pint_int
19339 = build_function_type_list (void_type_node,
19340 pint_type_node, integer_type_node, NULL_TREE);
19341 tree void_ftype_v16qi_v16qi_pchar
19342 = build_function_type_list (void_type_node,
19343 V16QI_type_node, V16QI_type_node,
19344 pchar_type_node, NULL_TREE);
19345 tree v2df_ftype_pcdouble
19346 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19347 tree v2df_ftype_v2df_v2df
19348 = build_function_type_list (V2DF_type_node,
19349 V2DF_type_node, V2DF_type_node, NULL_TREE);
19350 tree v16qi_ftype_v16qi_v16qi
19351 = build_function_type_list (V16QI_type_node,
19352 V16QI_type_node, V16QI_type_node, NULL_TREE);
19353 tree v8hi_ftype_v8hi_v8hi
19354 = build_function_type_list (V8HI_type_node,
19355 V8HI_type_node, V8HI_type_node, NULL_TREE);
19356 tree v4si_ftype_v4si_v4si
19357 = build_function_type_list (V4SI_type_node,
19358 V4SI_type_node, V4SI_type_node, NULL_TREE);
19359 tree v2di_ftype_v2di_v2di
19360 = build_function_type_list (V2DI_type_node,
19361 V2DI_type_node, V2DI_type_node, NULL_TREE);
19362 tree v2di_ftype_v2df_v2df
19363 = build_function_type_list (V2DI_type_node,
19364 V2DF_type_node, V2DF_type_node, NULL_TREE);
19365 tree v2df_ftype_v2df
19366 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19367 tree v2di_ftype_v2di_int
19368 = build_function_type_list (V2DI_type_node,
19369 V2DI_type_node, integer_type_node, NULL_TREE);
19370 tree v2di_ftype_v2di_v2di_int
19371 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19372 V2DI_type_node, integer_type_node, NULL_TREE);
19373 tree v4si_ftype_v4si_int
19374 = build_function_type_list (V4SI_type_node,
19375 V4SI_type_node, integer_type_node, NULL_TREE);
19376 tree v8hi_ftype_v8hi_int
19377 = build_function_type_list (V8HI_type_node,
19378 V8HI_type_node, integer_type_node, NULL_TREE);
19379 tree v4si_ftype_v8hi_v8hi
19380 = build_function_type_list (V4SI_type_node,
19381 V8HI_type_node, V8HI_type_node, NULL_TREE);
19382 tree v1di_ftype_v8qi_v8qi
19383 = build_function_type_list (V1DI_type_node,
19384 V8QI_type_node, V8QI_type_node, NULL_TREE);
19385 tree v1di_ftype_v2si_v2si
19386 = build_function_type_list (V1DI_type_node,
19387 V2SI_type_node, V2SI_type_node, NULL_TREE);
19388 tree v2di_ftype_v16qi_v16qi
19389 = build_function_type_list (V2DI_type_node,
19390 V16QI_type_node, V16QI_type_node, NULL_TREE);
19391 tree v2di_ftype_v4si_v4si
19392 = build_function_type_list (V2DI_type_node,
19393 V4SI_type_node, V4SI_type_node, NULL_TREE);
19394 tree int_ftype_v16qi
19395 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19396 tree v16qi_ftype_pcchar
19397 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19398 tree void_ftype_pchar_v16qi
19399 = build_function_type_list (void_type_node,
19400 pchar_type_node, V16QI_type_node, NULL_TREE);
19402 tree v2di_ftype_v2di_unsigned_unsigned
19403 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19404 unsigned_type_node, unsigned_type_node,
19405 NULL_TREE);
19406 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19407 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19408 unsigned_type_node, unsigned_type_node,
19409 NULL_TREE);
19410 tree v2di_ftype_v2di_v16qi
19411 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19412 NULL_TREE);
19413 tree v2df_ftype_v2df_v2df_v2df
19414 = build_function_type_list (V2DF_type_node,
19415 V2DF_type_node, V2DF_type_node,
19416 V2DF_type_node, NULL_TREE);
19417 tree v4sf_ftype_v4sf_v4sf_v4sf
19418 = build_function_type_list (V4SF_type_node,
19419 V4SF_type_node, V4SF_type_node,
19420 V4SF_type_node, NULL_TREE);
19421 tree v8hi_ftype_v16qi
19422 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19423 NULL_TREE);
19424 tree v4si_ftype_v16qi
19425 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19426 NULL_TREE);
19427 tree v2di_ftype_v16qi
19428 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19429 NULL_TREE);
19430 tree v4si_ftype_v8hi
19431 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19432 NULL_TREE);
19433 tree v2di_ftype_v8hi
19434 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19435 NULL_TREE);
19436 tree v2di_ftype_v4si
19437 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19438 NULL_TREE);
19439 tree v2di_ftype_pv2di
19440 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19441 NULL_TREE);
19442 tree v16qi_ftype_v16qi_v16qi_int
19443 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19444 V16QI_type_node, integer_type_node,
19445 NULL_TREE);
19446 tree v16qi_ftype_v16qi_v16qi_v16qi
19447 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19448 V16QI_type_node, V16QI_type_node,
19449 NULL_TREE);
19450 tree v8hi_ftype_v8hi_v8hi_int
19451 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19452 V8HI_type_node, integer_type_node,
19453 NULL_TREE);
19454 tree v4si_ftype_v4si_v4si_int
19455 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19456 V4SI_type_node, integer_type_node,
19457 NULL_TREE);
19458 tree int_ftype_v2di_v2di
19459 = build_function_type_list (integer_type_node,
19460 V2DI_type_node, V2DI_type_node,
19461 NULL_TREE);
19462 tree int_ftype_v16qi_int_v16qi_int_int
19463 = build_function_type_list (integer_type_node,
19464 V16QI_type_node,
19465 integer_type_node,
19466 V16QI_type_node,
19467 integer_type_node,
19468 integer_type_node,
19469 NULL_TREE);
19470 tree v16qi_ftype_v16qi_int_v16qi_int_int
19471 = build_function_type_list (V16QI_type_node,
19472 V16QI_type_node,
19473 integer_type_node,
19474 V16QI_type_node,
19475 integer_type_node,
19476 integer_type_node,
19477 NULL_TREE);
19478 tree int_ftype_v16qi_v16qi_int
19479 = build_function_type_list (integer_type_node,
19480 V16QI_type_node,
19481 V16QI_type_node,
19482 integer_type_node,
19483 NULL_TREE);
19485 /* SSE5 instructions */
19486 tree v2di_ftype_v2di_v2di_v2di
19487 = build_function_type_list (V2DI_type_node,
19488 V2DI_type_node,
19489 V2DI_type_node,
19490 V2DI_type_node,
19491 NULL_TREE);
19493 tree v4si_ftype_v4si_v4si_v4si
19494 = build_function_type_list (V4SI_type_node,
19495 V4SI_type_node,
19496 V4SI_type_node,
19497 V4SI_type_node,
19498 NULL_TREE);
19500 tree v4si_ftype_v4si_v4si_v2di
19501 = build_function_type_list (V4SI_type_node,
19502 V4SI_type_node,
19503 V4SI_type_node,
19504 V2DI_type_node,
19505 NULL_TREE);
19507 tree v8hi_ftype_v8hi_v8hi_v8hi
19508 = build_function_type_list (V8HI_type_node,
19509 V8HI_type_node,
19510 V8HI_type_node,
19511 V8HI_type_node,
19512 NULL_TREE);
19514 tree v8hi_ftype_v8hi_v8hi_v4si
19515 = build_function_type_list (V8HI_type_node,
19516 V8HI_type_node,
19517 V8HI_type_node,
19518 V4SI_type_node,
19519 NULL_TREE);
19521 tree v2df_ftype_v2df_v2df_v16qi
19522 = build_function_type_list (V2DF_type_node,
19523 V2DF_type_node,
19524 V2DF_type_node,
19525 V16QI_type_node,
19526 NULL_TREE);
19528 tree v4sf_ftype_v4sf_v4sf_v16qi
19529 = build_function_type_list (V4SF_type_node,
19530 V4SF_type_node,
19531 V4SF_type_node,
19532 V16QI_type_node,
19533 NULL_TREE);
19535 tree v2di_ftype_v2di_si
19536 = build_function_type_list (V2DI_type_node,
19537 V2DI_type_node,
19538 integer_type_node,
19539 NULL_TREE);
19541 tree v4si_ftype_v4si_si
19542 = build_function_type_list (V4SI_type_node,
19543 V4SI_type_node,
19544 integer_type_node,
19545 NULL_TREE);
19547 tree v8hi_ftype_v8hi_si
19548 = build_function_type_list (V8HI_type_node,
19549 V8HI_type_node,
19550 integer_type_node,
19551 NULL_TREE);
19553 tree v16qi_ftype_v16qi_si
19554 = build_function_type_list (V16QI_type_node,
19555 V16QI_type_node,
19556 integer_type_node,
19557 NULL_TREE);
19558 tree v4sf_ftype_v4hi
19559 = build_function_type_list (V4SF_type_node,
19560 V4HI_type_node,
19561 NULL_TREE);
19563 tree v4hi_ftype_v4sf
19564 = build_function_type_list (V4HI_type_node,
19565 V4SF_type_node,
19566 NULL_TREE);
19568 tree v2di_ftype_v2di
19569 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19571 tree v16qi_ftype_v8hi_v8hi
19572 = build_function_type_list (V16QI_type_node,
19573 V8HI_type_node, V8HI_type_node,
19574 NULL_TREE);
19575 tree v8hi_ftype_v4si_v4si
19576 = build_function_type_list (V8HI_type_node,
19577 V4SI_type_node, V4SI_type_node,
19578 NULL_TREE);
19579 tree v8hi_ftype_v16qi_v16qi
19580 = build_function_type_list (V8HI_type_node,
19581 V16QI_type_node, V16QI_type_node,
19582 NULL_TREE);
19583 tree v4hi_ftype_v8qi_v8qi
19584 = build_function_type_list (V4HI_type_node,
19585 V8QI_type_node, V8QI_type_node,
19586 NULL_TREE);
19587 tree unsigned_ftype_unsigned_uchar
19588 = build_function_type_list (unsigned_type_node,
19589 unsigned_type_node,
19590 unsigned_char_type_node,
19591 NULL_TREE);
19592 tree unsigned_ftype_unsigned_ushort
19593 = build_function_type_list (unsigned_type_node,
19594 unsigned_type_node,
19595 short_unsigned_type_node,
19596 NULL_TREE);
19597 tree unsigned_ftype_unsigned_unsigned
19598 = build_function_type_list (unsigned_type_node,
19599 unsigned_type_node,
19600 unsigned_type_node,
19601 NULL_TREE);
19602 tree uint64_ftype_uint64_uint64
19603 = build_function_type_list (long_long_unsigned_type_node,
19604 long_long_unsigned_type_node,
19605 long_long_unsigned_type_node,
19606 NULL_TREE);
19607 tree float_ftype_float
19608 = build_function_type_list (float_type_node,
19609 float_type_node,
19610 NULL_TREE);
19612 tree ftype;
19614 /* Add all special builtins with variable number of operands. */
19615 for (i = 0, d = bdesc_special_args;
19616 i < ARRAY_SIZE (bdesc_special_args);
19617 i++, d++)
19619 tree type;
19621 if (d->name == 0)
19622 continue;
19624 switch ((enum ix86_special_builtin_type) d->flag)
19626 case VOID_FTYPE_VOID:
19627 type = void_ftype_void;
19628 break;
19629 case V16QI_FTYPE_PCCHAR:
19630 type = v16qi_ftype_pcchar;
19631 break;
19632 case V4SF_FTYPE_PCFLOAT:
19633 type = v4sf_ftype_pcfloat;
19634 break;
19635 case V2DI_FTYPE_PV2DI:
19636 type = v2di_ftype_pv2di;
19637 break;
19638 case V2DF_FTYPE_PCDOUBLE:
19639 type = v2df_ftype_pcdouble;
19640 break;
19641 case V4SF_FTYPE_V4SF_PCV2SF:
19642 type = v4sf_ftype_v4sf_pcv2sf;
19643 break;
19644 case V2DF_FTYPE_V2DF_PCDOUBLE:
19645 type = v2df_ftype_v2df_pcdouble;
19646 break;
19647 case VOID_FTYPE_PV2SF_V4SF:
19648 type = void_ftype_pv2sf_v4sf;
19649 break;
19650 case VOID_FTYPE_PV2DI_V2DI:
19651 type = void_ftype_pv2di_v2di;
19652 break;
19653 case VOID_FTYPE_PCHAR_V16QI:
19654 type = void_ftype_pchar_v16qi;
19655 break;
19656 case VOID_FTYPE_PFLOAT_V4SF:
19657 type = void_ftype_pfloat_v4sf;
19658 break;
19659 case VOID_FTYPE_PDOUBLE_V2DF:
19660 type = void_ftype_pdouble_v2df;
19661 break;
19662 case VOID_FTYPE_PDI_DI:
19663 type = void_ftype_pdi_di;
19664 break;
19665 case VOID_FTYPE_PINT_INT:
19666 type = void_ftype_pint_int;
19667 break;
19668 default:
19669 gcc_unreachable ();
19672 def_builtin (d->mask, d->name, type, d->code);
19675 /* Add all builtins with variable number of operands. */
19676 for (i = 0, d = bdesc_args;
19677 i < ARRAY_SIZE (bdesc_args);
19678 i++, d++)
19680 tree type;
19682 if (d->name == 0)
19683 continue;
19685 switch ((enum ix86_builtin_type) d->flag)
19687 case FLOAT_FTYPE_FLOAT:
19688 type = float_ftype_float;
19689 break;
19690 case INT_FTYPE_V2DI_V2DI_PTEST:
19691 type = int_ftype_v2di_v2di;
19692 break;
19693 case INT64_FTYPE_V4SF:
19694 type = int64_ftype_v4sf;
19695 break;
19696 case INT64_FTYPE_V2DF:
19697 type = int64_ftype_v2df;
19698 break;
19699 case INT_FTYPE_V16QI:
19700 type = int_ftype_v16qi;
19701 break;
19702 case INT_FTYPE_V8QI:
19703 type = int_ftype_v8qi;
19704 break;
19705 case INT_FTYPE_V4SF:
19706 type = int_ftype_v4sf;
19707 break;
19708 case INT_FTYPE_V2DF:
19709 type = int_ftype_v2df;
19710 break;
19711 case V16QI_FTYPE_V16QI:
19712 type = v16qi_ftype_v16qi;
19713 break;
19714 case V8HI_FTYPE_V8HI:
19715 type = v8hi_ftype_v8hi;
19716 break;
19717 case V8HI_FTYPE_V16QI:
19718 type = v8hi_ftype_v16qi;
19719 break;
19720 case V8QI_FTYPE_V8QI:
19721 type = v8qi_ftype_v8qi;
19722 break;
19723 case V4SI_FTYPE_V4SI:
19724 type = v4si_ftype_v4si;
19725 break;
19726 case V4SI_FTYPE_V16QI:
19727 type = v4si_ftype_v16qi;
19728 break;
19729 case V4SI_FTYPE_V8HI:
19730 type = v4si_ftype_v8hi;
19731 break;
19732 case V4SI_FTYPE_V4SF:
19733 type = v4si_ftype_v4sf;
19734 break;
19735 case V4SI_FTYPE_V2DF:
19736 type = v4si_ftype_v2df;
19737 break;
19738 case V4HI_FTYPE_V4HI:
19739 type = v4hi_ftype_v4hi;
19740 break;
19741 case V4SF_FTYPE_V4SF:
19742 case V4SF_FTYPE_V4SF_VEC_MERGE:
19743 type = v4sf_ftype_v4sf;
19744 break;
19745 case V4SF_FTYPE_V4SI:
19746 type = v4sf_ftype_v4si;
19747 break;
19748 case V4SF_FTYPE_V2DF:
19749 type = v4sf_ftype_v2df;
19750 break;
19751 case V2DI_FTYPE_V2DI:
19752 type = v2di_ftype_v2di;
19753 break;
19754 case V2DI_FTYPE_V16QI:
19755 type = v2di_ftype_v16qi;
19756 break;
19757 case V2DI_FTYPE_V8HI:
19758 type = v2di_ftype_v8hi;
19759 break;
19760 case V2DI_FTYPE_V4SI:
19761 type = v2di_ftype_v4si;
19762 break;
19763 case V2SI_FTYPE_V2SI:
19764 type = v2si_ftype_v2si;
19765 break;
19766 case V2SI_FTYPE_V4SF:
19767 type = v2si_ftype_v4sf;
19768 break;
19769 case V2SI_FTYPE_V2DF:
19770 type = v2si_ftype_v2df;
19771 break;
19772 case V2SI_FTYPE_V2SF:
19773 type = v2si_ftype_v2sf;
19774 break;
19775 case V2DF_FTYPE_V4SF:
19776 type = v2df_ftype_v4sf;
19777 break;
19778 case V2DF_FTYPE_V2DF:
19779 case V2DF_FTYPE_V2DF_VEC_MERGE:
19780 type = v2df_ftype_v2df;
19781 break;
19782 case V2DF_FTYPE_V2SI:
19783 type = v2df_ftype_v2si;
19784 break;
19785 case V2DF_FTYPE_V4SI:
19786 type = v2df_ftype_v4si;
19787 break;
19788 case V2SF_FTYPE_V2SF:
19789 type = v2sf_ftype_v2sf;
19790 break;
19791 case V2SF_FTYPE_V2SI:
19792 type = v2sf_ftype_v2si;
19793 break;
19794 case V16QI_FTYPE_V16QI_V16QI:
19795 type = v16qi_ftype_v16qi_v16qi;
19796 break;
19797 case V16QI_FTYPE_V8HI_V8HI:
19798 type = v16qi_ftype_v8hi_v8hi;
19799 break;
19800 case V8QI_FTYPE_V8QI_V8QI:
19801 type = v8qi_ftype_v8qi_v8qi;
19802 break;
19803 case V8QI_FTYPE_V4HI_V4HI:
19804 type = v8qi_ftype_v4hi_v4hi;
19805 break;
19806 case V8HI_FTYPE_V8HI_V8HI:
19807 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19808 type = v8hi_ftype_v8hi_v8hi;
19809 break;
19810 case V8HI_FTYPE_V16QI_V16QI:
19811 type = v8hi_ftype_v16qi_v16qi;
19812 break;
19813 case V8HI_FTYPE_V4SI_V4SI:
19814 type = v8hi_ftype_v4si_v4si;
19815 break;
19816 case V8HI_FTYPE_V8HI_SI_COUNT:
19817 type = v8hi_ftype_v8hi_int;
19818 break;
19819 case V4SI_FTYPE_V4SI_V4SI:
19820 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19821 type = v4si_ftype_v4si_v4si;
19822 break;
19823 case V4SI_FTYPE_V8HI_V8HI:
19824 type = v4si_ftype_v8hi_v8hi;
19825 break;
19826 case V4SI_FTYPE_V4SF_V4SF:
19827 type = v4si_ftype_v4sf_v4sf;
19828 break;
19829 case V4SI_FTYPE_V2DF_V2DF:
19830 type = v4si_ftype_v2df_v2df;
19831 break;
19832 case V4SI_FTYPE_V4SI_SI_COUNT:
19833 type = v4si_ftype_v4si_int;
19834 break;
19835 case V4HI_FTYPE_V4HI_V4HI:
19836 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19837 type = v4hi_ftype_v4hi_v4hi;
19838 break;
19839 case V4HI_FTYPE_V8QI_V8QI:
19840 type = v4hi_ftype_v8qi_v8qi;
19841 break;
19842 case V4HI_FTYPE_V2SI_V2SI:
19843 type = v4hi_ftype_v2si_v2si;
19844 break;
19845 case V4HI_FTYPE_V4HI_SI_COUNT:
19846 type = v4hi_ftype_v4hi_int;
19847 break;
19848 case V4SF_FTYPE_V4SF_V4SF:
19849 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19850 type = v4sf_ftype_v4sf_v4sf;
19851 break;
19852 case V4SF_FTYPE_V4SF_V2SI:
19853 type = v4sf_ftype_v4sf_v2si;
19854 break;
19855 case V4SF_FTYPE_V4SF_V2DF:
19856 type = v4sf_ftype_v4sf_v2df;
19857 break;
19858 case V4SF_FTYPE_V4SF_DI:
19859 type = v4sf_ftype_v4sf_int64;
19860 break;
19861 case V4SF_FTYPE_V4SF_SI:
19862 type = v4sf_ftype_v4sf_int;
19863 break;
19864 case V2DI_FTYPE_V2DI_V2DI:
19865 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19866 type = v2di_ftype_v2di_v2di;
19867 break;
19868 case V2DI_FTYPE_V16QI_V16QI:
19869 type = v2di_ftype_v16qi_v16qi;
19870 break;
19871 case V2DI_FTYPE_V4SI_V4SI:
19872 type = v2di_ftype_v4si_v4si;
19873 break;
19874 case V2DI_FTYPE_V2DI_V16QI:
19875 type = v2di_ftype_v2di_v16qi;
19876 break;
19877 case V2DI_FTYPE_V2DF_V2DF:
19878 type = v2di_ftype_v2df_v2df;
19879 break;
19880 case V2DI_FTYPE_V2DI_SI_COUNT:
19881 type = v2di_ftype_v2di_int;
19882 break;
19883 case V2SI_FTYPE_V2SI_V2SI:
19884 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19885 type = v2si_ftype_v2si_v2si;
19886 break;
19887 case V2SI_FTYPE_V4HI_V4HI:
19888 type = v2si_ftype_v4hi_v4hi;
19889 break;
19890 case V2SI_FTYPE_V2SF_V2SF:
19891 type = v2si_ftype_v2sf_v2sf;
19892 break;
19893 case V2SI_FTYPE_V2SI_SI_COUNT:
19894 type = v2si_ftype_v2si_int;
19895 break;
19896 case V2DF_FTYPE_V2DF_V2DF:
19897 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19898 type = v2df_ftype_v2df_v2df;
19899 break;
19900 case V2DF_FTYPE_V2DF_V4SF:
19901 type = v2df_ftype_v2df_v4sf;
19902 break;
19903 case V2DF_FTYPE_V2DF_DI:
19904 type = v2df_ftype_v2df_int64;
19905 break;
19906 case V2DF_FTYPE_V2DF_SI:
19907 type = v2df_ftype_v2df_int;
19908 break;
19909 case V2SF_FTYPE_V2SF_V2SF:
19910 type = v2sf_ftype_v2sf_v2sf;
19911 break;
19912 case V1DI_FTYPE_V1DI_V1DI:
19913 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19914 type = v1di_ftype_v1di_v1di;
19915 break;
19916 case V1DI_FTYPE_V8QI_V8QI:
19917 type = v1di_ftype_v8qi_v8qi;
19918 break;
19919 case V1DI_FTYPE_V2SI_V2SI:
19920 type = v1di_ftype_v2si_v2si;
19921 break;
19922 case V1DI_FTYPE_V1DI_SI_COUNT:
19923 type = v1di_ftype_v1di_int;
19924 break;
19925 case UINT64_FTYPE_UINT64_UINT64:
19926 type = uint64_ftype_uint64_uint64;
19927 break;
19928 case UINT_FTYPE_UINT_UINT:
19929 type = unsigned_ftype_unsigned_unsigned;
19930 break;
19931 case UINT_FTYPE_UINT_USHORT:
19932 type = unsigned_ftype_unsigned_ushort;
19933 break;
19934 case UINT_FTYPE_UINT_UCHAR:
19935 type = unsigned_ftype_unsigned_uchar;
19936 break;
19937 case V8HI_FTYPE_V8HI_INT:
19938 type = v8hi_ftype_v8hi_int;
19939 break;
19940 case V4SI_FTYPE_V4SI_INT:
19941 type = v4si_ftype_v4si_int;
19942 break;
19943 case V4HI_FTYPE_V4HI_INT:
19944 type = v4hi_ftype_v4hi_int;
19945 break;
19946 case V4SF_FTYPE_V4SF_INT:
19947 type = v4sf_ftype_v4sf_int;
19948 break;
19949 case V2DI_FTYPE_V2DI_INT:
19950 case V2DI2TI_FTYPE_V2DI_INT:
19951 type = v2di_ftype_v2di_int;
19952 break;
19953 case V2DF_FTYPE_V2DF_INT:
19954 type = v2df_ftype_v2df_int;
19955 break;
19956 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19957 type = v16qi_ftype_v16qi_v16qi_v16qi;
19958 break;
19959 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19960 type = v4sf_ftype_v4sf_v4sf_v4sf;
19961 break;
19962 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19963 type = v2df_ftype_v2df_v2df_v2df;
19964 break;
19965 case V16QI_FTYPE_V16QI_V16QI_INT:
19966 type = v16qi_ftype_v16qi_v16qi_int;
19967 break;
19968 case V8HI_FTYPE_V8HI_V8HI_INT:
19969 type = v8hi_ftype_v8hi_v8hi_int;
19970 break;
19971 case V4SI_FTYPE_V4SI_V4SI_INT:
19972 type = v4si_ftype_v4si_v4si_int;
19973 break;
19974 case V4SF_FTYPE_V4SF_V4SF_INT:
19975 type = v4sf_ftype_v4sf_v4sf_int;
19976 break;
19977 case V2DI_FTYPE_V2DI_V2DI_INT:
19978 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19979 type = v2di_ftype_v2di_v2di_int;
19980 break;
19981 case V2DF_FTYPE_V2DF_V2DF_INT:
19982 type = v2df_ftype_v2df_v2df_int;
19983 break;
19984 case V2DI_FTYPE_V2DI_UINT_UINT:
19985 type = v2di_ftype_v2di_unsigned_unsigned;
19986 break;
19987 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19988 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19989 break;
19990 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19991 type = v1di_ftype_v1di_v1di_int;
19992 break;
19993 default:
19994 gcc_unreachable ();
19997 def_builtin_const (d->mask, d->name, type, d->code);
20000 /* pcmpestr[im] insns. */
20001 for (i = 0, d = bdesc_pcmpestr;
20002 i < ARRAY_SIZE (bdesc_pcmpestr);
20003 i++, d++)
20005 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20006 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20007 else
20008 ftype = int_ftype_v16qi_int_v16qi_int_int;
20009 def_builtin_const (d->mask, d->name, ftype, d->code);
20012 /* pcmpistr[im] insns. */
20013 for (i = 0, d = bdesc_pcmpistr;
20014 i < ARRAY_SIZE (bdesc_pcmpistr);
20015 i++, d++)
20017 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20018 ftype = v16qi_ftype_v16qi_v16qi_int;
20019 else
20020 ftype = int_ftype_v16qi_v16qi_int;
20021 def_builtin_const (d->mask, d->name, ftype, d->code);
20024 /* comi/ucomi insns. */
20025 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20026 if (d->mask == OPTION_MASK_ISA_SSE2)
20027 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20028 else
20029 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20031 /* SSE */
20032 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20033 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20035 /* SSE or 3DNow!A */
20036 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20038 /* SSE2 */
20039 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20041 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20042 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20044 /* SSE3. */
20045 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20046 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20048 /* AES */
20049 if (TARGET_AES)
20051 /* Define AES built-in functions only if AES is enabled. */
20052 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20053 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20054 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20055 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20057 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20060 /* PCLMUL */
20061 if (TARGET_PCLMUL)
20063 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20064 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20067 /* Access to the vec_init patterns. */
20068 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20069 integer_type_node, NULL_TREE);
20070 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20072 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20073 short_integer_type_node,
20074 short_integer_type_node,
20075 short_integer_type_node, NULL_TREE);
20076 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20078 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20079 char_type_node, char_type_node,
20080 char_type_node, char_type_node,
20081 char_type_node, char_type_node,
20082 char_type_node, NULL_TREE);
20083 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20085 /* Access to the vec_extract patterns. */
20086 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20087 integer_type_node, NULL_TREE);
20088 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20090 ftype = build_function_type_list (long_long_integer_type_node,
20091 V2DI_type_node, integer_type_node,
20092 NULL_TREE);
20093 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20095 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20096 integer_type_node, NULL_TREE);
20097 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20099 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20100 integer_type_node, NULL_TREE);
20101 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20103 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20104 integer_type_node, NULL_TREE);
20105 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20107 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20108 integer_type_node, NULL_TREE);
20109 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20111 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20112 integer_type_node, NULL_TREE);
20113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20115 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20116 integer_type_node, NULL_TREE);
20117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20119 /* Access to the vec_set patterns. */
20120 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20121 intDI_type_node,
20122 integer_type_node, NULL_TREE);
20123 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20125 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20126 float_type_node,
20127 integer_type_node, NULL_TREE);
20128 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20130 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20131 intSI_type_node,
20132 integer_type_node, NULL_TREE);
20133 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20135 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20136 intHI_type_node,
20137 integer_type_node, NULL_TREE);
20138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20140 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20141 intHI_type_node,
20142 integer_type_node, NULL_TREE);
20143 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20145 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20146 intQI_type_node,
20147 integer_type_node, NULL_TREE);
20148 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20150 /* Add SSE5 multi-arg argument instructions */
20151 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20153 tree mtype = NULL_TREE;
20155 if (d->name == 0)
20156 continue;
20158 switch ((enum multi_arg_type)d->flag)
20160 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20161 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20162 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20163 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20164 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20165 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20166 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20167 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20168 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20169 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20170 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20171 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20172 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20173 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20174 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20175 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20176 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20177 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20178 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20179 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20180 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20181 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20182 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20183 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20184 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20185 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20186 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20187 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20188 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20189 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20190 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20191 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20192 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20193 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20194 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20195 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20196 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20197 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20198 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20199 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20200 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20201 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20202 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20203 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20204 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20205 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20206 case MULTI_ARG_UNKNOWN:
20207 default:
20208 gcc_unreachable ();
20211 if (mtype)
20212 def_builtin_const (d->mask, d->name, mtype, d->code);
20216 static void
20217 ix86_init_builtins (void)
20219 tree float128_type_node = make_node (REAL_TYPE);
20220 tree ftype, decl;
20222 /* The __float80 type. */
20223 if (TYPE_MODE (long_double_type_node) == XFmode)
20224 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
20225 "__float80");
20226 else
20228 /* The __float80 type. */
20229 tree float80_type_node = make_node (REAL_TYPE);
20231 TYPE_PRECISION (float80_type_node) = 80;
20232 layout_type (float80_type_node);
20233 (*lang_hooks.types.register_builtin_type) (float80_type_node,
20234 "__float80");
20237 /* The __float128 type. */
20238 TYPE_PRECISION (float128_type_node) = 128;
20239 layout_type (float128_type_node);
20240 (*lang_hooks.types.register_builtin_type) (float128_type_node,
20241 "__float128");
20243 /* TFmode support builtins. */
20244 ftype = build_function_type (float128_type_node, void_list_node);
20245 decl = add_builtin_function ("__builtin_infq", ftype,
20246 IX86_BUILTIN_INFQ, BUILT_IN_MD,
20247 NULL, NULL_TREE);
20248 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
20250 ftype = build_function_type_list (float128_type_node,
20251 float128_type_node,
20252 NULL_TREE);
20253 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
20255 ftype = build_function_type_list (float128_type_node,
20256 float128_type_node,
20257 float128_type_node,
20258 NULL_TREE);
20259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
20261 if (TARGET_MMX)
20262 ix86_init_mmx_sse_builtins ();
20265 /* Errors in the source file can cause expand_expr to return const0_rtx
20266 where we expect a vector. To avoid crashing, use one of the vector
20267 clear instructions. */
20268 static rtx
20269 safe_vector_operand (rtx x, enum machine_mode mode)
20271 if (x == const0_rtx)
20272 x = CONST0_RTX (mode);
20273 return x;
20276 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20278 static rtx
20279 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20281 rtx pat;
20282 tree arg0 = CALL_EXPR_ARG (exp, 0);
20283 tree arg1 = CALL_EXPR_ARG (exp, 1);
20284 rtx op0 = expand_normal (arg0);
20285 rtx op1 = expand_normal (arg1);
20286 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20287 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20288 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20290 if (VECTOR_MODE_P (mode0))
20291 op0 = safe_vector_operand (op0, mode0);
20292 if (VECTOR_MODE_P (mode1))
20293 op1 = safe_vector_operand (op1, mode1);
20295 if (optimize || !target
20296 || GET_MODE (target) != tmode
20297 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20298 target = gen_reg_rtx (tmode);
20300 if (GET_MODE (op1) == SImode && mode1 == TImode)
20302 rtx x = gen_reg_rtx (V4SImode);
20303 emit_insn (gen_sse2_loadd (x, op1));
20304 op1 = gen_lowpart (TImode, x);
20307 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20308 op0 = copy_to_mode_reg (mode0, op0);
20309 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20310 op1 = copy_to_mode_reg (mode1, op1);
20312 pat = GEN_FCN (icode) (target, op0, op1);
20313 if (! pat)
20314 return 0;
20316 emit_insn (pat);
20318 return target;
20321 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20323 static rtx
20324 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20325 enum multi_arg_type m_type,
20326 enum insn_code sub_code)
20328 rtx pat;
20329 int i;
20330 int nargs;
20331 bool comparison_p = false;
20332 bool tf_p = false;
20333 bool last_arg_constant = false;
20334 int num_memory = 0;
20335 struct {
20336 rtx op;
20337 enum machine_mode mode;
20338 } args[4];
20340 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20342 switch (m_type)
20344 case MULTI_ARG_3_SF:
20345 case MULTI_ARG_3_DF:
20346 case MULTI_ARG_3_DI:
20347 case MULTI_ARG_3_SI:
20348 case MULTI_ARG_3_SI_DI:
20349 case MULTI_ARG_3_HI:
20350 case MULTI_ARG_3_HI_SI:
20351 case MULTI_ARG_3_QI:
20352 case MULTI_ARG_3_PERMPS:
20353 case MULTI_ARG_3_PERMPD:
20354 nargs = 3;
20355 break;
20357 case MULTI_ARG_2_SF:
20358 case MULTI_ARG_2_DF:
20359 case MULTI_ARG_2_DI:
20360 case MULTI_ARG_2_SI:
20361 case MULTI_ARG_2_HI:
20362 case MULTI_ARG_2_QI:
20363 nargs = 2;
20364 break;
20366 case MULTI_ARG_2_DI_IMM:
20367 case MULTI_ARG_2_SI_IMM:
20368 case MULTI_ARG_2_HI_IMM:
20369 case MULTI_ARG_2_QI_IMM:
20370 nargs = 2;
20371 last_arg_constant = true;
20372 break;
20374 case MULTI_ARG_1_SF:
20375 case MULTI_ARG_1_DF:
20376 case MULTI_ARG_1_DI:
20377 case MULTI_ARG_1_SI:
20378 case MULTI_ARG_1_HI:
20379 case MULTI_ARG_1_QI:
20380 case MULTI_ARG_1_SI_DI:
20381 case MULTI_ARG_1_HI_DI:
20382 case MULTI_ARG_1_HI_SI:
20383 case MULTI_ARG_1_QI_DI:
20384 case MULTI_ARG_1_QI_SI:
20385 case MULTI_ARG_1_QI_HI:
20386 case MULTI_ARG_1_PH2PS:
20387 case MULTI_ARG_1_PS2PH:
20388 nargs = 1;
20389 break;
20391 case MULTI_ARG_2_SF_CMP:
20392 case MULTI_ARG_2_DF_CMP:
20393 case MULTI_ARG_2_DI_CMP:
20394 case MULTI_ARG_2_SI_CMP:
20395 case MULTI_ARG_2_HI_CMP:
20396 case MULTI_ARG_2_QI_CMP:
20397 nargs = 2;
20398 comparison_p = true;
20399 break;
20401 case MULTI_ARG_2_SF_TF:
20402 case MULTI_ARG_2_DF_TF:
20403 case MULTI_ARG_2_DI_TF:
20404 case MULTI_ARG_2_SI_TF:
20405 case MULTI_ARG_2_HI_TF:
20406 case MULTI_ARG_2_QI_TF:
20407 nargs = 2;
20408 tf_p = true;
20409 break;
20411 case MULTI_ARG_UNKNOWN:
20412 default:
20413 gcc_unreachable ();
20416 if (optimize || !target
20417 || GET_MODE (target) != tmode
20418 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20419 target = gen_reg_rtx (tmode);
20421 gcc_assert (nargs <= 4);
20423 for (i = 0; i < nargs; i++)
20425 tree arg = CALL_EXPR_ARG (exp, i);
20426 rtx op = expand_normal (arg);
20427 int adjust = (comparison_p) ? 1 : 0;
20428 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20430 if (last_arg_constant && i == nargs-1)
20432 if (GET_CODE (op) != CONST_INT)
20434 error ("last argument must be an immediate");
20435 return gen_reg_rtx (tmode);
20438 else
20440 if (VECTOR_MODE_P (mode))
20441 op = safe_vector_operand (op, mode);
20443 /* If we aren't optimizing, only allow one memory operand to be
20444 generated. */
20445 if (memory_operand (op, mode))
20446 num_memory++;
20448 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20450 if (optimize
20451 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20452 || num_memory > 1)
20453 op = force_reg (mode, op);
20456 args[i].op = op;
20457 args[i].mode = mode;
20460 switch (nargs)
20462 case 1:
20463 pat = GEN_FCN (icode) (target, args[0].op);
20464 break;
20466 case 2:
20467 if (tf_p)
20468 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20469 GEN_INT ((int)sub_code));
20470 else if (! comparison_p)
20471 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20472 else
20474 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20475 args[0].op,
20476 args[1].op);
20478 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20480 break;
20482 case 3:
20483 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20484 break;
20486 default:
20487 gcc_unreachable ();
20490 if (! pat)
20491 return 0;
20493 emit_insn (pat);
20494 return target;
20497 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20498 insns with vec_merge. */
20500 static rtx
20501 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20502 rtx target)
20504 rtx pat;
20505 tree arg0 = CALL_EXPR_ARG (exp, 0);
20506 rtx op1, op0 = expand_normal (arg0);
20507 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20508 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20510 if (optimize || !target
20511 || GET_MODE (target) != tmode
20512 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20513 target = gen_reg_rtx (tmode);
20515 if (VECTOR_MODE_P (mode0))
20516 op0 = safe_vector_operand (op0, mode0);
20518 if ((optimize && !register_operand (op0, mode0))
20519 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20520 op0 = copy_to_mode_reg (mode0, op0);
20522 op1 = op0;
20523 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20524 op1 = copy_to_mode_reg (mode0, op1);
20526 pat = GEN_FCN (icode) (target, op0, op1);
20527 if (! pat)
20528 return 0;
20529 emit_insn (pat);
20530 return target;
20533 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20535 static rtx
20536 ix86_expand_sse_compare (const struct builtin_description *d,
20537 tree exp, rtx target, bool swap)
20539 rtx pat;
20540 tree arg0 = CALL_EXPR_ARG (exp, 0);
20541 tree arg1 = CALL_EXPR_ARG (exp, 1);
20542 rtx op0 = expand_normal (arg0);
20543 rtx op1 = expand_normal (arg1);
20544 rtx op2;
20545 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20546 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20547 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20548 enum rtx_code comparison = d->comparison;
20550 if (VECTOR_MODE_P (mode0))
20551 op0 = safe_vector_operand (op0, mode0);
20552 if (VECTOR_MODE_P (mode1))
20553 op1 = safe_vector_operand (op1, mode1);
20555 /* Swap operands if we have a comparison that isn't available in
20556 hardware. */
20557 if (swap)
20559 rtx tmp = gen_reg_rtx (mode1);
20560 emit_move_insn (tmp, op1);
20561 op1 = op0;
20562 op0 = tmp;
20565 if (optimize || !target
20566 || GET_MODE (target) != tmode
20567 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20568 target = gen_reg_rtx (tmode);
20570 if ((optimize && !register_operand (op0, mode0))
20571 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20572 op0 = copy_to_mode_reg (mode0, op0);
20573 if ((optimize && !register_operand (op1, mode1))
20574 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20575 op1 = copy_to_mode_reg (mode1, op1);
20577 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20578 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20579 if (! pat)
20580 return 0;
20581 emit_insn (pat);
20582 return target;
20585 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20587 static rtx
20588 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20589 rtx target)
20591 rtx pat;
20592 tree arg0 = CALL_EXPR_ARG (exp, 0);
20593 tree arg1 = CALL_EXPR_ARG (exp, 1);
20594 rtx op0 = expand_normal (arg0);
20595 rtx op1 = expand_normal (arg1);
20596 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20597 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20598 enum rtx_code comparison = d->comparison;
20600 if (VECTOR_MODE_P (mode0))
20601 op0 = safe_vector_operand (op0, mode0);
20602 if (VECTOR_MODE_P (mode1))
20603 op1 = safe_vector_operand (op1, mode1);
20605 /* Swap operands if we have a comparison that isn't available in
20606 hardware. */
20607 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20609 rtx tmp = op1;
20610 op1 = op0;
20611 op0 = tmp;
20614 target = gen_reg_rtx (SImode);
20615 emit_move_insn (target, const0_rtx);
20616 target = gen_rtx_SUBREG (QImode, target, 0);
20618 if ((optimize && !register_operand (op0, mode0))
20619 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20620 op0 = copy_to_mode_reg (mode0, op0);
20621 if ((optimize && !register_operand (op1, mode1))
20622 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20623 op1 = copy_to_mode_reg (mode1, op1);
20625 pat = GEN_FCN (d->icode) (op0, op1);
20626 if (! pat)
20627 return 0;
20628 emit_insn (pat);
20629 emit_insn (gen_rtx_SET (VOIDmode,
20630 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20631 gen_rtx_fmt_ee (comparison, QImode,
20632 SET_DEST (pat),
20633 const0_rtx)));
20635 return SUBREG_REG (target);
20638 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20640 static rtx
20641 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20642 rtx target)
20644 rtx pat;
20645 tree arg0 = CALL_EXPR_ARG (exp, 0);
20646 tree arg1 = CALL_EXPR_ARG (exp, 1);
20647 rtx op0 = expand_normal (arg0);
20648 rtx op1 = expand_normal (arg1);
20649 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20650 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20651 enum rtx_code comparison = d->comparison;
20653 if (VECTOR_MODE_P (mode0))
20654 op0 = safe_vector_operand (op0, mode0);
20655 if (VECTOR_MODE_P (mode1))
20656 op1 = safe_vector_operand (op1, mode1);
20658 target = gen_reg_rtx (SImode);
20659 emit_move_insn (target, const0_rtx);
20660 target = gen_rtx_SUBREG (QImode, target, 0);
20662 if ((optimize && !register_operand (op0, mode0))
20663 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20664 op0 = copy_to_mode_reg (mode0, op0);
20665 if ((optimize && !register_operand (op1, mode1))
20666 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20667 op1 = copy_to_mode_reg (mode1, op1);
20669 pat = GEN_FCN (d->icode) (op0, op1);
20670 if (! pat)
20671 return 0;
20672 emit_insn (pat);
20673 emit_insn (gen_rtx_SET (VOIDmode,
20674 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20675 gen_rtx_fmt_ee (comparison, QImode,
20676 SET_DEST (pat),
20677 const0_rtx)));
20679 return SUBREG_REG (target);
20682 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20684 static rtx
20685 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20686 tree exp, rtx target)
20688 rtx pat;
20689 tree arg0 = CALL_EXPR_ARG (exp, 0);
20690 tree arg1 = CALL_EXPR_ARG (exp, 1);
20691 tree arg2 = CALL_EXPR_ARG (exp, 2);
20692 tree arg3 = CALL_EXPR_ARG (exp, 3);
20693 tree arg4 = CALL_EXPR_ARG (exp, 4);
20694 rtx scratch0, scratch1;
20695 rtx op0 = expand_normal (arg0);
20696 rtx op1 = expand_normal (arg1);
20697 rtx op2 = expand_normal (arg2);
20698 rtx op3 = expand_normal (arg3);
20699 rtx op4 = expand_normal (arg4);
20700 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20702 tmode0 = insn_data[d->icode].operand[0].mode;
20703 tmode1 = insn_data[d->icode].operand[1].mode;
20704 modev2 = insn_data[d->icode].operand[2].mode;
20705 modei3 = insn_data[d->icode].operand[3].mode;
20706 modev4 = insn_data[d->icode].operand[4].mode;
20707 modei5 = insn_data[d->icode].operand[5].mode;
20708 modeimm = insn_data[d->icode].operand[6].mode;
20710 if (VECTOR_MODE_P (modev2))
20711 op0 = safe_vector_operand (op0, modev2);
20712 if (VECTOR_MODE_P (modev4))
20713 op2 = safe_vector_operand (op2, modev4);
20715 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20716 op0 = copy_to_mode_reg (modev2, op0);
20717 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20718 op1 = copy_to_mode_reg (modei3, op1);
20719 if ((optimize && !register_operand (op2, modev4))
20720 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20721 op2 = copy_to_mode_reg (modev4, op2);
20722 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20723 op3 = copy_to_mode_reg (modei5, op3);
20725 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20727 error ("the fifth argument must be a 8-bit immediate");
20728 return const0_rtx;
20731 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20733 if (optimize || !target
20734 || GET_MODE (target) != tmode0
20735 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20736 target = gen_reg_rtx (tmode0);
20738 scratch1 = gen_reg_rtx (tmode1);
20740 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20742 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20744 if (optimize || !target
20745 || GET_MODE (target) != tmode1
20746 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20747 target = gen_reg_rtx (tmode1);
20749 scratch0 = gen_reg_rtx (tmode0);
20751 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20753 else
20755 gcc_assert (d->flag);
20757 scratch0 = gen_reg_rtx (tmode0);
20758 scratch1 = gen_reg_rtx (tmode1);
20760 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20763 if (! pat)
20764 return 0;
20766 emit_insn (pat);
20768 if (d->flag)
20770 target = gen_reg_rtx (SImode);
20771 emit_move_insn (target, const0_rtx);
20772 target = gen_rtx_SUBREG (QImode, target, 0);
20774 emit_insn
20775 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20776 gen_rtx_fmt_ee (EQ, QImode,
20777 gen_rtx_REG ((enum machine_mode) d->flag,
20778 FLAGS_REG),
20779 const0_rtx)));
20780 return SUBREG_REG (target);
20782 else
20783 return target;
20787 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20789 static rtx
20790 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20791 tree exp, rtx target)
20793 rtx pat;
20794 tree arg0 = CALL_EXPR_ARG (exp, 0);
20795 tree arg1 = CALL_EXPR_ARG (exp, 1);
20796 tree arg2 = CALL_EXPR_ARG (exp, 2);
20797 rtx scratch0, scratch1;
20798 rtx op0 = expand_normal (arg0);
20799 rtx op1 = expand_normal (arg1);
20800 rtx op2 = expand_normal (arg2);
20801 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20803 tmode0 = insn_data[d->icode].operand[0].mode;
20804 tmode1 = insn_data[d->icode].operand[1].mode;
20805 modev2 = insn_data[d->icode].operand[2].mode;
20806 modev3 = insn_data[d->icode].operand[3].mode;
20807 modeimm = insn_data[d->icode].operand[4].mode;
20809 if (VECTOR_MODE_P (modev2))
20810 op0 = safe_vector_operand (op0, modev2);
20811 if (VECTOR_MODE_P (modev3))
20812 op1 = safe_vector_operand (op1, modev3);
20814 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20815 op0 = copy_to_mode_reg (modev2, op0);
20816 if ((optimize && !register_operand (op1, modev3))
20817 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20818 op1 = copy_to_mode_reg (modev3, op1);
20820 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20822 error ("the third argument must be a 8-bit immediate");
20823 return const0_rtx;
20826 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20828 if (optimize || !target
20829 || GET_MODE (target) != tmode0
20830 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20831 target = gen_reg_rtx (tmode0);
20833 scratch1 = gen_reg_rtx (tmode1);
20835 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20837 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20839 if (optimize || !target
20840 || GET_MODE (target) != tmode1
20841 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20842 target = gen_reg_rtx (tmode1);
20844 scratch0 = gen_reg_rtx (tmode0);
20846 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20848 else
20850 gcc_assert (d->flag);
20852 scratch0 = gen_reg_rtx (tmode0);
20853 scratch1 = gen_reg_rtx (tmode1);
20855 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20858 if (! pat)
20859 return 0;
20861 emit_insn (pat);
20863 if (d->flag)
20865 target = gen_reg_rtx (SImode);
20866 emit_move_insn (target, const0_rtx);
20867 target = gen_rtx_SUBREG (QImode, target, 0);
20869 emit_insn
20870 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20871 gen_rtx_fmt_ee (EQ, QImode,
20872 gen_rtx_REG ((enum machine_mode) d->flag,
20873 FLAGS_REG),
20874 const0_rtx)));
20875 return SUBREG_REG (target);
20877 else
20878 return target;
20881 /* Subroutine of ix86_expand_builtin to take care of insns with
20882 variable number of operands. */
20884 static rtx
20885 ix86_expand_args_builtin (const struct builtin_description *d,
20886 tree exp, rtx target)
20888 rtx pat, real_target;
20889 unsigned int i, nargs;
20890 unsigned int nargs_constant = 0;
20891 int num_memory = 0;
20892 struct
20894 rtx op;
20895 enum machine_mode mode;
20896 } args[4];
20897 bool last_arg_count = false;
20898 enum insn_code icode = d->icode;
20899 const struct insn_data *insn_p = &insn_data[icode];
20900 enum machine_mode tmode = insn_p->operand[0].mode;
20901 enum machine_mode rmode = VOIDmode;
20902 bool swap = false;
20903 enum rtx_code comparison = d->comparison;
20905 switch ((enum ix86_builtin_type) d->flag)
20907 case INT_FTYPE_V2DI_V2DI_PTEST:
20908 return ix86_expand_sse_ptest (d, exp, target);
20909 case FLOAT128_FTYPE_FLOAT128:
20910 case FLOAT_FTYPE_FLOAT:
20911 case INT64_FTYPE_V4SF:
20912 case INT64_FTYPE_V2DF:
20913 case INT_FTYPE_V16QI:
20914 case INT_FTYPE_V8QI:
20915 case INT_FTYPE_V4SF:
20916 case INT_FTYPE_V2DF:
20917 case V16QI_FTYPE_V16QI:
20918 case V8HI_FTYPE_V8HI:
20919 case V8HI_FTYPE_V16QI:
20920 case V8QI_FTYPE_V8QI:
20921 case V4SI_FTYPE_V4SI:
20922 case V4SI_FTYPE_V16QI:
20923 case V4SI_FTYPE_V4SF:
20924 case V4SI_FTYPE_V8HI:
20925 case V4SI_FTYPE_V2DF:
20926 case V4HI_FTYPE_V4HI:
20927 case V4SF_FTYPE_V4SF:
20928 case V4SF_FTYPE_V4SI:
20929 case V4SF_FTYPE_V2DF:
20930 case V2DI_FTYPE_V2DI:
20931 case V2DI_FTYPE_V16QI:
20932 case V2DI_FTYPE_V8HI:
20933 case V2DI_FTYPE_V4SI:
20934 case V2DF_FTYPE_V2DF:
20935 case V2DF_FTYPE_V4SI:
20936 case V2DF_FTYPE_V4SF:
20937 case V2DF_FTYPE_V2SI:
20938 case V2SI_FTYPE_V2SI:
20939 case V2SI_FTYPE_V4SF:
20940 case V2SI_FTYPE_V2SF:
20941 case V2SI_FTYPE_V2DF:
20942 case V2SF_FTYPE_V2SF:
20943 case V2SF_FTYPE_V2SI:
20944 nargs = 1;
20945 break;
20946 case V4SF_FTYPE_V4SF_VEC_MERGE:
20947 case V2DF_FTYPE_V2DF_VEC_MERGE:
20948 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20949 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20950 case V16QI_FTYPE_V16QI_V16QI:
20951 case V16QI_FTYPE_V8HI_V8HI:
20952 case V8QI_FTYPE_V8QI_V8QI:
20953 case V8QI_FTYPE_V4HI_V4HI:
20954 case V8HI_FTYPE_V8HI_V8HI:
20955 case V8HI_FTYPE_V16QI_V16QI:
20956 case V8HI_FTYPE_V4SI_V4SI:
20957 case V4SI_FTYPE_V4SI_V4SI:
20958 case V4SI_FTYPE_V8HI_V8HI:
20959 case V4SI_FTYPE_V4SF_V4SF:
20960 case V4SI_FTYPE_V2DF_V2DF:
20961 case V4HI_FTYPE_V4HI_V4HI:
20962 case V4HI_FTYPE_V8QI_V8QI:
20963 case V4HI_FTYPE_V2SI_V2SI:
20964 case V4SF_FTYPE_V4SF_V4SF:
20965 case V4SF_FTYPE_V4SF_V2SI:
20966 case V4SF_FTYPE_V4SF_V2DF:
20967 case V4SF_FTYPE_V4SF_DI:
20968 case V4SF_FTYPE_V4SF_SI:
20969 case V2DI_FTYPE_V2DI_V2DI:
20970 case V2DI_FTYPE_V16QI_V16QI:
20971 case V2DI_FTYPE_V4SI_V4SI:
20972 case V2DI_FTYPE_V2DI_V16QI:
20973 case V2DI_FTYPE_V2DF_V2DF:
20974 case V2SI_FTYPE_V2SI_V2SI:
20975 case V2SI_FTYPE_V4HI_V4HI:
20976 case V2SI_FTYPE_V2SF_V2SF:
20977 case V2DF_FTYPE_V2DF_V2DF:
20978 case V2DF_FTYPE_V2DF_V4SF:
20979 case V2DF_FTYPE_V2DF_DI:
20980 case V2DF_FTYPE_V2DF_SI:
20981 case V2SF_FTYPE_V2SF_V2SF:
20982 case V1DI_FTYPE_V1DI_V1DI:
20983 case V1DI_FTYPE_V8QI_V8QI:
20984 case V1DI_FTYPE_V2SI_V2SI:
20985 if (comparison == UNKNOWN)
20986 return ix86_expand_binop_builtin (icode, exp, target);
20987 nargs = 2;
20988 break;
20989 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20990 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20991 gcc_assert (comparison != UNKNOWN);
20992 nargs = 2;
20993 swap = true;
20994 break;
20995 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20996 case V8HI_FTYPE_V8HI_SI_COUNT:
20997 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20998 case V4SI_FTYPE_V4SI_SI_COUNT:
20999 case V4HI_FTYPE_V4HI_V4HI_COUNT:
21000 case V4HI_FTYPE_V4HI_SI_COUNT:
21001 case V2DI_FTYPE_V2DI_V2DI_COUNT:
21002 case V2DI_FTYPE_V2DI_SI_COUNT:
21003 case V2SI_FTYPE_V2SI_V2SI_COUNT:
21004 case V2SI_FTYPE_V2SI_SI_COUNT:
21005 case V1DI_FTYPE_V1DI_V1DI_COUNT:
21006 case V1DI_FTYPE_V1DI_SI_COUNT:
21007 nargs = 2;
21008 last_arg_count = true;
21009 break;
21010 case UINT64_FTYPE_UINT64_UINT64:
21011 case UINT_FTYPE_UINT_UINT:
21012 case UINT_FTYPE_UINT_USHORT:
21013 case UINT_FTYPE_UINT_UCHAR:
21014 nargs = 2;
21015 break;
21016 case V2DI2TI_FTYPE_V2DI_INT:
21017 nargs = 2;
21018 rmode = V2DImode;
21019 nargs_constant = 1;
21020 break;
21021 case V8HI_FTYPE_V8HI_INT:
21022 case V4SI_FTYPE_V4SI_INT:
21023 case V4HI_FTYPE_V4HI_INT:
21024 case V4SF_FTYPE_V4SF_INT:
21025 case V2DI_FTYPE_V2DI_INT:
21026 case V2DF_FTYPE_V2DF_INT:
21027 nargs = 2;
21028 nargs_constant = 1;
21029 break;
21030 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21031 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21032 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21033 nargs = 3;
21034 break;
21035 case V16QI_FTYPE_V16QI_V16QI_INT:
21036 case V8HI_FTYPE_V8HI_V8HI_INT:
21037 case V4SI_FTYPE_V4SI_V4SI_INT:
21038 case V4SF_FTYPE_V4SF_V4SF_INT:
21039 case V2DI_FTYPE_V2DI_V2DI_INT:
21040 case V2DF_FTYPE_V2DF_V2DF_INT:
21041 nargs = 3;
21042 nargs_constant = 1;
21043 break;
21044 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21045 nargs = 3;
21046 rmode = V2DImode;
21047 nargs_constant = 1;
21048 break;
21049 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21050 nargs = 3;
21051 rmode = DImode;
21052 nargs_constant = 1;
21053 break;
21054 case V2DI_FTYPE_V2DI_UINT_UINT:
21055 nargs = 3;
21056 nargs_constant = 2;
21057 break;
21058 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21059 nargs = 4;
21060 nargs_constant = 2;
21061 break;
21062 default:
21063 gcc_unreachable ();
21066 gcc_assert (nargs <= ARRAY_SIZE (args));
21068 if (comparison != UNKNOWN)
21070 gcc_assert (nargs == 2);
21071 return ix86_expand_sse_compare (d, exp, target, swap);
21074 if (rmode == VOIDmode || rmode == tmode)
21076 if (optimize
21077 || target == 0
21078 || GET_MODE (target) != tmode
21079 || ! (*insn_p->operand[0].predicate) (target, tmode))
21080 target = gen_reg_rtx (tmode);
21081 real_target = target;
21083 else
21085 target = gen_reg_rtx (rmode);
21086 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21089 for (i = 0; i < nargs; i++)
21091 tree arg = CALL_EXPR_ARG (exp, i);
21092 rtx op = expand_normal (arg);
21093 enum machine_mode mode = insn_p->operand[i + 1].mode;
21094 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21096 if (last_arg_count && (i + 1) == nargs)
21098 /* SIMD shift insns take either an 8-bit immediate or
21099 register as count. But builtin functions take int as
21100 count. If count doesn't match, we put it in register. */
21101 if (!match)
21103 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21104 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21105 op = copy_to_reg (op);
21108 else if ((nargs - i) <= nargs_constant)
21110 if (!match)
21111 switch (icode)
21113 case CODE_FOR_sse4_1_roundpd:
21114 case CODE_FOR_sse4_1_roundps:
21115 case CODE_FOR_sse4_1_roundsd:
21116 case CODE_FOR_sse4_1_roundss:
21117 case CODE_FOR_sse4_1_blendps:
21118 error ("the last argument must be a 4-bit immediate");
21119 return const0_rtx;
21121 case CODE_FOR_sse4_1_blendpd:
21122 error ("the last argument must be a 2-bit immediate");
21123 return const0_rtx;
21125 default:
21126 switch (nargs_constant)
21128 case 2:
21129 if ((nargs - i) == nargs_constant)
21131 error ("the next to last argument must be an 8-bit immediate");
21132 break;
21134 case 1:
21135 error ("the last argument must be an 8-bit immediate");
21136 break;
21137 default:
21138 gcc_unreachable ();
21140 return const0_rtx;
21143 else
21145 if (VECTOR_MODE_P (mode))
21146 op = safe_vector_operand (op, mode);
21148 /* If we aren't optimizing, only allow one memory operand to
21149 be generated. */
21150 if (memory_operand (op, mode))
21151 num_memory++;
21153 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21155 if (optimize || !match || num_memory > 1)
21156 op = copy_to_mode_reg (mode, op);
21158 else
21160 op = copy_to_reg (op);
21161 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21165 args[i].op = op;
21166 args[i].mode = mode;
21169 switch (nargs)
21171 case 1:
21172 pat = GEN_FCN (icode) (real_target, args[0].op);
21173 break;
21174 case 2:
21175 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21176 break;
21177 case 3:
21178 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21179 args[2].op);
21180 break;
21181 case 4:
21182 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21183 args[2].op, args[3].op);
21184 break;
21185 default:
21186 gcc_unreachable ();
21189 if (! pat)
21190 return 0;
21192 emit_insn (pat);
21193 return target;
21196 /* Subroutine of ix86_expand_builtin to take care of special insns
21197 with variable number of operands. */
21199 static rtx
21200 ix86_expand_special_args_builtin (const struct builtin_description *d,
21201 tree exp, rtx target)
21203 tree arg;
21204 rtx pat, op;
21205 unsigned int i, nargs, arg_adjust, memory;
21206 struct
21208 rtx op;
21209 enum machine_mode mode;
21210 } args[2];
21211 enum insn_code icode = d->icode;
21212 bool last_arg_constant = false;
21213 const struct insn_data *insn_p = &insn_data[icode];
21214 enum machine_mode tmode = insn_p->operand[0].mode;
21215 enum { load, store } class;
21217 switch ((enum ix86_special_builtin_type) d->flag)
21219 case VOID_FTYPE_VOID:
21220 emit_insn (GEN_FCN (icode) (target));
21221 return 0;
21222 case V2DI_FTYPE_PV2DI:
21223 case V16QI_FTYPE_PCCHAR:
21224 case V4SF_FTYPE_PCFLOAT:
21225 case V2DF_FTYPE_PCDOUBLE:
21226 nargs = 1;
21227 class = load;
21228 memory = 0;
21229 break;
21230 case VOID_FTYPE_PV2SF_V4SF:
21231 case VOID_FTYPE_PV2DI_V2DI:
21232 case VOID_FTYPE_PCHAR_V16QI:
21233 case VOID_FTYPE_PFLOAT_V4SF:
21234 case VOID_FTYPE_PDOUBLE_V2DF:
21235 case VOID_FTYPE_PDI_DI:
21236 case VOID_FTYPE_PINT_INT:
21237 nargs = 1;
21238 class = store;
21239 /* Reserve memory operand for target. */
21240 memory = ARRAY_SIZE (args);
21241 break;
21242 case V4SF_FTYPE_V4SF_PCV2SF:
21243 case V2DF_FTYPE_V2DF_PCDOUBLE:
21244 nargs = 2;
21245 class = load;
21246 memory = 1;
21247 break;
21248 default:
21249 gcc_unreachable ();
21252 gcc_assert (nargs <= ARRAY_SIZE (args));
21254 if (class == store)
21256 arg = CALL_EXPR_ARG (exp, 0);
21257 op = expand_normal (arg);
21258 gcc_assert (target == 0);
21259 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21260 arg_adjust = 1;
21262 else
21264 arg_adjust = 0;
21265 if (optimize
21266 || target == 0
21267 || GET_MODE (target) != tmode
21268 || ! (*insn_p->operand[0].predicate) (target, tmode))
21269 target = gen_reg_rtx (tmode);
21272 for (i = 0; i < nargs; i++)
21274 enum machine_mode mode = insn_p->operand[i + 1].mode;
21275 bool match;
21277 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21278 op = expand_normal (arg);
21279 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21281 if (last_arg_constant && (i + 1) == nargs)
21283 if (!match)
21284 switch (icode)
21286 default:
21287 error ("the last argument must be an 8-bit immediate");
21288 return const0_rtx;
21291 else
21293 if (i == memory)
21295 /* This must be the memory operand. */
21296 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21297 gcc_assert (GET_MODE (op) == mode
21298 || GET_MODE (op) == VOIDmode);
21300 else
21302 /* This must be register. */
21303 if (VECTOR_MODE_P (mode))
21304 op = safe_vector_operand (op, mode);
21306 gcc_assert (GET_MODE (op) == mode
21307 || GET_MODE (op) == VOIDmode);
21308 op = copy_to_mode_reg (mode, op);
21312 args[i].op = op;
21313 args[i].mode = mode;
21316 switch (nargs)
21318 case 1:
21319 pat = GEN_FCN (icode) (target, args[0].op);
21320 break;
21321 case 2:
21322 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21323 break;
21324 default:
21325 gcc_unreachable ();
21328 if (! pat)
21329 return 0;
21330 emit_insn (pat);
21331 return class == store ? 0 : target;
21334 /* Return the integer constant in ARG. Constrain it to be in the range
21335 of the subparts of VEC_TYPE; issue an error if not. */
21337 static int
21338 get_element_number (tree vec_type, tree arg)
21340 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21342 if (!host_integerp (arg, 1)
21343 || (elt = tree_low_cst (arg, 1), elt > max))
21345 error ("selector must be an integer constant in the range 0..%wi", max);
21346 return 0;
21349 return elt;
21352 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21353 ix86_expand_vector_init. We DO have language-level syntax for this, in
21354 the form of (type){ init-list }. Except that since we can't place emms
21355 instructions from inside the compiler, we can't allow the use of MMX
21356 registers unless the user explicitly asks for it. So we do *not* define
21357 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21358 we have builtins invoked by mmintrin.h that gives us license to emit
21359 these sorts of instructions. */
21361 static rtx
21362 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21364 enum machine_mode tmode = TYPE_MODE (type);
21365 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21366 int i, n_elt = GET_MODE_NUNITS (tmode);
21367 rtvec v = rtvec_alloc (n_elt);
21369 gcc_assert (VECTOR_MODE_P (tmode));
21370 gcc_assert (call_expr_nargs (exp) == n_elt);
21372 for (i = 0; i < n_elt; ++i)
21374 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21375 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21378 if (!target || !register_operand (target, tmode))
21379 target = gen_reg_rtx (tmode);
21381 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21382 return target;
21385 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21386 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21387 had a language-level syntax for referencing vector elements. */
21389 static rtx
21390 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21392 enum machine_mode tmode, mode0;
21393 tree arg0, arg1;
21394 int elt;
21395 rtx op0;
21397 arg0 = CALL_EXPR_ARG (exp, 0);
21398 arg1 = CALL_EXPR_ARG (exp, 1);
21400 op0 = expand_normal (arg0);
21401 elt = get_element_number (TREE_TYPE (arg0), arg1);
21403 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21404 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21405 gcc_assert (VECTOR_MODE_P (mode0));
21407 op0 = force_reg (mode0, op0);
21409 if (optimize || !target || !register_operand (target, tmode))
21410 target = gen_reg_rtx (tmode);
21412 ix86_expand_vector_extract (true, target, op0, elt);
21414 return target;
21417 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21418 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21419 a language-level syntax for referencing vector elements. */
21421 static rtx
21422 ix86_expand_vec_set_builtin (tree exp)
21424 enum machine_mode tmode, mode1;
21425 tree arg0, arg1, arg2;
21426 int elt;
21427 rtx op0, op1, target;
21429 arg0 = CALL_EXPR_ARG (exp, 0);
21430 arg1 = CALL_EXPR_ARG (exp, 1);
21431 arg2 = CALL_EXPR_ARG (exp, 2);
21433 tmode = TYPE_MODE (TREE_TYPE (arg0));
21434 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21435 gcc_assert (VECTOR_MODE_P (tmode));
21437 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21438 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21439 elt = get_element_number (TREE_TYPE (arg0), arg2);
21441 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21442 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21444 op0 = force_reg (tmode, op0);
21445 op1 = force_reg (mode1, op1);
21447 /* OP0 is the source of these builtin functions and shouldn't be
21448 modified. Create a copy, use it and return it as target. */
21449 target = gen_reg_rtx (tmode);
21450 emit_move_insn (target, op0);
21451 ix86_expand_vector_set (true, target, op1, elt);
21453 return target;
21456 /* Expand an expression EXP that calls a built-in function,
21457 with result going to TARGET if that's convenient
21458 (and in mode MODE if that's convenient).
21459 SUBTARGET may be used as the target for computing one of EXP's operands.
21460 IGNORE is nonzero if the value is to be ignored. */
21462 static rtx
21463 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21464 enum machine_mode mode ATTRIBUTE_UNUSED,
21465 int ignore ATTRIBUTE_UNUSED)
21467 const struct builtin_description *d;
21468 size_t i;
21469 enum insn_code icode;
21470 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21471 tree arg0, arg1, arg2;
21472 rtx op0, op1, op2, pat;
21473 enum machine_mode mode0, mode1, mode2;
21474 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21476 switch (fcode)
21478 case IX86_BUILTIN_MASKMOVQ:
21479 case IX86_BUILTIN_MASKMOVDQU:
21480 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21481 ? CODE_FOR_mmx_maskmovq
21482 : CODE_FOR_sse2_maskmovdqu);
21483 /* Note the arg order is different from the operand order. */
21484 arg1 = CALL_EXPR_ARG (exp, 0);
21485 arg2 = CALL_EXPR_ARG (exp, 1);
21486 arg0 = CALL_EXPR_ARG (exp, 2);
21487 op0 = expand_normal (arg0);
21488 op1 = expand_normal (arg1);
21489 op2 = expand_normal (arg2);
21490 mode0 = insn_data[icode].operand[0].mode;
21491 mode1 = insn_data[icode].operand[1].mode;
21492 mode2 = insn_data[icode].operand[2].mode;
21494 op0 = force_reg (Pmode, op0);
21495 op0 = gen_rtx_MEM (mode1, op0);
21497 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21498 op0 = copy_to_mode_reg (mode0, op0);
21499 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21500 op1 = copy_to_mode_reg (mode1, op1);
21501 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21502 op2 = copy_to_mode_reg (mode2, op2);
21503 pat = GEN_FCN (icode) (op0, op1, op2);
21504 if (! pat)
21505 return 0;
21506 emit_insn (pat);
21507 return 0;
21509 case IX86_BUILTIN_LDMXCSR:
21510 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21511 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21512 emit_move_insn (target, op0);
21513 emit_insn (gen_sse_ldmxcsr (target));
21514 return 0;
21516 case IX86_BUILTIN_STMXCSR:
21517 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21518 emit_insn (gen_sse_stmxcsr (target));
21519 return copy_to_mode_reg (SImode, target);
21521 case IX86_BUILTIN_CLFLUSH:
21522 arg0 = CALL_EXPR_ARG (exp, 0);
21523 op0 = expand_normal (arg0);
21524 icode = CODE_FOR_sse2_clflush;
21525 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21526 op0 = copy_to_mode_reg (Pmode, op0);
21528 emit_insn (gen_sse2_clflush (op0));
21529 return 0;
21531 case IX86_BUILTIN_MONITOR:
21532 arg0 = CALL_EXPR_ARG (exp, 0);
21533 arg1 = CALL_EXPR_ARG (exp, 1);
21534 arg2 = CALL_EXPR_ARG (exp, 2);
21535 op0 = expand_normal (arg0);
21536 op1 = expand_normal (arg1);
21537 op2 = expand_normal (arg2);
21538 if (!REG_P (op0))
21539 op0 = copy_to_mode_reg (Pmode, op0);
21540 if (!REG_P (op1))
21541 op1 = copy_to_mode_reg (SImode, op1);
21542 if (!REG_P (op2))
21543 op2 = copy_to_mode_reg (SImode, op2);
21544 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
21545 return 0;
21547 case IX86_BUILTIN_MWAIT:
21548 arg0 = CALL_EXPR_ARG (exp, 0);
21549 arg1 = CALL_EXPR_ARG (exp, 1);
21550 op0 = expand_normal (arg0);
21551 op1 = expand_normal (arg1);
21552 if (!REG_P (op0))
21553 op0 = copy_to_mode_reg (SImode, op0);
21554 if (!REG_P (op1))
21555 op1 = copy_to_mode_reg (SImode, op1);
21556 emit_insn (gen_sse3_mwait (op0, op1));
21557 return 0;
21559 case IX86_BUILTIN_VEC_INIT_V2SI:
21560 case IX86_BUILTIN_VEC_INIT_V4HI:
21561 case IX86_BUILTIN_VEC_INIT_V8QI:
21562 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21564 case IX86_BUILTIN_VEC_EXT_V2DF:
21565 case IX86_BUILTIN_VEC_EXT_V2DI:
21566 case IX86_BUILTIN_VEC_EXT_V4SF:
21567 case IX86_BUILTIN_VEC_EXT_V4SI:
21568 case IX86_BUILTIN_VEC_EXT_V8HI:
21569 case IX86_BUILTIN_VEC_EXT_V2SI:
21570 case IX86_BUILTIN_VEC_EXT_V4HI:
21571 case IX86_BUILTIN_VEC_EXT_V16QI:
21572 return ix86_expand_vec_ext_builtin (exp, target);
21574 case IX86_BUILTIN_VEC_SET_V2DI:
21575 case IX86_BUILTIN_VEC_SET_V4SF:
21576 case IX86_BUILTIN_VEC_SET_V4SI:
21577 case IX86_BUILTIN_VEC_SET_V8HI:
21578 case IX86_BUILTIN_VEC_SET_V4HI:
21579 case IX86_BUILTIN_VEC_SET_V16QI:
21580 return ix86_expand_vec_set_builtin (exp);
21582 case IX86_BUILTIN_INFQ:
21584 REAL_VALUE_TYPE inf;
21585 rtx tmp;
21587 real_inf (&inf);
21588 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21590 tmp = validize_mem (force_const_mem (mode, tmp));
21592 if (target == 0)
21593 target = gen_reg_rtx (mode);
21595 emit_move_insn (target, tmp);
21596 return target;
21599 default:
21600 break;
21603 for (i = 0, d = bdesc_special_args;
21604 i < ARRAY_SIZE (bdesc_special_args);
21605 i++, d++)
21606 if (d->code == fcode)
21607 return ix86_expand_special_args_builtin (d, exp, target);
21609 for (i = 0, d = bdesc_args;
21610 i < ARRAY_SIZE (bdesc_args);
21611 i++, d++)
21612 if (d->code == fcode)
21613 return ix86_expand_args_builtin (d, exp, target);
21615 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21616 if (d->code == fcode)
21617 return ix86_expand_sse_comi (d, exp, target);
21619 for (i = 0, d = bdesc_pcmpestr;
21620 i < ARRAY_SIZE (bdesc_pcmpestr);
21621 i++, d++)
21622 if (d->code == fcode)
21623 return ix86_expand_sse_pcmpestr (d, exp, target);
21625 for (i = 0, d = bdesc_pcmpistr;
21626 i < ARRAY_SIZE (bdesc_pcmpistr);
21627 i++, d++)
21628 if (d->code == fcode)
21629 return ix86_expand_sse_pcmpistr (d, exp, target);
21631 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21632 if (d->code == fcode)
21633 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21634 (enum multi_arg_type)d->flag,
21635 d->comparison);
21637 gcc_unreachable ();
21640 /* Returns a function decl for a vectorized version of the builtin function
21641 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21642 if it is not available. */
21644 static tree
21645 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21646 tree type_in)
21648 enum machine_mode in_mode, out_mode;
21649 int in_n, out_n;
21651 if (TREE_CODE (type_out) != VECTOR_TYPE
21652 || TREE_CODE (type_in) != VECTOR_TYPE)
21653 return NULL_TREE;
21655 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21656 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21657 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21658 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21660 switch (fn)
21662 case BUILT_IN_SQRT:
21663 if (out_mode == DFmode && out_n == 2
21664 && in_mode == DFmode && in_n == 2)
21665 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21666 break;
21668 case BUILT_IN_SQRTF:
21669 if (out_mode == SFmode && out_n == 4
21670 && in_mode == SFmode && in_n == 4)
21671 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21672 break;
21674 case BUILT_IN_LRINT:
21675 if (out_mode == SImode && out_n == 4
21676 && in_mode == DFmode && in_n == 2)
21677 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21678 break;
21680 case BUILT_IN_LRINTF:
21681 if (out_mode == SImode && out_n == 4
21682 && in_mode == SFmode && in_n == 4)
21683 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21684 break;
21686 default:
21690 /* Dispatch to a handler for a vectorization library. */
21691 if (ix86_veclib_handler)
21692 return (*ix86_veclib_handler)(fn, type_out, type_in);
21694 return NULL_TREE;
21697 /* Handler for an SVML-style interface to
21698 a library with vectorized intrinsics. */
21700 static tree
21701 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21703 char name[20];
21704 tree fntype, new_fndecl, args;
21705 unsigned arity;
21706 const char *bname;
21707 enum machine_mode el_mode, in_mode;
21708 int n, in_n;
21710 /* The SVML is suitable for unsafe math only. */
21711 if (!flag_unsafe_math_optimizations)
21712 return NULL_TREE;
21714 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21715 n = TYPE_VECTOR_SUBPARTS (type_out);
21716 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21717 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21718 if (el_mode != in_mode
21719 || n != in_n)
21720 return NULL_TREE;
21722 switch (fn)
21724 case BUILT_IN_EXP:
21725 case BUILT_IN_LOG:
21726 case BUILT_IN_LOG10:
21727 case BUILT_IN_POW:
21728 case BUILT_IN_TANH:
21729 case BUILT_IN_TAN:
21730 case BUILT_IN_ATAN:
21731 case BUILT_IN_ATAN2:
21732 case BUILT_IN_ATANH:
21733 case BUILT_IN_CBRT:
21734 case BUILT_IN_SINH:
21735 case BUILT_IN_SIN:
21736 case BUILT_IN_ASINH:
21737 case BUILT_IN_ASIN:
21738 case BUILT_IN_COSH:
21739 case BUILT_IN_COS:
21740 case BUILT_IN_ACOSH:
21741 case BUILT_IN_ACOS:
21742 if (el_mode != DFmode || n != 2)
21743 return NULL_TREE;
21744 break;
21746 case BUILT_IN_EXPF:
21747 case BUILT_IN_LOGF:
21748 case BUILT_IN_LOG10F:
21749 case BUILT_IN_POWF:
21750 case BUILT_IN_TANHF:
21751 case BUILT_IN_TANF:
21752 case BUILT_IN_ATANF:
21753 case BUILT_IN_ATAN2F:
21754 case BUILT_IN_ATANHF:
21755 case BUILT_IN_CBRTF:
21756 case BUILT_IN_SINHF:
21757 case BUILT_IN_SINF:
21758 case BUILT_IN_ASINHF:
21759 case BUILT_IN_ASINF:
21760 case BUILT_IN_COSHF:
21761 case BUILT_IN_COSF:
21762 case BUILT_IN_ACOSHF:
21763 case BUILT_IN_ACOSF:
21764 if (el_mode != SFmode || n != 4)
21765 return NULL_TREE;
21766 break;
21768 default:
21769 return NULL_TREE;
21772 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21774 if (fn == BUILT_IN_LOGF)
21775 strcpy (name, "vmlsLn4");
21776 else if (fn == BUILT_IN_LOG)
21777 strcpy (name, "vmldLn2");
21778 else if (n == 4)
21780 sprintf (name, "vmls%s", bname+10);
21781 name[strlen (name)-1] = '4';
21783 else
21784 sprintf (name, "vmld%s2", bname+10);
21786 /* Convert to uppercase. */
21787 name[4] &= ~0x20;
21789 arity = 0;
21790 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21791 args = TREE_CHAIN (args))
21792 arity++;
21794 if (arity == 1)
21795 fntype = build_function_type_list (type_out, type_in, NULL);
21796 else
21797 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21799 /* Build a function declaration for the vectorized function. */
21800 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21801 TREE_PUBLIC (new_fndecl) = 1;
21802 DECL_EXTERNAL (new_fndecl) = 1;
21803 DECL_IS_NOVOPS (new_fndecl) = 1;
21804 TREE_READONLY (new_fndecl) = 1;
21806 return new_fndecl;
21809 /* Handler for an ACML-style interface to
21810 a library with vectorized intrinsics. */
21812 static tree
21813 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21815 char name[20] = "__vr.._";
21816 tree fntype, new_fndecl, args;
21817 unsigned arity;
21818 const char *bname;
21819 enum machine_mode el_mode, in_mode;
21820 int n, in_n;
21822 /* The ACML is 64bits only and suitable for unsafe math only as
21823 it does not correctly support parts of IEEE with the required
21824 precision such as denormals. */
21825 if (!TARGET_64BIT
21826 || !flag_unsafe_math_optimizations)
21827 return NULL_TREE;
21829 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21830 n = TYPE_VECTOR_SUBPARTS (type_out);
21831 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21832 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21833 if (el_mode != in_mode
21834 || n != in_n)
21835 return NULL_TREE;
21837 switch (fn)
21839 case BUILT_IN_SIN:
21840 case BUILT_IN_COS:
21841 case BUILT_IN_EXP:
21842 case BUILT_IN_LOG:
21843 case BUILT_IN_LOG2:
21844 case BUILT_IN_LOG10:
21845 name[4] = 'd';
21846 name[5] = '2';
21847 if (el_mode != DFmode
21848 || n != 2)
21849 return NULL_TREE;
21850 break;
21852 case BUILT_IN_SINF:
21853 case BUILT_IN_COSF:
21854 case BUILT_IN_EXPF:
21855 case BUILT_IN_POWF:
21856 case BUILT_IN_LOGF:
21857 case BUILT_IN_LOG2F:
21858 case BUILT_IN_LOG10F:
21859 name[4] = 's';
21860 name[5] = '4';
21861 if (el_mode != SFmode
21862 || n != 4)
21863 return NULL_TREE;
21864 break;
21866 default:
21867 return NULL_TREE;
21870 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21871 sprintf (name + 7, "%s", bname+10);
21873 arity = 0;
21874 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21875 args = TREE_CHAIN (args))
21876 arity++;
21878 if (arity == 1)
21879 fntype = build_function_type_list (type_out, type_in, NULL);
21880 else
21881 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21883 /* Build a function declaration for the vectorized function. */
21884 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21885 TREE_PUBLIC (new_fndecl) = 1;
21886 DECL_EXTERNAL (new_fndecl) = 1;
21887 DECL_IS_NOVOPS (new_fndecl) = 1;
21888 TREE_READONLY (new_fndecl) = 1;
21890 return new_fndecl;
21894 /* Returns a decl of a function that implements conversion of the
21895 input vector of type TYPE, or NULL_TREE if it is not available. */
21897 static tree
21898 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21900 if (TREE_CODE (type) != VECTOR_TYPE)
21901 return NULL_TREE;
21903 switch (code)
21905 case FLOAT_EXPR:
21906 switch (TYPE_MODE (type))
21908 case V4SImode:
21909 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21910 default:
21911 return NULL_TREE;
21914 case FIX_TRUNC_EXPR:
21915 switch (TYPE_MODE (type))
21917 case V4SFmode:
21918 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21919 default:
21920 return NULL_TREE;
21922 default:
21923 return NULL_TREE;
21928 /* Returns a code for a target-specific builtin that implements
21929 reciprocal of the function, or NULL_TREE if not available. */
21931 static tree
21932 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21933 bool sqrt ATTRIBUTE_UNUSED)
21935 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21936 && flag_finite_math_only && !flag_trapping_math
21937 && flag_unsafe_math_optimizations))
21938 return NULL_TREE;
21940 if (md_fn)
21941 /* Machine dependent builtins. */
21942 switch (fn)
21944 /* Vectorized version of sqrt to rsqrt conversion. */
21945 case IX86_BUILTIN_SQRTPS_NR:
21946 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21948 default:
21949 return NULL_TREE;
21951 else
21952 /* Normal builtins. */
21953 switch (fn)
21955 /* Sqrt to rsqrt conversion. */
21956 case BUILT_IN_SQRTF:
21957 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21959 default:
21960 return NULL_TREE;
21964 /* Store OPERAND to the memory after reload is completed. This means
21965 that we can't easily use assign_stack_local. */
21967 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21969 rtx result;
21971 gcc_assert (reload_completed);
21972 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
21974 result = gen_rtx_MEM (mode,
21975 gen_rtx_PLUS (Pmode,
21976 stack_pointer_rtx,
21977 GEN_INT (-RED_ZONE_SIZE)));
21978 emit_move_insn (result, operand);
21980 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
21982 switch (mode)
21984 case HImode:
21985 case SImode:
21986 operand = gen_lowpart (DImode, operand);
21987 /* FALLTHRU */
21988 case DImode:
21989 emit_insn (
21990 gen_rtx_SET (VOIDmode,
21991 gen_rtx_MEM (DImode,
21992 gen_rtx_PRE_DEC (DImode,
21993 stack_pointer_rtx)),
21994 operand));
21995 break;
21996 default:
21997 gcc_unreachable ();
21999 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22001 else
22003 switch (mode)
22005 case DImode:
22007 rtx operands[2];
22008 split_di (&operand, 1, operands, operands + 1);
22009 emit_insn (
22010 gen_rtx_SET (VOIDmode,
22011 gen_rtx_MEM (SImode,
22012 gen_rtx_PRE_DEC (Pmode,
22013 stack_pointer_rtx)),
22014 operands[1]));
22015 emit_insn (
22016 gen_rtx_SET (VOIDmode,
22017 gen_rtx_MEM (SImode,
22018 gen_rtx_PRE_DEC (Pmode,
22019 stack_pointer_rtx)),
22020 operands[0]));
22022 break;
22023 case HImode:
22024 /* Store HImodes as SImodes. */
22025 operand = gen_lowpart (SImode, operand);
22026 /* FALLTHRU */
22027 case SImode:
22028 emit_insn (
22029 gen_rtx_SET (VOIDmode,
22030 gen_rtx_MEM (GET_MODE (operand),
22031 gen_rtx_PRE_DEC (SImode,
22032 stack_pointer_rtx)),
22033 operand));
22034 break;
22035 default:
22036 gcc_unreachable ();
22038 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22040 return result;
22043 /* Free operand from the memory. */
22044 void
22045 ix86_free_from_memory (enum machine_mode mode)
22047 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
22049 int size;
22051 if (mode == DImode || TARGET_64BIT)
22052 size = 8;
22053 else
22054 size = 4;
22055 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22056 to pop or add instruction if registers are available. */
22057 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22058 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22059 GEN_INT (size))));
22063 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22064 QImode must go into class Q_REGS.
22065 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
22066 movdf to do mem-to-mem moves through integer regs. */
22067 enum reg_class
22068 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22070 enum machine_mode mode = GET_MODE (x);
22072 /* We're only allowed to return a subclass of CLASS. Many of the
22073 following checks fail for NO_REGS, so eliminate that early. */
22074 if (regclass == NO_REGS)
22075 return NO_REGS;
22077 /* All classes can load zeros. */
22078 if (x == CONST0_RTX (mode))
22079 return regclass;
22081 /* Force constants into memory if we are loading a (nonzero) constant into
22082 an MMX or SSE register. This is because there are no MMX/SSE instructions
22083 to load from a constant. */
22084 if (CONSTANT_P (x)
22085 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22086 return NO_REGS;
22088 /* Prefer SSE regs only, if we can use them for math. */
22089 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22090 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22092 /* Floating-point constants need more complex checks. */
22093 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22095 /* General regs can load everything. */
22096 if (reg_class_subset_p (regclass, GENERAL_REGS))
22097 return regclass;
22099 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22100 zero above. We only want to wind up preferring 80387 registers if
22101 we plan on doing computation with them. */
22102 if (TARGET_80387
22103 && standard_80387_constant_p (x))
22105 /* Limit class to non-sse. */
22106 if (regclass == FLOAT_SSE_REGS)
22107 return FLOAT_REGS;
22108 if (regclass == FP_TOP_SSE_REGS)
22109 return FP_TOP_REG;
22110 if (regclass == FP_SECOND_SSE_REGS)
22111 return FP_SECOND_REG;
22112 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22113 return regclass;
22116 return NO_REGS;
22119 /* Generally when we see PLUS here, it's the function invariant
22120 (plus soft-fp const_int). Which can only be computed into general
22121 regs. */
22122 if (GET_CODE (x) == PLUS)
22123 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22125 /* QImode constants are easy to load, but non-constant QImode data
22126 must go into Q_REGS. */
22127 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22129 if (reg_class_subset_p (regclass, Q_REGS))
22130 return regclass;
22131 if (reg_class_subset_p (Q_REGS, regclass))
22132 return Q_REGS;
22133 return NO_REGS;
22136 return regclass;
22139 /* Discourage putting floating-point values in SSE registers unless
22140 SSE math is being used, and likewise for the 387 registers. */
22141 enum reg_class
22142 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22144 enum machine_mode mode = GET_MODE (x);
22146 /* Restrict the output reload class to the register bank that we are doing
22147 math on. If we would like not to return a subset of CLASS, reject this
22148 alternative: if reload cannot do this, it will still use its choice. */
22149 mode = GET_MODE (x);
22150 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22151 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22153 if (X87_FLOAT_MODE_P (mode))
22155 if (regclass == FP_TOP_SSE_REGS)
22156 return FP_TOP_REG;
22157 else if (regclass == FP_SECOND_SSE_REGS)
22158 return FP_SECOND_REG;
22159 else
22160 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22163 return regclass;
22166 static enum reg_class
22167 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22168 enum machine_mode mode,
22169 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22171 /* QImode spills from non-QI registers require
22172 intermediate register on 32bit targets. */
22173 if (!in_p && mode == QImode && !TARGET_64BIT
22174 && (class == GENERAL_REGS
22175 || class == LEGACY_REGS
22176 || class == INDEX_REGS))
22178 int regno;
22180 if (REG_P (x))
22181 regno = REGNO (x);
22182 else
22183 regno = -1;
22185 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22186 regno = true_regnum (x);
22188 /* Return Q_REGS if the operand is in memory. */
22189 if (regno == -1)
22190 return Q_REGS;
22193 return NO_REGS;
22196 /* If we are copying between general and FP registers, we need a memory
22197 location. The same is true for SSE and MMX registers.
22199 To optimize register_move_cost performance, allow inline variant.
22201 The macro can't work reliably when one of the CLASSES is class containing
22202 registers from multiple units (SSE, MMX, integer). We avoid this by never
22203 combining those units in single alternative in the machine description.
22204 Ensure that this constraint holds to avoid unexpected surprises.
22206 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22207 enforce these sanity checks. */
22209 static inline int
22210 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22211 enum machine_mode mode, int strict)
22213 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22214 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22215 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22216 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22217 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22218 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22220 gcc_assert (!strict);
22221 return true;
22224 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22225 return true;
22227 /* ??? This is a lie. We do have moves between mmx/general, and for
22228 mmx/sse2. But by saying we need secondary memory we discourage the
22229 register allocator from using the mmx registers unless needed. */
22230 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22231 return true;
22233 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22235 /* SSE1 doesn't have any direct moves from other classes. */
22236 if (!TARGET_SSE2)
22237 return true;
22239 /* If the target says that inter-unit moves are more expensive
22240 than moving through memory, then don't generate them. */
22241 if (!TARGET_INTER_UNIT_MOVES)
22242 return true;
22244 /* Between SSE and general, we have moves no larger than word size. */
22245 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22246 return true;
22249 return false;
22253 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22254 enum machine_mode mode, int strict)
22256 return inline_secondary_memory_needed (class1, class2, mode, strict);
22259 /* Return true if the registers in CLASS cannot represent the change from
22260 modes FROM to TO. */
22262 bool
22263 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22264 enum reg_class regclass)
22266 if (from == to)
22267 return false;
22269 /* x87 registers can't do subreg at all, as all values are reformatted
22270 to extended precision. */
22271 if (MAYBE_FLOAT_CLASS_P (regclass))
22272 return true;
22274 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22276 /* Vector registers do not support QI or HImode loads. If we don't
22277 disallow a change to these modes, reload will assume it's ok to
22278 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22279 the vec_dupv4hi pattern. */
22280 if (GET_MODE_SIZE (from) < 4)
22281 return true;
22283 /* Vector registers do not support subreg with nonzero offsets, which
22284 are otherwise valid for integer registers. Since we can't see
22285 whether we have a nonzero offset from here, prohibit all
22286 nonparadoxical subregs changing size. */
22287 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22288 return true;
22291 return false;
22294 /* Return the cost of moving data of mode M between a
22295 register and memory. A value of 2 is the default; this cost is
22296 relative to those in `REGISTER_MOVE_COST'.
22298 This function is used extensively by register_move_cost that is used to
22299 build tables at startup. Make it inline in this case.
22300 When IN is 2, return maximum of in and out move cost.
22302 If moving between registers and memory is more expensive than
22303 between two registers, you should define this macro to express the
22304 relative cost.
22306 Model also increased moving costs of QImode registers in non
22307 Q_REGS classes.
22309 static inline int
22310 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22311 int in)
22313 int cost;
22314 if (FLOAT_CLASS_P (regclass))
22316 int index;
22317 switch (mode)
22319 case SFmode:
22320 index = 0;
22321 break;
22322 case DFmode:
22323 index = 1;
22324 break;
22325 case XFmode:
22326 index = 2;
22327 break;
22328 default:
22329 return 100;
22331 if (in == 2)
22332 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22333 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22335 if (SSE_CLASS_P (regclass))
22337 int index;
22338 switch (GET_MODE_SIZE (mode))
22340 case 4:
22341 index = 0;
22342 break;
22343 case 8:
22344 index = 1;
22345 break;
22346 case 16:
22347 index = 2;
22348 break;
22349 default:
22350 return 100;
22352 if (in == 2)
22353 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22354 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22356 if (MMX_CLASS_P (regclass))
22358 int index;
22359 switch (GET_MODE_SIZE (mode))
22361 case 4:
22362 index = 0;
22363 break;
22364 case 8:
22365 index = 1;
22366 break;
22367 default:
22368 return 100;
22370 if (in)
22371 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22372 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22374 switch (GET_MODE_SIZE (mode))
22376 case 1:
22377 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22379 if (!in)
22380 return ix86_cost->int_store[0];
22381 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22382 cost = ix86_cost->movzbl_load;
22383 else
22384 cost = ix86_cost->int_load[0];
22385 if (in == 2)
22386 return MAX (cost, ix86_cost->int_store[0]);
22387 return cost;
22389 else
22391 if (in == 2)
22392 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22393 if (in)
22394 return ix86_cost->movzbl_load;
22395 else
22396 return ix86_cost->int_store[0] + 4;
22398 break;
22399 case 2:
22400 if (in == 2)
22401 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22402 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22403 default:
22404 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22405 if (mode == TFmode)
22406 mode = XFmode;
22407 if (in == 2)
22408 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22409 else if (in)
22410 cost = ix86_cost->int_load[2];
22411 else
22412 cost = ix86_cost->int_store[2];
22413 return (cost * (((int) GET_MODE_SIZE (mode)
22414 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22419 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22421 return inline_memory_move_cost (mode, regclass, in);
22425 /* Return the cost of moving data from a register in class CLASS1 to
22426 one in class CLASS2.
22428 It is not required that the cost always equal 2 when FROM is the same as TO;
22429 on some machines it is expensive to move between registers if they are not
22430 general registers. */
22433 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22434 enum reg_class class2)
22436 /* In case we require secondary memory, compute cost of the store followed
22437 by load. In order to avoid bad register allocation choices, we need
22438 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22440 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22442 int cost = 1;
22444 cost += inline_memory_move_cost (mode, class1, 2);
22445 cost += inline_memory_move_cost (mode, class2, 2);
22447 /* In case of copying from general_purpose_register we may emit multiple
22448 stores followed by single load causing memory size mismatch stall.
22449 Count this as arbitrarily high cost of 20. */
22450 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22451 cost += 20;
22453 /* In the case of FP/MMX moves, the registers actually overlap, and we
22454 have to switch modes in order to treat them differently. */
22455 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22456 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22457 cost += 20;
22459 return cost;
22462 /* Moves between SSE/MMX and integer unit are expensive. */
22463 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22464 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22466 /* ??? By keeping returned value relatively high, we limit the number
22467 of moves between integer and MMX/SSE registers for all targets.
22468 Additionally, high value prevents problem with x86_modes_tieable_p(),
22469 where integer modes in MMX/SSE registers are not tieable
22470 because of missing QImode and HImode moves to, from or between
22471 MMX/SSE registers. */
22472 return MAX (8, ix86_cost->mmxsse_to_integer);
22474 if (MAYBE_FLOAT_CLASS_P (class1))
22475 return ix86_cost->fp_move;
22476 if (MAYBE_SSE_CLASS_P (class1))
22477 return ix86_cost->sse_move;
22478 if (MAYBE_MMX_CLASS_P (class1))
22479 return ix86_cost->mmx_move;
22480 return 2;
22483 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22485 bool
22486 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22488 /* Flags and only flags can only hold CCmode values. */
22489 if (CC_REGNO_P (regno))
22490 return GET_MODE_CLASS (mode) == MODE_CC;
22491 if (GET_MODE_CLASS (mode) == MODE_CC
22492 || GET_MODE_CLASS (mode) == MODE_RANDOM
22493 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22494 return 0;
22495 if (FP_REGNO_P (regno))
22496 return VALID_FP_MODE_P (mode);
22497 if (SSE_REGNO_P (regno))
22499 /* We implement the move patterns for all vector modes into and
22500 out of SSE registers, even when no operation instructions
22501 are available. */
22502 return (VALID_SSE_REG_MODE (mode)
22503 || VALID_SSE2_REG_MODE (mode)
22504 || VALID_MMX_REG_MODE (mode)
22505 || VALID_MMX_REG_MODE_3DNOW (mode));
22507 if (MMX_REGNO_P (regno))
22509 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22510 so if the register is available at all, then we can move data of
22511 the given mode into or out of it. */
22512 return (VALID_MMX_REG_MODE (mode)
22513 || VALID_MMX_REG_MODE_3DNOW (mode));
22516 if (mode == QImode)
22518 /* Take care for QImode values - they can be in non-QI regs,
22519 but then they do cause partial register stalls. */
22520 if (regno < 4 || TARGET_64BIT)
22521 return 1;
22522 if (!TARGET_PARTIAL_REG_STALL)
22523 return 1;
22524 return reload_in_progress || reload_completed;
22526 /* We handle both integer and floats in the general purpose registers. */
22527 else if (VALID_INT_MODE_P (mode))
22528 return 1;
22529 else if (VALID_FP_MODE_P (mode))
22530 return 1;
22531 else if (VALID_DFP_MODE_P (mode))
22532 return 1;
22533 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22534 on to use that value in smaller contexts, this can easily force a
22535 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22536 supporting DImode, allow it. */
22537 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22538 return 1;
22540 return 0;
22543 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22544 tieable integer mode. */
22546 static bool
22547 ix86_tieable_integer_mode_p (enum machine_mode mode)
22549 switch (mode)
22551 case HImode:
22552 case SImode:
22553 return true;
22555 case QImode:
22556 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22558 case DImode:
22559 return TARGET_64BIT;
22561 default:
22562 return false;
22566 /* Return true if MODE1 is accessible in a register that can hold MODE2
22567 without copying. That is, all register classes that can hold MODE2
22568 can also hold MODE1. */
22570 bool
22571 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22573 if (mode1 == mode2)
22574 return true;
22576 if (ix86_tieable_integer_mode_p (mode1)
22577 && ix86_tieable_integer_mode_p (mode2))
22578 return true;
22580 /* MODE2 being XFmode implies fp stack or general regs, which means we
22581 can tie any smaller floating point modes to it. Note that we do not
22582 tie this with TFmode. */
22583 if (mode2 == XFmode)
22584 return mode1 == SFmode || mode1 == DFmode;
22586 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22587 that we can tie it with SFmode. */
22588 if (mode2 == DFmode)
22589 return mode1 == SFmode;
22591 /* If MODE2 is only appropriate for an SSE register, then tie with
22592 any other mode acceptable to SSE registers. */
22593 if (GET_MODE_SIZE (mode2) == 16
22594 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22595 return (GET_MODE_SIZE (mode1) == 16
22596 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22598 /* If MODE2 is appropriate for an MMX register, then tie
22599 with any other mode acceptable to MMX registers. */
22600 if (GET_MODE_SIZE (mode2) == 8
22601 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22602 return (GET_MODE_SIZE (mode1) == 8
22603 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22605 return false;
22608 /* Compute a (partial) cost for rtx X. Return true if the complete
22609 cost has been computed, and false if subexpressions should be
22610 scanned. In either case, *TOTAL contains the cost result. */
22612 static bool
22613 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22615 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22616 enum machine_mode mode = GET_MODE (x);
22618 switch (code)
22620 case CONST_INT:
22621 case CONST:
22622 case LABEL_REF:
22623 case SYMBOL_REF:
22624 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22625 *total = 3;
22626 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22627 *total = 2;
22628 else if (flag_pic && SYMBOLIC_CONST (x)
22629 && (!TARGET_64BIT
22630 || (!GET_CODE (x) != LABEL_REF
22631 && (GET_CODE (x) != SYMBOL_REF
22632 || !SYMBOL_REF_LOCAL_P (x)))))
22633 *total = 1;
22634 else
22635 *total = 0;
22636 return true;
22638 case CONST_DOUBLE:
22639 if (mode == VOIDmode)
22640 *total = 0;
22641 else
22642 switch (standard_80387_constant_p (x))
22644 case 1: /* 0.0 */
22645 *total = 1;
22646 break;
22647 default: /* Other constants */
22648 *total = 2;
22649 break;
22650 case 0:
22651 case -1:
22652 /* Start with (MEM (SYMBOL_REF)), since that's where
22653 it'll probably end up. Add a penalty for size. */
22654 *total = (COSTS_N_INSNS (1)
22655 + (flag_pic != 0 && !TARGET_64BIT)
22656 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22657 break;
22659 return true;
22661 case ZERO_EXTEND:
22662 /* The zero extensions is often completely free on x86_64, so make
22663 it as cheap as possible. */
22664 if (TARGET_64BIT && mode == DImode
22665 && GET_MODE (XEXP (x, 0)) == SImode)
22666 *total = 1;
22667 else if (TARGET_ZERO_EXTEND_WITH_AND)
22668 *total = ix86_cost->add;
22669 else
22670 *total = ix86_cost->movzx;
22671 return false;
22673 case SIGN_EXTEND:
22674 *total = ix86_cost->movsx;
22675 return false;
22677 case ASHIFT:
22678 if (CONST_INT_P (XEXP (x, 1))
22679 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22681 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22682 if (value == 1)
22684 *total = ix86_cost->add;
22685 return false;
22687 if ((value == 2 || value == 3)
22688 && ix86_cost->lea <= ix86_cost->shift_const)
22690 *total = ix86_cost->lea;
22691 return false;
22694 /* FALLTHRU */
22696 case ROTATE:
22697 case ASHIFTRT:
22698 case LSHIFTRT:
22699 case ROTATERT:
22700 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22702 if (CONST_INT_P (XEXP (x, 1)))
22704 if (INTVAL (XEXP (x, 1)) > 32)
22705 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22706 else
22707 *total = ix86_cost->shift_const * 2;
22709 else
22711 if (GET_CODE (XEXP (x, 1)) == AND)
22712 *total = ix86_cost->shift_var * 2;
22713 else
22714 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22717 else
22719 if (CONST_INT_P (XEXP (x, 1)))
22720 *total = ix86_cost->shift_const;
22721 else
22722 *total = ix86_cost->shift_var;
22724 return false;
22726 case MULT:
22727 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22729 /* ??? SSE scalar cost should be used here. */
22730 *total = ix86_cost->fmul;
22731 return false;
22733 else if (X87_FLOAT_MODE_P (mode))
22735 *total = ix86_cost->fmul;
22736 return false;
22738 else if (FLOAT_MODE_P (mode))
22740 /* ??? SSE vector cost should be used here. */
22741 *total = ix86_cost->fmul;
22742 return false;
22744 else
22746 rtx op0 = XEXP (x, 0);
22747 rtx op1 = XEXP (x, 1);
22748 int nbits;
22749 if (CONST_INT_P (XEXP (x, 1)))
22751 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22752 for (nbits = 0; value != 0; value &= value - 1)
22753 nbits++;
22755 else
22756 /* This is arbitrary. */
22757 nbits = 7;
22759 /* Compute costs correctly for widening multiplication. */
22760 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22761 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22762 == GET_MODE_SIZE (mode))
22764 int is_mulwiden = 0;
22765 enum machine_mode inner_mode = GET_MODE (op0);
22767 if (GET_CODE (op0) == GET_CODE (op1))
22768 is_mulwiden = 1, op1 = XEXP (op1, 0);
22769 else if (CONST_INT_P (op1))
22771 if (GET_CODE (op0) == SIGN_EXTEND)
22772 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22773 == INTVAL (op1);
22774 else
22775 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22778 if (is_mulwiden)
22779 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22782 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22783 + nbits * ix86_cost->mult_bit
22784 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22786 return true;
22789 case DIV:
22790 case UDIV:
22791 case MOD:
22792 case UMOD:
22793 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22794 /* ??? SSE cost should be used here. */
22795 *total = ix86_cost->fdiv;
22796 else if (X87_FLOAT_MODE_P (mode))
22797 *total = ix86_cost->fdiv;
22798 else if (FLOAT_MODE_P (mode))
22799 /* ??? SSE vector cost should be used here. */
22800 *total = ix86_cost->fdiv;
22801 else
22802 *total = ix86_cost->divide[MODE_INDEX (mode)];
22803 return false;
22805 case PLUS:
22806 if (GET_MODE_CLASS (mode) == MODE_INT
22807 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22809 if (GET_CODE (XEXP (x, 0)) == PLUS
22810 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22811 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22812 && CONSTANT_P (XEXP (x, 1)))
22814 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22815 if (val == 2 || val == 4 || val == 8)
22817 *total = ix86_cost->lea;
22818 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22819 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22820 outer_code);
22821 *total += rtx_cost (XEXP (x, 1), outer_code);
22822 return true;
22825 else if (GET_CODE (XEXP (x, 0)) == MULT
22826 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22828 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22829 if (val == 2 || val == 4 || val == 8)
22831 *total = ix86_cost->lea;
22832 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22833 *total += rtx_cost (XEXP (x, 1), outer_code);
22834 return true;
22837 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22839 *total = ix86_cost->lea;
22840 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22841 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22842 *total += rtx_cost (XEXP (x, 1), outer_code);
22843 return true;
22846 /* FALLTHRU */
22848 case MINUS:
22849 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22851 /* ??? SSE cost should be used here. */
22852 *total = ix86_cost->fadd;
22853 return false;
22855 else if (X87_FLOAT_MODE_P (mode))
22857 *total = ix86_cost->fadd;
22858 return false;
22860 else if (FLOAT_MODE_P (mode))
22862 /* ??? SSE vector cost should be used here. */
22863 *total = ix86_cost->fadd;
22864 return false;
22866 /* FALLTHRU */
22868 case AND:
22869 case IOR:
22870 case XOR:
22871 if (!TARGET_64BIT && mode == DImode)
22873 *total = (ix86_cost->add * 2
22874 + (rtx_cost (XEXP (x, 0), outer_code)
22875 << (GET_MODE (XEXP (x, 0)) != DImode))
22876 + (rtx_cost (XEXP (x, 1), outer_code)
22877 << (GET_MODE (XEXP (x, 1)) != DImode)));
22878 return true;
22880 /* FALLTHRU */
22882 case NEG:
22883 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22885 /* ??? SSE cost should be used here. */
22886 *total = ix86_cost->fchs;
22887 return false;
22889 else if (X87_FLOAT_MODE_P (mode))
22891 *total = ix86_cost->fchs;
22892 return false;
22894 else if (FLOAT_MODE_P (mode))
22896 /* ??? SSE vector cost should be used here. */
22897 *total = ix86_cost->fchs;
22898 return false;
22900 /* FALLTHRU */
22902 case NOT:
22903 if (!TARGET_64BIT && mode == DImode)
22904 *total = ix86_cost->add * 2;
22905 else
22906 *total = ix86_cost->add;
22907 return false;
22909 case COMPARE:
22910 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22911 && XEXP (XEXP (x, 0), 1) == const1_rtx
22912 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22913 && XEXP (x, 1) == const0_rtx)
22915 /* This kind of construct is implemented using test[bwl].
22916 Treat it as if we had an AND. */
22917 *total = (ix86_cost->add
22918 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22919 + rtx_cost (const1_rtx, outer_code));
22920 return true;
22922 return false;
22924 case FLOAT_EXTEND:
22925 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22926 *total = 0;
22927 return false;
22929 case ABS:
22930 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22931 /* ??? SSE cost should be used here. */
22932 *total = ix86_cost->fabs;
22933 else if (X87_FLOAT_MODE_P (mode))
22934 *total = ix86_cost->fabs;
22935 else if (FLOAT_MODE_P (mode))
22936 /* ??? SSE vector cost should be used here. */
22937 *total = ix86_cost->fabs;
22938 return false;
22940 case SQRT:
22941 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22942 /* ??? SSE cost should be used here. */
22943 *total = ix86_cost->fsqrt;
22944 else if (X87_FLOAT_MODE_P (mode))
22945 *total = ix86_cost->fsqrt;
22946 else if (FLOAT_MODE_P (mode))
22947 /* ??? SSE vector cost should be used here. */
22948 *total = ix86_cost->fsqrt;
22949 return false;
22951 case UNSPEC:
22952 if (XINT (x, 1) == UNSPEC_TP)
22953 *total = 0;
22954 return false;
22956 default:
22957 return false;
22961 #if TARGET_MACHO
22963 static int current_machopic_label_num;
22965 /* Given a symbol name and its associated stub, write out the
22966 definition of the stub. */
22968 void
22969 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22971 unsigned int length;
22972 char *binder_name, *symbol_name, lazy_ptr_name[32];
22973 int label = ++current_machopic_label_num;
22975 /* For 64-bit we shouldn't get here. */
22976 gcc_assert (!TARGET_64BIT);
22978 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22979 symb = (*targetm.strip_name_encoding) (symb);
22981 length = strlen (stub);
22982 binder_name = XALLOCAVEC (char, length + 32);
22983 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22985 length = strlen (symb);
22986 symbol_name = XALLOCAVEC (char, length + 32);
22987 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22989 sprintf (lazy_ptr_name, "L%d$lz", label);
22991 if (MACHOPIC_PURE)
22992 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22993 else
22994 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22996 fprintf (file, "%s:\n", stub);
22997 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22999 if (MACHOPIC_PURE)
23001 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
23002 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
23003 fprintf (file, "\tjmp\t*%%edx\n");
23005 else
23006 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23008 fprintf (file, "%s:\n", binder_name);
23010 if (MACHOPIC_PURE)
23012 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23013 fprintf (file, "\tpushl\t%%eax\n");
23015 else
23016 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23018 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
23020 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23021 fprintf (file, "%s:\n", lazy_ptr_name);
23022 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23023 fprintf (file, "\t.long %s\n", binder_name);
23026 void
23027 darwin_x86_file_end (void)
23029 darwin_file_end ();
23030 ix86_file_end ();
23032 #endif /* TARGET_MACHO */
23034 /* Order the registers for register allocator. */
23036 void
23037 x86_order_regs_for_local_alloc (void)
23039 int pos = 0;
23040 int i;
23042 /* First allocate the local general purpose registers. */
23043 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23044 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23045 reg_alloc_order [pos++] = i;
23047 /* Global general purpose registers. */
23048 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23049 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23050 reg_alloc_order [pos++] = i;
23052 /* x87 registers come first in case we are doing FP math
23053 using them. */
23054 if (!TARGET_SSE_MATH)
23055 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23056 reg_alloc_order [pos++] = i;
23058 /* SSE registers. */
23059 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23060 reg_alloc_order [pos++] = i;
23061 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23062 reg_alloc_order [pos++] = i;
23064 /* x87 registers. */
23065 if (TARGET_SSE_MATH)
23066 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23067 reg_alloc_order [pos++] = i;
23069 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23070 reg_alloc_order [pos++] = i;
23072 /* Initialize the rest of array as we do not allocate some registers
23073 at all. */
23074 while (pos < FIRST_PSEUDO_REGISTER)
23075 reg_alloc_order [pos++] = 0;
23078 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23079 struct attribute_spec.handler. */
23080 static tree
23081 ix86_handle_struct_attribute (tree *node, tree name,
23082 tree args ATTRIBUTE_UNUSED,
23083 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23085 tree *type = NULL;
23086 if (DECL_P (*node))
23088 if (TREE_CODE (*node) == TYPE_DECL)
23089 type = &TREE_TYPE (*node);
23091 else
23092 type = node;
23094 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23095 || TREE_CODE (*type) == UNION_TYPE)))
23097 warning (OPT_Wattributes, "%qs attribute ignored",
23098 IDENTIFIER_POINTER (name));
23099 *no_add_attrs = true;
23102 else if ((is_attribute_p ("ms_struct", name)
23103 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23104 || ((is_attribute_p ("gcc_struct", name)
23105 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23107 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23108 IDENTIFIER_POINTER (name));
23109 *no_add_attrs = true;
23112 return NULL_TREE;
23115 static bool
23116 ix86_ms_bitfield_layout_p (const_tree record_type)
23118 return (TARGET_MS_BITFIELD_LAYOUT &&
23119 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23120 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23123 /* Returns an expression indicating where the this parameter is
23124 located on entry to the FUNCTION. */
23126 static rtx
23127 x86_this_parameter (tree function)
23129 tree type = TREE_TYPE (function);
23130 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23131 int nregs;
23133 if (TARGET_64BIT)
23135 const int *parm_regs;
23137 if (ix86_function_type_abi (type) == MS_ABI)
23138 parm_regs = x86_64_ms_abi_int_parameter_registers;
23139 else
23140 parm_regs = x86_64_int_parameter_registers;
23141 return gen_rtx_REG (DImode, parm_regs[aggr]);
23144 nregs = ix86_function_regparm (type, function);
23146 if (nregs > 0 && !stdarg_p (type))
23148 int regno;
23150 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23151 regno = aggr ? DX_REG : CX_REG;
23152 else
23154 regno = AX_REG;
23155 if (aggr)
23157 regno = DX_REG;
23158 if (nregs == 1)
23159 return gen_rtx_MEM (SImode,
23160 plus_constant (stack_pointer_rtx, 4));
23163 return gen_rtx_REG (SImode, regno);
23166 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23169 /* Determine whether x86_output_mi_thunk can succeed. */
23171 static bool
23172 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23173 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23174 HOST_WIDE_INT vcall_offset, const_tree function)
23176 /* 64-bit can handle anything. */
23177 if (TARGET_64BIT)
23178 return true;
23180 /* For 32-bit, everything's fine if we have one free register. */
23181 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23182 return true;
23184 /* Need a free register for vcall_offset. */
23185 if (vcall_offset)
23186 return false;
23188 /* Need a free register for GOT references. */
23189 if (flag_pic && !(*targetm.binds_local_p) (function))
23190 return false;
23192 /* Otherwise ok. */
23193 return true;
23196 /* Output the assembler code for a thunk function. THUNK_DECL is the
23197 declaration for the thunk function itself, FUNCTION is the decl for
23198 the target function. DELTA is an immediate constant offset to be
23199 added to THIS. If VCALL_OFFSET is nonzero, the word at
23200 *(*this + vcall_offset) should be added to THIS. */
23202 static void
23203 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23204 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23205 HOST_WIDE_INT vcall_offset, tree function)
23207 rtx xops[3];
23208 rtx this_param = x86_this_parameter (function);
23209 rtx this_reg, tmp;
23211 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23212 pull it in now and let DELTA benefit. */
23213 if (REG_P (this_param))
23214 this_reg = this_param;
23215 else if (vcall_offset)
23217 /* Put the this parameter into %eax. */
23218 xops[0] = this_param;
23219 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23220 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23222 else
23223 this_reg = NULL_RTX;
23225 /* Adjust the this parameter by a fixed constant. */
23226 if (delta)
23228 xops[0] = GEN_INT (delta);
23229 xops[1] = this_reg ? this_reg : this_param;
23230 if (TARGET_64BIT)
23232 if (!x86_64_general_operand (xops[0], DImode))
23234 tmp = gen_rtx_REG (DImode, R10_REG);
23235 xops[1] = tmp;
23236 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23237 xops[0] = tmp;
23238 xops[1] = this_param;
23240 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23242 else
23243 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23246 /* Adjust the this parameter by a value stored in the vtable. */
23247 if (vcall_offset)
23249 if (TARGET_64BIT)
23250 tmp = gen_rtx_REG (DImode, R10_REG);
23251 else
23253 int tmp_regno = CX_REG;
23254 if (lookup_attribute ("fastcall",
23255 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23256 tmp_regno = AX_REG;
23257 tmp = gen_rtx_REG (SImode, tmp_regno);
23260 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23261 xops[1] = tmp;
23262 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23264 /* Adjust the this parameter. */
23265 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23266 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23268 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23269 xops[0] = GEN_INT (vcall_offset);
23270 xops[1] = tmp2;
23271 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23272 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23274 xops[1] = this_reg;
23275 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
23278 /* If necessary, drop THIS back to its stack slot. */
23279 if (this_reg && this_reg != this_param)
23281 xops[0] = this_reg;
23282 xops[1] = this_param;
23283 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23286 xops[0] = XEXP (DECL_RTL (function), 0);
23287 if (TARGET_64BIT)
23289 if (!flag_pic || (*targetm.binds_local_p) (function))
23290 output_asm_insn ("jmp\t%P0", xops);
23291 /* All thunks should be in the same object as their target,
23292 and thus binds_local_p should be true. */
23293 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23294 gcc_unreachable ();
23295 else
23297 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23298 tmp = gen_rtx_CONST (Pmode, tmp);
23299 tmp = gen_rtx_MEM (QImode, tmp);
23300 xops[0] = tmp;
23301 output_asm_insn ("jmp\t%A0", xops);
23304 else
23306 if (!flag_pic || (*targetm.binds_local_p) (function))
23307 output_asm_insn ("jmp\t%P0", xops);
23308 else
23309 #if TARGET_MACHO
23310 if (TARGET_MACHO)
23312 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23313 tmp = (gen_rtx_SYMBOL_REF
23314 (Pmode,
23315 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23316 tmp = gen_rtx_MEM (QImode, tmp);
23317 xops[0] = tmp;
23318 output_asm_insn ("jmp\t%0", xops);
23320 else
23321 #endif /* TARGET_MACHO */
23323 tmp = gen_rtx_REG (SImode, CX_REG);
23324 output_set_got (tmp, NULL_RTX);
23326 xops[1] = tmp;
23327 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23328 output_asm_insn ("jmp\t{*}%1", xops);
23333 static void
23334 x86_file_start (void)
23336 default_file_start ();
23337 #if TARGET_MACHO
23338 darwin_file_start ();
23339 #endif
23340 if (X86_FILE_START_VERSION_DIRECTIVE)
23341 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23342 if (X86_FILE_START_FLTUSED)
23343 fputs ("\t.global\t__fltused\n", asm_out_file);
23344 if (ix86_asm_dialect == ASM_INTEL)
23345 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23349 x86_field_alignment (tree field, int computed)
23351 enum machine_mode mode;
23352 tree type = TREE_TYPE (field);
23354 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23355 return computed;
23356 mode = TYPE_MODE (strip_array_types (type));
23357 if (mode == DFmode || mode == DCmode
23358 || GET_MODE_CLASS (mode) == MODE_INT
23359 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23360 return MIN (32, computed);
23361 return computed;
23364 /* Output assembler code to FILE to increment profiler label # LABELNO
23365 for profiling a function entry. */
23366 void
23367 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23369 if (TARGET_64BIT)
23371 #ifndef NO_PROFILE_COUNTERS
23372 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23373 #endif
23375 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23376 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23377 else
23378 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23380 else if (flag_pic)
23382 #ifndef NO_PROFILE_COUNTERS
23383 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23384 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23385 #endif
23386 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23388 else
23390 #ifndef NO_PROFILE_COUNTERS
23391 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23392 PROFILE_COUNT_REGISTER);
23393 #endif
23394 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23398 /* We don't have exact information about the insn sizes, but we may assume
23399 quite safely that we are informed about all 1 byte insns and memory
23400 address sizes. This is enough to eliminate unnecessary padding in
23401 99% of cases. */
23403 static int
23404 min_insn_size (rtx insn)
23406 int l = 0;
23408 if (!INSN_P (insn) || !active_insn_p (insn))
23409 return 0;
23411 /* Discard alignments we've emit and jump instructions. */
23412 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23413 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23414 return 0;
23415 if (JUMP_P (insn)
23416 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23417 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23418 return 0;
23420 /* Important case - calls are always 5 bytes.
23421 It is common to have many calls in the row. */
23422 if (CALL_P (insn)
23423 && symbolic_reference_mentioned_p (PATTERN (insn))
23424 && !SIBLING_CALL_P (insn))
23425 return 5;
23426 if (get_attr_length (insn) <= 1)
23427 return 1;
23429 /* For normal instructions we may rely on the sizes of addresses
23430 and the presence of symbol to require 4 bytes of encoding.
23431 This is not the case for jumps where references are PC relative. */
23432 if (!JUMP_P (insn))
23434 l = get_attr_length_address (insn);
23435 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23436 l = 4;
23438 if (l)
23439 return 1+l;
23440 else
23441 return 2;
23444 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23445 window. */
23447 static void
23448 ix86_avoid_jump_misspredicts (void)
23450 rtx insn, start = get_insns ();
23451 int nbytes = 0, njumps = 0;
23452 int isjump = 0;
23454 /* Look for all minimal intervals of instructions containing 4 jumps.
23455 The intervals are bounded by START and INSN. NBYTES is the total
23456 size of instructions in the interval including INSN and not including
23457 START. When the NBYTES is smaller than 16 bytes, it is possible
23458 that the end of START and INSN ends up in the same 16byte page.
23460 The smallest offset in the page INSN can start is the case where START
23461 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23462 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23464 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23467 nbytes += min_insn_size (insn);
23468 if (dump_file)
23469 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23470 INSN_UID (insn), min_insn_size (insn));
23471 if ((JUMP_P (insn)
23472 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23473 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23474 || CALL_P (insn))
23475 njumps++;
23476 else
23477 continue;
23479 while (njumps > 3)
23481 start = NEXT_INSN (start);
23482 if ((JUMP_P (start)
23483 && GET_CODE (PATTERN (start)) != ADDR_VEC
23484 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23485 || CALL_P (start))
23486 njumps--, isjump = 1;
23487 else
23488 isjump = 0;
23489 nbytes -= min_insn_size (start);
23491 gcc_assert (njumps >= 0);
23492 if (dump_file)
23493 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23494 INSN_UID (start), INSN_UID (insn), nbytes);
23496 if (njumps == 3 && isjump && nbytes < 16)
23498 int padsize = 15 - nbytes + min_insn_size (insn);
23500 if (dump_file)
23501 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23502 INSN_UID (insn), padsize);
23503 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23508 /* AMD Athlon works faster
23509 when RET is not destination of conditional jump or directly preceded
23510 by other jump instruction. We avoid the penalty by inserting NOP just
23511 before the RET instructions in such cases. */
23512 static void
23513 ix86_pad_returns (void)
23515 edge e;
23516 edge_iterator ei;
23518 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23520 basic_block bb = e->src;
23521 rtx ret = BB_END (bb);
23522 rtx prev;
23523 bool replace = false;
23525 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23526 || !maybe_hot_bb_p (bb))
23527 continue;
23528 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23529 if (active_insn_p (prev) || LABEL_P (prev))
23530 break;
23531 if (prev && LABEL_P (prev))
23533 edge e;
23534 edge_iterator ei;
23536 FOR_EACH_EDGE (e, ei, bb->preds)
23537 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23538 && !(e->flags & EDGE_FALLTHRU))
23539 replace = true;
23541 if (!replace)
23543 prev = prev_active_insn (ret);
23544 if (prev
23545 && ((JUMP_P (prev) && any_condjump_p (prev))
23546 || CALL_P (prev)))
23547 replace = true;
23548 /* Empty functions get branch mispredict even when the jump destination
23549 is not visible to us. */
23550 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23551 replace = true;
23553 if (replace)
23555 emit_insn_before (gen_return_internal_long (), ret);
23556 delete_insn (ret);
23561 /* Implement machine specific optimizations. We implement padding of returns
23562 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23563 static void
23564 ix86_reorg (void)
23566 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23567 ix86_pad_returns ();
23568 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23569 ix86_avoid_jump_misspredicts ();
23572 /* Return nonzero when QImode register that must be represented via REX prefix
23573 is used. */
23574 bool
23575 x86_extended_QIreg_mentioned_p (rtx insn)
23577 int i;
23578 extract_insn_cached (insn);
23579 for (i = 0; i < recog_data.n_operands; i++)
23580 if (REG_P (recog_data.operand[i])
23581 && REGNO (recog_data.operand[i]) >= 4)
23582 return true;
23583 return false;
23586 /* Return nonzero when P points to register encoded via REX prefix.
23587 Called via for_each_rtx. */
23588 static int
23589 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23591 unsigned int regno;
23592 if (!REG_P (*p))
23593 return 0;
23594 regno = REGNO (*p);
23595 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23598 /* Return true when INSN mentions register that must be encoded using REX
23599 prefix. */
23600 bool
23601 x86_extended_reg_mentioned_p (rtx insn)
23603 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23606 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23607 optabs would emit if we didn't have TFmode patterns. */
23609 void
23610 x86_emit_floatuns (rtx operands[2])
23612 rtx neglab, donelab, i0, i1, f0, in, out;
23613 enum machine_mode mode, inmode;
23615 inmode = GET_MODE (operands[1]);
23616 gcc_assert (inmode == SImode || inmode == DImode);
23618 out = operands[0];
23619 in = force_reg (inmode, operands[1]);
23620 mode = GET_MODE (out);
23621 neglab = gen_label_rtx ();
23622 donelab = gen_label_rtx ();
23623 f0 = gen_reg_rtx (mode);
23625 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23627 expand_float (out, in, 0);
23629 emit_jump_insn (gen_jump (donelab));
23630 emit_barrier ();
23632 emit_label (neglab);
23634 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23635 1, OPTAB_DIRECT);
23636 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23637 1, OPTAB_DIRECT);
23638 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23640 expand_float (f0, i0, 0);
23642 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23644 emit_label (donelab);
23647 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23648 with all elements equal to VAR. Return true if successful. */
23650 static bool
23651 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23652 rtx target, rtx val)
23654 enum machine_mode smode, wsmode, wvmode;
23655 rtx x;
23657 switch (mode)
23659 case V2SImode:
23660 case V2SFmode:
23661 if (!mmx_ok)
23662 return false;
23663 /* FALLTHRU */
23665 case V2DFmode:
23666 case V2DImode:
23667 case V4SFmode:
23668 case V4SImode:
23669 val = force_reg (GET_MODE_INNER (mode), val);
23670 x = gen_rtx_VEC_DUPLICATE (mode, val);
23671 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23672 return true;
23674 case V4HImode:
23675 if (!mmx_ok)
23676 return false;
23677 if (TARGET_SSE || TARGET_3DNOW_A)
23679 val = gen_lowpart (SImode, val);
23680 x = gen_rtx_TRUNCATE (HImode, val);
23681 x = gen_rtx_VEC_DUPLICATE (mode, x);
23682 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23683 return true;
23685 else
23687 smode = HImode;
23688 wsmode = SImode;
23689 wvmode = V2SImode;
23690 goto widen;
23693 case V8QImode:
23694 if (!mmx_ok)
23695 return false;
23696 smode = QImode;
23697 wsmode = HImode;
23698 wvmode = V4HImode;
23699 goto widen;
23700 case V8HImode:
23701 if (TARGET_SSE2)
23703 rtx tmp1, tmp2;
23704 /* Extend HImode to SImode using a paradoxical SUBREG. */
23705 tmp1 = gen_reg_rtx (SImode);
23706 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23707 /* Insert the SImode value as low element of V4SImode vector. */
23708 tmp2 = gen_reg_rtx (V4SImode);
23709 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23710 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23711 CONST0_RTX (V4SImode),
23712 const1_rtx);
23713 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23714 /* Cast the V4SImode vector back to a V8HImode vector. */
23715 tmp1 = gen_reg_rtx (V8HImode);
23716 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23717 /* Duplicate the low short through the whole low SImode word. */
23718 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23719 /* Cast the V8HImode vector back to a V4SImode vector. */
23720 tmp2 = gen_reg_rtx (V4SImode);
23721 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23722 /* Replicate the low element of the V4SImode vector. */
23723 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23724 /* Cast the V2SImode back to V8HImode, and store in target. */
23725 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23726 return true;
23728 smode = HImode;
23729 wsmode = SImode;
23730 wvmode = V4SImode;
23731 goto widen;
23732 case V16QImode:
23733 if (TARGET_SSE2)
23735 rtx tmp1, tmp2;
23736 /* Extend QImode to SImode using a paradoxical SUBREG. */
23737 tmp1 = gen_reg_rtx (SImode);
23738 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23739 /* Insert the SImode value as low element of V4SImode vector. */
23740 tmp2 = gen_reg_rtx (V4SImode);
23741 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23742 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23743 CONST0_RTX (V4SImode),
23744 const1_rtx);
23745 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23746 /* Cast the V4SImode vector back to a V16QImode vector. */
23747 tmp1 = gen_reg_rtx (V16QImode);
23748 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23749 /* Duplicate the low byte through the whole low SImode word. */
23750 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23751 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23752 /* Cast the V16QImode vector back to a V4SImode vector. */
23753 tmp2 = gen_reg_rtx (V4SImode);
23754 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23755 /* Replicate the low element of the V4SImode vector. */
23756 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23757 /* Cast the V2SImode back to V16QImode, and store in target. */
23758 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23759 return true;
23761 smode = QImode;
23762 wsmode = HImode;
23763 wvmode = V8HImode;
23764 goto widen;
23765 widen:
23766 /* Replicate the value once into the next wider mode and recurse. */
23767 val = convert_modes (wsmode, smode, val, true);
23768 x = expand_simple_binop (wsmode, ASHIFT, val,
23769 GEN_INT (GET_MODE_BITSIZE (smode)),
23770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23771 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23773 x = gen_reg_rtx (wvmode);
23774 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23775 gcc_unreachable ();
23776 emit_move_insn (target, gen_lowpart (mode, x));
23777 return true;
23779 default:
23780 return false;
23784 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23785 whose ONE_VAR element is VAR, and other elements are zero. Return true
23786 if successful. */
23788 static bool
23789 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23790 rtx target, rtx var, int one_var)
23792 enum machine_mode vsimode;
23793 rtx new_target;
23794 rtx x, tmp;
23795 bool use_vector_set = false;
23797 switch (mode)
23799 case V2DImode:
23800 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23801 break;
23802 case V16QImode:
23803 case V4SImode:
23804 case V4SFmode:
23805 use_vector_set = TARGET_SSE4_1;
23806 break;
23807 case V8HImode:
23808 use_vector_set = TARGET_SSE2;
23809 break;
23810 case V4HImode:
23811 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23812 break;
23813 default:
23814 break;
23817 if (use_vector_set)
23819 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23820 var = force_reg (GET_MODE_INNER (mode), var);
23821 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23822 return true;
23825 switch (mode)
23827 case V2SFmode:
23828 case V2SImode:
23829 if (!mmx_ok)
23830 return false;
23831 /* FALLTHRU */
23833 case V2DFmode:
23834 case V2DImode:
23835 if (one_var != 0)
23836 return false;
23837 var = force_reg (GET_MODE_INNER (mode), var);
23838 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23839 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23840 return true;
23842 case V4SFmode:
23843 case V4SImode:
23844 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23845 new_target = gen_reg_rtx (mode);
23846 else
23847 new_target = target;
23848 var = force_reg (GET_MODE_INNER (mode), var);
23849 x = gen_rtx_VEC_DUPLICATE (mode, var);
23850 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23851 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23852 if (one_var != 0)
23854 /* We need to shuffle the value to the correct position, so
23855 create a new pseudo to store the intermediate result. */
23857 /* With SSE2, we can use the integer shuffle insns. */
23858 if (mode != V4SFmode && TARGET_SSE2)
23860 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23861 GEN_INT (1),
23862 GEN_INT (one_var == 1 ? 0 : 1),
23863 GEN_INT (one_var == 2 ? 0 : 1),
23864 GEN_INT (one_var == 3 ? 0 : 1)));
23865 if (target != new_target)
23866 emit_move_insn (target, new_target);
23867 return true;
23870 /* Otherwise convert the intermediate result to V4SFmode and
23871 use the SSE1 shuffle instructions. */
23872 if (mode != V4SFmode)
23874 tmp = gen_reg_rtx (V4SFmode);
23875 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23877 else
23878 tmp = new_target;
23880 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23881 GEN_INT (1),
23882 GEN_INT (one_var == 1 ? 0 : 1),
23883 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23884 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23886 if (mode != V4SFmode)
23887 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23888 else if (tmp != target)
23889 emit_move_insn (target, tmp);
23891 else if (target != new_target)
23892 emit_move_insn (target, new_target);
23893 return true;
23895 case V8HImode:
23896 case V16QImode:
23897 vsimode = V4SImode;
23898 goto widen;
23899 case V4HImode:
23900 case V8QImode:
23901 if (!mmx_ok)
23902 return false;
23903 vsimode = V2SImode;
23904 goto widen;
23905 widen:
23906 if (one_var != 0)
23907 return false;
23909 /* Zero extend the variable element to SImode and recurse. */
23910 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23912 x = gen_reg_rtx (vsimode);
23913 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23914 var, one_var))
23915 gcc_unreachable ();
23917 emit_move_insn (target, gen_lowpart (mode, x));
23918 return true;
23920 default:
23921 return false;
23925 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23926 consisting of the values in VALS. It is known that all elements
23927 except ONE_VAR are constants. Return true if successful. */
23929 static bool
23930 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23931 rtx target, rtx vals, int one_var)
23933 rtx var = XVECEXP (vals, 0, one_var);
23934 enum machine_mode wmode;
23935 rtx const_vec, x;
23937 const_vec = copy_rtx (vals);
23938 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23939 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23941 switch (mode)
23943 case V2DFmode:
23944 case V2DImode:
23945 case V2SFmode:
23946 case V2SImode:
23947 /* For the two element vectors, it's just as easy to use
23948 the general case. */
23949 return false;
23951 case V4SFmode:
23952 case V4SImode:
23953 case V8HImode:
23954 case V4HImode:
23955 break;
23957 case V16QImode:
23958 if (TARGET_SSE4_1)
23959 break;
23960 wmode = V8HImode;
23961 goto widen;
23962 case V8QImode:
23963 wmode = V4HImode;
23964 goto widen;
23965 widen:
23966 /* There's no way to set one QImode entry easily. Combine
23967 the variable value with its adjacent constant value, and
23968 promote to an HImode set. */
23969 x = XVECEXP (vals, 0, one_var ^ 1);
23970 if (one_var & 1)
23972 var = convert_modes (HImode, QImode, var, true);
23973 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23974 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23975 x = GEN_INT (INTVAL (x) & 0xff);
23977 else
23979 var = convert_modes (HImode, QImode, var, true);
23980 x = gen_int_mode (INTVAL (x) << 8, HImode);
23982 if (x != const0_rtx)
23983 var = expand_simple_binop (HImode, IOR, var, x, var,
23984 1, OPTAB_LIB_WIDEN);
23986 x = gen_reg_rtx (wmode);
23987 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23988 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23990 emit_move_insn (target, gen_lowpart (mode, x));
23991 return true;
23993 default:
23994 return false;
23997 emit_move_insn (target, const_vec);
23998 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23999 return true;
24002 /* A subroutine of ix86_expand_vector_init_general. Use vector
24003 concatenate to handle the most general case: all values variable,
24004 and none identical. */
24006 static void
24007 ix86_expand_vector_init_concat (enum machine_mode mode,
24008 rtx target, rtx *ops, int n)
24010 enum machine_mode cmode, hmode = VOIDmode;
24011 rtx first[4], second[2];
24012 rtvec v;
24013 int i, j;
24015 switch (n)
24017 case 2:
24018 switch (mode)
24020 case V4SImode:
24021 cmode = V2SImode;
24022 break;
24023 case V4SFmode:
24024 cmode = V2SFmode;
24025 break;
24026 case V2DImode:
24027 cmode = DImode;
24028 break;
24029 case V2SImode:
24030 cmode = SImode;
24031 break;
24032 case V2DFmode:
24033 cmode = DFmode;
24034 break;
24035 case V2SFmode:
24036 cmode = SFmode;
24037 break;
24038 default:
24039 gcc_unreachable ();
24042 if (!register_operand (ops[1], cmode))
24043 ops[1] = force_reg (cmode, ops[1]);
24044 if (!register_operand (ops[0], cmode))
24045 ops[0] = force_reg (cmode, ops[0]);
24046 emit_insn (gen_rtx_SET (VOIDmode, target,
24047 gen_rtx_VEC_CONCAT (mode, ops[0],
24048 ops[1])));
24049 break;
24051 case 4:
24052 switch (mode)
24054 case V4SImode:
24055 cmode = V2SImode;
24056 break;
24057 case V4SFmode:
24058 cmode = V2SFmode;
24059 break;
24060 default:
24061 gcc_unreachable ();
24063 goto half;
24065 half:
24066 /* FIXME: We process inputs backward to help RA. PR 36222. */
24067 i = n - 1;
24068 j = (n >> 1) - 1;
24069 for (; i > 0; i -= 2, j--)
24071 first[j] = gen_reg_rtx (cmode);
24072 v = gen_rtvec (2, ops[i - 1], ops[i]);
24073 ix86_expand_vector_init (false, first[j],
24074 gen_rtx_PARALLEL (cmode, v));
24077 n >>= 1;
24078 if (n > 2)
24080 gcc_assert (hmode != VOIDmode);
24081 for (i = j = 0; i < n; i += 2, j++)
24083 second[j] = gen_reg_rtx (hmode);
24084 ix86_expand_vector_init_concat (hmode, second [j],
24085 &first [i], 2);
24087 n >>= 1;
24088 ix86_expand_vector_init_concat (mode, target, second, n);
24090 else
24091 ix86_expand_vector_init_concat (mode, target, first, n);
24092 break;
24094 default:
24095 gcc_unreachable ();
24099 /* A subroutine of ix86_expand_vector_init_general. Use vector
24100 interleave to handle the most general case: all values variable,
24101 and none identical. */
24103 static void
24104 ix86_expand_vector_init_interleave (enum machine_mode mode,
24105 rtx target, rtx *ops, int n)
24107 enum machine_mode first_imode, second_imode, third_imode;
24108 int i, j;
24109 rtx op0, op1;
24110 rtx (*gen_load_even) (rtx, rtx, rtx);
24111 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24112 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24114 switch (mode)
24116 case V8HImode:
24117 gen_load_even = gen_vec_setv8hi;
24118 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24119 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24120 first_imode = V4SImode;
24121 second_imode = V2DImode;
24122 third_imode = VOIDmode;
24123 break;
24124 case V16QImode:
24125 gen_load_even = gen_vec_setv16qi;
24126 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24127 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24128 first_imode = V8HImode;
24129 second_imode = V4SImode;
24130 third_imode = V2DImode;
24131 break;
24132 default:
24133 gcc_unreachable ();
24136 for (i = 0; i < n; i++)
24138 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24139 op0 = gen_reg_rtx (SImode);
24140 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24142 /* Insert the SImode value as low element of V4SImode vector. */
24143 op1 = gen_reg_rtx (V4SImode);
24144 op0 = gen_rtx_VEC_MERGE (V4SImode,
24145 gen_rtx_VEC_DUPLICATE (V4SImode,
24146 op0),
24147 CONST0_RTX (V4SImode),
24148 const1_rtx);
24149 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24151 /* Cast the V4SImode vector back to a vector in orignal mode. */
24152 op0 = gen_reg_rtx (mode);
24153 emit_move_insn (op0, gen_lowpart (mode, op1));
24155 /* Load even elements into the second positon. */
24156 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24157 const1_rtx));
24159 /* Cast vector to FIRST_IMODE vector. */
24160 ops[i] = gen_reg_rtx (first_imode);
24161 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24164 /* Interleave low FIRST_IMODE vectors. */
24165 for (i = j = 0; i < n; i += 2, j++)
24167 op0 = gen_reg_rtx (first_imode);
24168 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24170 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24171 ops[j] = gen_reg_rtx (second_imode);
24172 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24175 /* Interleave low SECOND_IMODE vectors. */
24176 switch (second_imode)
24178 case V4SImode:
24179 for (i = j = 0; i < n / 2; i += 2, j++)
24181 op0 = gen_reg_rtx (second_imode);
24182 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24183 ops[i + 1]));
24185 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24186 vector. */
24187 ops[j] = gen_reg_rtx (third_imode);
24188 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24190 second_imode = V2DImode;
24191 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24192 /* FALLTHRU */
24194 case V2DImode:
24195 op0 = gen_reg_rtx (second_imode);
24196 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24197 ops[1]));
24199 /* Cast the SECOND_IMODE vector back to a vector on original
24200 mode. */
24201 emit_insn (gen_rtx_SET (VOIDmode, target,
24202 gen_lowpart (mode, op0)));
24203 break;
24205 default:
24206 gcc_unreachable ();
24210 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24211 all values variable, and none identical. */
24213 static void
24214 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24215 rtx target, rtx vals)
24217 rtx ops[16];
24218 int n, i;
24220 switch (mode)
24222 case V2SFmode:
24223 case V2SImode:
24224 if (!mmx_ok && !TARGET_SSE)
24225 break;
24226 /* FALLTHRU */
24228 case V4SFmode:
24229 case V4SImode:
24230 case V2DFmode:
24231 case V2DImode:
24232 n = GET_MODE_NUNITS (mode);
24233 for (i = 0; i < n; i++)
24234 ops[i] = XVECEXP (vals, 0, i);
24235 ix86_expand_vector_init_concat (mode, target, ops, n);
24236 return;
24238 case V16QImode:
24239 if (!TARGET_SSE4_1)
24240 break;
24241 /* FALLTHRU */
24243 case V8HImode:
24244 if (!TARGET_SSE2)
24245 break;
24247 n = GET_MODE_NUNITS (mode);
24248 for (i = 0; i < n; i++)
24249 ops[i] = XVECEXP (vals, 0, i);
24250 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24251 return;
24253 case V4HImode:
24254 case V8QImode:
24255 break;
24257 default:
24258 gcc_unreachable ();
24262 int i, j, n_elts, n_words, n_elt_per_word;
24263 enum machine_mode inner_mode;
24264 rtx words[4], shift;
24266 inner_mode = GET_MODE_INNER (mode);
24267 n_elts = GET_MODE_NUNITS (mode);
24268 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24269 n_elt_per_word = n_elts / n_words;
24270 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24272 for (i = 0; i < n_words; ++i)
24274 rtx word = NULL_RTX;
24276 for (j = 0; j < n_elt_per_word; ++j)
24278 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24279 elt = convert_modes (word_mode, inner_mode, elt, true);
24281 if (j == 0)
24282 word = elt;
24283 else
24285 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24286 word, 1, OPTAB_LIB_WIDEN);
24287 word = expand_simple_binop (word_mode, IOR, word, elt,
24288 word, 1, OPTAB_LIB_WIDEN);
24292 words[i] = word;
24295 if (n_words == 1)
24296 emit_move_insn (target, gen_lowpart (mode, words[0]));
24297 else if (n_words == 2)
24299 rtx tmp = gen_reg_rtx (mode);
24300 emit_clobber (tmp);
24301 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24302 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24303 emit_move_insn (target, tmp);
24305 else if (n_words == 4)
24307 rtx tmp = gen_reg_rtx (V4SImode);
24308 gcc_assert (word_mode == SImode);
24309 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24310 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24311 emit_move_insn (target, gen_lowpart (mode, tmp));
24313 else
24314 gcc_unreachable ();
24318 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24319 instructions unless MMX_OK is true. */
24321 void
24322 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24324 enum machine_mode mode = GET_MODE (target);
24325 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24326 int n_elts = GET_MODE_NUNITS (mode);
24327 int n_var = 0, one_var = -1;
24328 bool all_same = true, all_const_zero = true;
24329 int i;
24330 rtx x;
24332 for (i = 0; i < n_elts; ++i)
24334 x = XVECEXP (vals, 0, i);
24335 if (!(CONST_INT_P (x)
24336 || GET_CODE (x) == CONST_DOUBLE
24337 || GET_CODE (x) == CONST_FIXED))
24338 n_var++, one_var = i;
24339 else if (x != CONST0_RTX (inner_mode))
24340 all_const_zero = false;
24341 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24342 all_same = false;
24345 /* Constants are best loaded from the constant pool. */
24346 if (n_var == 0)
24348 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24349 return;
24352 /* If all values are identical, broadcast the value. */
24353 if (all_same
24354 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24355 XVECEXP (vals, 0, 0)))
24356 return;
24358 /* Values where only one field is non-constant are best loaded from
24359 the pool and overwritten via move later. */
24360 if (n_var == 1)
24362 if (all_const_zero
24363 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24364 XVECEXP (vals, 0, one_var),
24365 one_var))
24366 return;
24368 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24369 return;
24372 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24375 void
24376 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24378 enum machine_mode mode = GET_MODE (target);
24379 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24380 bool use_vec_merge = false;
24381 rtx tmp;
24383 switch (mode)
24385 case V2SFmode:
24386 case V2SImode:
24387 if (mmx_ok)
24389 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24390 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24391 if (elt == 0)
24392 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24393 else
24394 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24395 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24396 return;
24398 break;
24400 case V2DImode:
24401 use_vec_merge = TARGET_SSE4_1;
24402 if (use_vec_merge)
24403 break;
24405 case V2DFmode:
24407 rtx op0, op1;
24409 /* For the two element vectors, we implement a VEC_CONCAT with
24410 the extraction of the other element. */
24412 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24413 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24415 if (elt == 0)
24416 op0 = val, op1 = tmp;
24417 else
24418 op0 = tmp, op1 = val;
24420 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24421 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24423 return;
24425 case V4SFmode:
24426 use_vec_merge = TARGET_SSE4_1;
24427 if (use_vec_merge)
24428 break;
24430 switch (elt)
24432 case 0:
24433 use_vec_merge = true;
24434 break;
24436 case 1:
24437 /* tmp = target = A B C D */
24438 tmp = copy_to_reg (target);
24439 /* target = A A B B */
24440 emit_insn (gen_sse_unpcklps (target, target, target));
24441 /* target = X A B B */
24442 ix86_expand_vector_set (false, target, val, 0);
24443 /* target = A X C D */
24444 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24445 GEN_INT (1), GEN_INT (0),
24446 GEN_INT (2+4), GEN_INT (3+4)));
24447 return;
24449 case 2:
24450 /* tmp = target = A B C D */
24451 tmp = copy_to_reg (target);
24452 /* tmp = X B C D */
24453 ix86_expand_vector_set (false, tmp, val, 0);
24454 /* target = A B X D */
24455 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24456 GEN_INT (0), GEN_INT (1),
24457 GEN_INT (0+4), GEN_INT (3+4)));
24458 return;
24460 case 3:
24461 /* tmp = target = A B C D */
24462 tmp = copy_to_reg (target);
24463 /* tmp = X B C D */
24464 ix86_expand_vector_set (false, tmp, val, 0);
24465 /* target = A B X D */
24466 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24467 GEN_INT (0), GEN_INT (1),
24468 GEN_INT (2+4), GEN_INT (0+4)));
24469 return;
24471 default:
24472 gcc_unreachable ();
24474 break;
24476 case V4SImode:
24477 use_vec_merge = TARGET_SSE4_1;
24478 if (use_vec_merge)
24479 break;
24481 /* Element 0 handled by vec_merge below. */
24482 if (elt == 0)
24484 use_vec_merge = true;
24485 break;
24488 if (TARGET_SSE2)
24490 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24491 store into element 0, then shuffle them back. */
24493 rtx order[4];
24495 order[0] = GEN_INT (elt);
24496 order[1] = const1_rtx;
24497 order[2] = const2_rtx;
24498 order[3] = GEN_INT (3);
24499 order[elt] = const0_rtx;
24501 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24502 order[1], order[2], order[3]));
24504 ix86_expand_vector_set (false, target, val, 0);
24506 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24507 order[1], order[2], order[3]));
24509 else
24511 /* For SSE1, we have to reuse the V4SF code. */
24512 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24513 gen_lowpart (SFmode, val), elt);
24515 return;
24517 case V8HImode:
24518 use_vec_merge = TARGET_SSE2;
24519 break;
24520 case V4HImode:
24521 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24522 break;
24524 case V16QImode:
24525 use_vec_merge = TARGET_SSE4_1;
24526 break;
24528 case V8QImode:
24529 default:
24530 break;
24533 if (use_vec_merge)
24535 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24536 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24537 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24539 else
24541 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24543 emit_move_insn (mem, target);
24545 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24546 emit_move_insn (tmp, val);
24548 emit_move_insn (target, mem);
24552 void
24553 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24555 enum machine_mode mode = GET_MODE (vec);
24556 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24557 bool use_vec_extr = false;
24558 rtx tmp;
24560 switch (mode)
24562 case V2SImode:
24563 case V2SFmode:
24564 if (!mmx_ok)
24565 break;
24566 /* FALLTHRU */
24568 case V2DFmode:
24569 case V2DImode:
24570 use_vec_extr = true;
24571 break;
24573 case V4SFmode:
24574 use_vec_extr = TARGET_SSE4_1;
24575 if (use_vec_extr)
24576 break;
24578 switch (elt)
24580 case 0:
24581 tmp = vec;
24582 break;
24584 case 1:
24585 case 3:
24586 tmp = gen_reg_rtx (mode);
24587 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24588 GEN_INT (elt), GEN_INT (elt),
24589 GEN_INT (elt+4), GEN_INT (elt+4)));
24590 break;
24592 case 2:
24593 tmp = gen_reg_rtx (mode);
24594 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24595 break;
24597 default:
24598 gcc_unreachable ();
24600 vec = tmp;
24601 use_vec_extr = true;
24602 elt = 0;
24603 break;
24605 case V4SImode:
24606 use_vec_extr = TARGET_SSE4_1;
24607 if (use_vec_extr)
24608 break;
24610 if (TARGET_SSE2)
24612 switch (elt)
24614 case 0:
24615 tmp = vec;
24616 break;
24618 case 1:
24619 case 3:
24620 tmp = gen_reg_rtx (mode);
24621 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24622 GEN_INT (elt), GEN_INT (elt),
24623 GEN_INT (elt), GEN_INT (elt)));
24624 break;
24626 case 2:
24627 tmp = gen_reg_rtx (mode);
24628 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24629 break;
24631 default:
24632 gcc_unreachable ();
24634 vec = tmp;
24635 use_vec_extr = true;
24636 elt = 0;
24638 else
24640 /* For SSE1, we have to reuse the V4SF code. */
24641 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24642 gen_lowpart (V4SFmode, vec), elt);
24643 return;
24645 break;
24647 case V8HImode:
24648 use_vec_extr = TARGET_SSE2;
24649 break;
24650 case V4HImode:
24651 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24652 break;
24654 case V16QImode:
24655 use_vec_extr = TARGET_SSE4_1;
24656 break;
24658 case V8QImode:
24659 /* ??? Could extract the appropriate HImode element and shift. */
24660 default:
24661 break;
24664 if (use_vec_extr)
24666 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24667 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24669 /* Let the rtl optimizers know about the zero extension performed. */
24670 if (inner_mode == QImode || inner_mode == HImode)
24672 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24673 target = gen_lowpart (SImode, target);
24676 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24678 else
24680 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24682 emit_move_insn (mem, vec);
24684 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24685 emit_move_insn (target, tmp);
24689 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24690 pattern to reduce; DEST is the destination; IN is the input vector. */
24692 void
24693 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24695 rtx tmp1, tmp2, tmp3;
24697 tmp1 = gen_reg_rtx (V4SFmode);
24698 tmp2 = gen_reg_rtx (V4SFmode);
24699 tmp3 = gen_reg_rtx (V4SFmode);
24701 emit_insn (gen_sse_movhlps (tmp1, in, in));
24702 emit_insn (fn (tmp2, tmp1, in));
24704 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24705 GEN_INT (1), GEN_INT (1),
24706 GEN_INT (1+4), GEN_INT (1+4)));
24707 emit_insn (fn (dest, tmp2, tmp3));
24710 /* Target hook for scalar_mode_supported_p. */
24711 static bool
24712 ix86_scalar_mode_supported_p (enum machine_mode mode)
24714 if (DECIMAL_FLOAT_MODE_P (mode))
24715 return true;
24716 else if (mode == TFmode)
24717 return true;
24718 else
24719 return default_scalar_mode_supported_p (mode);
24722 /* Implements target hook vector_mode_supported_p. */
24723 static bool
24724 ix86_vector_mode_supported_p (enum machine_mode mode)
24726 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24727 return true;
24728 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24729 return true;
24730 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24731 return true;
24732 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24733 return true;
24734 return false;
24737 /* Target hook for c_mode_for_suffix. */
24738 static enum machine_mode
24739 ix86_c_mode_for_suffix (char suffix)
24741 if (suffix == 'q')
24742 return TFmode;
24743 if (suffix == 'w')
24744 return XFmode;
24746 return VOIDmode;
24749 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24751 We do this in the new i386 backend to maintain source compatibility
24752 with the old cc0-based compiler. */
24754 static tree
24755 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24756 tree inputs ATTRIBUTE_UNUSED,
24757 tree clobbers)
24759 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24760 clobbers);
24761 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24762 clobbers);
24763 return clobbers;
24766 /* Implements target vector targetm.asm.encode_section_info. This
24767 is not used by netware. */
24769 static void ATTRIBUTE_UNUSED
24770 ix86_encode_section_info (tree decl, rtx rtl, int first)
24772 default_encode_section_info (decl, rtl, first);
24774 if (TREE_CODE (decl) == VAR_DECL
24775 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24776 && ix86_in_large_data_p (decl))
24777 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24780 /* Worker function for REVERSE_CONDITION. */
24782 enum rtx_code
24783 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24785 return (mode != CCFPmode && mode != CCFPUmode
24786 ? reverse_condition (code)
24787 : reverse_condition_maybe_unordered (code));
24790 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24791 to OPERANDS[0]. */
24793 const char *
24794 output_387_reg_move (rtx insn, rtx *operands)
24796 if (REG_P (operands[0]))
24798 if (REG_P (operands[1])
24799 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24801 if (REGNO (operands[0]) == FIRST_STACK_REG)
24802 return output_387_ffreep (operands, 0);
24803 return "fstp\t%y0";
24805 if (STACK_TOP_P (operands[0]))
24806 return "fld%z1\t%y1";
24807 return "fst\t%y0";
24809 else if (MEM_P (operands[0]))
24811 gcc_assert (REG_P (operands[1]));
24812 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24813 return "fstp%z0\t%y0";
24814 else
24816 /* There is no non-popping store to memory for XFmode.
24817 So if we need one, follow the store with a load. */
24818 if (GET_MODE (operands[0]) == XFmode)
24819 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24820 else
24821 return "fst%z0\t%y0";
24824 else
24825 gcc_unreachable();
24828 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24829 FP status register is set. */
24831 void
24832 ix86_emit_fp_unordered_jump (rtx label)
24834 rtx reg = gen_reg_rtx (HImode);
24835 rtx temp;
24837 emit_insn (gen_x86_fnstsw_1 (reg));
24839 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24841 emit_insn (gen_x86_sahf_1 (reg));
24843 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24844 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24846 else
24848 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24850 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24851 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24854 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24855 gen_rtx_LABEL_REF (VOIDmode, label),
24856 pc_rtx);
24857 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24859 emit_jump_insn (temp);
24860 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24863 /* Output code to perform a log1p XFmode calculation. */
24865 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24867 rtx label1 = gen_label_rtx ();
24868 rtx label2 = gen_label_rtx ();
24870 rtx tmp = gen_reg_rtx (XFmode);
24871 rtx tmp2 = gen_reg_rtx (XFmode);
24873 emit_insn (gen_absxf2 (tmp, op1));
24874 emit_insn (gen_cmpxf (tmp,
24875 CONST_DOUBLE_FROM_REAL_VALUE (
24876 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24877 XFmode)));
24878 emit_jump_insn (gen_bge (label1));
24880 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24881 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24882 emit_jump (label2);
24884 emit_label (label1);
24885 emit_move_insn (tmp, CONST1_RTX (XFmode));
24886 emit_insn (gen_addxf3 (tmp, op1, tmp));
24887 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24888 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24890 emit_label (label2);
24893 /* Output code to perform a Newton-Rhapson approximation of a single precision
24894 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24896 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24898 rtx x0, x1, e0, e1, two;
24900 x0 = gen_reg_rtx (mode);
24901 e0 = gen_reg_rtx (mode);
24902 e1 = gen_reg_rtx (mode);
24903 x1 = gen_reg_rtx (mode);
24905 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24907 if (VECTOR_MODE_P (mode))
24908 two = ix86_build_const_vector (SFmode, true, two);
24910 two = force_reg (mode, two);
24912 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24914 /* x0 = rcp(b) estimate */
24915 emit_insn (gen_rtx_SET (VOIDmode, x0,
24916 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24917 UNSPEC_RCP)));
24918 /* e0 = x0 * b */
24919 emit_insn (gen_rtx_SET (VOIDmode, e0,
24920 gen_rtx_MULT (mode, x0, b)));
24921 /* e1 = 2. - e0 */
24922 emit_insn (gen_rtx_SET (VOIDmode, e1,
24923 gen_rtx_MINUS (mode, two, e0)));
24924 /* x1 = x0 * e1 */
24925 emit_insn (gen_rtx_SET (VOIDmode, x1,
24926 gen_rtx_MULT (mode, x0, e1)));
24927 /* res = a * x1 */
24928 emit_insn (gen_rtx_SET (VOIDmode, res,
24929 gen_rtx_MULT (mode, a, x1)));
24932 /* Output code to perform a Newton-Rhapson approximation of a
24933 single precision floating point [reciprocal] square root. */
24935 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24936 bool recip)
24938 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24939 REAL_VALUE_TYPE r;
24941 x0 = gen_reg_rtx (mode);
24942 e0 = gen_reg_rtx (mode);
24943 e1 = gen_reg_rtx (mode);
24944 e2 = gen_reg_rtx (mode);
24945 e3 = gen_reg_rtx (mode);
24947 real_from_integer (&r, VOIDmode, -3, -1, 0);
24948 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24950 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24951 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24953 if (VECTOR_MODE_P (mode))
24955 mthree = ix86_build_const_vector (SFmode, true, mthree);
24956 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24959 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24960 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24962 /* x0 = rsqrt(a) estimate */
24963 emit_insn (gen_rtx_SET (VOIDmode, x0,
24964 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24965 UNSPEC_RSQRT)));
24967 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24968 if (!recip)
24970 rtx zero, mask;
24972 zero = gen_reg_rtx (mode);
24973 mask = gen_reg_rtx (mode);
24975 zero = force_reg (mode, CONST0_RTX(mode));
24976 emit_insn (gen_rtx_SET (VOIDmode, mask,
24977 gen_rtx_NE (mode, zero, a)));
24979 emit_insn (gen_rtx_SET (VOIDmode, x0,
24980 gen_rtx_AND (mode, x0, mask)));
24983 /* e0 = x0 * a */
24984 emit_insn (gen_rtx_SET (VOIDmode, e0,
24985 gen_rtx_MULT (mode, x0, a)));
24986 /* e1 = e0 * x0 */
24987 emit_insn (gen_rtx_SET (VOIDmode, e1,
24988 gen_rtx_MULT (mode, e0, x0)));
24990 /* e2 = e1 - 3. */
24991 mthree = force_reg (mode, mthree);
24992 emit_insn (gen_rtx_SET (VOIDmode, e2,
24993 gen_rtx_PLUS (mode, e1, mthree)));
24995 mhalf = force_reg (mode, mhalf);
24996 if (recip)
24997 /* e3 = -.5 * x0 */
24998 emit_insn (gen_rtx_SET (VOIDmode, e3,
24999 gen_rtx_MULT (mode, x0, mhalf)));
25000 else
25001 /* e3 = -.5 * e0 */
25002 emit_insn (gen_rtx_SET (VOIDmode, e3,
25003 gen_rtx_MULT (mode, e0, mhalf)));
25004 /* ret = e2 * e3 */
25005 emit_insn (gen_rtx_SET (VOIDmode, res,
25006 gen_rtx_MULT (mode, e2, e3)));
25009 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25011 static void ATTRIBUTE_UNUSED
25012 i386_solaris_elf_named_section (const char *name, unsigned int flags,
25013 tree decl)
25015 /* With Binutils 2.15, the "@unwind" marker must be specified on
25016 every occurrence of the ".eh_frame" section, not just the first
25017 one. */
25018 if (TARGET_64BIT
25019 && strcmp (name, ".eh_frame") == 0)
25021 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25022 flags & SECTION_WRITE ? "aw" : "a");
25023 return;
25025 default_elf_asm_named_section (name, flags, decl);
25028 /* Return the mangling of TYPE if it is an extended fundamental type. */
25030 static const char *
25031 ix86_mangle_type (const_tree type)
25033 type = TYPE_MAIN_VARIANT (type);
25035 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25036 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25037 return NULL;
25039 switch (TYPE_MODE (type))
25041 case TFmode:
25042 /* __float128 is "g". */
25043 return "g";
25044 case XFmode:
25045 /* "long double" or __float80 is "e". */
25046 return "e";
25047 default:
25048 return NULL;
25052 /* For 32-bit code we can save PIC register setup by using
25053 __stack_chk_fail_local hidden function instead of calling
25054 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25055 register, so it is better to call __stack_chk_fail directly. */
25057 static tree
25058 ix86_stack_protect_fail (void)
25060 return TARGET_64BIT
25061 ? default_external_stack_protect_fail ()
25062 : default_hidden_stack_protect_fail ();
25065 /* Select a format to encode pointers in exception handling data. CODE
25066 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25067 true if the symbol may be affected by dynamic relocations.
25069 ??? All x86 object file formats are capable of representing this.
25070 After all, the relocation needed is the same as for the call insn.
25071 Whether or not a particular assembler allows us to enter such, I
25072 guess we'll have to see. */
25074 asm_preferred_eh_data_format (int code, int global)
25076 if (flag_pic)
25078 int type = DW_EH_PE_sdata8;
25079 if (!TARGET_64BIT
25080 || ix86_cmodel == CM_SMALL_PIC
25081 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25082 type = DW_EH_PE_sdata4;
25083 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25085 if (ix86_cmodel == CM_SMALL
25086 || (ix86_cmodel == CM_MEDIUM && code))
25087 return DW_EH_PE_udata4;
25088 return DW_EH_PE_absptr;
25091 /* Expand copysign from SIGN to the positive value ABS_VALUE
25092 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25093 the sign-bit. */
25094 static void
25095 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25097 enum machine_mode mode = GET_MODE (sign);
25098 rtx sgn = gen_reg_rtx (mode);
25099 if (mask == NULL_RTX)
25101 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25102 if (!VECTOR_MODE_P (mode))
25104 /* We need to generate a scalar mode mask in this case. */
25105 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25106 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25107 mask = gen_reg_rtx (mode);
25108 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25111 else
25112 mask = gen_rtx_NOT (mode, mask);
25113 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25114 gen_rtx_AND (mode, mask, sign)));
25115 emit_insn (gen_rtx_SET (VOIDmode, result,
25116 gen_rtx_IOR (mode, abs_value, sgn)));
25119 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25120 mask for masking out the sign-bit is stored in *SMASK, if that is
25121 non-null. */
25122 static rtx
25123 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25125 enum machine_mode mode = GET_MODE (op0);
25126 rtx xa, mask;
25128 xa = gen_reg_rtx (mode);
25129 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25130 if (!VECTOR_MODE_P (mode))
25132 /* We need to generate a scalar mode mask in this case. */
25133 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25134 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25135 mask = gen_reg_rtx (mode);
25136 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25138 emit_insn (gen_rtx_SET (VOIDmode, xa,
25139 gen_rtx_AND (mode, op0, mask)));
25141 if (smask)
25142 *smask = mask;
25144 return xa;
25147 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25148 swapping the operands if SWAP_OPERANDS is true. The expanded
25149 code is a forward jump to a newly created label in case the
25150 comparison is true. The generated label rtx is returned. */
25151 static rtx
25152 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25153 bool swap_operands)
25155 rtx label, tmp;
25157 if (swap_operands)
25159 tmp = op0;
25160 op0 = op1;
25161 op1 = tmp;
25164 label = gen_label_rtx ();
25165 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25166 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25167 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25168 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25169 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25170 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25171 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25172 JUMP_LABEL (tmp) = label;
25174 return label;
25177 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25178 using comparison code CODE. Operands are swapped for the comparison if
25179 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25180 static rtx
25181 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25182 bool swap_operands)
25184 enum machine_mode mode = GET_MODE (op0);
25185 rtx mask = gen_reg_rtx (mode);
25187 if (swap_operands)
25189 rtx tmp = op0;
25190 op0 = op1;
25191 op1 = tmp;
25194 if (mode == DFmode)
25195 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25196 gen_rtx_fmt_ee (code, mode, op0, op1)));
25197 else
25198 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25199 gen_rtx_fmt_ee (code, mode, op0, op1)));
25201 return mask;
25204 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25205 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25206 static rtx
25207 ix86_gen_TWO52 (enum machine_mode mode)
25209 REAL_VALUE_TYPE TWO52r;
25210 rtx TWO52;
25212 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25213 TWO52 = const_double_from_real_value (TWO52r, mode);
25214 TWO52 = force_reg (mode, TWO52);
25216 return TWO52;
25219 /* Expand SSE sequence for computing lround from OP1 storing
25220 into OP0. */
25221 void
25222 ix86_expand_lround (rtx op0, rtx op1)
25224 /* C code for the stuff we're doing below:
25225 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25226 return (long)tmp;
25228 enum machine_mode mode = GET_MODE (op1);
25229 const struct real_format *fmt;
25230 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25231 rtx adj;
25233 /* load nextafter (0.5, 0.0) */
25234 fmt = REAL_MODE_FORMAT (mode);
25235 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25236 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25238 /* adj = copysign (0.5, op1) */
25239 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25240 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25242 /* adj = op1 + adj */
25243 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25245 /* op0 = (imode)adj */
25246 expand_fix (op0, adj, 0);
25249 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25250 into OPERAND0. */
25251 void
25252 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25254 /* C code for the stuff we're doing below (for do_floor):
25255 xi = (long)op1;
25256 xi -= (double)xi > op1 ? 1 : 0;
25257 return xi;
25259 enum machine_mode fmode = GET_MODE (op1);
25260 enum machine_mode imode = GET_MODE (op0);
25261 rtx ireg, freg, label, tmp;
25263 /* reg = (long)op1 */
25264 ireg = gen_reg_rtx (imode);
25265 expand_fix (ireg, op1, 0);
25267 /* freg = (double)reg */
25268 freg = gen_reg_rtx (fmode);
25269 expand_float (freg, ireg, 0);
25271 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25272 label = ix86_expand_sse_compare_and_jump (UNLE,
25273 freg, op1, !do_floor);
25274 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25275 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25276 emit_move_insn (ireg, tmp);
25278 emit_label (label);
25279 LABEL_NUSES (label) = 1;
25281 emit_move_insn (op0, ireg);
25284 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25285 result in OPERAND0. */
25286 void
25287 ix86_expand_rint (rtx operand0, rtx operand1)
25289 /* C code for the stuff we're doing below:
25290 xa = fabs (operand1);
25291 if (!isless (xa, 2**52))
25292 return operand1;
25293 xa = xa + 2**52 - 2**52;
25294 return copysign (xa, operand1);
25296 enum machine_mode mode = GET_MODE (operand0);
25297 rtx res, xa, label, TWO52, mask;
25299 res = gen_reg_rtx (mode);
25300 emit_move_insn (res, operand1);
25302 /* xa = abs (operand1) */
25303 xa = ix86_expand_sse_fabs (res, &mask);
25305 /* if (!isless (xa, TWO52)) goto label; */
25306 TWO52 = ix86_gen_TWO52 (mode);
25307 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25309 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25310 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25312 ix86_sse_copysign_to_positive (res, xa, res, mask);
25314 emit_label (label);
25315 LABEL_NUSES (label) = 1;
25317 emit_move_insn (operand0, res);
25320 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25321 into OPERAND0. */
25322 void
25323 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25325 /* C code for the stuff we expand below.
25326 double xa = fabs (x), x2;
25327 if (!isless (xa, TWO52))
25328 return x;
25329 xa = xa + TWO52 - TWO52;
25330 x2 = copysign (xa, x);
25331 Compensate. Floor:
25332 if (x2 > x)
25333 x2 -= 1;
25334 Compensate. Ceil:
25335 if (x2 < x)
25336 x2 -= -1;
25337 return x2;
25339 enum machine_mode mode = GET_MODE (operand0);
25340 rtx xa, TWO52, tmp, label, one, res, mask;
25342 TWO52 = ix86_gen_TWO52 (mode);
25344 /* Temporary for holding the result, initialized to the input
25345 operand to ease control flow. */
25346 res = gen_reg_rtx (mode);
25347 emit_move_insn (res, operand1);
25349 /* xa = abs (operand1) */
25350 xa = ix86_expand_sse_fabs (res, &mask);
25352 /* if (!isless (xa, TWO52)) goto label; */
25353 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25355 /* xa = xa + TWO52 - TWO52; */
25356 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25357 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25359 /* xa = copysign (xa, operand1) */
25360 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25362 /* generate 1.0 or -1.0 */
25363 one = force_reg (mode,
25364 const_double_from_real_value (do_floor
25365 ? dconst1 : dconstm1, mode));
25367 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25368 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25369 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25370 gen_rtx_AND (mode, one, tmp)));
25371 /* We always need to subtract here to preserve signed zero. */
25372 tmp = expand_simple_binop (mode, MINUS,
25373 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25374 emit_move_insn (res, tmp);
25376 emit_label (label);
25377 LABEL_NUSES (label) = 1;
25379 emit_move_insn (operand0, res);
25382 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25383 into OPERAND0. */
25384 void
25385 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25387 /* C code for the stuff we expand below.
25388 double xa = fabs (x), x2;
25389 if (!isless (xa, TWO52))
25390 return x;
25391 x2 = (double)(long)x;
25392 Compensate. Floor:
25393 if (x2 > x)
25394 x2 -= 1;
25395 Compensate. Ceil:
25396 if (x2 < x)
25397 x2 += 1;
25398 if (HONOR_SIGNED_ZEROS (mode))
25399 return copysign (x2, x);
25400 return x2;
25402 enum machine_mode mode = GET_MODE (operand0);
25403 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25405 TWO52 = ix86_gen_TWO52 (mode);
25407 /* Temporary for holding the result, initialized to the input
25408 operand to ease control flow. */
25409 res = gen_reg_rtx (mode);
25410 emit_move_insn (res, operand1);
25412 /* xa = abs (operand1) */
25413 xa = ix86_expand_sse_fabs (res, &mask);
25415 /* if (!isless (xa, TWO52)) goto label; */
25416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25418 /* xa = (double)(long)x */
25419 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25420 expand_fix (xi, res, 0);
25421 expand_float (xa, xi, 0);
25423 /* generate 1.0 */
25424 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25426 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25427 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25428 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25429 gen_rtx_AND (mode, one, tmp)));
25430 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25431 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25432 emit_move_insn (res, tmp);
25434 if (HONOR_SIGNED_ZEROS (mode))
25435 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25437 emit_label (label);
25438 LABEL_NUSES (label) = 1;
25440 emit_move_insn (operand0, res);
25443 /* Expand SSE sequence for computing round from OPERAND1 storing
25444 into OPERAND0. Sequence that works without relying on DImode truncation
25445 via cvttsd2siq that is only available on 64bit targets. */
25446 void
25447 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25449 /* C code for the stuff we expand below.
25450 double xa = fabs (x), xa2, x2;
25451 if (!isless (xa, TWO52))
25452 return x;
25453 Using the absolute value and copying back sign makes
25454 -0.0 -> -0.0 correct.
25455 xa2 = xa + TWO52 - TWO52;
25456 Compensate.
25457 dxa = xa2 - xa;
25458 if (dxa <= -0.5)
25459 xa2 += 1;
25460 else if (dxa > 0.5)
25461 xa2 -= 1;
25462 x2 = copysign (xa2, x);
25463 return x2;
25465 enum machine_mode mode = GET_MODE (operand0);
25466 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25468 TWO52 = ix86_gen_TWO52 (mode);
25470 /* Temporary for holding the result, initialized to the input
25471 operand to ease control flow. */
25472 res = gen_reg_rtx (mode);
25473 emit_move_insn (res, operand1);
25475 /* xa = abs (operand1) */
25476 xa = ix86_expand_sse_fabs (res, &mask);
25478 /* if (!isless (xa, TWO52)) goto label; */
25479 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25481 /* xa2 = xa + TWO52 - TWO52; */
25482 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25483 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25485 /* dxa = xa2 - xa; */
25486 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25488 /* generate 0.5, 1.0 and -0.5 */
25489 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25490 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25491 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25492 0, OPTAB_DIRECT);
25494 /* Compensate. */
25495 tmp = gen_reg_rtx (mode);
25496 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25497 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25498 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25499 gen_rtx_AND (mode, one, tmp)));
25500 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25501 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25502 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25503 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25504 gen_rtx_AND (mode, one, tmp)));
25505 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25507 /* res = copysign (xa2, operand1) */
25508 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25510 emit_label (label);
25511 LABEL_NUSES (label) = 1;
25513 emit_move_insn (operand0, res);
25516 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25517 into OPERAND0. */
25518 void
25519 ix86_expand_trunc (rtx operand0, rtx operand1)
25521 /* C code for SSE variant we expand below.
25522 double xa = fabs (x), x2;
25523 if (!isless (xa, TWO52))
25524 return x;
25525 x2 = (double)(long)x;
25526 if (HONOR_SIGNED_ZEROS (mode))
25527 return copysign (x2, x);
25528 return x2;
25530 enum machine_mode mode = GET_MODE (operand0);
25531 rtx xa, xi, TWO52, label, res, mask;
25533 TWO52 = ix86_gen_TWO52 (mode);
25535 /* Temporary for holding the result, initialized to the input
25536 operand to ease control flow. */
25537 res = gen_reg_rtx (mode);
25538 emit_move_insn (res, operand1);
25540 /* xa = abs (operand1) */
25541 xa = ix86_expand_sse_fabs (res, &mask);
25543 /* if (!isless (xa, TWO52)) goto label; */
25544 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25546 /* x = (double)(long)x */
25547 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25548 expand_fix (xi, res, 0);
25549 expand_float (res, xi, 0);
25551 if (HONOR_SIGNED_ZEROS (mode))
25552 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25554 emit_label (label);
25555 LABEL_NUSES (label) = 1;
25557 emit_move_insn (operand0, res);
25560 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25561 into OPERAND0. */
25562 void
25563 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25565 enum machine_mode mode = GET_MODE (operand0);
25566 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25568 /* C code for SSE variant we expand below.
25569 double xa = fabs (x), x2;
25570 if (!isless (xa, TWO52))
25571 return x;
25572 xa2 = xa + TWO52 - TWO52;
25573 Compensate:
25574 if (xa2 > xa)
25575 xa2 -= 1.0;
25576 x2 = copysign (xa2, x);
25577 return x2;
25580 TWO52 = ix86_gen_TWO52 (mode);
25582 /* Temporary for holding the result, initialized to the input
25583 operand to ease control flow. */
25584 res = gen_reg_rtx (mode);
25585 emit_move_insn (res, operand1);
25587 /* xa = abs (operand1) */
25588 xa = ix86_expand_sse_fabs (res, &smask);
25590 /* if (!isless (xa, TWO52)) goto label; */
25591 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25593 /* res = xa + TWO52 - TWO52; */
25594 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25595 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25596 emit_move_insn (res, tmp);
25598 /* generate 1.0 */
25599 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25601 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25602 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25603 emit_insn (gen_rtx_SET (VOIDmode, mask,
25604 gen_rtx_AND (mode, mask, one)));
25605 tmp = expand_simple_binop (mode, MINUS,
25606 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25607 emit_move_insn (res, tmp);
25609 /* res = copysign (res, operand1) */
25610 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25612 emit_label (label);
25613 LABEL_NUSES (label) = 1;
25615 emit_move_insn (operand0, res);
25618 /* Expand SSE sequence for computing round from OPERAND1 storing
25619 into OPERAND0. */
25620 void
25621 ix86_expand_round (rtx operand0, rtx operand1)
25623 /* C code for the stuff we're doing below:
25624 double xa = fabs (x);
25625 if (!isless (xa, TWO52))
25626 return x;
25627 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25628 return copysign (xa, x);
25630 enum machine_mode mode = GET_MODE (operand0);
25631 rtx res, TWO52, xa, label, xi, half, mask;
25632 const struct real_format *fmt;
25633 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25635 /* Temporary for holding the result, initialized to the input
25636 operand to ease control flow. */
25637 res = gen_reg_rtx (mode);
25638 emit_move_insn (res, operand1);
25640 TWO52 = ix86_gen_TWO52 (mode);
25641 xa = ix86_expand_sse_fabs (res, &mask);
25642 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25644 /* load nextafter (0.5, 0.0) */
25645 fmt = REAL_MODE_FORMAT (mode);
25646 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25647 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25649 /* xa = xa + 0.5 */
25650 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25651 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25653 /* xa = (double)(int64_t)xa */
25654 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25655 expand_fix (xi, xa, 0);
25656 expand_float (xa, xi, 0);
25658 /* res = copysign (xa, operand1) */
25659 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25661 emit_label (label);
25662 LABEL_NUSES (label) = 1;
25664 emit_move_insn (operand0, res);
25668 /* Validate whether a SSE5 instruction is valid or not.
25669 OPERANDS is the array of operands.
25670 NUM is the number of operands.
25671 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25672 NUM_MEMORY is the maximum number of memory operands to accept. */
25674 bool
25675 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25676 bool uses_oc0, int num_memory)
25678 int mem_mask;
25679 int mem_count;
25680 int i;
25682 /* Count the number of memory arguments */
25683 mem_mask = 0;
25684 mem_count = 0;
25685 for (i = 0; i < num; i++)
25687 enum machine_mode mode = GET_MODE (operands[i]);
25688 if (register_operand (operands[i], mode))
25691 else if (memory_operand (operands[i], mode))
25693 mem_mask |= (1 << i);
25694 mem_count++;
25697 else
25699 rtx pattern = PATTERN (insn);
25701 /* allow 0 for pcmov */
25702 if (GET_CODE (pattern) != SET
25703 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25704 || i < 2
25705 || operands[i] != CONST0_RTX (mode))
25706 return false;
25710 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25711 a memory operation. */
25712 if (num_memory < 0)
25714 num_memory = -num_memory;
25715 if ((mem_mask & (1 << (num-1))) != 0)
25717 mem_mask &= ~(1 << (num-1));
25718 mem_count--;
25722 /* If there were no memory operations, allow the insn */
25723 if (mem_mask == 0)
25724 return true;
25726 /* Do not allow the destination register to be a memory operand. */
25727 else if (mem_mask & (1 << 0))
25728 return false;
25730 /* If there are too many memory operations, disallow the instruction. While
25731 the hardware only allows 1 memory reference, before register allocation
25732 for some insns, we allow two memory operations sometimes in order to allow
25733 code like the following to be optimized:
25735 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25737 or similar cases that are vectorized into using the fmaddss
25738 instruction. */
25739 else if (mem_count > num_memory)
25740 return false;
25742 /* Don't allow more than one memory operation if not optimizing. */
25743 else if (mem_count > 1 && !optimize)
25744 return false;
25746 else if (num == 4 && mem_count == 1)
25748 /* formats (destination is the first argument), example fmaddss:
25749 xmm1, xmm1, xmm2, xmm3/mem
25750 xmm1, xmm1, xmm2/mem, xmm3
25751 xmm1, xmm2, xmm3/mem, xmm1
25752 xmm1, xmm2/mem, xmm3, xmm1 */
25753 if (uses_oc0)
25754 return ((mem_mask == (1 << 1))
25755 || (mem_mask == (1 << 2))
25756 || (mem_mask == (1 << 3)));
25758 /* format, example pmacsdd:
25759 xmm1, xmm2, xmm3/mem, xmm1 */
25760 else
25761 return (mem_mask == (1 << 2));
25764 else if (num == 4 && num_memory == 2)
25766 /* If there are two memory operations, we can load one of the memory ops
25767 into the destination register. This is for optimizing the
25768 multiply/add ops, which the combiner has optimized both the multiply
25769 and the add insns to have a memory operation. We have to be careful
25770 that the destination doesn't overlap with the inputs. */
25771 rtx op0 = operands[0];
25773 if (reg_mentioned_p (op0, operands[1])
25774 || reg_mentioned_p (op0, operands[2])
25775 || reg_mentioned_p (op0, operands[3]))
25776 return false;
25778 /* formats (destination is the first argument), example fmaddss:
25779 xmm1, xmm1, xmm2, xmm3/mem
25780 xmm1, xmm1, xmm2/mem, xmm3
25781 xmm1, xmm2, xmm3/mem, xmm1
25782 xmm1, xmm2/mem, xmm3, xmm1
25784 For the oc0 case, we will load either operands[1] or operands[3] into
25785 operands[0], so any combination of 2 memory operands is ok. */
25786 if (uses_oc0)
25787 return true;
25789 /* format, example pmacsdd:
25790 xmm1, xmm2, xmm3/mem, xmm1
25792 For the integer multiply/add instructions be more restrictive and
25793 require operands[2] and operands[3] to be the memory operands. */
25794 else
25795 return (mem_mask == ((1 << 2) | (1 << 3)));
25798 else if (num == 3 && num_memory == 1)
25800 /* formats, example protb:
25801 xmm1, xmm2, xmm3/mem
25802 xmm1, xmm2/mem, xmm3 */
25803 if (uses_oc0)
25804 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25806 /* format, example comeq:
25807 xmm1, xmm2, xmm3/mem */
25808 else
25809 return (mem_mask == (1 << 2));
25812 else
25813 gcc_unreachable ();
25815 return false;
25819 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25820 hardware will allow by using the destination register to load one of the
25821 memory operations. Presently this is used by the multiply/add routines to
25822 allow 2 memory references. */
25824 void
25825 ix86_expand_sse5_multiple_memory (rtx operands[],
25826 int num,
25827 enum machine_mode mode)
25829 rtx op0 = operands[0];
25830 if (num != 4
25831 || memory_operand (op0, mode)
25832 || reg_mentioned_p (op0, operands[1])
25833 || reg_mentioned_p (op0, operands[2])
25834 || reg_mentioned_p (op0, operands[3]))
25835 gcc_unreachable ();
25837 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25838 the destination register. */
25839 if (memory_operand (operands[1], mode))
25841 emit_move_insn (op0, operands[1]);
25842 operands[1] = op0;
25844 else if (memory_operand (operands[3], mode))
25846 emit_move_insn (op0, operands[3]);
25847 operands[3] = op0;
25849 else
25850 gcc_unreachable ();
25852 return;
25856 /* Table of valid machine attributes. */
25857 static const struct attribute_spec ix86_attribute_table[] =
25859 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25860 /* Stdcall attribute says callee is responsible for popping arguments
25861 if they are not variable. */
25862 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25863 /* Fastcall attribute says callee is responsible for popping arguments
25864 if they are not variable. */
25865 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25866 /* Cdecl attribute says the callee is a normal C declaration */
25867 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25868 /* Regparm attribute specifies how many integer arguments are to be
25869 passed in registers. */
25870 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25871 /* Sseregparm attribute says we are using x86_64 calling conventions
25872 for FP arguments. */
25873 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25874 /* force_align_arg_pointer says this function realigns the stack at entry. */
25875 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25876 false, true, true, ix86_handle_cconv_attribute },
25877 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25878 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25879 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25880 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25881 #endif
25882 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25883 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25884 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25885 SUBTARGET_ATTRIBUTE_TABLE,
25886 #endif
25887 { NULL, 0, 0, false, false, false, NULL }
25890 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25891 static int
25892 x86_builtin_vectorization_cost (bool runtime_test)
25894 /* If the branch of the runtime test is taken - i.e. - the vectorized
25895 version is skipped - this incurs a misprediction cost (because the
25896 vectorized version is expected to be the fall-through). So we subtract
25897 the latency of a mispredicted branch from the costs that are incured
25898 when the vectorized version is executed.
25900 TODO: The values in individual target tables have to be tuned or new
25901 fields may be needed. For eg. on K8, the default branch path is the
25902 not-taken path. If the taken path is predicted correctly, the minimum
25903 penalty of going down the taken-path is 1 cycle. If the taken-path is
25904 not predicted correctly, then the minimum penalty is 10 cycles. */
25906 if (runtime_test)
25908 return (-(ix86_cost->cond_taken_branch_cost));
25910 else
25911 return 0;
25914 /* Initialize the GCC target structure. */
25915 #undef TARGET_RETURN_IN_MEMORY
25916 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25918 #undef TARGET_ATTRIBUTE_TABLE
25919 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25920 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25921 # undef TARGET_MERGE_DECL_ATTRIBUTES
25922 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25923 #endif
25925 #undef TARGET_COMP_TYPE_ATTRIBUTES
25926 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25928 #undef TARGET_INIT_BUILTINS
25929 #define TARGET_INIT_BUILTINS ix86_init_builtins
25930 #undef TARGET_EXPAND_BUILTIN
25931 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25933 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25934 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25935 ix86_builtin_vectorized_function
25937 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25938 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25940 #undef TARGET_BUILTIN_RECIPROCAL
25941 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25943 #undef TARGET_ASM_FUNCTION_EPILOGUE
25944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25946 #undef TARGET_ENCODE_SECTION_INFO
25947 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25948 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25949 #else
25950 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25951 #endif
25953 #undef TARGET_ASM_OPEN_PAREN
25954 #define TARGET_ASM_OPEN_PAREN ""
25955 #undef TARGET_ASM_CLOSE_PAREN
25956 #define TARGET_ASM_CLOSE_PAREN ""
25958 #undef TARGET_ASM_ALIGNED_HI_OP
25959 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25960 #undef TARGET_ASM_ALIGNED_SI_OP
25961 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25962 #ifdef ASM_QUAD
25963 #undef TARGET_ASM_ALIGNED_DI_OP
25964 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25965 #endif
25967 #undef TARGET_ASM_UNALIGNED_HI_OP
25968 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25969 #undef TARGET_ASM_UNALIGNED_SI_OP
25970 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25971 #undef TARGET_ASM_UNALIGNED_DI_OP
25972 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25974 #undef TARGET_SCHED_ADJUST_COST
25975 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25976 #undef TARGET_SCHED_ISSUE_RATE
25977 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25978 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25979 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25980 ia32_multipass_dfa_lookahead
25982 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25983 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25985 #ifdef HAVE_AS_TLS
25986 #undef TARGET_HAVE_TLS
25987 #define TARGET_HAVE_TLS true
25988 #endif
25989 #undef TARGET_CANNOT_FORCE_CONST_MEM
25990 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25991 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25992 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25994 #undef TARGET_DELEGITIMIZE_ADDRESS
25995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25997 #undef TARGET_MS_BITFIELD_LAYOUT_P
25998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26000 #if TARGET_MACHO
26001 #undef TARGET_BINDS_LOCAL_P
26002 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26003 #endif
26004 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26005 #undef TARGET_BINDS_LOCAL_P
26006 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26007 #endif
26009 #undef TARGET_ASM_OUTPUT_MI_THUNK
26010 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26011 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26012 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26014 #undef TARGET_ASM_FILE_START
26015 #define TARGET_ASM_FILE_START x86_file_start
26017 #undef TARGET_DEFAULT_TARGET_FLAGS
26018 #define TARGET_DEFAULT_TARGET_FLAGS \
26019 (TARGET_DEFAULT \
26020 | TARGET_SUBTARGET_DEFAULT \
26021 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26023 #undef TARGET_HANDLE_OPTION
26024 #define TARGET_HANDLE_OPTION ix86_handle_option
26026 #undef TARGET_RTX_COSTS
26027 #define TARGET_RTX_COSTS ix86_rtx_costs
26028 #undef TARGET_ADDRESS_COST
26029 #define TARGET_ADDRESS_COST ix86_address_cost
26031 #undef TARGET_FIXED_CONDITION_CODE_REGS
26032 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26033 #undef TARGET_CC_MODES_COMPATIBLE
26034 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26036 #undef TARGET_MACHINE_DEPENDENT_REORG
26037 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26039 #undef TARGET_BUILD_BUILTIN_VA_LIST
26040 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26042 #undef TARGET_EXPAND_BUILTIN_VA_START
26043 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26045 #undef TARGET_MD_ASM_CLOBBERS
26046 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26048 #undef TARGET_PROMOTE_PROTOTYPES
26049 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26050 #undef TARGET_STRUCT_VALUE_RTX
26051 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26052 #undef TARGET_SETUP_INCOMING_VARARGS
26053 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26054 #undef TARGET_MUST_PASS_IN_STACK
26055 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26056 #undef TARGET_PASS_BY_REFERENCE
26057 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26058 #undef TARGET_INTERNAL_ARG_POINTER
26059 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26060 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26061 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26062 #undef TARGET_STRICT_ARGUMENT_NAMING
26063 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26065 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26066 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26068 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26069 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26071 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26072 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26074 #undef TARGET_C_MODE_FOR_SUFFIX
26075 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26077 #ifdef HAVE_AS_TLS
26078 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26079 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26080 #endif
26082 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26083 #undef TARGET_INSERT_ATTRIBUTES
26084 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26085 #endif
26087 #undef TARGET_MANGLE_TYPE
26088 #define TARGET_MANGLE_TYPE ix86_mangle_type
26090 #undef TARGET_STACK_PROTECT_FAIL
26091 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26093 #undef TARGET_FUNCTION_VALUE
26094 #define TARGET_FUNCTION_VALUE ix86_function_value
26096 #undef TARGET_SECONDARY_RELOAD
26097 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26099 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26100 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26102 struct gcc_target targetm = TARGET_INITIALIZER;
26104 #include "gt-i386.h"