Revert 137452.
[official-gcc.git] / gcc / config / i386 / i386.c
bloba0edacaaaba212594e1bbd288f65f798f0f040a0
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "tree-gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
62 #endif
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 static const
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
149 static const
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
169 3, /* MOVE_RATIO */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
195 1, /* Branch cost */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
219 static const
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
239 3, /* MOVE_RATIO */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
267 1, /* Branch cost */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
291 static const
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
311 6, /* MOVE_RATIO */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
337 2, /* Branch cost */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
361 static const
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
438 static const
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
509 static const
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
529 4, /* MOVE_RATIO */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 static const
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
602 9, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
628 5, /* Branch cost */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
655 static const
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
792 2, /* Branch cost */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
821 static const
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
867 2, /* Branch cost */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
892 static const
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
912 17, /* MOVE_RATIO */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {-1, libcall}}},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
965 static const
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1038 static const
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115 static const
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 m_486 | m_PENT,
1227 /* X86_TUNE_USE_BIT_TEST */
1228 m_386,
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 ~m_386,
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1262 m_PPRO,
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1274 m_K6,
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 m_PENT4,
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 m_PPRO,
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1286 ~m_PENT,
1288 /* X86_TUNE_READ_MODIFY */
1289 ~(m_PENT | m_PPRO),
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1308 ~m_PPRO,
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1314 m_PPRO,
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 m_AMDFAM10,
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1356 m_ATHLON_K8,
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1359 m_AMD_MULTIPLE,
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1374 ~m_486,
1376 /* X86_TUNE_USE_FFREEP */
1377 m_AMD_MULTIPLE,
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 ~(m_AMDFAM10),
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1393 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1405 ~m_K8,
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 m_K8 | m_GENERIC64,
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 ~(m_386 | m_486),
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419 machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 than a MOV. */
1424 m_PENT,
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1428 m_PENT,
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1433 m_K6,
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1437 m_AMDFAM10,
1439 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1440 with a subsequent conditional jump instruction into a single
1441 compare-and-branch uop. */
1442 m_CORE2,
1445 /* Feature tests against the various architecture variations. */
1446 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1447 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1448 ~(m_386 | m_486 | m_PENT | m_K6),
1450 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1451 ~m_386,
1453 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1454 ~(m_386 | m_486),
1456 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1457 ~m_386,
1459 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1460 ~m_386,
1463 static const unsigned int x86_accumulate_outgoing_args
1464 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1466 static const unsigned int x86_arch_always_fancy_math_387
1467 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1468 | m_NOCONA | m_CORE2 | m_GENERIC;
1470 static enum stringop_alg stringop_alg = no_stringop;
1472 /* In case the average insn count for single function invocation is
1473 lower than this constant, emit fast (but longer) prologue and
1474 epilogue code. */
1475 #define FAST_PROLOGUE_INSN_COUNT 20
1477 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1478 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1479 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1480 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1482 /* Array of the smallest class containing reg number REGNO, indexed by
1483 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1485 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1487 /* ax, dx, cx, bx */
1488 AREG, DREG, CREG, BREG,
1489 /* si, di, bp, sp */
1490 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1491 /* FP registers */
1492 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1493 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1494 /* arg pointer */
1495 NON_Q_REGS,
1496 /* flags, fpsr, fpcr, frame */
1497 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1498 /* SSE registers */
1499 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1500 SSE_REGS, SSE_REGS,
1501 /* MMX registers */
1502 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1503 MMX_REGS, MMX_REGS,
1504 /* REX registers */
1505 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1506 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1507 /* SSE REX registers */
1508 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1509 SSE_REGS, SSE_REGS,
1512 /* The "default" register map used in 32bit mode. */
1514 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1516 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1517 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1519 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1520 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1521 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1522 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1525 static int const x86_64_int_parameter_registers[6] =
1527 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_ms_abi_int_parameter_registers[4] =
1533 2 /*RCX*/, 1 /*RDX*/,
1534 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1537 static int const x86_64_int_return_registers[4] =
1539 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1542 /* The "default" register map used in 64bit mode. */
1543 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1545 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1546 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1547 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1548 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1549 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1550 8,9,10,11,12,13,14,15, /* extended integer registers */
1551 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1554 /* Define the register numbers to be used in Dwarf debugging information.
1555 The SVR4 reference port C compiler uses the following register numbers
1556 in its Dwarf output code:
1557 0 for %eax (gcc regno = 0)
1558 1 for %ecx (gcc regno = 2)
1559 2 for %edx (gcc regno = 1)
1560 3 for %ebx (gcc regno = 3)
1561 4 for %esp (gcc regno = 7)
1562 5 for %ebp (gcc regno = 6)
1563 6 for %esi (gcc regno = 4)
1564 7 for %edi (gcc regno = 5)
1565 The following three DWARF register numbers are never generated by
1566 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1567 believes these numbers have these meanings.
1568 8 for %eip (no gcc equivalent)
1569 9 for %eflags (gcc regno = 17)
1570 10 for %trapno (no gcc equivalent)
1571 It is not at all clear how we should number the FP stack registers
1572 for the x86 architecture. If the version of SDB on x86/svr4 were
1573 a bit less brain dead with respect to floating-point then we would
1574 have a precedent to follow with respect to DWARF register numbers
1575 for x86 FP registers, but the SDB on x86/svr4 is so completely
1576 broken with respect to FP registers that it is hardly worth thinking
1577 of it as something to strive for compatibility with.
1578 The version of x86/svr4 SDB I have at the moment does (partially)
1579 seem to believe that DWARF register number 11 is associated with
1580 the x86 register %st(0), but that's about all. Higher DWARF
1581 register numbers don't seem to be associated with anything in
1582 particular, and even for DWARF regno 11, SDB only seems to under-
1583 stand that it should say that a variable lives in %st(0) (when
1584 asked via an `=' command) if we said it was in DWARF regno 11,
1585 but SDB still prints garbage when asked for the value of the
1586 variable in question (via a `/' command).
1587 (Also note that the labels SDB prints for various FP stack regs
1588 when doing an `x' command are all wrong.)
1589 Note that these problems generally don't affect the native SVR4
1590 C compiler because it doesn't allow the use of -O with -g and
1591 because when it is *not* optimizing, it allocates a memory
1592 location for each floating-point variable, and the memory
1593 location is what gets described in the DWARF AT_location
1594 attribute for the variable in question.
1595 Regardless of the severe mental illness of the x86/svr4 SDB, we
1596 do something sensible here and we use the following DWARF
1597 register numbers. Note that these are all stack-top-relative
1598 numbers.
1599 11 for %st(0) (gcc regno = 8)
1600 12 for %st(1) (gcc regno = 9)
1601 13 for %st(2) (gcc regno = 10)
1602 14 for %st(3) (gcc regno = 11)
1603 15 for %st(4) (gcc regno = 12)
1604 16 for %st(5) (gcc regno = 13)
1605 17 for %st(6) (gcc regno = 14)
1606 18 for %st(7) (gcc regno = 15)
1608 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1610 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1611 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1612 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1613 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1614 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1615 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1616 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1619 /* Test and compare insns in i386.md store the information needed to
1620 generate branch and scc insns here. */
1622 rtx ix86_compare_op0 = NULL_RTX;
1623 rtx ix86_compare_op1 = NULL_RTX;
1624 rtx ix86_compare_emitted = NULL_RTX;
1626 /* Size of the register save area. */
1627 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1629 /* Define the structure for the machine field in struct function. */
1631 struct stack_local_entry GTY(())
1633 unsigned short mode;
1634 unsigned short n;
1635 rtx rtl;
1636 struct stack_local_entry *next;
1639 /* Structure describing stack frame layout.
1640 Stack grows downward:
1642 [arguments]
1643 <- ARG_POINTER
1644 saved pc
1646 saved frame pointer if frame_pointer_needed
1647 <- HARD_FRAME_POINTER
1648 [saved regs]
1650 [padding1] \
1652 [va_arg registers] (
1653 > to_allocate <- FRAME_POINTER
1654 [frame] (
1656 [padding2] /
1658 struct ix86_frame
1660 int nregs;
1661 int padding1;
1662 int va_arg_size;
1663 HOST_WIDE_INT frame;
1664 int padding2;
1665 int outgoing_arguments_size;
1666 int red_zone_size;
1668 HOST_WIDE_INT to_allocate;
1669 /* The offsets relative to ARG_POINTER. */
1670 HOST_WIDE_INT frame_pointer_offset;
1671 HOST_WIDE_INT hard_frame_pointer_offset;
1672 HOST_WIDE_INT stack_pointer_offset;
1674 /* When save_regs_using_mov is set, emit prologue using
1675 move instead of push instructions. */
1676 bool save_regs_using_mov;
1679 /* Code model option. */
1680 enum cmodel ix86_cmodel;
1681 /* Asm dialect. */
1682 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1683 /* TLS dialects. */
1684 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1686 /* Which unit we are generating floating point math for. */
1687 enum fpmath_unit ix86_fpmath;
1689 /* Which cpu are we scheduling for. */
1690 enum processor_type ix86_tune;
1692 /* Which instruction set architecture to use. */
1693 enum processor_type ix86_arch;
1695 /* true if sse prefetch instruction is not NOOP. */
1696 int x86_prefetch_sse;
1698 /* ix86_regparm_string as a number */
1699 static int ix86_regparm;
1701 /* -mstackrealign option */
1702 extern int ix86_force_align_arg_pointer;
1703 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1705 static rtx (*ix86_gen_leave) (void);
1706 static rtx (*ix86_gen_pop1) (rtx);
1707 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1708 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1709 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1710 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1711 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1713 /* Preferred alignment for stack boundary in bits. */
1714 unsigned int ix86_preferred_stack_boundary;
1716 /* Values 1-5: see jump.c */
1717 int ix86_branch_cost;
1719 /* Variables which are this size or smaller are put in the data/bss
1720 or ldata/lbss sections. */
1722 int ix86_section_threshold = 65536;
1724 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1725 char internal_label_prefix[16];
1726 int internal_label_prefix_len;
1728 /* Fence to use after loop using movnt. */
1729 tree x86_mfence;
1731 /* Register class used for passing given 64bit part of the argument.
1732 These represent classes as documented by the PS ABI, with the exception
1733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1734 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1736 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1737 whenever possible (upper half does contain padding). */
1738 enum x86_64_reg_class
1740 X86_64_NO_CLASS,
1741 X86_64_INTEGER_CLASS,
1742 X86_64_INTEGERSI_CLASS,
1743 X86_64_SSE_CLASS,
1744 X86_64_SSESF_CLASS,
1745 X86_64_SSEDF_CLASS,
1746 X86_64_SSEUP_CLASS,
1747 X86_64_X87_CLASS,
1748 X86_64_X87UP_CLASS,
1749 X86_64_COMPLEX_X87_CLASS,
1750 X86_64_MEMORY_CLASS
1752 static const char * const x86_64_reg_class_name[] =
1754 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1755 "sseup", "x87", "x87up", "cplx87", "no"
1758 #define MAX_CLASSES 4
1760 /* Table of constants used by fldpi, fldln2, etc.... */
1761 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1762 static bool ext_80387_constants_init = 0;
1765 static struct machine_function * ix86_init_machine_status (void);
1766 static rtx ix86_function_value (const_tree, const_tree, bool);
1767 static int ix86_function_regparm (const_tree, const_tree);
1768 static void ix86_compute_frame_layout (struct ix86_frame *);
1769 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1770 rtx, rtx, int);
1773 /* The svr4 ABI for the i386 says that records and unions are returned
1774 in memory. */
1775 #ifndef DEFAULT_PCC_STRUCT_RETURN
1776 #define DEFAULT_PCC_STRUCT_RETURN 1
1777 #endif
1779 /* Bit flags that specify the ISA we are compiling for. */
1780 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1782 /* A mask of ix86_isa_flags that includes bit X if X
1783 was set or cleared on the command line. */
1784 static int ix86_isa_flags_explicit;
1786 /* Define a set of ISAs which are available when a given ISA is
1787 enabled. MMX and SSE ISAs are handled separately. */
1789 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1790 #define OPTION_MASK_ISA_3DNOW_SET \
1791 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1793 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1794 #define OPTION_MASK_ISA_SSE2_SET \
1795 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1796 #define OPTION_MASK_ISA_SSE3_SET \
1797 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1798 #define OPTION_MASK_ISA_SSSE3_SET \
1799 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1800 #define OPTION_MASK_ISA_SSE4_1_SET \
1801 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1802 #define OPTION_MASK_ISA_SSE4_2_SET \
1803 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1805 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1806 as -msse4.2. */
1807 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1809 #define OPTION_MASK_ISA_SSE4A_SET \
1810 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1811 #define OPTION_MASK_ISA_SSE5_SET \
1812 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1814 /* Define a set of ISAs which aren't available when a given ISA is
1815 disabled. MMX and SSE ISAs are handled separately. */
1817 #define OPTION_MASK_ISA_MMX_UNSET \
1818 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1819 #define OPTION_MASK_ISA_3DNOW_UNSET \
1820 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1821 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1823 #define OPTION_MASK_ISA_SSE_UNSET \
1824 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1825 #define OPTION_MASK_ISA_SSE2_UNSET \
1826 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1827 #define OPTION_MASK_ISA_SSE3_UNSET \
1828 (OPTION_MASK_ISA_SSE3 \
1829 | OPTION_MASK_ISA_SSSE3_UNSET \
1830 | OPTION_MASK_ISA_SSE4A_UNSET )
1831 #define OPTION_MASK_ISA_SSSE3_UNSET \
1832 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1833 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1834 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1835 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1837 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1838 as -mno-sse4.1. */
1839 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1841 #define OPTION_MASK_ISA_SSE4A_UNSET \
1842 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1844 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1846 /* Vectorization library interface and handlers. */
1847 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1848 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1849 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1851 /* Implement TARGET_HANDLE_OPTION. */
1853 static bool
1854 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1856 switch (code)
1858 case OPT_mmmx:
1859 if (value)
1861 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1864 else
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1869 return true;
1871 case OPT_m3dnow:
1872 if (value)
1874 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1877 else
1879 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1882 return true;
1884 case OPT_m3dnowa:
1885 return false;
1887 case OPT_msse:
1888 if (value)
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1893 else
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1898 return true;
1900 case OPT_msse2:
1901 if (value)
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1906 else
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1911 return true;
1913 case OPT_msse3:
1914 if (value)
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1919 else
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1924 return true;
1926 case OPT_mssse3:
1927 if (value)
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1932 else
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1937 return true;
1939 case OPT_msse4_1:
1940 if (value)
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1945 else
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1950 return true;
1952 case OPT_msse4_2:
1953 if (value)
1955 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1956 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1958 else
1960 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1961 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1963 return true;
1965 case OPT_msse4:
1966 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1968 return true;
1970 case OPT_mno_sse4:
1971 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1973 return true;
1975 case OPT_msse4a:
1976 if (value)
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1981 else
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1986 return true;
1988 case OPT_msse5:
1989 if (value)
1991 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1992 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1994 else
1996 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1997 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1999 return true;
2001 default:
2002 return true;
2006 /* Sometimes certain combinations of command options do not make
2007 sense on a particular target machine. You can define a macro
2008 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2009 defined, is executed once just after all the command options have
2010 been parsed.
2012 Don't use this macro to turn on various extra optimizations for
2013 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2015 void
2016 override_options (void)
2018 int i;
2019 int ix86_tune_defaulted = 0;
2020 int ix86_arch_specified = 0;
2021 unsigned int ix86_arch_mask, ix86_tune_mask;
2023 /* Comes from final.c -- no real reason to change it. */
2024 #define MAX_CODE_ALIGN 16
2026 static struct ptt
2028 const struct processor_costs *cost; /* Processor costs */
2029 const int align_loop; /* Default alignments. */
2030 const int align_loop_max_skip;
2031 const int align_jump;
2032 const int align_jump_max_skip;
2033 const int align_func;
2035 const processor_target_table[PROCESSOR_max] =
2037 {&i386_cost, 4, 3, 4, 3, 4},
2038 {&i486_cost, 16, 15, 16, 15, 16},
2039 {&pentium_cost, 16, 7, 16, 7, 16},
2040 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2041 {&geode_cost, 0, 0, 0, 0, 0},
2042 {&k6_cost, 32, 7, 32, 7, 32},
2043 {&athlon_cost, 16, 7, 16, 7, 16},
2044 {&pentium4_cost, 0, 0, 0, 0, 0},
2045 {&k8_cost, 16, 7, 16, 7, 16},
2046 {&nocona_cost, 0, 0, 0, 0, 0},
2047 {&core2_cost, 16, 10, 16, 10, 16},
2048 {&generic32_cost, 16, 7, 16, 7, 16},
2049 {&generic64_cost, 16, 10, 16, 10, 16},
2050 {&amdfam10_cost, 32, 24, 32, 7, 32}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2055 "generic",
2056 "i386",
2057 "i486",
2058 "pentium",
2059 "pentium-mmx",
2060 "pentiumpro",
2061 "pentium2",
2062 "pentium3",
2063 "pentium4",
2064 "pentium-m",
2065 "prescott",
2066 "nocona",
2067 "core2",
2068 "geode",
2069 "k6",
2070 "k6-2",
2071 "k6-3",
2072 "athlon",
2073 "athlon-4",
2074 "k8",
2075 "amdfam10"
2078 enum pta_flags
2080 PTA_SSE = 1 << 0,
2081 PTA_SSE2 = 1 << 1,
2082 PTA_SSE3 = 1 << 2,
2083 PTA_MMX = 1 << 3,
2084 PTA_PREFETCH_SSE = 1 << 4,
2085 PTA_3DNOW = 1 << 5,
2086 PTA_3DNOW_A = 1 << 6,
2087 PTA_64BIT = 1 << 7,
2088 PTA_SSSE3 = 1 << 8,
2089 PTA_CX16 = 1 << 9,
2090 PTA_POPCNT = 1 << 10,
2091 PTA_ABM = 1 << 11,
2092 PTA_SSE4A = 1 << 12,
2093 PTA_NO_SAHF = 1 << 13,
2094 PTA_SSE4_1 = 1 << 14,
2095 PTA_SSE4_2 = 1 << 15,
2096 PTA_SSE5 = 1 << 16,
2097 PTA_AES = 1 << 17,
2098 PTA_PCLMUL = 1 << 18
2101 static struct pta
2103 const char *const name; /* processor name or nickname. */
2104 const enum processor_type processor;
2105 const unsigned /*enum pta_flags*/ flags;
2107 const processor_alias_table[] =
2109 {"i386", PROCESSOR_I386, 0},
2110 {"i486", PROCESSOR_I486, 0},
2111 {"i586", PROCESSOR_PENTIUM, 0},
2112 {"pentium", PROCESSOR_PENTIUM, 0},
2113 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2114 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2115 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2116 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2117 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2118 {"i686", PROCESSOR_PENTIUMPRO, 0},
2119 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2120 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2121 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2122 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2123 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2124 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2125 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2126 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2127 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2128 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2129 | PTA_CX16 | PTA_NO_SAHF)},
2130 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2131 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2132 | PTA_SSSE3
2133 | PTA_CX16)},
2134 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 |PTA_PREFETCH_SSE)},
2136 {"k6", PROCESSOR_K6, PTA_MMX},
2137 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2138 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2139 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_PREFETCH_SSE)},
2141 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2142 | PTA_PREFETCH_SSE)},
2143 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE)},
2145 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2146 | PTA_SSE)},
2147 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE)},
2149 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2150 | PTA_MMX | PTA_SSE | PTA_SSE2
2151 | PTA_NO_SAHF)},
2152 {"k8", PROCESSOR_K8, (PTA_64BIT
2153 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2154 | PTA_SSE | PTA_SSE2
2155 | PTA_NO_SAHF)},
2156 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2157 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2158 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2159 | PTA_NO_SAHF)},
2160 {"opteron", PROCESSOR_K8, (PTA_64BIT
2161 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2162 | PTA_SSE | PTA_SSE2
2163 | PTA_NO_SAHF)},
2164 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2165 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2166 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2167 | PTA_NO_SAHF)},
2168 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2169 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2170 | PTA_SSE | PTA_SSE2
2171 | PTA_NO_SAHF)},
2172 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175 | PTA_NO_SAHF)},
2176 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2177 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2178 | PTA_SSE | PTA_SSE2
2179 | PTA_NO_SAHF)},
2180 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2181 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2182 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2183 | PTA_SSE4A
2184 | PTA_CX16 | PTA_ABM)},
2185 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2186 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2187 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2188 | PTA_SSE4A
2189 | PTA_CX16 | PTA_ABM)},
2190 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2191 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2194 int const pta_size = ARRAY_SIZE (processor_alias_table);
2196 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2197 SUBTARGET_OVERRIDE_OPTIONS;
2198 #endif
2200 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2201 SUBSUBTARGET_OVERRIDE_OPTIONS;
2202 #endif
2204 /* -fPIC is the default for x86_64. */
2205 if (TARGET_MACHO && TARGET_64BIT)
2206 flag_pic = 2;
2208 /* Set the default values for switches whose default depends on TARGET_64BIT
2209 in case they weren't overwritten by command line options. */
2210 if (TARGET_64BIT)
2212 /* Mach-O doesn't support omitting the frame pointer for now. */
2213 if (flag_omit_frame_pointer == 2)
2214 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2215 if (flag_asynchronous_unwind_tables == 2)
2216 flag_asynchronous_unwind_tables = 1;
2217 if (flag_pcc_struct_return == 2)
2218 flag_pcc_struct_return = 0;
2220 else
2222 if (flag_omit_frame_pointer == 2)
2223 flag_omit_frame_pointer = 0;
2224 if (flag_asynchronous_unwind_tables == 2)
2225 flag_asynchronous_unwind_tables = 0;
2226 if (flag_pcc_struct_return == 2)
2227 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2230 /* Need to check -mtune=generic first. */
2231 if (ix86_tune_string)
2233 if (!strcmp (ix86_tune_string, "generic")
2234 || !strcmp (ix86_tune_string, "i686")
2235 /* As special support for cross compilers we read -mtune=native
2236 as -mtune=generic. With native compilers we won't see the
2237 -mtune=native, as it was changed by the driver. */
2238 || !strcmp (ix86_tune_string, "native"))
2240 if (TARGET_64BIT)
2241 ix86_tune_string = "generic64";
2242 else
2243 ix86_tune_string = "generic32";
2245 else if (!strncmp (ix86_tune_string, "generic", 7))
2246 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2248 else
2250 if (ix86_arch_string)
2251 ix86_tune_string = ix86_arch_string;
2252 if (!ix86_tune_string)
2254 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2255 ix86_tune_defaulted = 1;
2258 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2259 need to use a sensible tune option. */
2260 if (!strcmp (ix86_tune_string, "generic")
2261 || !strcmp (ix86_tune_string, "x86-64")
2262 || !strcmp (ix86_tune_string, "i686"))
2264 if (TARGET_64BIT)
2265 ix86_tune_string = "generic64";
2266 else
2267 ix86_tune_string = "generic32";
2270 if (ix86_stringop_string)
2272 if (!strcmp (ix86_stringop_string, "rep_byte"))
2273 stringop_alg = rep_prefix_1_byte;
2274 else if (!strcmp (ix86_stringop_string, "libcall"))
2275 stringop_alg = libcall;
2276 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2277 stringop_alg = rep_prefix_4_byte;
2278 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2279 stringop_alg = rep_prefix_8_byte;
2280 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2281 stringop_alg = loop_1_byte;
2282 else if (!strcmp (ix86_stringop_string, "loop"))
2283 stringop_alg = loop;
2284 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2285 stringop_alg = unrolled_loop;
2286 else
2287 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2289 if (!strcmp (ix86_tune_string, "x86-64"))
2290 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2291 "-mtune=generic instead as appropriate.");
2293 if (!ix86_arch_string)
2294 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2295 else
2296 ix86_arch_specified = 1;
2298 if (!strcmp (ix86_arch_string, "generic"))
2299 error ("generic CPU can be used only for -mtune= switch");
2300 if (!strncmp (ix86_arch_string, "generic", 7))
2301 error ("bad value (%s) for -march= switch", ix86_arch_string);
2303 if (ix86_cmodel_string != 0)
2305 if (!strcmp (ix86_cmodel_string, "small"))
2306 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2307 else if (!strcmp (ix86_cmodel_string, "medium"))
2308 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2309 else if (!strcmp (ix86_cmodel_string, "large"))
2310 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2311 else if (flag_pic)
2312 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2313 else if (!strcmp (ix86_cmodel_string, "32"))
2314 ix86_cmodel = CM_32;
2315 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2316 ix86_cmodel = CM_KERNEL;
2317 else
2318 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2320 else
2322 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2323 use of rip-relative addressing. This eliminates fixups that
2324 would otherwise be needed if this object is to be placed in a
2325 DLL, and is essentially just as efficient as direct addressing. */
2326 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2327 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2328 else if (TARGET_64BIT)
2329 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2330 else
2331 ix86_cmodel = CM_32;
2333 if (ix86_asm_string != 0)
2335 if (! TARGET_MACHO
2336 && !strcmp (ix86_asm_string, "intel"))
2337 ix86_asm_dialect = ASM_INTEL;
2338 else if (!strcmp (ix86_asm_string, "att"))
2339 ix86_asm_dialect = ASM_ATT;
2340 else
2341 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2343 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2344 error ("code model %qs not supported in the %s bit mode",
2345 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2346 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2347 sorry ("%i-bit mode not compiled in",
2348 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2350 for (i = 0; i < pta_size; i++)
2351 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2353 ix86_arch = processor_alias_table[i].processor;
2354 /* Default cpu tuning to the architecture. */
2355 ix86_tune = ix86_arch;
2357 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2358 error ("CPU you selected does not support x86-64 "
2359 "instruction set");
2361 if (processor_alias_table[i].flags & PTA_MMX
2362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2363 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2364 if (processor_alias_table[i].flags & PTA_3DNOW
2365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2366 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2367 if (processor_alias_table[i].flags & PTA_3DNOW_A
2368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2369 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2370 if (processor_alias_table[i].flags & PTA_SSE
2371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2373 if (processor_alias_table[i].flags & PTA_SSE2
2374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2375 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2376 if (processor_alias_table[i].flags & PTA_SSE3
2377 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2378 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2379 if (processor_alias_table[i].flags & PTA_SSSE3
2380 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2381 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2382 if (processor_alias_table[i].flags & PTA_SSE4_1
2383 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2384 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2385 if (processor_alias_table[i].flags & PTA_SSE4_2
2386 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2387 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2388 if (processor_alias_table[i].flags & PTA_SSE4A
2389 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2390 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2391 if (processor_alias_table[i].flags & PTA_SSE5
2392 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2393 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2395 if (processor_alias_table[i].flags & PTA_ABM)
2396 x86_abm = true;
2397 if (processor_alias_table[i].flags & PTA_CX16)
2398 x86_cmpxchg16b = true;
2399 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2400 x86_popcnt = true;
2401 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2402 x86_prefetch_sse = true;
2403 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2404 x86_sahf = true;
2405 if (processor_alias_table[i].flags & PTA_AES)
2406 x86_aes = true;
2407 if (processor_alias_table[i].flags & PTA_PCLMUL)
2408 x86_pclmul = true;
2410 break;
2413 if (i == pta_size)
2414 error ("bad value (%s) for -march= switch", ix86_arch_string);
2416 ix86_arch_mask = 1u << ix86_arch;
2417 for (i = 0; i < X86_ARCH_LAST; ++i)
2418 ix86_arch_features[i] &= ix86_arch_mask;
2420 for (i = 0; i < pta_size; i++)
2421 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2423 ix86_tune = processor_alias_table[i].processor;
2424 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2426 if (ix86_tune_defaulted)
2428 ix86_tune_string = "x86-64";
2429 for (i = 0; i < pta_size; i++)
2430 if (! strcmp (ix86_tune_string,
2431 processor_alias_table[i].name))
2432 break;
2433 ix86_tune = processor_alias_table[i].processor;
2435 else
2436 error ("CPU you selected does not support x86-64 "
2437 "instruction set");
2439 /* Intel CPUs have always interpreted SSE prefetch instructions as
2440 NOPs; so, we can enable SSE prefetch instructions even when
2441 -mtune (rather than -march) points us to a processor that has them.
2442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2443 higher processors. */
2444 if (TARGET_CMOVE
2445 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2446 x86_prefetch_sse = true;
2447 break;
2449 if (i == pta_size)
2450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2452 /* Enable SSE2 if AES or PCLMUL is enabled. */
2453 if ((x86_aes || x86_pclmul)
2454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2456 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2460 ix86_tune_mask = 1u << ix86_tune;
2461 for (i = 0; i < X86_TUNE_LAST; ++i)
2462 ix86_tune_features[i] &= ix86_tune_mask;
2464 if (optimize_size)
2465 ix86_cost = &size_cost;
2466 else
2467 ix86_cost = processor_target_table[ix86_tune].cost;
2469 /* Arrange to set up i386_stack_locals for all functions. */
2470 init_machine_status = ix86_init_machine_status;
2472 /* Validate -mregparm= value. */
2473 if (ix86_regparm_string)
2475 if (TARGET_64BIT)
2476 warning (0, "-mregparm is ignored in 64-bit mode");
2477 i = atoi (ix86_regparm_string);
2478 if (i < 0 || i > REGPARM_MAX)
2479 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2480 else
2481 ix86_regparm = i;
2483 if (TARGET_64BIT)
2484 ix86_regparm = REGPARM_MAX;
2486 /* If the user has provided any of the -malign-* options,
2487 warn and use that value only if -falign-* is not set.
2488 Remove this code in GCC 3.2 or later. */
2489 if (ix86_align_loops_string)
2491 warning (0, "-malign-loops is obsolete, use -falign-loops");
2492 if (align_loops == 0)
2494 i = atoi (ix86_align_loops_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497 else
2498 align_loops = 1 << i;
2502 if (ix86_align_jumps_string)
2504 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2505 if (align_jumps == 0)
2507 i = atoi (ix86_align_jumps_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510 else
2511 align_jumps = 1 << i;
2515 if (ix86_align_funcs_string)
2517 warning (0, "-malign-functions is obsolete, use -falign-functions");
2518 if (align_functions == 0)
2520 i = atoi (ix86_align_funcs_string);
2521 if (i < 0 || i > MAX_CODE_ALIGN)
2522 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2523 else
2524 align_functions = 1 << i;
2528 /* Default align_* from the processor table. */
2529 if (align_loops == 0)
2531 align_loops = processor_target_table[ix86_tune].align_loop;
2532 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2534 if (align_jumps == 0)
2536 align_jumps = processor_target_table[ix86_tune].align_jump;
2537 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2539 if (align_functions == 0)
2541 align_functions = processor_target_table[ix86_tune].align_func;
2544 /* Validate -mbranch-cost= value, or provide default. */
2545 ix86_branch_cost = ix86_cost->branch_cost;
2546 if (ix86_branch_cost_string)
2548 i = atoi (ix86_branch_cost_string);
2549 if (i < 0 || i > 5)
2550 error ("-mbranch-cost=%d is not between 0 and 5", i);
2551 else
2552 ix86_branch_cost = i;
2554 if (ix86_section_threshold_string)
2556 i = atoi (ix86_section_threshold_string);
2557 if (i < 0)
2558 error ("-mlarge-data-threshold=%d is negative", i);
2559 else
2560 ix86_section_threshold = i;
2563 if (ix86_tls_dialect_string)
2565 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2566 ix86_tls_dialect = TLS_DIALECT_GNU;
2567 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2568 ix86_tls_dialect = TLS_DIALECT_GNU2;
2569 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2570 ix86_tls_dialect = TLS_DIALECT_SUN;
2571 else
2572 error ("bad value (%s) for -mtls-dialect= switch",
2573 ix86_tls_dialect_string);
2576 if (ix87_precision_string)
2578 i = atoi (ix87_precision_string);
2579 if (i != 32 && i != 64 && i != 80)
2580 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2583 if (TARGET_64BIT)
2585 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2587 /* Enable by default the SSE and MMX builtins. Do allow the user to
2588 explicitly disable any of these. In particular, disabling SSE and
2589 MMX for kernel code is extremely useful. */
2590 if (!ix86_arch_specified)
2591 ix86_isa_flags
2592 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2593 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2595 if (TARGET_RTD)
2596 warning (0, "-mrtd is ignored in 64bit mode");
2598 else
2600 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2602 if (!ix86_arch_specified)
2603 ix86_isa_flags
2604 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2606 /* i386 ABI does not specify red zone. It still makes sense to use it
2607 when programmer takes care to stack from being destroyed. */
2608 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2609 target_flags |= MASK_NO_RED_ZONE;
2612 /* Keep nonleaf frame pointers. */
2613 if (flag_omit_frame_pointer)
2614 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2615 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2616 flag_omit_frame_pointer = 1;
2618 /* If we're doing fast math, we don't care about comparison order
2619 wrt NaNs. This lets us use a shorter comparison sequence. */
2620 if (flag_finite_math_only)
2621 target_flags &= ~MASK_IEEE_FP;
2623 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2624 since the insns won't need emulation. */
2625 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2626 target_flags &= ~MASK_NO_FANCY_MATH_387;
2628 /* Likewise, if the target doesn't have a 387, or we've specified
2629 software floating point, don't use 387 inline intrinsics. */
2630 if (!TARGET_80387)
2631 target_flags |= MASK_NO_FANCY_MATH_387;
2633 /* Turn on MMX builtins for -msse. */
2634 if (TARGET_SSE)
2636 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2637 x86_prefetch_sse = true;
2640 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2641 if (TARGET_SSE4_2 || TARGET_ABM)
2642 x86_popcnt = true;
2644 /* Validate -mpreferred-stack-boundary= value, or provide default.
2645 The default of 128 bits is for Pentium III's SSE __m128. We can't
2646 change it because of optimize_size. Otherwise, we can't mix object
2647 files compiled with -Os and -On. */
2648 ix86_preferred_stack_boundary = 128;
2649 if (ix86_preferred_stack_boundary_string)
2651 i = atoi (ix86_preferred_stack_boundary_string);
2652 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2653 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2654 TARGET_64BIT ? 4 : 2);
2655 else
2656 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2659 /* Accept -msseregparm only if at least SSE support is enabled. */
2660 if (TARGET_SSEREGPARM
2661 && ! TARGET_SSE)
2662 error ("-msseregparm used without SSE enabled");
2664 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2665 if (ix86_fpmath_string != 0)
2667 if (! strcmp (ix86_fpmath_string, "387"))
2668 ix86_fpmath = FPMATH_387;
2669 else if (! strcmp (ix86_fpmath_string, "sse"))
2671 if (!TARGET_SSE)
2673 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2674 ix86_fpmath = FPMATH_387;
2676 else
2677 ix86_fpmath = FPMATH_SSE;
2679 else if (! strcmp (ix86_fpmath_string, "387,sse")
2680 || ! strcmp (ix86_fpmath_string, "sse,387"))
2682 if (!TARGET_SSE)
2684 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2685 ix86_fpmath = FPMATH_387;
2687 else if (!TARGET_80387)
2689 warning (0, "387 instruction set disabled, using SSE arithmetics");
2690 ix86_fpmath = FPMATH_SSE;
2692 else
2693 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2695 else
2696 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2699 /* If the i387 is disabled, then do not return values in it. */
2700 if (!TARGET_80387)
2701 target_flags &= ~MASK_FLOAT_RETURNS;
2703 /* Use external vectorized library in vectorizing intrinsics. */
2704 if (ix86_veclibabi_string)
2706 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2707 ix86_veclib_handler = ix86_veclibabi_svml;
2708 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2709 ix86_veclib_handler = ix86_veclibabi_acml;
2710 else
2711 error ("unknown vectorization library ABI type (%s) for "
2712 "-mveclibabi= switch", ix86_veclibabi_string);
2715 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2716 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 && !optimize_size)
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2720 /* ??? Unwind info is not correct around the CFG unless either a frame
2721 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2722 unwind info generation to be aware of the CFG and propagating states
2723 around edges. */
2724 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2725 || flag_exceptions || flag_non_call_exceptions)
2726 && flag_omit_frame_pointer
2727 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2729 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2730 warning (0, "unwind tables currently require either a frame pointer "
2731 "or -maccumulate-outgoing-args for correctness");
2732 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2735 /* If stack probes are required, the space used for large function
2736 arguments on the stack must also be probed, so enable
2737 -maccumulate-outgoing-args so this happens in the prologue. */
2738 if (TARGET_STACK_PROBE
2739 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2741 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2742 warning (0, "stack probing requires -maccumulate-outgoing-args "
2743 "for correctness");
2744 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2747 /* For sane SSE instruction set generation we need fcomi instruction.
2748 It is safe to enable all CMOVE instructions. */
2749 if (TARGET_SSE)
2750 TARGET_CMOVE = 1;
2752 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2754 char *p;
2755 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2756 p = strchr (internal_label_prefix, 'X');
2757 internal_label_prefix_len = p - internal_label_prefix;
2758 *p = '\0';
2761 /* When scheduling description is not available, disable scheduler pass
2762 so it won't slow down the compilation and make x87 code slower. */
2763 if (!TARGET_SCHEDULE)
2764 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2766 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2767 set_param_value ("simultaneous-prefetches",
2768 ix86_cost->simultaneous_prefetches);
2769 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2770 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2771 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2772 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2773 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2774 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2776 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2777 can be optimized to ap = __builtin_next_arg (0).
2778 For abi switching it should be corrected. */
2779 if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
2780 targetm.expand_builtin_va_start = NULL;
2782 if (TARGET_64BIT)
2784 ix86_gen_leave = gen_leave_rex64;
2785 ix86_gen_pop1 = gen_popdi1;
2786 ix86_gen_add3 = gen_adddi3;
2787 ix86_gen_sub3 = gen_subdi3;
2788 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
2789 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
2790 ix86_gen_monitor = gen_sse3_monitor64;
2792 else
2794 ix86_gen_leave = gen_leave;
2795 ix86_gen_pop1 = gen_popsi1;
2796 ix86_gen_add3 = gen_addsi3;
2797 ix86_gen_sub3 = gen_subsi3;
2798 ix86_gen_sub3_carry = gen_subsi3_carry;
2799 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
2800 ix86_gen_monitor = gen_sse3_monitor;
2803 #ifdef USE_IX86_CLD
2804 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2805 if (!TARGET_64BIT)
2806 target_flags |= MASK_CLD & ~target_flags_explicit;
2807 #endif
2810 /* Return true if this goes in large data/bss. */
2812 static bool
2813 ix86_in_large_data_p (tree exp)
2815 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2816 return false;
2818 /* Functions are never large data. */
2819 if (TREE_CODE (exp) == FUNCTION_DECL)
2820 return false;
2822 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2824 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2825 if (strcmp (section, ".ldata") == 0
2826 || strcmp (section, ".lbss") == 0)
2827 return true;
2828 return false;
2830 else
2832 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2834 /* If this is an incomplete type with size 0, then we can't put it
2835 in data because it might be too big when completed. */
2836 if (!size || size > ix86_section_threshold)
2837 return true;
2840 return false;
2843 /* Switch to the appropriate section for output of DECL.
2844 DECL is either a `VAR_DECL' node or a constant of some sort.
2845 RELOC indicates whether forming the initial value of DECL requires
2846 link-time relocations. */
2848 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2849 ATTRIBUTE_UNUSED;
2851 static section *
2852 x86_64_elf_select_section (tree decl, int reloc,
2853 unsigned HOST_WIDE_INT align)
2855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2856 && ix86_in_large_data_p (decl))
2858 const char *sname = NULL;
2859 unsigned int flags = SECTION_WRITE;
2860 switch (categorize_decl_for_section (decl, reloc))
2862 case SECCAT_DATA:
2863 sname = ".ldata";
2864 break;
2865 case SECCAT_DATA_REL:
2866 sname = ".ldata.rel";
2867 break;
2868 case SECCAT_DATA_REL_LOCAL:
2869 sname = ".ldata.rel.local";
2870 break;
2871 case SECCAT_DATA_REL_RO:
2872 sname = ".ldata.rel.ro";
2873 break;
2874 case SECCAT_DATA_REL_RO_LOCAL:
2875 sname = ".ldata.rel.ro.local";
2876 break;
2877 case SECCAT_BSS:
2878 sname = ".lbss";
2879 flags |= SECTION_BSS;
2880 break;
2881 case SECCAT_RODATA:
2882 case SECCAT_RODATA_MERGE_STR:
2883 case SECCAT_RODATA_MERGE_STR_INIT:
2884 case SECCAT_RODATA_MERGE_CONST:
2885 sname = ".lrodata";
2886 flags = 0;
2887 break;
2888 case SECCAT_SRODATA:
2889 case SECCAT_SDATA:
2890 case SECCAT_SBSS:
2891 gcc_unreachable ();
2892 case SECCAT_TEXT:
2893 case SECCAT_TDATA:
2894 case SECCAT_TBSS:
2895 /* We don't split these for medium model. Place them into
2896 default sections and hope for best. */
2897 break;
2898 case SECCAT_EMUTLS_VAR:
2899 case SECCAT_EMUTLS_TMPL:
2900 gcc_unreachable ();
2902 if (sname)
2904 /* We might get called with string constants, but get_named_section
2905 doesn't like them as they are not DECLs. Also, we need to set
2906 flags in that case. */
2907 if (!DECL_P (decl))
2908 return get_section (sname, flags, NULL);
2909 return get_named_section (decl, sname, reloc);
2912 return default_elf_select_section (decl, reloc, align);
2915 /* Build up a unique section name, expressed as a
2916 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2917 RELOC indicates whether the initial value of EXP requires
2918 link-time relocations. */
2920 static void ATTRIBUTE_UNUSED
2921 x86_64_elf_unique_section (tree decl, int reloc)
2923 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2924 && ix86_in_large_data_p (decl))
2926 const char *prefix = NULL;
2927 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2928 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2930 switch (categorize_decl_for_section (decl, reloc))
2932 case SECCAT_DATA:
2933 case SECCAT_DATA_REL:
2934 case SECCAT_DATA_REL_LOCAL:
2935 case SECCAT_DATA_REL_RO:
2936 case SECCAT_DATA_REL_RO_LOCAL:
2937 prefix = one_only ? ".ld" : ".ldata";
2938 break;
2939 case SECCAT_BSS:
2940 prefix = one_only ? ".lb" : ".lbss";
2941 break;
2942 case SECCAT_RODATA:
2943 case SECCAT_RODATA_MERGE_STR:
2944 case SECCAT_RODATA_MERGE_STR_INIT:
2945 case SECCAT_RODATA_MERGE_CONST:
2946 prefix = one_only ? ".lr" : ".lrodata";
2947 break;
2948 case SECCAT_SRODATA:
2949 case SECCAT_SDATA:
2950 case SECCAT_SBSS:
2951 gcc_unreachable ();
2952 case SECCAT_TEXT:
2953 case SECCAT_TDATA:
2954 case SECCAT_TBSS:
2955 /* We don't split these for medium model. Place them into
2956 default sections and hope for best. */
2957 break;
2958 case SECCAT_EMUTLS_VAR:
2959 prefix = targetm.emutls.var_section;
2960 break;
2961 case SECCAT_EMUTLS_TMPL:
2962 prefix = targetm.emutls.tmpl_section;
2963 break;
2965 if (prefix)
2967 const char *name, *linkonce;
2968 char *string;
2970 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2971 name = targetm.strip_name_encoding (name);
2973 /* If we're using one_only, then there needs to be a .gnu.linkonce
2974 prefix to the section name. */
2975 linkonce = one_only ? ".gnu.linkonce" : "";
2977 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2979 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2980 return;
2983 default_unique_section (decl, reloc);
2986 #ifdef COMMON_ASM_OP
2987 /* This says how to output assembler code to declare an
2988 uninitialized external linkage data object.
2990 For medium model x86-64 we need to use .largecomm opcode for
2991 large objects. */
2992 void
2993 x86_elf_aligned_common (FILE *file,
2994 const char *name, unsigned HOST_WIDE_INT size,
2995 int align)
2997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2998 && size > (unsigned int)ix86_section_threshold)
2999 fprintf (file, ".largecomm\t");
3000 else
3001 fprintf (file, "%s", COMMON_ASM_OP);
3002 assemble_name (file, name);
3003 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3004 size, align / BITS_PER_UNIT);
3006 #endif
3008 /* Utility function for targets to use in implementing
3009 ASM_OUTPUT_ALIGNED_BSS. */
3011 void
3012 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3013 const char *name, unsigned HOST_WIDE_INT size,
3014 int align)
3016 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3017 && size > (unsigned int)ix86_section_threshold)
3018 switch_to_section (get_named_section (decl, ".lbss", 0));
3019 else
3020 switch_to_section (bss_section);
3021 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3022 #ifdef ASM_DECLARE_OBJECT_NAME
3023 last_assemble_variable_decl = decl;
3024 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3025 #else
3026 /* Standard thing is just output label for the object. */
3027 ASM_OUTPUT_LABEL (file, name);
3028 #endif /* ASM_DECLARE_OBJECT_NAME */
3029 ASM_OUTPUT_SKIP (file, size ? size : 1);
3032 void
3033 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3035 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3036 make the problem with not enough registers even worse. */
3037 #ifdef INSN_SCHEDULING
3038 if (level > 1)
3039 flag_schedule_insns = 0;
3040 #endif
3042 if (TARGET_MACHO)
3043 /* The Darwin libraries never set errno, so we might as well
3044 avoid calling them when that's the only reason we would. */
3045 flag_errno_math = 0;
3047 /* The default values of these switches depend on the TARGET_64BIT
3048 that is not known at this moment. Mark these values with 2 and
3049 let user the to override these. In case there is no command line option
3050 specifying them, we will set the defaults in override_options. */
3051 if (optimize >= 1)
3052 flag_omit_frame_pointer = 2;
3053 flag_pcc_struct_return = 2;
3054 flag_asynchronous_unwind_tables = 2;
3055 flag_vect_cost_model = 1;
3056 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3057 SUBTARGET_OPTIMIZATION_OPTIONS;
3058 #endif
3061 /* Decide whether we can make a sibling call to a function. DECL is the
3062 declaration of the function being targeted by the call and EXP is the
3063 CALL_EXPR representing the call. */
3065 static bool
3066 ix86_function_ok_for_sibcall (tree decl, tree exp)
3068 tree func;
3069 rtx a, b;
3071 /* If we are generating position-independent code, we cannot sibcall
3072 optimize any indirect call, or a direct call to a global function,
3073 as the PLT requires %ebx be live. */
3074 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3075 return false;
3077 if (decl)
3078 func = decl;
3079 else
3081 func = TREE_TYPE (CALL_EXPR_FN (exp));
3082 if (POINTER_TYPE_P (func))
3083 func = TREE_TYPE (func);
3086 /* Check that the return value locations are the same. Like
3087 if we are returning floats on the 80387 register stack, we cannot
3088 make a sibcall from a function that doesn't return a float to a
3089 function that does or, conversely, from a function that does return
3090 a float to a function that doesn't; the necessary stack adjustment
3091 would not be executed. This is also the place we notice
3092 differences in the return value ABI. Note that it is ok for one
3093 of the functions to have void return type as long as the return
3094 value of the other is passed in a register. */
3095 a = ix86_function_value (TREE_TYPE (exp), func, false);
3096 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3097 cfun->decl, false);
3098 if (STACK_REG_P (a) || STACK_REG_P (b))
3100 if (!rtx_equal_p (a, b))
3101 return false;
3103 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3105 else if (!rtx_equal_p (a, b))
3106 return false;
3108 /* If this call is indirect, we'll need to be able to use a call-clobbered
3109 register for the address of the target function. Make sure that all
3110 such registers are not used for passing parameters. */
3111 if (!decl && !TARGET_64BIT)
3113 tree type;
3115 /* We're looking at the CALL_EXPR, we need the type of the function. */
3116 type = CALL_EXPR_FN (exp); /* pointer expression */
3117 type = TREE_TYPE (type); /* pointer type */
3118 type = TREE_TYPE (type); /* function type */
3120 if (ix86_function_regparm (type, NULL) >= 3)
3122 /* ??? Need to count the actual number of registers to be used,
3123 not the possible number of registers. Fix later. */
3124 return false;
3128 /* Dllimport'd functions are also called indirectly. */
3129 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3130 && decl && DECL_DLLIMPORT_P (decl)
3131 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3132 return false;
3134 /* If we forced aligned the stack, then sibcalling would unalign the
3135 stack, which may break the called function. */
3136 if (cfun->machine->force_align_arg_pointer)
3137 return false;
3139 /* Otherwise okay. That also includes certain types of indirect calls. */
3140 return true;
3143 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3144 calling convention attributes;
3145 arguments as in struct attribute_spec.handler. */
3147 static tree
3148 ix86_handle_cconv_attribute (tree *node, tree name,
3149 tree args,
3150 int flags ATTRIBUTE_UNUSED,
3151 bool *no_add_attrs)
3153 if (TREE_CODE (*node) != FUNCTION_TYPE
3154 && TREE_CODE (*node) != METHOD_TYPE
3155 && TREE_CODE (*node) != FIELD_DECL
3156 && TREE_CODE (*node) != TYPE_DECL)
3158 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3159 IDENTIFIER_POINTER (name));
3160 *no_add_attrs = true;
3161 return NULL_TREE;
3164 /* Can combine regparm with all attributes but fastcall. */
3165 if (is_attribute_p ("regparm", name))
3167 tree cst;
3169 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3171 error ("fastcall and regparm attributes are not compatible");
3174 cst = TREE_VALUE (args);
3175 if (TREE_CODE (cst) != INTEGER_CST)
3177 warning (OPT_Wattributes,
3178 "%qs attribute requires an integer constant argument",
3179 IDENTIFIER_POINTER (name));
3180 *no_add_attrs = true;
3182 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3184 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3185 IDENTIFIER_POINTER (name), REGPARM_MAX);
3186 *no_add_attrs = true;
3189 if (!TARGET_64BIT
3190 && lookup_attribute (ix86_force_align_arg_pointer_string,
3191 TYPE_ATTRIBUTES (*node))
3192 && compare_tree_int (cst, REGPARM_MAX-1))
3194 error ("%s functions limited to %d register parameters",
3195 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3198 return NULL_TREE;
3201 if (TARGET_64BIT)
3203 /* Do not warn when emulating the MS ABI. */
3204 if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
3205 warning (OPT_Wattributes, "%qs attribute ignored",
3206 IDENTIFIER_POINTER (name));
3207 *no_add_attrs = true;
3208 return NULL_TREE;
3211 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3212 if (is_attribute_p ("fastcall", name))
3214 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3216 error ("fastcall and cdecl attributes are not compatible");
3218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3220 error ("fastcall and stdcall attributes are not compatible");
3222 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3224 error ("fastcall and regparm attributes are not compatible");
3228 /* Can combine stdcall with fastcall (redundant), regparm and
3229 sseregparm. */
3230 else if (is_attribute_p ("stdcall", name))
3232 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3234 error ("stdcall and cdecl attributes are not compatible");
3236 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3238 error ("stdcall and fastcall attributes are not compatible");
3242 /* Can combine cdecl with regparm and sseregparm. */
3243 else if (is_attribute_p ("cdecl", name))
3245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3247 error ("stdcall and cdecl attributes are not compatible");
3249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3251 error ("fastcall and cdecl attributes are not compatible");
3255 /* Can combine sseregparm with all attributes. */
3257 return NULL_TREE;
3260 /* Return 0 if the attributes for two types are incompatible, 1 if they
3261 are compatible, and 2 if they are nearly compatible (which causes a
3262 warning to be generated). */
3264 static int
3265 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3267 /* Check for mismatch of non-default calling convention. */
3268 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3270 if (TREE_CODE (type1) != FUNCTION_TYPE
3271 && TREE_CODE (type1) != METHOD_TYPE)
3272 return 1;
3274 /* Check for mismatched fastcall/regparm types. */
3275 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3276 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3277 || (ix86_function_regparm (type1, NULL)
3278 != ix86_function_regparm (type2, NULL)))
3279 return 0;
3281 /* Check for mismatched sseregparm types. */
3282 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3283 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3284 return 0;
3286 /* Check for mismatched return types (cdecl vs stdcall). */
3287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3289 return 0;
3291 return 1;
3294 /* Return the regparm value for a function with the indicated TYPE and DECL.
3295 DECL may be NULL when calling function indirectly
3296 or considering a libcall. */
3298 static int
3299 ix86_function_regparm (const_tree type, const_tree decl)
3301 tree attr;
3302 int regparm = ix86_regparm;
3304 static bool error_issued;
3306 if (TARGET_64BIT)
3308 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3309 return regparm;
3310 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3313 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3314 if (attr)
3316 regparm
3317 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3319 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3321 /* We can't use regparm(3) for nested functions because
3322 these pass static chain pointer in %ecx register. */
3323 if (!error_issued && regparm == 3
3324 && decl_function_context (decl)
3325 && !DECL_NO_STATIC_CHAIN (decl))
3327 error ("nested functions are limited to 2 register parameters");
3328 error_issued = true;
3329 return 0;
3333 return regparm;
3336 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3337 return 2;
3339 /* Use register calling convention for local functions when possible. */
3340 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3341 && flag_unit_at_a_time && !profile_flag)
3343 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3344 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3345 if (i && i->local)
3347 int local_regparm, globals = 0, regno;
3348 struct function *f;
3350 /* Make sure no regparm register is taken by a
3351 fixed register variable. */
3352 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3353 if (fixed_regs[local_regparm])
3354 break;
3356 /* We can't use regparm(3) for nested functions as these use
3357 static chain pointer in third argument. */
3358 if (local_regparm == 3
3359 && (decl_function_context (decl)
3360 || ix86_force_align_arg_pointer)
3361 && !DECL_NO_STATIC_CHAIN (decl))
3362 local_regparm = 2;
3364 /* If the function realigns its stackpointer, the prologue will
3365 clobber %ecx. If we've already generated code for the callee,
3366 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3367 scanning the attributes for the self-realigning property. */
3368 f = DECL_STRUCT_FUNCTION (decl);
3369 if (local_regparm == 3
3370 && (f ? !!f->machine->force_align_arg_pointer
3371 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3372 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3373 local_regparm = 2;
3375 /* Each fixed register usage increases register pressure,
3376 so less registers should be used for argument passing.
3377 This functionality can be overriden by an explicit
3378 regparm value. */
3379 for (regno = 0; regno <= DI_REG; regno++)
3380 if (fixed_regs[regno])
3381 globals++;
3383 local_regparm
3384 = globals < local_regparm ? local_regparm - globals : 0;
3386 if (local_regparm > regparm)
3387 regparm = local_regparm;
3391 return regparm;
3394 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3395 DFmode (2) arguments in SSE registers for a function with the
3396 indicated TYPE and DECL. DECL may be NULL when calling function
3397 indirectly or considering a libcall. Otherwise return 0. */
3399 static int
3400 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3402 gcc_assert (!TARGET_64BIT);
3404 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3405 by the sseregparm attribute. */
3406 if (TARGET_SSEREGPARM
3407 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3409 if (!TARGET_SSE)
3411 if (warn)
3413 if (decl)
3414 error ("Calling %qD with attribute sseregparm without "
3415 "SSE/SSE2 enabled", decl);
3416 else
3417 error ("Calling %qT with attribute sseregparm without "
3418 "SSE/SSE2 enabled", type);
3420 return 0;
3423 return 2;
3426 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3427 (and DFmode for SSE2) arguments in SSE registers. */
3428 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3430 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3431 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3432 if (i && i->local)
3433 return TARGET_SSE2 ? 2 : 1;
3436 return 0;
3439 /* Return true if EAX is live at the start of the function. Used by
3440 ix86_expand_prologue to determine if we need special help before
3441 calling allocate_stack_worker. */
3443 static bool
3444 ix86_eax_live_at_start_p (void)
3446 /* Cheat. Don't bother working forward from ix86_function_regparm
3447 to the function type to whether an actual argument is located in
3448 eax. Instead just look at cfg info, which is still close enough
3449 to correct at this point. This gives false positives for broken
3450 functions that might use uninitialized data that happens to be
3451 allocated in eax, but who cares? */
3452 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3455 /* Value is the number of bytes of arguments automatically
3456 popped when returning from a subroutine call.
3457 FUNDECL is the declaration node of the function (as a tree),
3458 FUNTYPE is the data type of the function (as a tree),
3459 or for a library call it is an identifier node for the subroutine name.
3460 SIZE is the number of bytes of arguments passed on the stack.
3462 On the 80386, the RTD insn may be used to pop them if the number
3463 of args is fixed, but if the number is variable then the caller
3464 must pop them all. RTD can't be used for library calls now
3465 because the library is compiled with the Unix compiler.
3466 Use of RTD is a selectable option, since it is incompatible with
3467 standard Unix calling sequences. If the option is not selected,
3468 the caller must always pop the args.
3470 The attribute stdcall is equivalent to RTD on a per module basis. */
3473 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3475 int rtd;
3477 /* None of the 64-bit ABIs pop arguments. */
3478 if (TARGET_64BIT)
3479 return 0;
3481 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3483 /* Cdecl functions override -mrtd, and never pop the stack. */
3484 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3486 /* Stdcall and fastcall functions will pop the stack if not
3487 variable args. */
3488 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3489 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3490 rtd = 1;
3492 if (rtd && ! stdarg_p (funtype))
3493 return size;
3496 /* Lose any fake structure return argument if it is passed on the stack. */
3497 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3498 && !KEEP_AGGREGATE_RETURN_POINTER)
3500 int nregs = ix86_function_regparm (funtype, fundecl);
3501 if (nregs == 0)
3502 return GET_MODE_SIZE (Pmode);
3505 return 0;
3508 /* Argument support functions. */
3510 /* Return true when register may be used to pass function parameters. */
3511 bool
3512 ix86_function_arg_regno_p (int regno)
3514 int i;
3515 const int *parm_regs;
3517 if (!TARGET_64BIT)
3519 if (TARGET_MACHO)
3520 return (regno < REGPARM_MAX
3521 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3522 else
3523 return (regno < REGPARM_MAX
3524 || (TARGET_MMX && MMX_REGNO_P (regno)
3525 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3526 || (TARGET_SSE && SSE_REGNO_P (regno)
3527 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3530 if (TARGET_MACHO)
3532 if (SSE_REGNO_P (regno) && TARGET_SSE)
3533 return true;
3535 else
3537 if (TARGET_SSE && SSE_REGNO_P (regno)
3538 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3539 return true;
3542 /* TODO: The function should depend on current function ABI but
3543 builtins.c would need updating then. Therefore we use the
3544 default ABI. */
3546 /* RAX is used as hidden argument to va_arg functions. */
3547 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3548 return true;
3550 if (DEFAULT_ABI == MS_ABI)
3551 parm_regs = x86_64_ms_abi_int_parameter_registers;
3552 else
3553 parm_regs = x86_64_int_parameter_registers;
3554 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3555 : X86_64_REGPARM_MAX); i++)
3556 if (regno == parm_regs[i])
3557 return true;
3558 return false;
3561 /* Return if we do not know how to pass TYPE solely in registers. */
3563 static bool
3564 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3566 if (must_pass_in_stack_var_size_or_pad (mode, type))
3567 return true;
3569 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3570 The layout_type routine is crafty and tries to trick us into passing
3571 currently unsupported vector types on the stack by using TImode. */
3572 return (!TARGET_64BIT && mode == TImode
3573 && type && TREE_CODE (type) != VECTOR_TYPE);
3576 /* It returns the size, in bytes, of the area reserved for arguments passed
3577 in registers for the function represented by fndecl dependent to the used
3578 abi format. */
3580 ix86_reg_parm_stack_space (const_tree fndecl)
3582 int call_abi = 0;
3583 /* For libcalls it is possible that there is no fndecl at hand.
3584 Therefore assume for this case the default abi of the target. */
3585 if (!fndecl)
3586 call_abi = DEFAULT_ABI;
3587 else
3588 call_abi = ix86_function_abi (fndecl);
3589 if (call_abi == 1)
3590 return 32;
3591 return 0;
3594 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3595 call abi used. */
3597 ix86_function_type_abi (const_tree fntype)
3599 if (TARGET_64BIT && fntype != NULL)
3601 int abi;
3602 if (DEFAULT_ABI == SYSV_ABI)
3603 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3604 else
3605 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3607 if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3608 sorry ("using sysv calling convention on target w64 is not supported");
3610 return abi;
3612 return DEFAULT_ABI;
3616 ix86_function_abi (const_tree fndecl)
3618 if (! fndecl)
3619 return DEFAULT_ABI;
3620 return ix86_function_type_abi (TREE_TYPE (fndecl));
3623 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3624 call abi used. */
3626 ix86_cfun_abi (void)
3628 if (! cfun || ! TARGET_64BIT)
3629 return DEFAULT_ABI;
3630 return cfun->machine->call_abi;
3633 /* regclass.c */
3634 extern void init_regs (void);
3636 /* Implementation of call abi switching target hook. Specific to FNDECL
3637 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3638 for more details.
3639 To prevent redudant calls of costy function init_regs (), it checks not to
3640 reset register usage for default abi. */
3641 void
3642 ix86_call_abi_override (const_tree fndecl)
3644 if (fndecl == NULL_TREE)
3645 cfun->machine->call_abi = DEFAULT_ABI;
3646 else
3647 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3648 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3650 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3652 call_used_regs[4 /*RSI*/] = 0;
3653 call_used_regs[5 /*RDI*/] = 0;
3654 init_regs ();
3657 else if (TARGET_64BIT)
3659 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3661 call_used_regs[4 /*RSI*/] = 1;
3662 call_used_regs[5 /*RDI*/] = 1;
3663 init_regs ();
3668 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3669 for a call to a function whose data type is FNTYPE.
3670 For a library call, FNTYPE is 0. */
3672 void
3673 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3674 tree fntype, /* tree ptr for function decl */
3675 rtx libname, /* SYMBOL_REF of library name or 0 */
3676 tree fndecl)
3678 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3679 memset (cum, 0, sizeof (*cum));
3681 cum->call_abi = ix86_function_type_abi (fntype);
3682 /* Set up the number of registers to use for passing arguments. */
3683 cum->nregs = ix86_regparm;
3684 if (TARGET_64BIT)
3686 if (cum->call_abi != DEFAULT_ABI)
3687 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3688 : X64_REGPARM_MAX;
3690 if (TARGET_SSE)
3692 cum->sse_nregs = SSE_REGPARM_MAX;
3693 if (TARGET_64BIT)
3695 if (cum->call_abi != DEFAULT_ABI)
3696 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3697 : X64_SSE_REGPARM_MAX;
3700 if (TARGET_MMX)
3701 cum->mmx_nregs = MMX_REGPARM_MAX;
3702 cum->warn_sse = true;
3703 cum->warn_mmx = true;
3705 /* Because type might mismatch in between caller and callee, we need to
3706 use actual type of function for local calls.
3707 FIXME: cgraph_analyze can be told to actually record if function uses
3708 va_start so for local functions maybe_vaarg can be made aggressive
3709 helping K&R code.
3710 FIXME: once typesytem is fixed, we won't need this code anymore. */
3711 if (i && i->local)
3712 fntype = TREE_TYPE (fndecl);
3713 cum->maybe_vaarg = (fntype
3714 ? (!prototype_p (fntype) || stdarg_p (fntype))
3715 : !libname);
3717 if (!TARGET_64BIT)
3719 /* If there are variable arguments, then we won't pass anything
3720 in registers in 32-bit mode. */
3721 if (stdarg_p (fntype))
3723 cum->nregs = 0;
3724 cum->sse_nregs = 0;
3725 cum->mmx_nregs = 0;
3726 cum->warn_sse = 0;
3727 cum->warn_mmx = 0;
3728 return;
3731 /* Use ecx and edx registers if function has fastcall attribute,
3732 else look for regparm information. */
3733 if (fntype)
3735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3737 cum->nregs = 2;
3738 cum->fastcall = 1;
3740 else
3741 cum->nregs = ix86_function_regparm (fntype, fndecl);
3744 /* Set up the number of SSE registers used for passing SFmode
3745 and DFmode arguments. Warn for mismatching ABI. */
3746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3750 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3751 But in the case of vector types, it is some vector mode.
3753 When we have only some of our vector isa extensions enabled, then there
3754 are some modes for which vector_mode_supported_p is false. For these
3755 modes, the generic vector support in gcc will choose some non-vector mode
3756 in order to implement the type. By computing the natural mode, we'll
3757 select the proper ABI location for the operand and not depend on whatever
3758 the middle-end decides to do with these vector types. */
3760 static enum machine_mode
3761 type_natural_mode (const_tree type)
3763 enum machine_mode mode = TYPE_MODE (type);
3765 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if ((size == 8 || size == 16)
3769 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3770 && TYPE_VECTOR_SUBPARTS (type) > 1)
3772 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3774 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3775 mode = MIN_MODE_VECTOR_FLOAT;
3776 else
3777 mode = MIN_MODE_VECTOR_INT;
3779 /* Get the mode which has this inner mode and number of units. */
3780 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3781 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3782 && GET_MODE_INNER (mode) == innermode)
3783 return mode;
3785 gcc_unreachable ();
3789 return mode;
3792 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3793 this may not agree with the mode that the type system has chosen for the
3794 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3795 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3797 static rtx
3798 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3799 unsigned int regno)
3801 rtx tmp;
3803 if (orig_mode != BLKmode)
3804 tmp = gen_rtx_REG (orig_mode, regno);
3805 else
3807 tmp = gen_rtx_REG (mode, regno);
3808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3809 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3812 return tmp;
3815 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3816 of this code is to classify each 8bytes of incoming argument by the register
3817 class and assign registers accordingly. */
3819 /* Return the union class of CLASS1 and CLASS2.
3820 See the x86-64 PS ABI for details. */
3822 static enum x86_64_reg_class
3823 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3825 /* Rule #1: If both classes are equal, this is the resulting class. */
3826 if (class1 == class2)
3827 return class1;
3829 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3830 the other class. */
3831 if (class1 == X86_64_NO_CLASS)
3832 return class2;
3833 if (class2 == X86_64_NO_CLASS)
3834 return class1;
3836 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3837 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3838 return X86_64_MEMORY_CLASS;
3840 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3841 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3842 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3843 return X86_64_INTEGERSI_CLASS;
3844 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3845 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3846 return X86_64_INTEGER_CLASS;
3848 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3849 MEMORY is used. */
3850 if (class1 == X86_64_X87_CLASS
3851 || class1 == X86_64_X87UP_CLASS
3852 || class1 == X86_64_COMPLEX_X87_CLASS
3853 || class2 == X86_64_X87_CLASS
3854 || class2 == X86_64_X87UP_CLASS
3855 || class2 == X86_64_COMPLEX_X87_CLASS)
3856 return X86_64_MEMORY_CLASS;
3858 /* Rule #6: Otherwise class SSE is used. */
3859 return X86_64_SSE_CLASS;
3862 /* Classify the argument of type TYPE and mode MODE.
3863 CLASSES will be filled by the register class used to pass each word
3864 of the operand. The number of words is returned. In case the parameter
3865 should be passed in memory, 0 is returned. As a special case for zero
3866 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3868 BIT_OFFSET is used internally for handling records and specifies offset
3869 of the offset in bits modulo 256 to avoid overflow cases.
3871 See the x86-64 PS ABI for details.
3874 static int
3875 classify_argument (enum machine_mode mode, const_tree type,
3876 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3878 HOST_WIDE_INT bytes =
3879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3882 /* Variable sized entities are always passed/returned in memory. */
3883 if (bytes < 0)
3884 return 0;
3886 if (mode != VOIDmode
3887 && targetm.calls.must_pass_in_stack (mode, type))
3888 return 0;
3890 if (type && AGGREGATE_TYPE_P (type))
3892 int i;
3893 tree field;
3894 enum x86_64_reg_class subclasses[MAX_CLASSES];
3896 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3897 if (bytes > 16)
3898 return 0;
3900 for (i = 0; i < words; i++)
3901 classes[i] = X86_64_NO_CLASS;
3903 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3904 signalize memory class, so handle it as special case. */
3905 if (!words)
3907 classes[0] = X86_64_NO_CLASS;
3908 return 1;
3911 /* Classify each field of record and merge classes. */
3912 switch (TREE_CODE (type))
3914 case RECORD_TYPE:
3915 /* And now merge the fields of structure. */
3916 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3918 if (TREE_CODE (field) == FIELD_DECL)
3920 int num;
3922 if (TREE_TYPE (field) == error_mark_node)
3923 continue;
3925 /* Bitfields are always classified as integer. Handle them
3926 early, since later code would consider them to be
3927 misaligned integers. */
3928 if (DECL_BIT_FIELD (field))
3930 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3931 i < ((int_bit_position (field) + (bit_offset % 64))
3932 + tree_low_cst (DECL_SIZE (field), 0)
3933 + 63) / 8 / 8; i++)
3934 classes[i] =
3935 merge_classes (X86_64_INTEGER_CLASS,
3936 classes[i]);
3938 else
3940 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3941 TREE_TYPE (field), subclasses,
3942 (int_bit_position (field)
3943 + bit_offset) % 256);
3944 if (!num)
3945 return 0;
3946 for (i = 0; i < num; i++)
3948 int pos =
3949 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3950 classes[i + pos] =
3951 merge_classes (subclasses[i], classes[i + pos]);
3956 break;
3958 case ARRAY_TYPE:
3959 /* Arrays are handled as small records. */
3961 int num;
3962 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3963 TREE_TYPE (type), subclasses, bit_offset);
3964 if (!num)
3965 return 0;
3967 /* The partial classes are now full classes. */
3968 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3969 subclasses[0] = X86_64_SSE_CLASS;
3970 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3971 subclasses[0] = X86_64_INTEGER_CLASS;
3973 for (i = 0; i < words; i++)
3974 classes[i] = subclasses[i % num];
3976 break;
3978 case UNION_TYPE:
3979 case QUAL_UNION_TYPE:
3980 /* Unions are similar to RECORD_TYPE but offset is always 0.
3982 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3984 if (TREE_CODE (field) == FIELD_DECL)
3986 int num;
3988 if (TREE_TYPE (field) == error_mark_node)
3989 continue;
3991 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3992 TREE_TYPE (field), subclasses,
3993 bit_offset);
3994 if (!num)
3995 return 0;
3996 for (i = 0; i < num; i++)
3997 classes[i] = merge_classes (subclasses[i], classes[i]);
4000 break;
4002 default:
4003 gcc_unreachable ();
4006 /* Final merger cleanup. */
4007 for (i = 0; i < words; i++)
4009 /* If one class is MEMORY, everything should be passed in
4010 memory. */
4011 if (classes[i] == X86_64_MEMORY_CLASS)
4012 return 0;
4014 /* The X86_64_SSEUP_CLASS should be always preceded by
4015 X86_64_SSE_CLASS. */
4016 if (classes[i] == X86_64_SSEUP_CLASS
4017 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4018 classes[i] = X86_64_SSE_CLASS;
4020 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4021 if (classes[i] == X86_64_X87UP_CLASS
4022 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4023 classes[i] = X86_64_SSE_CLASS;
4025 return words;
4028 /* Compute alignment needed. We align all types to natural boundaries with
4029 exception of XFmode that is aligned to 64bits. */
4030 if (mode != VOIDmode && mode != BLKmode)
4032 int mode_alignment = GET_MODE_BITSIZE (mode);
4034 if (mode == XFmode)
4035 mode_alignment = 128;
4036 else if (mode == XCmode)
4037 mode_alignment = 256;
4038 if (COMPLEX_MODE_P (mode))
4039 mode_alignment /= 2;
4040 /* Misaligned fields are always returned in memory. */
4041 if (bit_offset % mode_alignment)
4042 return 0;
4045 /* for V1xx modes, just use the base mode */
4046 if (VECTOR_MODE_P (mode) && mode != V1DImode
4047 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4048 mode = GET_MODE_INNER (mode);
4050 /* Classification of atomic types. */
4051 switch (mode)
4053 case SDmode:
4054 case DDmode:
4055 classes[0] = X86_64_SSE_CLASS;
4056 return 1;
4057 case TDmode:
4058 classes[0] = X86_64_SSE_CLASS;
4059 classes[1] = X86_64_SSEUP_CLASS;
4060 return 2;
4061 case DImode:
4062 case SImode:
4063 case HImode:
4064 case QImode:
4065 case CSImode:
4066 case CHImode:
4067 case CQImode:
4068 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4069 classes[0] = X86_64_INTEGERSI_CLASS;
4070 else
4071 classes[0] = X86_64_INTEGER_CLASS;
4072 return 1;
4073 case CDImode:
4074 case TImode:
4075 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4076 return 2;
4077 case CTImode:
4078 return 0;
4079 case SFmode:
4080 if (!(bit_offset % 64))
4081 classes[0] = X86_64_SSESF_CLASS;
4082 else
4083 classes[0] = X86_64_SSE_CLASS;
4084 return 1;
4085 case DFmode:
4086 classes[0] = X86_64_SSEDF_CLASS;
4087 return 1;
4088 case XFmode:
4089 classes[0] = X86_64_X87_CLASS;
4090 classes[1] = X86_64_X87UP_CLASS;
4091 return 2;
4092 case TFmode:
4093 classes[0] = X86_64_SSE_CLASS;
4094 classes[1] = X86_64_SSEUP_CLASS;
4095 return 2;
4096 case SCmode:
4097 classes[0] = X86_64_SSE_CLASS;
4098 return 1;
4099 case DCmode:
4100 classes[0] = X86_64_SSEDF_CLASS;
4101 classes[1] = X86_64_SSEDF_CLASS;
4102 return 2;
4103 case XCmode:
4104 classes[0] = X86_64_COMPLEX_X87_CLASS;
4105 return 1;
4106 case TCmode:
4107 /* This modes is larger than 16 bytes. */
4108 return 0;
4109 case V4SFmode:
4110 case V4SImode:
4111 case V16QImode:
4112 case V8HImode:
4113 case V2DFmode:
4114 case V2DImode:
4115 classes[0] = X86_64_SSE_CLASS;
4116 classes[1] = X86_64_SSEUP_CLASS;
4117 return 2;
4118 case V1DImode:
4119 case V2SFmode:
4120 case V2SImode:
4121 case V4HImode:
4122 case V8QImode:
4123 classes[0] = X86_64_SSE_CLASS;
4124 return 1;
4125 case BLKmode:
4126 case VOIDmode:
4127 return 0;
4128 default:
4129 gcc_assert (VECTOR_MODE_P (mode));
4131 if (bytes > 16)
4132 return 0;
4134 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4136 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4137 classes[0] = X86_64_INTEGERSI_CLASS;
4138 else
4139 classes[0] = X86_64_INTEGER_CLASS;
4140 classes[1] = X86_64_INTEGER_CLASS;
4141 return 1 + (bytes > 8);
4145 /* Examine the argument and return set number of register required in each
4146 class. Return 0 iff parameter should be passed in memory. */
4147 static int
4148 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4149 int *int_nregs, int *sse_nregs)
4151 enum x86_64_reg_class regclass[MAX_CLASSES];
4152 int n = classify_argument (mode, type, regclass, 0);
4154 *int_nregs = 0;
4155 *sse_nregs = 0;
4156 if (!n)
4157 return 0;
4158 for (n--; n >= 0; n--)
4159 switch (regclass[n])
4161 case X86_64_INTEGER_CLASS:
4162 case X86_64_INTEGERSI_CLASS:
4163 (*int_nregs)++;
4164 break;
4165 case X86_64_SSE_CLASS:
4166 case X86_64_SSESF_CLASS:
4167 case X86_64_SSEDF_CLASS:
4168 (*sse_nregs)++;
4169 break;
4170 case X86_64_NO_CLASS:
4171 case X86_64_SSEUP_CLASS:
4172 break;
4173 case X86_64_X87_CLASS:
4174 case X86_64_X87UP_CLASS:
4175 if (!in_return)
4176 return 0;
4177 break;
4178 case X86_64_COMPLEX_X87_CLASS:
4179 return in_return ? 2 : 0;
4180 case X86_64_MEMORY_CLASS:
4181 gcc_unreachable ();
4183 return 1;
4186 /* Construct container for the argument used by GCC interface. See
4187 FUNCTION_ARG for the detailed description. */
4189 static rtx
4190 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4191 const_tree type, int in_return, int nintregs, int nsseregs,
4192 const int *intreg, int sse_regno)
4194 /* The following variables hold the static issued_error state. */
4195 static bool issued_sse_arg_error;
4196 static bool issued_sse_ret_error;
4197 static bool issued_x87_ret_error;
4199 enum machine_mode tmpmode;
4200 int bytes =
4201 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4202 enum x86_64_reg_class regclass[MAX_CLASSES];
4203 int n;
4204 int i;
4205 int nexps = 0;
4206 int needed_sseregs, needed_intregs;
4207 rtx exp[MAX_CLASSES];
4208 rtx ret;
4210 n = classify_argument (mode, type, regclass, 0);
4211 if (!n)
4212 return NULL;
4213 if (!examine_argument (mode, type, in_return, &needed_intregs,
4214 &needed_sseregs))
4215 return NULL;
4216 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4217 return NULL;
4219 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4220 some less clueful developer tries to use floating-point anyway. */
4221 if (needed_sseregs && !TARGET_SSE)
4223 if (in_return)
4225 if (!issued_sse_ret_error)
4227 error ("SSE register return with SSE disabled");
4228 issued_sse_ret_error = true;
4231 else if (!issued_sse_arg_error)
4233 error ("SSE register argument with SSE disabled");
4234 issued_sse_arg_error = true;
4236 return NULL;
4239 /* Likewise, error if the ABI requires us to return values in the
4240 x87 registers and the user specified -mno-80387. */
4241 if (!TARGET_80387 && in_return)
4242 for (i = 0; i < n; i++)
4243 if (regclass[i] == X86_64_X87_CLASS
4244 || regclass[i] == X86_64_X87UP_CLASS
4245 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4247 if (!issued_x87_ret_error)
4249 error ("x87 register return with x87 disabled");
4250 issued_x87_ret_error = true;
4252 return NULL;
4255 /* First construct simple cases. Avoid SCmode, since we want to use
4256 single register to pass this type. */
4257 if (n == 1 && mode != SCmode)
4258 switch (regclass[0])
4260 case X86_64_INTEGER_CLASS:
4261 case X86_64_INTEGERSI_CLASS:
4262 return gen_rtx_REG (mode, intreg[0]);
4263 case X86_64_SSE_CLASS:
4264 case X86_64_SSESF_CLASS:
4265 case X86_64_SSEDF_CLASS:
4266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4267 case X86_64_X87_CLASS:
4268 case X86_64_COMPLEX_X87_CLASS:
4269 return gen_rtx_REG (mode, FIRST_STACK_REG);
4270 case X86_64_NO_CLASS:
4271 /* Zero sized array, struct or class. */
4272 return NULL;
4273 default:
4274 gcc_unreachable ();
4276 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4277 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4280 if (n == 2
4281 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4282 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4283 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4284 && regclass[1] == X86_64_INTEGER_CLASS
4285 && (mode == CDImode || mode == TImode || mode == TFmode)
4286 && intreg[0] + 1 == intreg[1])
4287 return gen_rtx_REG (mode, intreg[0]);
4289 /* Otherwise figure out the entries of the PARALLEL. */
4290 for (i = 0; i < n; i++)
4292 switch (regclass[i])
4294 case X86_64_NO_CLASS:
4295 break;
4296 case X86_64_INTEGER_CLASS:
4297 case X86_64_INTEGERSI_CLASS:
4298 /* Merge TImodes on aligned occasions here too. */
4299 if (i * 8 + 8 > bytes)
4300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4301 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4302 tmpmode = SImode;
4303 else
4304 tmpmode = DImode;
4305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4306 if (tmpmode == BLKmode)
4307 tmpmode = DImode;
4308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4309 gen_rtx_REG (tmpmode, *intreg),
4310 GEN_INT (i*8));
4311 intreg++;
4312 break;
4313 case X86_64_SSESF_CLASS:
4314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4315 gen_rtx_REG (SFmode,
4316 SSE_REGNO (sse_regno)),
4317 GEN_INT (i*8));
4318 sse_regno++;
4319 break;
4320 case X86_64_SSEDF_CLASS:
4321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4322 gen_rtx_REG (DFmode,
4323 SSE_REGNO (sse_regno)),
4324 GEN_INT (i*8));
4325 sse_regno++;
4326 break;
4327 case X86_64_SSE_CLASS:
4328 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4329 tmpmode = TImode;
4330 else
4331 tmpmode = DImode;
4332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4333 gen_rtx_REG (tmpmode,
4334 SSE_REGNO (sse_regno)),
4335 GEN_INT (i*8));
4336 if (tmpmode == TImode)
4337 i++;
4338 sse_regno++;
4339 break;
4340 default:
4341 gcc_unreachable ();
4345 /* Empty aligned struct, union or class. */
4346 if (nexps == 0)
4347 return NULL;
4349 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4350 for (i = 0; i < nexps; i++)
4351 XVECEXP (ret, 0, i) = exp [i];
4352 return ret;
4355 /* Update the data in CUM to advance over an argument of mode MODE
4356 and data type TYPE. (TYPE is null for libcalls where that information
4357 may not be available.) */
4359 static void
4360 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4361 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4363 switch (mode)
4365 default:
4366 break;
4368 case BLKmode:
4369 if (bytes < 0)
4370 break;
4371 /* FALLTHRU */
4373 case DImode:
4374 case SImode:
4375 case HImode:
4376 case QImode:
4377 cum->words += words;
4378 cum->nregs -= words;
4379 cum->regno += words;
4381 if (cum->nregs <= 0)
4383 cum->nregs = 0;
4384 cum->regno = 0;
4386 break;
4388 case DFmode:
4389 if (cum->float_in_sse < 2)
4390 break;
4391 case SFmode:
4392 if (cum->float_in_sse < 1)
4393 break;
4394 /* FALLTHRU */
4396 case TImode:
4397 case V16QImode:
4398 case V8HImode:
4399 case V4SImode:
4400 case V2DImode:
4401 case V4SFmode:
4402 case V2DFmode:
4403 if (!type || !AGGREGATE_TYPE_P (type))
4405 cum->sse_words += words;
4406 cum->sse_nregs -= 1;
4407 cum->sse_regno += 1;
4408 if (cum->sse_nregs <= 0)
4410 cum->sse_nregs = 0;
4411 cum->sse_regno = 0;
4414 break;
4416 case V8QImode:
4417 case V4HImode:
4418 case V2SImode:
4419 case V2SFmode:
4420 case V1DImode:
4421 if (!type || !AGGREGATE_TYPE_P (type))
4423 cum->mmx_words += words;
4424 cum->mmx_nregs -= 1;
4425 cum->mmx_regno += 1;
4426 if (cum->mmx_nregs <= 0)
4428 cum->mmx_nregs = 0;
4429 cum->mmx_regno = 0;
4432 break;
4436 static void
4437 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, HOST_WIDE_INT words)
4440 int int_nregs, sse_nregs;
4442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4443 cum->words += words;
4444 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4446 cum->nregs -= int_nregs;
4447 cum->sse_nregs -= sse_nregs;
4448 cum->regno += int_nregs;
4449 cum->sse_regno += sse_nregs;
4451 else
4452 cum->words += words;
4455 static void
4456 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4457 HOST_WIDE_INT words)
4459 /* Otherwise, this should be passed indirect. */
4460 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4462 cum->words += words;
4463 if (cum->nregs > 0)
4465 cum->nregs -= 1;
4466 cum->regno += 1;
4470 void
4471 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4472 tree type, int named ATTRIBUTE_UNUSED)
4474 HOST_WIDE_INT bytes, words;
4476 if (mode == BLKmode)
4477 bytes = int_size_in_bytes (type);
4478 else
4479 bytes = GET_MODE_SIZE (mode);
4480 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4482 if (type)
4483 mode = type_natural_mode (type);
4485 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4486 function_arg_advance_ms_64 (cum, bytes, words);
4487 else if (TARGET_64BIT)
4488 function_arg_advance_64 (cum, mode, type, words);
4489 else
4490 function_arg_advance_32 (cum, mode, type, bytes, words);
4493 /* Define where to put the arguments to a function.
4494 Value is zero to push the argument on the stack,
4495 or a hard register in which to store the argument.
4497 MODE is the argument's machine mode.
4498 TYPE is the data type of the argument (as a tree).
4499 This is null for libcalls where that information may
4500 not be available.
4501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4502 the preceding args and about the function being called.
4503 NAMED is nonzero if this argument is a named parameter
4504 (otherwise it is an extra parameter matching an ellipsis). */
4506 static rtx
4507 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4508 enum machine_mode orig_mode, tree type,
4509 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4511 static bool warnedsse, warnedmmx;
4513 /* Avoid the AL settings for the Unix64 ABI. */
4514 if (mode == VOIDmode)
4515 return constm1_rtx;
4517 switch (mode)
4519 default:
4520 break;
4522 case BLKmode:
4523 if (bytes < 0)
4524 break;
4525 /* FALLTHRU */
4526 case DImode:
4527 case SImode:
4528 case HImode:
4529 case QImode:
4530 if (words <= cum->nregs)
4532 int regno = cum->regno;
4534 /* Fastcall allocates the first two DWORD (SImode) or
4535 smaller arguments to ECX and EDX if it isn't an
4536 aggregate type . */
4537 if (cum->fastcall)
4539 if (mode == BLKmode
4540 || mode == DImode
4541 || (type && AGGREGATE_TYPE_P (type)))
4542 break;
4544 /* ECX not EAX is the first allocated register. */
4545 if (regno == AX_REG)
4546 regno = CX_REG;
4548 return gen_rtx_REG (mode, regno);
4550 break;
4552 case DFmode:
4553 if (cum->float_in_sse < 2)
4554 break;
4555 case SFmode:
4556 if (cum->float_in_sse < 1)
4557 break;
4558 /* FALLTHRU */
4559 case TImode:
4560 case V16QImode:
4561 case V8HImode:
4562 case V4SImode:
4563 case V2DImode:
4564 case V4SFmode:
4565 case V2DFmode:
4566 if (!type || !AGGREGATE_TYPE_P (type))
4568 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4570 warnedsse = true;
4571 warning (0, "SSE vector argument without SSE enabled "
4572 "changes the ABI");
4574 if (cum->sse_nregs)
4575 return gen_reg_or_parallel (mode, orig_mode,
4576 cum->sse_regno + FIRST_SSE_REG);
4578 break;
4580 case V8QImode:
4581 case V4HImode:
4582 case V2SImode:
4583 case V2SFmode:
4584 case V1DImode:
4585 if (!type || !AGGREGATE_TYPE_P (type))
4587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4589 warnedmmx = true;
4590 warning (0, "MMX vector argument without MMX enabled "
4591 "changes the ABI");
4593 if (cum->mmx_nregs)
4594 return gen_reg_or_parallel (mode, orig_mode,
4595 cum->mmx_regno + FIRST_MMX_REG);
4597 break;
4600 return NULL_RTX;
4603 static rtx
4604 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4605 enum machine_mode orig_mode, tree type)
4607 /* Handle a hidden AL argument containing number of registers
4608 for varargs x86-64 functions. */
4609 if (mode == VOIDmode)
4610 return GEN_INT (cum->maybe_vaarg
4611 ? (cum->sse_nregs < 0
4612 ? (cum->call_abi == DEFAULT_ABI
4613 ? SSE_REGPARM_MAX
4614 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4615 : X64_SSE_REGPARM_MAX))
4616 : cum->sse_regno)
4617 : -1);
4619 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4620 cum->sse_nregs,
4621 &x86_64_int_parameter_registers [cum->regno],
4622 cum->sse_regno);
4625 static rtx
4626 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4627 enum machine_mode orig_mode, int named,
4628 HOST_WIDE_INT bytes)
4630 unsigned int regno;
4632 /* Avoid the AL settings for the Unix64 ABI. */
4633 if (mode == VOIDmode)
4634 return constm1_rtx;
4636 /* If we've run out of registers, it goes on the stack. */
4637 if (cum->nregs == 0)
4638 return NULL_RTX;
4640 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4642 /* Only floating point modes are passed in anything but integer regs. */
4643 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4645 if (named)
4646 regno = cum->regno + FIRST_SSE_REG;
4647 else
4649 rtx t1, t2;
4651 /* Unnamed floating parameters are passed in both the
4652 SSE and integer registers. */
4653 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4654 t2 = gen_rtx_REG (mode, regno);
4655 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4656 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4657 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4660 /* Handle aggregated types passed in register. */
4661 if (orig_mode == BLKmode)
4663 if (bytes > 0 && bytes <= 8)
4664 mode = (bytes > 4 ? DImode : SImode);
4665 if (mode == BLKmode)
4666 mode = DImode;
4669 return gen_reg_or_parallel (mode, orig_mode, regno);
4673 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4674 tree type, int named)
4676 enum machine_mode mode = omode;
4677 HOST_WIDE_INT bytes, words;
4679 if (mode == BLKmode)
4680 bytes = int_size_in_bytes (type);
4681 else
4682 bytes = GET_MODE_SIZE (mode);
4683 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4685 /* To simplify the code below, represent vector types with a vector mode
4686 even if MMX/SSE are not active. */
4687 if (type && TREE_CODE (type) == VECTOR_TYPE)
4688 mode = type_natural_mode (type);
4690 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4691 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4692 else if (TARGET_64BIT)
4693 return function_arg_64 (cum, mode, omode, type);
4694 else
4695 return function_arg_32 (cum, mode, omode, type, bytes, words);
4698 /* A C expression that indicates when an argument must be passed by
4699 reference. If nonzero for an argument, a copy of that argument is
4700 made in memory and a pointer to the argument is passed instead of
4701 the argument itself. The pointer is passed in whatever way is
4702 appropriate for passing a pointer to that type. */
4704 static bool
4705 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4706 enum machine_mode mode ATTRIBUTE_UNUSED,
4707 const_tree type, bool named ATTRIBUTE_UNUSED)
4709 /* See Windows x64 Software Convention. */
4710 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4712 int msize = (int) GET_MODE_SIZE (mode);
4713 if (type)
4715 /* Arrays are passed by reference. */
4716 if (TREE_CODE (type) == ARRAY_TYPE)
4717 return true;
4719 if (AGGREGATE_TYPE_P (type))
4721 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4722 are passed by reference. */
4723 msize = int_size_in_bytes (type);
4727 /* __m128 is passed by reference. */
4728 switch (msize) {
4729 case 1: case 2: case 4: case 8:
4730 break;
4731 default:
4732 return true;
4735 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4736 return 1;
4738 return 0;
4741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4742 ABI. */
4743 static bool
4744 contains_aligned_value_p (tree type)
4746 enum machine_mode mode = TYPE_MODE (type);
4747 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
4748 || mode == TDmode
4749 || mode == TFmode
4750 || mode == TCmode)
4751 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4752 return true;
4753 if (TYPE_ALIGN (type) < 128)
4754 return false;
4756 if (AGGREGATE_TYPE_P (type))
4758 /* Walk the aggregates recursively. */
4759 switch (TREE_CODE (type))
4761 case RECORD_TYPE:
4762 case UNION_TYPE:
4763 case QUAL_UNION_TYPE:
4765 tree field;
4767 /* Walk all the structure fields. */
4768 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4770 if (TREE_CODE (field) == FIELD_DECL
4771 && contains_aligned_value_p (TREE_TYPE (field)))
4772 return true;
4774 break;
4777 case ARRAY_TYPE:
4778 /* Just for use if some languages passes arrays by value. */
4779 if (contains_aligned_value_p (TREE_TYPE (type)))
4780 return true;
4781 break;
4783 default:
4784 gcc_unreachable ();
4787 return false;
4790 /* Gives the alignment boundary, in bits, of an argument with the
4791 specified mode and type. */
4794 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4796 int align;
4797 if (type)
4799 /* Since canonical type is used for call, we convert it to
4800 canonical type if needed. */
4801 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4802 type = TYPE_CANONICAL (type);
4803 align = TYPE_ALIGN (type);
4805 else
4806 align = GET_MODE_ALIGNMENT (mode);
4807 if (align < PARM_BOUNDARY)
4808 align = PARM_BOUNDARY;
4809 /* In 32bit, only _Decimal128 and __float128 are aligned to their
4810 natural boundaries. */
4811 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
4813 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4814 make an exception for SSE modes since these require 128bit
4815 alignment.
4817 The handling here differs from field_alignment. ICC aligns MMX
4818 arguments to 4 byte boundaries, while structure fields are aligned
4819 to 8 byte boundaries. */
4820 if (!type)
4822 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
4823 align = PARM_BOUNDARY;
4825 else
4827 if (!contains_aligned_value_p (type))
4828 align = PARM_BOUNDARY;
4831 if (align > BIGGEST_ALIGNMENT)
4832 align = BIGGEST_ALIGNMENT;
4833 return align;
4836 /* Return true if N is a possible register number of function value. */
4838 bool
4839 ix86_function_value_regno_p (int regno)
4841 switch (regno)
4843 case 0:
4844 return true;
4846 case FIRST_FLOAT_REG:
4847 /* TODO: The function should depend on current function ABI but
4848 builtins.c would need updating then. Therefore we use the
4849 default ABI. */
4850 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4851 return false;
4852 return TARGET_FLOAT_RETURNS_IN_80387;
4854 case FIRST_SSE_REG:
4855 return TARGET_SSE;
4857 case FIRST_MMX_REG:
4858 if (TARGET_MACHO || TARGET_64BIT)
4859 return false;
4860 return TARGET_MMX;
4863 return false;
4866 /* Define how to find the value returned by a function.
4867 VALTYPE is the data type of the value (as a tree).
4868 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4869 otherwise, FUNC is 0. */
4871 static rtx
4872 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4873 const_tree fntype, const_tree fn)
4875 unsigned int regno;
4877 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4878 we normally prevent this case when mmx is not available. However
4879 some ABIs may require the result to be returned like DImode. */
4880 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4881 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4883 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4884 we prevent this case when sse is not available. However some ABIs
4885 may require the result to be returned like integer TImode. */
4886 else if (mode == TImode
4887 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4888 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4890 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4891 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4892 regno = FIRST_FLOAT_REG;
4893 else
4894 /* Most things go in %eax. */
4895 regno = AX_REG;
4897 /* Override FP return register with %xmm0 for local functions when
4898 SSE math is enabled or for functions with sseregparm attribute. */
4899 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4901 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4902 if ((sse_level >= 1 && mode == SFmode)
4903 || (sse_level == 2 && mode == DFmode))
4904 regno = FIRST_SSE_REG;
4907 return gen_rtx_REG (orig_mode, regno);
4910 static rtx
4911 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4912 const_tree valtype)
4914 rtx ret;
4916 /* Handle libcalls, which don't provide a type node. */
4917 if (valtype == NULL)
4919 switch (mode)
4921 case SFmode:
4922 case SCmode:
4923 case DFmode:
4924 case DCmode:
4925 case TFmode:
4926 case SDmode:
4927 case DDmode:
4928 case TDmode:
4929 return gen_rtx_REG (mode, FIRST_SSE_REG);
4930 case XFmode:
4931 case XCmode:
4932 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4933 case TCmode:
4934 return NULL;
4935 default:
4936 return gen_rtx_REG (mode, AX_REG);
4940 ret = construct_container (mode, orig_mode, valtype, 1,
4941 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4942 x86_64_int_return_registers, 0);
4944 /* For zero sized structures, construct_container returns NULL, but we
4945 need to keep rest of compiler happy by returning meaningful value. */
4946 if (!ret)
4947 ret = gen_rtx_REG (orig_mode, AX_REG);
4949 return ret;
4952 static rtx
4953 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4955 unsigned int regno = AX_REG;
4957 if (TARGET_SSE)
4959 switch (GET_MODE_SIZE (mode))
4961 case 16:
4962 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4963 && !COMPLEX_MODE_P (mode))
4964 regno = FIRST_SSE_REG;
4965 break;
4966 case 8:
4967 case 4:
4968 if (mode == SFmode || mode == DFmode)
4969 regno = FIRST_SSE_REG;
4970 break;
4971 default:
4972 break;
4975 return gen_rtx_REG (orig_mode, regno);
4978 static rtx
4979 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4980 enum machine_mode orig_mode, enum machine_mode mode)
4982 const_tree fn, fntype;
4984 fn = NULL_TREE;
4985 if (fntype_or_decl && DECL_P (fntype_or_decl))
4986 fn = fntype_or_decl;
4987 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4989 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4990 return function_value_ms_64 (orig_mode, mode);
4991 else if (TARGET_64BIT)
4992 return function_value_64 (orig_mode, mode, valtype);
4993 else
4994 return function_value_32 (orig_mode, mode, fntype, fn);
4997 static rtx
4998 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4999 bool outgoing ATTRIBUTE_UNUSED)
5001 enum machine_mode mode, orig_mode;
5003 orig_mode = TYPE_MODE (valtype);
5004 mode = type_natural_mode (valtype);
5005 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5009 ix86_libcall_value (enum machine_mode mode)
5011 return ix86_function_value_1 (NULL, NULL, mode, mode);
5014 /* Return true iff type is returned in memory. */
5016 static int ATTRIBUTE_UNUSED
5017 return_in_memory_32 (const_tree type, enum machine_mode mode)
5019 HOST_WIDE_INT size;
5021 if (mode == BLKmode)
5022 return 1;
5024 size = int_size_in_bytes (type);
5026 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5027 return 0;
5029 if (VECTOR_MODE_P (mode) || mode == TImode)
5031 /* User-created vectors small enough to fit in EAX. */
5032 if (size < 8)
5033 return 0;
5035 /* MMX/3dNow values are returned in MM0,
5036 except when it doesn't exits. */
5037 if (size == 8)
5038 return (TARGET_MMX ? 0 : 1);
5040 /* SSE values are returned in XMM0, except when it doesn't exist. */
5041 if (size == 16)
5042 return (TARGET_SSE ? 0 : 1);
5045 if (mode == XFmode)
5046 return 0;
5048 if (size > 12)
5049 return 1;
5050 return 0;
5053 static int ATTRIBUTE_UNUSED
5054 return_in_memory_64 (const_tree type, enum machine_mode mode)
5056 int needed_intregs, needed_sseregs;
5057 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5060 static int ATTRIBUTE_UNUSED
5061 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5063 HOST_WIDE_INT size = int_size_in_bytes (type);
5065 /* __m128 is returned in xmm0. */
5066 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5067 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5068 return 0;
5070 /* Otherwise, the size must be exactly in [1248]. */
5071 return (size != 1 && size != 2 && size != 4 && size != 8);
5074 static bool
5075 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5077 #ifdef SUBTARGET_RETURN_IN_MEMORY
5078 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5079 #else
5080 const enum machine_mode mode = type_natural_mode (type);
5082 if (TARGET_64BIT_MS_ABI)
5083 return return_in_memory_ms_64 (type, mode);
5084 else if (TARGET_64BIT)
5085 return return_in_memory_64 (type, mode);
5086 else
5087 return return_in_memory_32 (type, mode);
5088 #endif
5091 /* Return false iff TYPE is returned in memory. This version is used
5092 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5093 but differs notably in that when MMX is available, 8-byte vectors
5094 are returned in memory, rather than in MMX registers. */
5096 bool
5097 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5099 int size;
5100 enum machine_mode mode = type_natural_mode (type);
5102 if (TARGET_64BIT)
5103 return return_in_memory_64 (type, mode);
5105 if (mode == BLKmode)
5106 return 1;
5108 size = int_size_in_bytes (type);
5110 if (VECTOR_MODE_P (mode))
5112 /* Return in memory only if MMX registers *are* available. This
5113 seems backwards, but it is consistent with the existing
5114 Solaris x86 ABI. */
5115 if (size == 8)
5116 return TARGET_MMX;
5117 if (size == 16)
5118 return !TARGET_SSE;
5120 else if (mode == TImode)
5121 return !TARGET_SSE;
5122 else if (mode == XFmode)
5123 return 0;
5125 return size > 12;
5128 /* When returning SSE vector types, we have a choice of either
5129 (1) being abi incompatible with a -march switch, or
5130 (2) generating an error.
5131 Given no good solution, I think the safest thing is one warning.
5132 The user won't be able to use -Werror, but....
5134 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5135 called in response to actually generating a caller or callee that
5136 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
5137 via aggregate_value_p for general type probing from tree-ssa. */
5139 static rtx
5140 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5142 static bool warnedsse, warnedmmx;
5144 if (!TARGET_64BIT && type)
5146 /* Look at the return type of the function, not the function type. */
5147 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5149 if (!TARGET_SSE && !warnedsse)
5151 if (mode == TImode
5152 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5154 warnedsse = true;
5155 warning (0, "SSE vector return without SSE enabled "
5156 "changes the ABI");
5160 if (!TARGET_MMX && !warnedmmx)
5162 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5164 warnedmmx = true;
5165 warning (0, "MMX vector return without MMX enabled "
5166 "changes the ABI");
5171 return NULL;
5175 /* Create the va_list data type. */
5177 static tree
5178 ix86_build_builtin_va_list (void)
5180 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5182 /* For i386 we use plain pointer to argument area. */
5183 if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
5184 return build_pointer_type (char_type_node);
5186 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5187 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5189 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5190 unsigned_type_node);
5191 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5192 unsigned_type_node);
5193 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5194 ptr_type_node);
5195 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5196 ptr_type_node);
5198 va_list_gpr_counter_field = f_gpr;
5199 va_list_fpr_counter_field = f_fpr;
5201 DECL_FIELD_CONTEXT (f_gpr) = record;
5202 DECL_FIELD_CONTEXT (f_fpr) = record;
5203 DECL_FIELD_CONTEXT (f_ovf) = record;
5204 DECL_FIELD_CONTEXT (f_sav) = record;
5206 TREE_CHAIN (record) = type_decl;
5207 TYPE_NAME (record) = type_decl;
5208 TYPE_FIELDS (record) = f_gpr;
5209 TREE_CHAIN (f_gpr) = f_fpr;
5210 TREE_CHAIN (f_fpr) = f_ovf;
5211 TREE_CHAIN (f_ovf) = f_sav;
5213 layout_type (record);
5215 /* The correct type is an array type of one element. */
5216 return build_array_type (record, build_index_type (size_zero_node));
5219 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5221 static void
5222 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5224 rtx save_area, mem;
5225 rtx label;
5226 rtx label_ref;
5227 rtx tmp_reg;
5228 rtx nsse_reg;
5229 alias_set_type set;
5230 int i;
5231 int regparm = ix86_regparm;
5233 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5234 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5236 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5237 return;
5239 /* Indicate to allocate space on the stack for varargs save area. */
5240 ix86_save_varrargs_registers = 1;
5241 /* We need 16-byte stack alignment to save SSE registers. If user
5242 asked for lower preferred_stack_boundary, lets just hope that he knows
5243 what he is doing and won't varargs SSE values.
5245 We also may end up assuming that only 64bit values are stored in SSE
5246 register let some floating point program work. */
5247 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5248 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5250 save_area = frame_pointer_rtx;
5251 set = get_varargs_alias_set ();
5253 for (i = cum->regno;
5254 i < regparm
5255 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5256 i++)
5258 mem = gen_rtx_MEM (Pmode,
5259 plus_constant (save_area, i * UNITS_PER_WORD));
5260 MEM_NOTRAP_P (mem) = 1;
5261 set_mem_alias_set (mem, set);
5262 emit_move_insn (mem, gen_rtx_REG (Pmode,
5263 x86_64_int_parameter_registers[i]));
5266 if (cum->sse_nregs && cfun->va_list_fpr_size)
5268 /* Now emit code to save SSE registers. The AX parameter contains number
5269 of SSE parameter registers used to call this function. We use
5270 sse_prologue_save insn template that produces computed jump across
5271 SSE saves. We need some preparation work to get this working. */
5273 label = gen_label_rtx ();
5274 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5276 /* Compute address to jump to :
5277 label - eax*4 + nnamed_sse_arguments*4 */
5278 tmp_reg = gen_reg_rtx (Pmode);
5279 nsse_reg = gen_reg_rtx (Pmode);
5280 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5281 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5282 gen_rtx_MULT (Pmode, nsse_reg,
5283 GEN_INT (4))));
5284 if (cum->sse_regno)
5285 emit_move_insn
5286 (nsse_reg,
5287 gen_rtx_CONST (DImode,
5288 gen_rtx_PLUS (DImode,
5289 label_ref,
5290 GEN_INT (cum->sse_regno * 4))));
5291 else
5292 emit_move_insn (nsse_reg, label_ref);
5293 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5295 /* Compute address of memory block we save into. We always use pointer
5296 pointing 127 bytes after first byte to store - this is needed to keep
5297 instruction size limited by 4 bytes. */
5298 tmp_reg = gen_reg_rtx (Pmode);
5299 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5300 plus_constant (save_area,
5301 8 * X86_64_REGPARM_MAX + 127)));
5302 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5303 MEM_NOTRAP_P (mem) = 1;
5304 set_mem_alias_set (mem, set);
5305 set_mem_align (mem, BITS_PER_WORD);
5307 /* And finally do the dirty job! */
5308 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5309 GEN_INT (cum->sse_regno), label));
5313 static void
5314 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5316 alias_set_type set = get_varargs_alias_set ();
5317 int i;
5319 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5321 rtx reg, mem;
5323 mem = gen_rtx_MEM (Pmode,
5324 plus_constant (virtual_incoming_args_rtx,
5325 i * UNITS_PER_WORD));
5326 MEM_NOTRAP_P (mem) = 1;
5327 set_mem_alias_set (mem, set);
5329 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5330 emit_move_insn (mem, reg);
5334 static void
5335 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5336 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5337 int no_rtl)
5339 CUMULATIVE_ARGS next_cum;
5340 tree fntype;
5342 /* This argument doesn't appear to be used anymore. Which is good,
5343 because the old code here didn't suppress rtl generation. */
5344 gcc_assert (!no_rtl);
5346 if (!TARGET_64BIT)
5347 return;
5349 fntype = TREE_TYPE (current_function_decl);
5351 /* For varargs, we do not want to skip the dummy va_dcl argument.
5352 For stdargs, we do want to skip the last named argument. */
5353 next_cum = *cum;
5354 if (stdarg_p (fntype))
5355 function_arg_advance (&next_cum, mode, type, 1);
5357 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5358 setup_incoming_varargs_ms_64 (&next_cum);
5359 else
5360 setup_incoming_varargs_64 (&next_cum);
5363 /* Implement va_start. */
5365 static void
5366 ix86_va_start (tree valist, rtx nextarg)
5368 HOST_WIDE_INT words, n_gpr, n_fpr;
5369 tree f_gpr, f_fpr, f_ovf, f_sav;
5370 tree gpr, fpr, ovf, sav, t;
5371 tree type;
5373 /* Only 64bit target needs something special. */
5374 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5376 std_expand_builtin_va_start (valist, nextarg);
5377 return;
5380 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5381 f_fpr = TREE_CHAIN (f_gpr);
5382 f_ovf = TREE_CHAIN (f_fpr);
5383 f_sav = TREE_CHAIN (f_ovf);
5385 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5386 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5387 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5388 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5389 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5391 /* Count number of gp and fp argument registers used. */
5392 words = crtl->args.info.words;
5393 n_gpr = crtl->args.info.regno;
5394 n_fpr = crtl->args.info.sse_regno;
5396 if (cfun->va_list_gpr_size)
5398 type = TREE_TYPE (gpr);
5399 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5400 build_int_cst (type, n_gpr * 8));
5401 TREE_SIDE_EFFECTS (t) = 1;
5402 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5405 if (cfun->va_list_fpr_size)
5407 type = TREE_TYPE (fpr);
5408 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5409 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5410 TREE_SIDE_EFFECTS (t) = 1;
5411 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5414 /* Find the overflow area. */
5415 type = TREE_TYPE (ovf);
5416 t = make_tree (type, virtual_incoming_args_rtx);
5417 if (words != 0)
5418 t = build2 (POINTER_PLUS_EXPR, type, t,
5419 size_int (words * UNITS_PER_WORD));
5420 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5421 TREE_SIDE_EFFECTS (t) = 1;
5422 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5424 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5426 /* Find the register save area.
5427 Prologue of the function save it right above stack frame. */
5428 type = TREE_TYPE (sav);
5429 t = make_tree (type, frame_pointer_rtx);
5430 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5431 TREE_SIDE_EFFECTS (t) = 1;
5432 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5436 /* Implement va_arg. */
5438 static tree
5439 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5441 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5442 tree f_gpr, f_fpr, f_ovf, f_sav;
5443 tree gpr, fpr, ovf, sav, t;
5444 int size, rsize;
5445 tree lab_false, lab_over = NULL_TREE;
5446 tree addr, t2;
5447 rtx container;
5448 int indirect_p = 0;
5449 tree ptrtype;
5450 enum machine_mode nat_mode;
5452 /* Only 64bit target needs something special. */
5453 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5454 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5456 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5457 f_fpr = TREE_CHAIN (f_gpr);
5458 f_ovf = TREE_CHAIN (f_fpr);
5459 f_sav = TREE_CHAIN (f_ovf);
5461 valist = build_va_arg_indirect_ref (valist);
5462 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5463 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5464 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5465 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5467 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5468 if (indirect_p)
5469 type = build_pointer_type (type);
5470 size = int_size_in_bytes (type);
5471 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5473 nat_mode = type_natural_mode (type);
5474 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5475 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5476 intreg, 0);
5478 /* Pull the value out of the saved registers. */
5480 addr = create_tmp_var (ptr_type_node, "addr");
5481 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5483 if (container)
5485 int needed_intregs, needed_sseregs;
5486 bool need_temp;
5487 tree int_addr, sse_addr;
5489 lab_false = create_artificial_label ();
5490 lab_over = create_artificial_label ();
5492 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5494 need_temp = (!REG_P (container)
5495 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5496 || TYPE_ALIGN (type) > 128));
5498 /* In case we are passing structure, verify that it is consecutive block
5499 on the register save area. If not we need to do moves. */
5500 if (!need_temp && !REG_P (container))
5502 /* Verify that all registers are strictly consecutive */
5503 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5505 int i;
5507 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5509 rtx slot = XVECEXP (container, 0, i);
5510 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5511 || INTVAL (XEXP (slot, 1)) != i * 16)
5512 need_temp = 1;
5515 else
5517 int i;
5519 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5521 rtx slot = XVECEXP (container, 0, i);
5522 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5523 || INTVAL (XEXP (slot, 1)) != i * 8)
5524 need_temp = 1;
5528 if (!need_temp)
5530 int_addr = addr;
5531 sse_addr = addr;
5533 else
5535 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5536 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5537 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5538 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5541 /* First ensure that we fit completely in registers. */
5542 if (needed_intregs)
5544 t = build_int_cst (TREE_TYPE (gpr),
5545 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5546 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5547 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5548 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5549 gimplify_and_add (t, pre_p);
5551 if (needed_sseregs)
5553 t = build_int_cst (TREE_TYPE (fpr),
5554 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5555 + X86_64_REGPARM_MAX * 8);
5556 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5557 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5558 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5559 gimplify_and_add (t, pre_p);
5562 /* Compute index to start of area used for integer regs. */
5563 if (needed_intregs)
5565 /* int_addr = gpr + sav; */
5566 t = fold_convert (sizetype, gpr);
5567 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5568 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5569 gimplify_and_add (t, pre_p);
5571 if (needed_sseregs)
5573 /* sse_addr = fpr + sav; */
5574 t = fold_convert (sizetype, fpr);
5575 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5576 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5577 gimplify_and_add (t, pre_p);
5579 if (need_temp)
5581 int i;
5582 tree temp = create_tmp_var (type, "va_arg_tmp");
5584 /* addr = &temp; */
5585 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5586 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5587 gimplify_and_add (t, pre_p);
5589 for (i = 0; i < XVECLEN (container, 0); i++)
5591 rtx slot = XVECEXP (container, 0, i);
5592 rtx reg = XEXP (slot, 0);
5593 enum machine_mode mode = GET_MODE (reg);
5594 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5595 tree addr_type = build_pointer_type (piece_type);
5596 tree src_addr, src;
5597 int src_offset;
5598 tree dest_addr, dest;
5600 if (SSE_REGNO_P (REGNO (reg)))
5602 src_addr = sse_addr;
5603 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5605 else
5607 src_addr = int_addr;
5608 src_offset = REGNO (reg) * 8;
5610 src_addr = fold_convert (addr_type, src_addr);
5611 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5612 size_int (src_offset));
5613 src = build_va_arg_indirect_ref (src_addr);
5615 dest_addr = fold_convert (addr_type, addr);
5616 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5617 size_int (INTVAL (XEXP (slot, 1))));
5618 dest = build_va_arg_indirect_ref (dest_addr);
5620 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5621 gimplify_and_add (t, pre_p);
5625 if (needed_intregs)
5627 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5628 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5629 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5630 gimplify_and_add (t, pre_p);
5632 if (needed_sseregs)
5634 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5635 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5636 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5637 gimplify_and_add (t, pre_p);
5640 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5641 gimplify_and_add (t, pre_p);
5643 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5644 append_to_statement_list (t, pre_p);
5647 /* ... otherwise out of the overflow area. */
5649 /* Care for on-stack alignment if needed. */
5650 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5651 || integer_zerop (TYPE_SIZE (type)))
5652 t = ovf;
5653 else
5655 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5656 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5657 size_int (align - 1));
5658 t = fold_convert (sizetype, t);
5659 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5660 size_int (-align));
5661 t = fold_convert (TREE_TYPE (ovf), t);
5663 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5665 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5666 gimplify_and_add (t2, pre_p);
5668 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5669 size_int (rsize * UNITS_PER_WORD));
5670 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5671 gimplify_and_add (t, pre_p);
5673 if (container)
5675 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5676 append_to_statement_list (t, pre_p);
5679 ptrtype = build_pointer_type (type);
5680 addr = fold_convert (ptrtype, addr);
5682 if (indirect_p)
5683 addr = build_va_arg_indirect_ref (addr);
5684 return build_va_arg_indirect_ref (addr);
5687 /* Return nonzero if OPNUM's MEM should be matched
5688 in movabs* patterns. */
5691 ix86_check_movabs (rtx insn, int opnum)
5693 rtx set, mem;
5695 set = PATTERN (insn);
5696 if (GET_CODE (set) == PARALLEL)
5697 set = XVECEXP (set, 0, 0);
5698 gcc_assert (GET_CODE (set) == SET);
5699 mem = XEXP (set, opnum);
5700 while (GET_CODE (mem) == SUBREG)
5701 mem = SUBREG_REG (mem);
5702 gcc_assert (MEM_P (mem));
5703 return (volatile_ok || !MEM_VOLATILE_P (mem));
5706 /* Initialize the table of extra 80387 mathematical constants. */
5708 static void
5709 init_ext_80387_constants (void)
5711 static const char * cst[5] =
5713 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5714 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5715 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5716 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5717 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5719 int i;
5721 for (i = 0; i < 5; i++)
5723 real_from_string (&ext_80387_constants_table[i], cst[i]);
5724 /* Ensure each constant is rounded to XFmode precision. */
5725 real_convert (&ext_80387_constants_table[i],
5726 XFmode, &ext_80387_constants_table[i]);
5729 ext_80387_constants_init = 1;
5732 /* Return true if the constant is something that can be loaded with
5733 a special instruction. */
5736 standard_80387_constant_p (rtx x)
5738 enum machine_mode mode = GET_MODE (x);
5740 REAL_VALUE_TYPE r;
5742 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5743 return -1;
5745 if (x == CONST0_RTX (mode))
5746 return 1;
5747 if (x == CONST1_RTX (mode))
5748 return 2;
5750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5752 /* For XFmode constants, try to find a special 80387 instruction when
5753 optimizing for size or on those CPUs that benefit from them. */
5754 if (mode == XFmode
5755 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5757 int i;
5759 if (! ext_80387_constants_init)
5760 init_ext_80387_constants ();
5762 for (i = 0; i < 5; i++)
5763 if (real_identical (&r, &ext_80387_constants_table[i]))
5764 return i + 3;
5767 /* Load of the constant -0.0 or -1.0 will be split as
5768 fldz;fchs or fld1;fchs sequence. */
5769 if (real_isnegzero (&r))
5770 return 8;
5771 if (real_identical (&r, &dconstm1))
5772 return 9;
5774 return 0;
5777 /* Return the opcode of the special instruction to be used to load
5778 the constant X. */
5780 const char *
5781 standard_80387_constant_opcode (rtx x)
5783 switch (standard_80387_constant_p (x))
5785 case 1:
5786 return "fldz";
5787 case 2:
5788 return "fld1";
5789 case 3:
5790 return "fldlg2";
5791 case 4:
5792 return "fldln2";
5793 case 5:
5794 return "fldl2e";
5795 case 6:
5796 return "fldl2t";
5797 case 7:
5798 return "fldpi";
5799 case 8:
5800 case 9:
5801 return "#";
5802 default:
5803 gcc_unreachable ();
5807 /* Return the CONST_DOUBLE representing the 80387 constant that is
5808 loaded by the specified special instruction. The argument IDX
5809 matches the return value from standard_80387_constant_p. */
5812 standard_80387_constant_rtx (int idx)
5814 int i;
5816 if (! ext_80387_constants_init)
5817 init_ext_80387_constants ();
5819 switch (idx)
5821 case 3:
5822 case 4:
5823 case 5:
5824 case 6:
5825 case 7:
5826 i = idx - 3;
5827 break;
5829 default:
5830 gcc_unreachable ();
5833 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5834 XFmode);
5837 /* Return 1 if mode is a valid mode for sse. */
5838 static int
5839 standard_sse_mode_p (enum machine_mode mode)
5841 switch (mode)
5843 case V16QImode:
5844 case V8HImode:
5845 case V4SImode:
5846 case V2DImode:
5847 case V4SFmode:
5848 case V2DFmode:
5849 return 1;
5851 default:
5852 return 0;
5856 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5859 standard_sse_constant_p (rtx x)
5861 enum machine_mode mode = GET_MODE (x);
5863 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5864 return 1;
5865 if (vector_all_ones_operand (x, mode)
5866 && standard_sse_mode_p (mode))
5867 return TARGET_SSE2 ? 2 : -1;
5869 return 0;
5872 /* Return the opcode of the special instruction to be used to load
5873 the constant X. */
5875 const char *
5876 standard_sse_constant_opcode (rtx insn, rtx x)
5878 switch (standard_sse_constant_p (x))
5880 case 1:
5881 if (get_attr_mode (insn) == MODE_V4SF)
5882 return "xorps\t%0, %0";
5883 else if (get_attr_mode (insn) == MODE_V2DF)
5884 return "xorpd\t%0, %0";
5885 else
5886 return "pxor\t%0, %0";
5887 case 2:
5888 return "pcmpeqd\t%0, %0";
5890 gcc_unreachable ();
5893 /* Returns 1 if OP contains a symbol reference */
5896 symbolic_reference_mentioned_p (rtx op)
5898 const char *fmt;
5899 int i;
5901 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5902 return 1;
5904 fmt = GET_RTX_FORMAT (GET_CODE (op));
5905 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5907 if (fmt[i] == 'E')
5909 int j;
5911 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5912 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5913 return 1;
5916 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5917 return 1;
5920 return 0;
5923 /* Return 1 if it is appropriate to emit `ret' instructions in the
5924 body of a function. Do this only if the epilogue is simple, needing a
5925 couple of insns. Prior to reloading, we can't tell how many registers
5926 must be saved, so return 0 then. Return 0 if there is no frame
5927 marker to de-allocate. */
5930 ix86_can_use_return_insn_p (void)
5932 struct ix86_frame frame;
5934 if (! reload_completed || frame_pointer_needed)
5935 return 0;
5937 /* Don't allow more than 32 pop, since that's all we can do
5938 with one instruction. */
5939 if (crtl->args.pops_args
5940 && crtl->args.size >= 32768)
5941 return 0;
5943 ix86_compute_frame_layout (&frame);
5944 return frame.to_allocate == 0 && frame.nregs == 0;
5947 /* Value should be nonzero if functions must have frame pointers.
5948 Zero means the frame pointer need not be set up (and parms may
5949 be accessed via the stack pointer) in functions that seem suitable. */
5952 ix86_frame_pointer_required (void)
5954 /* If we accessed previous frames, then the generated code expects
5955 to be able to access the saved ebp value in our frame. */
5956 if (cfun->machine->accesses_prev_frame)
5957 return 1;
5959 /* Several x86 os'es need a frame pointer for other reasons,
5960 usually pertaining to setjmp. */
5961 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5962 return 1;
5964 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5965 the frame pointer by default. Turn it back on now if we've not
5966 got a leaf function. */
5967 if (TARGET_OMIT_LEAF_FRAME_POINTER
5968 && (!current_function_is_leaf
5969 || ix86_current_function_calls_tls_descriptor))
5970 return 1;
5972 if (crtl->profile)
5973 return 1;
5975 return 0;
5978 /* Record that the current function accesses previous call frames. */
5980 void
5981 ix86_setup_frame_addresses (void)
5983 cfun->machine->accesses_prev_frame = 1;
5986 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5987 # define USE_HIDDEN_LINKONCE 1
5988 #else
5989 # define USE_HIDDEN_LINKONCE 0
5990 #endif
5992 static int pic_labels_used;
5994 /* Fills in the label name that should be used for a pc thunk for
5995 the given register. */
5997 static void
5998 get_pc_thunk_name (char name[32], unsigned int regno)
6000 gcc_assert (!TARGET_64BIT);
6002 if (USE_HIDDEN_LINKONCE)
6003 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6004 else
6005 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6009 /* This function generates code for -fpic that loads %ebx with
6010 the return address of the caller and then returns. */
6012 void
6013 ix86_file_end (void)
6015 rtx xops[2];
6016 int regno;
6018 for (regno = 0; regno < 8; ++regno)
6020 char name[32];
6022 if (! ((pic_labels_used >> regno) & 1))
6023 continue;
6025 get_pc_thunk_name (name, regno);
6027 #if TARGET_MACHO
6028 if (TARGET_MACHO)
6030 switch_to_section (darwin_sections[text_coal_section]);
6031 fputs ("\t.weak_definition\t", asm_out_file);
6032 assemble_name (asm_out_file, name);
6033 fputs ("\n\t.private_extern\t", asm_out_file);
6034 assemble_name (asm_out_file, name);
6035 fputs ("\n", asm_out_file);
6036 ASM_OUTPUT_LABEL (asm_out_file, name);
6038 else
6039 #endif
6040 if (USE_HIDDEN_LINKONCE)
6042 tree decl;
6044 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6045 error_mark_node);
6046 TREE_PUBLIC (decl) = 1;
6047 TREE_STATIC (decl) = 1;
6048 DECL_ONE_ONLY (decl) = 1;
6050 (*targetm.asm_out.unique_section) (decl, 0);
6051 switch_to_section (get_named_section (decl, NULL, 0));
6053 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6054 fputs ("\t.hidden\t", asm_out_file);
6055 assemble_name (asm_out_file, name);
6056 fputc ('\n', asm_out_file);
6057 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6059 else
6061 switch_to_section (text_section);
6062 ASM_OUTPUT_LABEL (asm_out_file, name);
6065 xops[0] = gen_rtx_REG (Pmode, regno);
6066 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6067 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6068 output_asm_insn ("ret", xops);
6071 if (NEED_INDICATE_EXEC_STACK)
6072 file_end_indicate_exec_stack ();
6075 /* Emit code for the SET_GOT patterns. */
6077 const char *
6078 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6080 rtx xops[3];
6082 xops[0] = dest;
6084 if (TARGET_VXWORKS_RTP && flag_pic)
6086 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6087 xops[2] = gen_rtx_MEM (Pmode,
6088 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6089 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6091 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6092 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6093 an unadorned address. */
6094 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6095 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6096 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6097 return "";
6100 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6102 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6104 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6106 if (!flag_pic)
6107 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6108 else
6109 output_asm_insn ("call\t%a2", xops);
6111 #if TARGET_MACHO
6112 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6113 is what will be referenced by the Mach-O PIC subsystem. */
6114 if (!label)
6115 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6116 #endif
6118 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6119 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6121 if (flag_pic)
6122 output_asm_insn ("pop%z0\t%0", xops);
6124 else
6126 char name[32];
6127 get_pc_thunk_name (name, REGNO (dest));
6128 pic_labels_used |= 1 << REGNO (dest);
6130 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6131 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6132 output_asm_insn ("call\t%X2", xops);
6133 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6134 is what will be referenced by the Mach-O PIC subsystem. */
6135 #if TARGET_MACHO
6136 if (!label)
6137 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6138 else
6139 targetm.asm_out.internal_label (asm_out_file, "L",
6140 CODE_LABEL_NUMBER (label));
6141 #endif
6144 if (TARGET_MACHO)
6145 return "";
6147 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6148 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6149 else
6150 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6152 return "";
6155 /* Generate an "push" pattern for input ARG. */
6157 static rtx
6158 gen_push (rtx arg)
6160 return gen_rtx_SET (VOIDmode,
6161 gen_rtx_MEM (Pmode,
6162 gen_rtx_PRE_DEC (Pmode,
6163 stack_pointer_rtx)),
6164 arg);
6167 /* Return >= 0 if there is an unused call-clobbered register available
6168 for the entire function. */
6170 static unsigned int
6171 ix86_select_alt_pic_regnum (void)
6173 if (current_function_is_leaf && !crtl->profile
6174 && !ix86_current_function_calls_tls_descriptor)
6176 int i;
6177 for (i = 2; i >= 0; --i)
6178 if (!df_regs_ever_live_p (i))
6179 return i;
6182 return INVALID_REGNUM;
6185 /* Return 1 if we need to save REGNO. */
6186 static int
6187 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6189 if (pic_offset_table_rtx
6190 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6191 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6192 || crtl->profile
6193 || crtl->calls_eh_return
6194 || crtl->uses_const_pool))
6196 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6197 return 0;
6198 return 1;
6201 if (crtl->calls_eh_return && maybe_eh_return)
6203 unsigned i;
6204 for (i = 0; ; i++)
6206 unsigned test = EH_RETURN_DATA_REGNO (i);
6207 if (test == INVALID_REGNUM)
6208 break;
6209 if (test == regno)
6210 return 1;
6214 if (cfun->machine->force_align_arg_pointer
6215 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6216 return 1;
6218 return (df_regs_ever_live_p (regno)
6219 && !call_used_regs[regno]
6220 && !fixed_regs[regno]
6221 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6224 /* Return number of registers to be saved on the stack. */
6226 static int
6227 ix86_nsaved_regs (void)
6229 int nregs = 0;
6230 int regno;
6232 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6233 if (ix86_save_reg (regno, true))
6234 nregs++;
6235 return nregs;
6238 /* Return the offset between two registers, one to be eliminated, and the other
6239 its replacement, at the start of a routine. */
6241 HOST_WIDE_INT
6242 ix86_initial_elimination_offset (int from, int to)
6244 struct ix86_frame frame;
6245 ix86_compute_frame_layout (&frame);
6247 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6248 return frame.hard_frame_pointer_offset;
6249 else if (from == FRAME_POINTER_REGNUM
6250 && to == HARD_FRAME_POINTER_REGNUM)
6251 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6252 else
6254 gcc_assert (to == STACK_POINTER_REGNUM);
6256 if (from == ARG_POINTER_REGNUM)
6257 return frame.stack_pointer_offset;
6259 gcc_assert (from == FRAME_POINTER_REGNUM);
6260 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6264 /* Fill structure ix86_frame about frame of currently computed function. */
6266 static void
6267 ix86_compute_frame_layout (struct ix86_frame *frame)
6269 HOST_WIDE_INT total_size;
6270 unsigned int stack_alignment_needed;
6271 HOST_WIDE_INT offset;
6272 unsigned int preferred_alignment;
6273 HOST_WIDE_INT size = get_frame_size ();
6275 frame->nregs = ix86_nsaved_regs ();
6276 total_size = size;
6278 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6279 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6281 /* During reload iteration the amount of registers saved can change.
6282 Recompute the value as needed. Do not recompute when amount of registers
6283 didn't change as reload does multiple calls to the function and does not
6284 expect the decision to change within single iteration. */
6285 if (!optimize_size
6286 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6288 int count = frame->nregs;
6290 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6291 /* The fast prologue uses move instead of push to save registers. This
6292 is significantly longer, but also executes faster as modern hardware
6293 can execute the moves in parallel, but can't do that for push/pop.
6295 Be careful about choosing what prologue to emit: When function takes
6296 many instructions to execute we may use slow version as well as in
6297 case function is known to be outside hot spot (this is known with
6298 feedback only). Weight the size of function by number of registers
6299 to save as it is cheap to use one or two push instructions but very
6300 slow to use many of them. */
6301 if (count)
6302 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6303 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6304 || (flag_branch_probabilities
6305 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6306 cfun->machine->use_fast_prologue_epilogue = false;
6307 else
6308 cfun->machine->use_fast_prologue_epilogue
6309 = !expensive_function_p (count);
6311 if (TARGET_PROLOGUE_USING_MOVE
6312 && cfun->machine->use_fast_prologue_epilogue)
6313 frame->save_regs_using_mov = true;
6314 else
6315 frame->save_regs_using_mov = false;
6318 /* Skip return address and saved base pointer. */
6319 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6321 frame->hard_frame_pointer_offset = offset;
6323 /* Do some sanity checking of stack_alignment_needed and
6324 preferred_alignment, since i386 port is the only using those features
6325 that may break easily. */
6327 gcc_assert (!size || stack_alignment_needed);
6328 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6329 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6330 gcc_assert (stack_alignment_needed
6331 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6333 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6334 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6336 /* Register save area */
6337 offset += frame->nregs * UNITS_PER_WORD;
6339 /* Va-arg area */
6340 if (ix86_save_varrargs_registers)
6342 offset += X86_64_VARARGS_SIZE;
6343 frame->va_arg_size = X86_64_VARARGS_SIZE;
6345 else
6346 frame->va_arg_size = 0;
6348 /* Align start of frame for local function. */
6349 frame->padding1 = ((offset + stack_alignment_needed - 1)
6350 & -stack_alignment_needed) - offset;
6352 offset += frame->padding1;
6354 /* Frame pointer points here. */
6355 frame->frame_pointer_offset = offset;
6357 offset += size;
6359 /* Add outgoing arguments area. Can be skipped if we eliminated
6360 all the function calls as dead code.
6361 Skipping is however impossible when function calls alloca. Alloca
6362 expander assumes that last crtl->outgoing_args_size
6363 of stack frame are unused. */
6364 if (ACCUMULATE_OUTGOING_ARGS
6365 && (!current_function_is_leaf || cfun->calls_alloca
6366 || ix86_current_function_calls_tls_descriptor))
6368 offset += crtl->outgoing_args_size;
6369 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6371 else
6372 frame->outgoing_arguments_size = 0;
6374 /* Align stack boundary. Only needed if we're calling another function
6375 or using alloca. */
6376 if (!current_function_is_leaf || cfun->calls_alloca
6377 || ix86_current_function_calls_tls_descriptor)
6378 frame->padding2 = ((offset + preferred_alignment - 1)
6379 & -preferred_alignment) - offset;
6380 else
6381 frame->padding2 = 0;
6383 offset += frame->padding2;
6385 /* We've reached end of stack frame. */
6386 frame->stack_pointer_offset = offset;
6388 /* Size prologue needs to allocate. */
6389 frame->to_allocate =
6390 (size + frame->padding1 + frame->padding2
6391 + frame->outgoing_arguments_size + frame->va_arg_size);
6393 if ((!frame->to_allocate && frame->nregs <= 1)
6394 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6395 frame->save_regs_using_mov = false;
6397 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
6398 && current_function_is_leaf
6399 && !ix86_current_function_calls_tls_descriptor)
6401 frame->red_zone_size = frame->to_allocate;
6402 if (frame->save_regs_using_mov)
6403 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6404 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6405 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6407 else
6408 frame->red_zone_size = 0;
6409 frame->to_allocate -= frame->red_zone_size;
6410 frame->stack_pointer_offset -= frame->red_zone_size;
6411 #if 0
6412 fprintf (stderr, "\n");
6413 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6414 fprintf (stderr, "size: %ld\n", (long)size);
6415 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6416 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6417 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6418 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6419 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6420 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6421 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6422 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6423 (long)frame->hard_frame_pointer_offset);
6424 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6425 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6426 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6427 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6428 #endif
6431 /* Emit code to save registers in the prologue. */
6433 static void
6434 ix86_emit_save_regs (void)
6436 unsigned int regno;
6437 rtx insn;
6439 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6440 if (ix86_save_reg (regno, true))
6442 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6443 RTX_FRAME_RELATED_P (insn) = 1;
6447 /* Emit code to save registers using MOV insns. First register
6448 is restored from POINTER + OFFSET. */
6449 static void
6450 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6452 unsigned int regno;
6453 rtx insn;
6455 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6456 if (ix86_save_reg (regno, true))
6458 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6459 Pmode, offset),
6460 gen_rtx_REG (Pmode, regno));
6461 RTX_FRAME_RELATED_P (insn) = 1;
6462 offset += UNITS_PER_WORD;
6466 /* Expand prologue or epilogue stack adjustment.
6467 The pattern exist to put a dependency on all ebp-based memory accesses.
6468 STYLE should be negative if instructions should be marked as frame related,
6469 zero if %r11 register is live and cannot be freely used and positive
6470 otherwise. */
6472 static void
6473 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6475 rtx insn;
6477 if (! TARGET_64BIT)
6478 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6479 else if (x86_64_immediate_operand (offset, DImode))
6480 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6481 else
6483 rtx r11;
6484 /* r11 is used by indirect sibcall return as well, set before the
6485 epilogue and used after the epilogue. ATM indirect sibcall
6486 shouldn't be used together with huge frame sizes in one
6487 function because of the frame_size check in sibcall.c. */
6488 gcc_assert (style);
6489 r11 = gen_rtx_REG (DImode, R11_REG);
6490 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6491 if (style < 0)
6492 RTX_FRAME_RELATED_P (insn) = 1;
6493 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6494 offset));
6496 if (style < 0)
6497 RTX_FRAME_RELATED_P (insn) = 1;
6500 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6502 static rtx
6503 ix86_internal_arg_pointer (void)
6505 bool has_force_align_arg_pointer =
6506 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6507 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6508 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6509 && DECL_NAME (current_function_decl)
6510 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6511 && DECL_FILE_SCOPE_P (current_function_decl))
6512 || ix86_force_align_arg_pointer
6513 || has_force_align_arg_pointer)
6515 /* Nested functions can't realign the stack due to a register
6516 conflict. */
6517 if (DECL_CONTEXT (current_function_decl)
6518 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6520 if (ix86_force_align_arg_pointer)
6521 warning (0, "-mstackrealign ignored for nested functions");
6522 if (has_force_align_arg_pointer)
6523 error ("%s not supported for nested functions",
6524 ix86_force_align_arg_pointer_string);
6525 return virtual_incoming_args_rtx;
6527 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6528 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6530 else
6531 return virtual_incoming_args_rtx;
6534 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6535 This is called from dwarf2out.c to emit call frame instructions
6536 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6537 static void
6538 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6540 rtx unspec = SET_SRC (pattern);
6541 gcc_assert (GET_CODE (unspec) == UNSPEC);
6543 switch (index)
6545 case UNSPEC_REG_SAVE:
6546 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6547 SET_DEST (pattern));
6548 break;
6549 case UNSPEC_DEF_CFA:
6550 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6551 INTVAL (XVECEXP (unspec, 0, 0)));
6552 break;
6553 default:
6554 gcc_unreachable ();
6558 /* Expand the prologue into a bunch of separate insns. */
6560 void
6561 ix86_expand_prologue (void)
6563 rtx insn;
6564 bool pic_reg_used;
6565 struct ix86_frame frame;
6566 HOST_WIDE_INT allocate;
6568 ix86_compute_frame_layout (&frame);
6570 if (cfun->machine->force_align_arg_pointer)
6572 rtx x, y;
6574 /* Grab the argument pointer. */
6575 x = plus_constant (stack_pointer_rtx, 4);
6576 y = cfun->machine->force_align_arg_pointer;
6577 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6578 RTX_FRAME_RELATED_P (insn) = 1;
6580 /* The unwind info consists of two parts: install the fafp as the cfa,
6581 and record the fafp as the "save register" of the stack pointer.
6582 The later is there in order that the unwinder can see where it
6583 should restore the stack pointer across the and insn. */
6584 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6585 x = gen_rtx_SET (VOIDmode, y, x);
6586 RTX_FRAME_RELATED_P (x) = 1;
6587 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6588 UNSPEC_REG_SAVE);
6589 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6590 RTX_FRAME_RELATED_P (y) = 1;
6591 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6592 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6593 REG_NOTES (insn) = x;
6595 /* Align the stack. */
6596 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6597 GEN_INT (-16)));
6599 /* And here we cheat like madmen with the unwind info. We force the
6600 cfa register back to sp+4, which is exactly what it was at the
6601 start of the function. Re-pushing the return address results in
6602 the return at the same spot relative to the cfa, and thus is
6603 correct wrt the unwind info. */
6604 x = cfun->machine->force_align_arg_pointer;
6605 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6606 insn = emit_insn (gen_push (x));
6607 RTX_FRAME_RELATED_P (insn) = 1;
6609 x = GEN_INT (4);
6610 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6611 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6612 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6613 REG_NOTES (insn) = x;
6616 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6617 slower on all targets. Also sdb doesn't like it. */
6619 if (frame_pointer_needed)
6621 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6622 RTX_FRAME_RELATED_P (insn) = 1;
6624 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6625 RTX_FRAME_RELATED_P (insn) = 1;
6628 allocate = frame.to_allocate;
6630 if (!frame.save_regs_using_mov)
6631 ix86_emit_save_regs ();
6632 else
6633 allocate += frame.nregs * UNITS_PER_WORD;
6635 /* When using red zone we may start register saving before allocating
6636 the stack frame saving one cycle of the prologue. However I will
6637 avoid doing this if I am going to have to probe the stack since
6638 at least on x86_64 the stack probe can turn into a call that clobbers
6639 a red zone location */
6640 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
6641 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6642 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6643 : stack_pointer_rtx,
6644 -frame.nregs * UNITS_PER_WORD);
6646 if (allocate == 0)
6648 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6649 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6650 GEN_INT (-allocate), -1);
6651 else
6653 /* Only valid for Win32. */
6654 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6655 bool eax_live;
6656 rtx t;
6658 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6660 if (cfun->machine->call_abi == MS_ABI)
6661 eax_live = false;
6662 else
6663 eax_live = ix86_eax_live_at_start_p ();
6665 if (eax_live)
6667 emit_insn (gen_push (eax));
6668 allocate -= UNITS_PER_WORD;
6671 emit_move_insn (eax, GEN_INT (allocate));
6673 if (TARGET_64BIT)
6674 insn = gen_allocate_stack_worker_64 (eax);
6675 else
6676 insn = gen_allocate_stack_worker_32 (eax);
6677 insn = emit_insn (insn);
6678 RTX_FRAME_RELATED_P (insn) = 1;
6679 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6680 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6681 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6682 t, REG_NOTES (insn));
6684 if (eax_live)
6686 if (frame_pointer_needed)
6687 t = plus_constant (hard_frame_pointer_rtx,
6688 allocate
6689 - frame.to_allocate
6690 - frame.nregs * UNITS_PER_WORD);
6691 else
6692 t = plus_constant (stack_pointer_rtx, allocate);
6693 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6697 if (frame.save_regs_using_mov
6698 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
6699 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6701 if (!frame_pointer_needed || !frame.to_allocate)
6702 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6703 else
6704 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6705 -frame.nregs * UNITS_PER_WORD);
6708 pic_reg_used = false;
6709 if (pic_offset_table_rtx
6710 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6711 || crtl->profile))
6713 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6715 if (alt_pic_reg_used != INVALID_REGNUM)
6716 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6718 pic_reg_used = true;
6721 if (pic_reg_used)
6723 if (TARGET_64BIT)
6725 if (ix86_cmodel == CM_LARGE_PIC)
6727 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6728 rtx label = gen_label_rtx ();
6729 emit_label (label);
6730 LABEL_PRESERVE_P (label) = 1;
6731 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6732 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6733 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6734 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6735 pic_offset_table_rtx, tmp_reg));
6737 else
6738 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6740 else
6741 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6744 /* Prevent function calls from being scheduled before the call to mcount.
6745 In the pic_reg_used case, make sure that the got load isn't deleted. */
6746 if (crtl->profile)
6748 if (pic_reg_used)
6749 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6750 emit_insn (gen_blockage ());
6753 /* Emit cld instruction if stringops are used in the function. */
6754 if (TARGET_CLD && ix86_current_function_needs_cld)
6755 emit_insn (gen_cld ());
6758 /* Emit code to restore saved registers using MOV insns. First register
6759 is restored from POINTER + OFFSET. */
6760 static void
6761 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6762 int maybe_eh_return)
6764 int regno;
6765 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6767 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6768 if (ix86_save_reg (regno, maybe_eh_return))
6770 /* Ensure that adjust_address won't be forced to produce pointer
6771 out of range allowed by x86-64 instruction set. */
6772 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6774 rtx r11;
6776 r11 = gen_rtx_REG (DImode, R11_REG);
6777 emit_move_insn (r11, GEN_INT (offset));
6778 emit_insn (gen_adddi3 (r11, r11, pointer));
6779 base_address = gen_rtx_MEM (Pmode, r11);
6780 offset = 0;
6782 emit_move_insn (gen_rtx_REG (Pmode, regno),
6783 adjust_address (base_address, Pmode, offset));
6784 offset += UNITS_PER_WORD;
6788 /* Restore function stack, frame, and registers. */
6790 void
6791 ix86_expand_epilogue (int style)
6793 int regno;
6794 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6795 struct ix86_frame frame;
6796 HOST_WIDE_INT offset;
6798 ix86_compute_frame_layout (&frame);
6800 /* Calculate start of saved registers relative to ebp. Special care
6801 must be taken for the normal return case of a function using
6802 eh_return: the eax and edx registers are marked as saved, but not
6803 restored along this path. */
6804 offset = frame.nregs;
6805 if (crtl->calls_eh_return && style != 2)
6806 offset -= 2;
6807 offset *= -UNITS_PER_WORD;
6809 /* If we're only restoring one register and sp is not valid then
6810 using a move instruction to restore the register since it's
6811 less work than reloading sp and popping the register.
6813 The default code result in stack adjustment using add/lea instruction,
6814 while this code results in LEAVE instruction (or discrete equivalent),
6815 so it is profitable in some other cases as well. Especially when there
6816 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6817 and there is exactly one register to pop. This heuristic may need some
6818 tuning in future. */
6819 if ((!sp_valid && frame.nregs <= 1)
6820 || (TARGET_EPILOGUE_USING_MOVE
6821 && cfun->machine->use_fast_prologue_epilogue
6822 && (frame.nregs > 1 || frame.to_allocate))
6823 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6824 || (frame_pointer_needed && TARGET_USE_LEAVE
6825 && cfun->machine->use_fast_prologue_epilogue
6826 && frame.nregs == 1)
6827 || crtl->calls_eh_return)
6829 /* Restore registers. We can use ebp or esp to address the memory
6830 locations. If both are available, default to ebp, since offsets
6831 are known to be small. Only exception is esp pointing directly to the
6832 end of block of saved registers, where we may simplify addressing
6833 mode. */
6835 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6836 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6837 frame.to_allocate, style == 2);
6838 else
6839 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6840 offset, style == 2);
6842 /* eh_return epilogues need %ecx added to the stack pointer. */
6843 if (style == 2)
6845 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6847 if (frame_pointer_needed)
6849 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6850 tmp = plus_constant (tmp, UNITS_PER_WORD);
6851 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6853 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6854 emit_move_insn (hard_frame_pointer_rtx, tmp);
6856 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6857 const0_rtx, style);
6859 else
6861 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6862 tmp = plus_constant (tmp, (frame.to_allocate
6863 + frame.nregs * UNITS_PER_WORD));
6864 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6867 else if (!frame_pointer_needed)
6868 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6869 GEN_INT (frame.to_allocate
6870 + frame.nregs * UNITS_PER_WORD),
6871 style);
6872 /* If not an i386, mov & pop is faster than "leave". */
6873 else if (TARGET_USE_LEAVE || optimize_size
6874 || !cfun->machine->use_fast_prologue_epilogue)
6875 emit_insn ((*ix86_gen_leave) ());
6876 else
6878 pro_epilogue_adjust_stack (stack_pointer_rtx,
6879 hard_frame_pointer_rtx,
6880 const0_rtx, style);
6882 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6885 else
6887 /* First step is to deallocate the stack frame so that we can
6888 pop the registers. */
6889 if (!sp_valid)
6891 gcc_assert (frame_pointer_needed);
6892 pro_epilogue_adjust_stack (stack_pointer_rtx,
6893 hard_frame_pointer_rtx,
6894 GEN_INT (offset), style);
6896 else if (frame.to_allocate)
6897 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6898 GEN_INT (frame.to_allocate), style);
6900 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6901 if (ix86_save_reg (regno, false))
6902 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
6903 if (frame_pointer_needed)
6905 /* Leave results in shorter dependency chains on CPUs that are
6906 able to grok it fast. */
6907 if (TARGET_USE_LEAVE)
6908 emit_insn ((*ix86_gen_leave) ());
6909 else
6910 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6914 if (cfun->machine->force_align_arg_pointer)
6916 emit_insn (gen_addsi3 (stack_pointer_rtx,
6917 cfun->machine->force_align_arg_pointer,
6918 GEN_INT (-4)));
6921 /* Sibcall epilogues don't want a return instruction. */
6922 if (style == 0)
6923 return;
6925 if (crtl->args.pops_args && crtl->args.size)
6927 rtx popc = GEN_INT (crtl->args.pops_args);
6929 /* i386 can only pop 64K bytes. If asked to pop more, pop
6930 return address, do explicit add, and jump indirectly to the
6931 caller. */
6933 if (crtl->args.pops_args >= 65536)
6935 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6937 /* There is no "pascal" calling convention in any 64bit ABI. */
6938 gcc_assert (!TARGET_64BIT);
6940 emit_insn (gen_popsi1 (ecx));
6941 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6942 emit_jump_insn (gen_return_indirect_internal (ecx));
6944 else
6945 emit_jump_insn (gen_return_pop_internal (popc));
6947 else
6948 emit_jump_insn (gen_return_internal ());
6951 /* Reset from the function's potential modifications. */
6953 static void
6954 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6955 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6957 if (pic_offset_table_rtx)
6958 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6959 #if TARGET_MACHO
6960 /* Mach-O doesn't support labels at the end of objects, so if
6961 it looks like we might want one, insert a NOP. */
6963 rtx insn = get_last_insn ();
6964 while (insn
6965 && NOTE_P (insn)
6966 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6967 insn = PREV_INSN (insn);
6968 if (insn
6969 && (LABEL_P (insn)
6970 || (NOTE_P (insn)
6971 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6972 fputs ("\tnop\n", file);
6974 #endif
6978 /* Extract the parts of an RTL expression that is a valid memory address
6979 for an instruction. Return 0 if the structure of the address is
6980 grossly off. Return -1 if the address contains ASHIFT, so it is not
6981 strictly valid, but still used for computing length of lea instruction. */
6984 ix86_decompose_address (rtx addr, struct ix86_address *out)
6986 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6987 rtx base_reg, index_reg;
6988 HOST_WIDE_INT scale = 1;
6989 rtx scale_rtx = NULL_RTX;
6990 int retval = 1;
6991 enum ix86_address_seg seg = SEG_DEFAULT;
6993 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6994 base = addr;
6995 else if (GET_CODE (addr) == PLUS)
6997 rtx addends[4], op;
6998 int n = 0, i;
7000 op = addr;
7003 if (n >= 4)
7004 return 0;
7005 addends[n++] = XEXP (op, 1);
7006 op = XEXP (op, 0);
7008 while (GET_CODE (op) == PLUS);
7009 if (n >= 4)
7010 return 0;
7011 addends[n] = op;
7013 for (i = n; i >= 0; --i)
7015 op = addends[i];
7016 switch (GET_CODE (op))
7018 case MULT:
7019 if (index)
7020 return 0;
7021 index = XEXP (op, 0);
7022 scale_rtx = XEXP (op, 1);
7023 break;
7025 case UNSPEC:
7026 if (XINT (op, 1) == UNSPEC_TP
7027 && TARGET_TLS_DIRECT_SEG_REFS
7028 && seg == SEG_DEFAULT)
7029 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7030 else
7031 return 0;
7032 break;
7034 case REG:
7035 case SUBREG:
7036 if (!base)
7037 base = op;
7038 else if (!index)
7039 index = op;
7040 else
7041 return 0;
7042 break;
7044 case CONST:
7045 case CONST_INT:
7046 case SYMBOL_REF:
7047 case LABEL_REF:
7048 if (disp)
7049 return 0;
7050 disp = op;
7051 break;
7053 default:
7054 return 0;
7058 else if (GET_CODE (addr) == MULT)
7060 index = XEXP (addr, 0); /* index*scale */
7061 scale_rtx = XEXP (addr, 1);
7063 else if (GET_CODE (addr) == ASHIFT)
7065 rtx tmp;
7067 /* We're called for lea too, which implements ashift on occasion. */
7068 index = XEXP (addr, 0);
7069 tmp = XEXP (addr, 1);
7070 if (!CONST_INT_P (tmp))
7071 return 0;
7072 scale = INTVAL (tmp);
7073 if ((unsigned HOST_WIDE_INT) scale > 3)
7074 return 0;
7075 scale = 1 << scale;
7076 retval = -1;
7078 else
7079 disp = addr; /* displacement */
7081 /* Extract the integral value of scale. */
7082 if (scale_rtx)
7084 if (!CONST_INT_P (scale_rtx))
7085 return 0;
7086 scale = INTVAL (scale_rtx);
7089 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7090 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7092 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7093 if (base_reg && index_reg && scale == 1
7094 && (index_reg == arg_pointer_rtx
7095 || index_reg == frame_pointer_rtx
7096 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7098 rtx tmp;
7099 tmp = base, base = index, index = tmp;
7100 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7103 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7104 if ((base_reg == hard_frame_pointer_rtx
7105 || base_reg == frame_pointer_rtx
7106 || base_reg == arg_pointer_rtx) && !disp)
7107 disp = const0_rtx;
7109 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7110 Avoid this by transforming to [%esi+0]. */
7111 if (TARGET_K6 && !optimize_size
7112 && base_reg && !index_reg && !disp
7113 && REG_P (base_reg)
7114 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7115 disp = const0_rtx;
7117 /* Special case: encode reg+reg instead of reg*2. */
7118 if (!base && index && scale && scale == 2)
7119 base = index, base_reg = index_reg, scale = 1;
7121 /* Special case: scaling cannot be encoded without base or displacement. */
7122 if (!base && !disp && index && scale != 1)
7123 disp = const0_rtx;
7125 out->base = base;
7126 out->index = index;
7127 out->disp = disp;
7128 out->scale = scale;
7129 out->seg = seg;
7131 return retval;
7134 /* Return cost of the memory address x.
7135 For i386, it is better to use a complex address than let gcc copy
7136 the address into a reg and make a new pseudo. But not if the address
7137 requires to two regs - that would mean more pseudos with longer
7138 lifetimes. */
7139 static int
7140 ix86_address_cost (rtx x)
7142 struct ix86_address parts;
7143 int cost = 1;
7144 int ok = ix86_decompose_address (x, &parts);
7146 gcc_assert (ok);
7148 if (parts.base && GET_CODE (parts.base) == SUBREG)
7149 parts.base = SUBREG_REG (parts.base);
7150 if (parts.index && GET_CODE (parts.index) == SUBREG)
7151 parts.index = SUBREG_REG (parts.index);
7153 /* Attempt to minimize number of registers in the address. */
7154 if ((parts.base
7155 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7156 || (parts.index
7157 && (!REG_P (parts.index)
7158 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7159 cost++;
7161 if (parts.base
7162 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7163 && parts.index
7164 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7165 && parts.base != parts.index)
7166 cost++;
7168 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7169 since it's predecode logic can't detect the length of instructions
7170 and it degenerates to vector decoded. Increase cost of such
7171 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7172 to split such addresses or even refuse such addresses at all.
7174 Following addressing modes are affected:
7175 [base+scale*index]
7176 [scale*index+disp]
7177 [base+index]
7179 The first and last case may be avoidable by explicitly coding the zero in
7180 memory address, but I don't have AMD-K6 machine handy to check this
7181 theory. */
7183 if (TARGET_K6
7184 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7185 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7186 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7187 cost += 10;
7189 return cost;
7192 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7193 this is used for to form addresses to local data when -fPIC is in
7194 use. */
7196 static bool
7197 darwin_local_data_pic (rtx disp)
7199 if (GET_CODE (disp) == MINUS)
7201 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7202 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7203 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7205 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7206 if (! strcmp (sym_name, "<pic base>"))
7207 return true;
7211 return false;
7214 /* Determine if a given RTX is a valid constant. We already know this
7215 satisfies CONSTANT_P. */
7217 bool
7218 legitimate_constant_p (rtx x)
7220 switch (GET_CODE (x))
7222 case CONST:
7223 x = XEXP (x, 0);
7225 if (GET_CODE (x) == PLUS)
7227 if (!CONST_INT_P (XEXP (x, 1)))
7228 return false;
7229 x = XEXP (x, 0);
7232 if (TARGET_MACHO && darwin_local_data_pic (x))
7233 return true;
7235 /* Only some unspecs are valid as "constants". */
7236 if (GET_CODE (x) == UNSPEC)
7237 switch (XINT (x, 1))
7239 case UNSPEC_GOT:
7240 case UNSPEC_GOTOFF:
7241 case UNSPEC_PLTOFF:
7242 return TARGET_64BIT;
7243 case UNSPEC_TPOFF:
7244 case UNSPEC_NTPOFF:
7245 x = XVECEXP (x, 0, 0);
7246 return (GET_CODE (x) == SYMBOL_REF
7247 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7248 case UNSPEC_DTPOFF:
7249 x = XVECEXP (x, 0, 0);
7250 return (GET_CODE (x) == SYMBOL_REF
7251 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7252 default:
7253 return false;
7256 /* We must have drilled down to a symbol. */
7257 if (GET_CODE (x) == LABEL_REF)
7258 return true;
7259 if (GET_CODE (x) != SYMBOL_REF)
7260 return false;
7261 /* FALLTHRU */
7263 case SYMBOL_REF:
7264 /* TLS symbols are never valid. */
7265 if (SYMBOL_REF_TLS_MODEL (x))
7266 return false;
7268 /* DLLIMPORT symbols are never valid. */
7269 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7270 && SYMBOL_REF_DLLIMPORT_P (x))
7271 return false;
7272 break;
7274 case CONST_DOUBLE:
7275 if (GET_MODE (x) == TImode
7276 && x != CONST0_RTX (TImode)
7277 && !TARGET_64BIT)
7278 return false;
7279 break;
7281 case CONST_VECTOR:
7282 if (x == CONST0_RTX (GET_MODE (x)))
7283 return true;
7284 return false;
7286 default:
7287 break;
7290 /* Otherwise we handle everything else in the move patterns. */
7291 return true;
7294 /* Determine if it's legal to put X into the constant pool. This
7295 is not possible for the address of thread-local symbols, which
7296 is checked above. */
7298 static bool
7299 ix86_cannot_force_const_mem (rtx x)
7301 /* We can always put integral constants and vectors in memory. */
7302 switch (GET_CODE (x))
7304 case CONST_INT:
7305 case CONST_DOUBLE:
7306 case CONST_VECTOR:
7307 return false;
7309 default:
7310 break;
7312 return !legitimate_constant_p (x);
7315 /* Determine if a given RTX is a valid constant address. */
7317 bool
7318 constant_address_p (rtx x)
7320 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7323 /* Nonzero if the constant value X is a legitimate general operand
7324 when generating PIC code. It is given that flag_pic is on and
7325 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7327 bool
7328 legitimate_pic_operand_p (rtx x)
7330 rtx inner;
7332 switch (GET_CODE (x))
7334 case CONST:
7335 inner = XEXP (x, 0);
7336 if (GET_CODE (inner) == PLUS
7337 && CONST_INT_P (XEXP (inner, 1)))
7338 inner = XEXP (inner, 0);
7340 /* Only some unspecs are valid as "constants". */
7341 if (GET_CODE (inner) == UNSPEC)
7342 switch (XINT (inner, 1))
7344 case UNSPEC_GOT:
7345 case UNSPEC_GOTOFF:
7346 case UNSPEC_PLTOFF:
7347 return TARGET_64BIT;
7348 case UNSPEC_TPOFF:
7349 x = XVECEXP (inner, 0, 0);
7350 return (GET_CODE (x) == SYMBOL_REF
7351 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7352 default:
7353 return false;
7355 /* FALLTHRU */
7357 case SYMBOL_REF:
7358 case LABEL_REF:
7359 return legitimate_pic_address_disp_p (x);
7361 default:
7362 return true;
7366 /* Determine if a given CONST RTX is a valid memory displacement
7367 in PIC mode. */
7370 legitimate_pic_address_disp_p (rtx disp)
7372 bool saw_plus;
7374 /* In 64bit mode we can allow direct addresses of symbols and labels
7375 when they are not dynamic symbols. */
7376 if (TARGET_64BIT)
7378 rtx op0 = disp, op1;
7380 switch (GET_CODE (disp))
7382 case LABEL_REF:
7383 return true;
7385 case CONST:
7386 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7387 break;
7388 op0 = XEXP (XEXP (disp, 0), 0);
7389 op1 = XEXP (XEXP (disp, 0), 1);
7390 if (!CONST_INT_P (op1)
7391 || INTVAL (op1) >= 16*1024*1024
7392 || INTVAL (op1) < -16*1024*1024)
7393 break;
7394 if (GET_CODE (op0) == LABEL_REF)
7395 return true;
7396 if (GET_CODE (op0) != SYMBOL_REF)
7397 break;
7398 /* FALLTHRU */
7400 case SYMBOL_REF:
7401 /* TLS references should always be enclosed in UNSPEC. */
7402 if (SYMBOL_REF_TLS_MODEL (op0))
7403 return false;
7404 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7405 && ix86_cmodel != CM_LARGE_PIC)
7406 return true;
7407 break;
7409 default:
7410 break;
7413 if (GET_CODE (disp) != CONST)
7414 return 0;
7415 disp = XEXP (disp, 0);
7417 if (TARGET_64BIT)
7419 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7420 of GOT tables. We should not need these anyway. */
7421 if (GET_CODE (disp) != UNSPEC
7422 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7423 && XINT (disp, 1) != UNSPEC_GOTOFF
7424 && XINT (disp, 1) != UNSPEC_PLTOFF))
7425 return 0;
7427 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7428 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7429 return 0;
7430 return 1;
7433 saw_plus = false;
7434 if (GET_CODE (disp) == PLUS)
7436 if (!CONST_INT_P (XEXP (disp, 1)))
7437 return 0;
7438 disp = XEXP (disp, 0);
7439 saw_plus = true;
7442 if (TARGET_MACHO && darwin_local_data_pic (disp))
7443 return 1;
7445 if (GET_CODE (disp) != UNSPEC)
7446 return 0;
7448 switch (XINT (disp, 1))
7450 case UNSPEC_GOT:
7451 if (saw_plus)
7452 return false;
7453 /* We need to check for both symbols and labels because VxWorks loads
7454 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7455 details. */
7456 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7457 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7458 case UNSPEC_GOTOFF:
7459 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7460 While ABI specify also 32bit relocation but we don't produce it in
7461 small PIC model at all. */
7462 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7463 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7464 && !TARGET_64BIT)
7465 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7466 return false;
7467 case UNSPEC_GOTTPOFF:
7468 case UNSPEC_GOTNTPOFF:
7469 case UNSPEC_INDNTPOFF:
7470 if (saw_plus)
7471 return false;
7472 disp = XVECEXP (disp, 0, 0);
7473 return (GET_CODE (disp) == SYMBOL_REF
7474 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7475 case UNSPEC_NTPOFF:
7476 disp = XVECEXP (disp, 0, 0);
7477 return (GET_CODE (disp) == SYMBOL_REF
7478 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7479 case UNSPEC_DTPOFF:
7480 disp = XVECEXP (disp, 0, 0);
7481 return (GET_CODE (disp) == SYMBOL_REF
7482 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7485 return 0;
7488 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7489 memory address for an instruction. The MODE argument is the machine mode
7490 for the MEM expression that wants to use this address.
7492 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7493 convert common non-canonical forms to canonical form so that they will
7494 be recognized. */
7497 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7498 rtx addr, int strict)
7500 struct ix86_address parts;
7501 rtx base, index, disp;
7502 HOST_WIDE_INT scale;
7503 const char *reason = NULL;
7504 rtx reason_rtx = NULL_RTX;
7506 if (ix86_decompose_address (addr, &parts) <= 0)
7508 reason = "decomposition failed";
7509 goto report_error;
7512 base = parts.base;
7513 index = parts.index;
7514 disp = parts.disp;
7515 scale = parts.scale;
7517 /* Validate base register.
7519 Don't allow SUBREG's that span more than a word here. It can lead to spill
7520 failures when the base is one word out of a two word structure, which is
7521 represented internally as a DImode int. */
7523 if (base)
7525 rtx reg;
7526 reason_rtx = base;
7528 if (REG_P (base))
7529 reg = base;
7530 else if (GET_CODE (base) == SUBREG
7531 && REG_P (SUBREG_REG (base))
7532 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7533 <= UNITS_PER_WORD)
7534 reg = SUBREG_REG (base);
7535 else
7537 reason = "base is not a register";
7538 goto report_error;
7541 if (GET_MODE (base) != Pmode)
7543 reason = "base is not in Pmode";
7544 goto report_error;
7547 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7548 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7550 reason = "base is not valid";
7551 goto report_error;
7555 /* Validate index register.
7557 Don't allow SUBREG's that span more than a word here -- same as above. */
7559 if (index)
7561 rtx reg;
7562 reason_rtx = index;
7564 if (REG_P (index))
7565 reg = index;
7566 else if (GET_CODE (index) == SUBREG
7567 && REG_P (SUBREG_REG (index))
7568 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7569 <= UNITS_PER_WORD)
7570 reg = SUBREG_REG (index);
7571 else
7573 reason = "index is not a register";
7574 goto report_error;
7577 if (GET_MODE (index) != Pmode)
7579 reason = "index is not in Pmode";
7580 goto report_error;
7583 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7584 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7586 reason = "index is not valid";
7587 goto report_error;
7591 /* Validate scale factor. */
7592 if (scale != 1)
7594 reason_rtx = GEN_INT (scale);
7595 if (!index)
7597 reason = "scale without index";
7598 goto report_error;
7601 if (scale != 2 && scale != 4 && scale != 8)
7603 reason = "scale is not a valid multiplier";
7604 goto report_error;
7608 /* Validate displacement. */
7609 if (disp)
7611 reason_rtx = disp;
7613 if (GET_CODE (disp) == CONST
7614 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7615 switch (XINT (XEXP (disp, 0), 1))
7617 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7618 used. While ABI specify also 32bit relocations, we don't produce
7619 them at all and use IP relative instead. */
7620 case UNSPEC_GOT:
7621 case UNSPEC_GOTOFF:
7622 gcc_assert (flag_pic);
7623 if (!TARGET_64BIT)
7624 goto is_legitimate_pic;
7625 reason = "64bit address unspec";
7626 goto report_error;
7628 case UNSPEC_GOTPCREL:
7629 gcc_assert (flag_pic);
7630 goto is_legitimate_pic;
7632 case UNSPEC_GOTTPOFF:
7633 case UNSPEC_GOTNTPOFF:
7634 case UNSPEC_INDNTPOFF:
7635 case UNSPEC_NTPOFF:
7636 case UNSPEC_DTPOFF:
7637 break;
7639 default:
7640 reason = "invalid address unspec";
7641 goto report_error;
7644 else if (SYMBOLIC_CONST (disp)
7645 && (flag_pic
7646 || (TARGET_MACHO
7647 #if TARGET_MACHO
7648 && MACHOPIC_INDIRECT
7649 && !machopic_operand_p (disp)
7650 #endif
7654 is_legitimate_pic:
7655 if (TARGET_64BIT && (index || base))
7657 /* foo@dtpoff(%rX) is ok. */
7658 if (GET_CODE (disp) != CONST
7659 || GET_CODE (XEXP (disp, 0)) != PLUS
7660 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7661 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7662 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7663 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7665 reason = "non-constant pic memory reference";
7666 goto report_error;
7669 else if (! legitimate_pic_address_disp_p (disp))
7671 reason = "displacement is an invalid pic construct";
7672 goto report_error;
7675 /* This code used to verify that a symbolic pic displacement
7676 includes the pic_offset_table_rtx register.
7678 While this is good idea, unfortunately these constructs may
7679 be created by "adds using lea" optimization for incorrect
7680 code like:
7682 int a;
7683 int foo(int i)
7685 return *(&a+i);
7688 This code is nonsensical, but results in addressing
7689 GOT table with pic_offset_table_rtx base. We can't
7690 just refuse it easily, since it gets matched by
7691 "addsi3" pattern, that later gets split to lea in the
7692 case output register differs from input. While this
7693 can be handled by separate addsi pattern for this case
7694 that never results in lea, this seems to be easier and
7695 correct fix for crash to disable this test. */
7697 else if (GET_CODE (disp) != LABEL_REF
7698 && !CONST_INT_P (disp)
7699 && (GET_CODE (disp) != CONST
7700 || !legitimate_constant_p (disp))
7701 && (GET_CODE (disp) != SYMBOL_REF
7702 || !legitimate_constant_p (disp)))
7704 reason = "displacement is not constant";
7705 goto report_error;
7707 else if (TARGET_64BIT
7708 && !x86_64_immediate_operand (disp, VOIDmode))
7710 reason = "displacement is out of range";
7711 goto report_error;
7715 /* Everything looks valid. */
7716 return TRUE;
7718 report_error:
7719 return FALSE;
7722 /* Return a unique alias set for the GOT. */
7724 static alias_set_type
7725 ix86_GOT_alias_set (void)
7727 static alias_set_type set = -1;
7728 if (set == -1)
7729 set = new_alias_set ();
7730 return set;
7733 /* Return a legitimate reference for ORIG (an address) using the
7734 register REG. If REG is 0, a new pseudo is generated.
7736 There are two types of references that must be handled:
7738 1. Global data references must load the address from the GOT, via
7739 the PIC reg. An insn is emitted to do this load, and the reg is
7740 returned.
7742 2. Static data references, constant pool addresses, and code labels
7743 compute the address as an offset from the GOT, whose base is in
7744 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7745 differentiate them from global data objects. The returned
7746 address is the PIC reg + an unspec constant.
7748 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7749 reg also appears in the address. */
7751 static rtx
7752 legitimize_pic_address (rtx orig, rtx reg)
7754 rtx addr = orig;
7755 rtx new_rtx = orig;
7756 rtx base;
7758 #if TARGET_MACHO
7759 if (TARGET_MACHO && !TARGET_64BIT)
7761 if (reg == 0)
7762 reg = gen_reg_rtx (Pmode);
7763 /* Use the generic Mach-O PIC machinery. */
7764 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7766 #endif
7768 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7769 new_rtx = addr;
7770 else if (TARGET_64BIT
7771 && ix86_cmodel != CM_SMALL_PIC
7772 && gotoff_operand (addr, Pmode))
7774 rtx tmpreg;
7775 /* This symbol may be referenced via a displacement from the PIC
7776 base address (@GOTOFF). */
7778 if (reload_in_progress)
7779 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7780 if (GET_CODE (addr) == CONST)
7781 addr = XEXP (addr, 0);
7782 if (GET_CODE (addr) == PLUS)
7784 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7785 UNSPEC_GOTOFF);
7786 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7788 else
7789 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7791 if (!reg)
7792 tmpreg = gen_reg_rtx (Pmode);
7793 else
7794 tmpreg = reg;
7795 emit_move_insn (tmpreg, new_rtx);
7797 if (reg != 0)
7799 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7800 tmpreg, 1, OPTAB_DIRECT);
7801 new_rtx = reg;
7803 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7805 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7807 /* This symbol may be referenced via a displacement from the PIC
7808 base address (@GOTOFF). */
7810 if (reload_in_progress)
7811 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7812 if (GET_CODE (addr) == CONST)
7813 addr = XEXP (addr, 0);
7814 if (GET_CODE (addr) == PLUS)
7816 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7817 UNSPEC_GOTOFF);
7818 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7820 else
7821 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7822 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7823 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7825 if (reg != 0)
7827 emit_move_insn (reg, new_rtx);
7828 new_rtx = reg;
7831 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7832 /* We can't use @GOTOFF for text labels on VxWorks;
7833 see gotoff_operand. */
7834 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7836 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7838 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7839 return legitimize_dllimport_symbol (addr, true);
7840 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7841 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7842 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7844 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7845 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7849 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7851 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7852 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7853 new_rtx = gen_const_mem (Pmode, new_rtx);
7854 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7856 if (reg == 0)
7857 reg = gen_reg_rtx (Pmode);
7858 /* Use directly gen_movsi, otherwise the address is loaded
7859 into register for CSE. We don't want to CSE this addresses,
7860 instead we CSE addresses from the GOT table, so skip this. */
7861 emit_insn (gen_movsi (reg, new_rtx));
7862 new_rtx = reg;
7864 else
7866 /* This symbol must be referenced via a load from the
7867 Global Offset Table (@GOT). */
7869 if (reload_in_progress)
7870 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7871 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7872 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7873 if (TARGET_64BIT)
7874 new_rtx = force_reg (Pmode, new_rtx);
7875 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7876 new_rtx = gen_const_mem (Pmode, new_rtx);
7877 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7879 if (reg == 0)
7880 reg = gen_reg_rtx (Pmode);
7881 emit_move_insn (reg, new_rtx);
7882 new_rtx = reg;
7885 else
7887 if (CONST_INT_P (addr)
7888 && !x86_64_immediate_operand (addr, VOIDmode))
7890 if (reg)
7892 emit_move_insn (reg, addr);
7893 new_rtx = reg;
7895 else
7896 new_rtx = force_reg (Pmode, addr);
7898 else if (GET_CODE (addr) == CONST)
7900 addr = XEXP (addr, 0);
7902 /* We must match stuff we generate before. Assume the only
7903 unspecs that can get here are ours. Not that we could do
7904 anything with them anyway.... */
7905 if (GET_CODE (addr) == UNSPEC
7906 || (GET_CODE (addr) == PLUS
7907 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7908 return orig;
7909 gcc_assert (GET_CODE (addr) == PLUS);
7911 if (GET_CODE (addr) == PLUS)
7913 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7915 /* Check first to see if this is a constant offset from a @GOTOFF
7916 symbol reference. */
7917 if (gotoff_operand (op0, Pmode)
7918 && CONST_INT_P (op1))
7920 if (!TARGET_64BIT)
7922 if (reload_in_progress)
7923 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7924 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7925 UNSPEC_GOTOFF);
7926 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7927 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7928 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7930 if (reg != 0)
7932 emit_move_insn (reg, new_rtx);
7933 new_rtx = reg;
7936 else
7938 if (INTVAL (op1) < -16*1024*1024
7939 || INTVAL (op1) >= 16*1024*1024)
7941 if (!x86_64_immediate_operand (op1, Pmode))
7942 op1 = force_reg (Pmode, op1);
7943 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7947 else
7949 base = legitimize_pic_address (XEXP (addr, 0), reg);
7950 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7951 base == reg ? NULL_RTX : reg);
7953 if (CONST_INT_P (new_rtx))
7954 new_rtx = plus_constant (base, INTVAL (new_rtx));
7955 else
7957 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7959 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7960 new_rtx = XEXP (new_rtx, 1);
7962 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7967 return new_rtx;
7970 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7972 static rtx
7973 get_thread_pointer (int to_reg)
7975 rtx tp, reg, insn;
7977 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7978 if (!to_reg)
7979 return tp;
7981 reg = gen_reg_rtx (Pmode);
7982 insn = gen_rtx_SET (VOIDmode, reg, tp);
7983 insn = emit_insn (insn);
7985 return reg;
7988 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7989 false if we expect this to be used for a memory address and true if
7990 we expect to load the address into a register. */
7992 static rtx
7993 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7995 rtx dest, base, off, pic, tp;
7996 int type;
7998 switch (model)
8000 case TLS_MODEL_GLOBAL_DYNAMIC:
8001 dest = gen_reg_rtx (Pmode);
8002 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8004 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8006 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8008 start_sequence ();
8009 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8010 insns = get_insns ();
8011 end_sequence ();
8013 RTL_CONST_CALL_P (insns) = 1;
8014 emit_libcall_block (insns, dest, rax, x);
8016 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8017 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8018 else
8019 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8021 if (TARGET_GNU2_TLS)
8023 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8025 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8027 break;
8029 case TLS_MODEL_LOCAL_DYNAMIC:
8030 base = gen_reg_rtx (Pmode);
8031 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8033 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8035 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8037 start_sequence ();
8038 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8039 insns = get_insns ();
8040 end_sequence ();
8042 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8043 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8044 RTL_CONST_CALL_P (insns) = 1;
8045 emit_libcall_block (insns, base, rax, note);
8047 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8048 emit_insn (gen_tls_local_dynamic_base_64 (base));
8049 else
8050 emit_insn (gen_tls_local_dynamic_base_32 (base));
8052 if (TARGET_GNU2_TLS)
8054 rtx x = ix86_tls_module_base ();
8056 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8057 gen_rtx_MINUS (Pmode, x, tp));
8060 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8061 off = gen_rtx_CONST (Pmode, off);
8063 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8065 if (TARGET_GNU2_TLS)
8067 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8069 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8072 break;
8074 case TLS_MODEL_INITIAL_EXEC:
8075 if (TARGET_64BIT)
8077 pic = NULL;
8078 type = UNSPEC_GOTNTPOFF;
8080 else if (flag_pic)
8082 if (reload_in_progress)
8083 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8084 pic = pic_offset_table_rtx;
8085 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8087 else if (!TARGET_ANY_GNU_TLS)
8089 pic = gen_reg_rtx (Pmode);
8090 emit_insn (gen_set_got (pic));
8091 type = UNSPEC_GOTTPOFF;
8093 else
8095 pic = NULL;
8096 type = UNSPEC_INDNTPOFF;
8099 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8100 off = gen_rtx_CONST (Pmode, off);
8101 if (pic)
8102 off = gen_rtx_PLUS (Pmode, pic, off);
8103 off = gen_const_mem (Pmode, off);
8104 set_mem_alias_set (off, ix86_GOT_alias_set ());
8106 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8108 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8109 off = force_reg (Pmode, off);
8110 return gen_rtx_PLUS (Pmode, base, off);
8112 else
8114 base = get_thread_pointer (true);
8115 dest = gen_reg_rtx (Pmode);
8116 emit_insn (gen_subsi3 (dest, base, off));
8118 break;
8120 case TLS_MODEL_LOCAL_EXEC:
8121 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8122 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8123 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8124 off = gen_rtx_CONST (Pmode, off);
8126 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8128 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8129 return gen_rtx_PLUS (Pmode, base, off);
8131 else
8133 base = get_thread_pointer (true);
8134 dest = gen_reg_rtx (Pmode);
8135 emit_insn (gen_subsi3 (dest, base, off));
8137 break;
8139 default:
8140 gcc_unreachable ();
8143 return dest;
8146 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8147 to symbol DECL. */
8149 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8150 htab_t dllimport_map;
8152 static tree
8153 get_dllimport_decl (tree decl)
8155 struct tree_map *h, in;
8156 void **loc;
8157 const char *name;
8158 const char *prefix;
8159 size_t namelen, prefixlen;
8160 char *imp_name;
8161 tree to;
8162 rtx rtl;
8164 if (!dllimport_map)
8165 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8167 in.hash = htab_hash_pointer (decl);
8168 in.base.from = decl;
8169 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8170 h = (struct tree_map *) *loc;
8171 if (h)
8172 return h->to;
8174 *loc = h = GGC_NEW (struct tree_map);
8175 h->hash = in.hash;
8176 h->base.from = decl;
8177 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8178 DECL_ARTIFICIAL (to) = 1;
8179 DECL_IGNORED_P (to) = 1;
8180 DECL_EXTERNAL (to) = 1;
8181 TREE_READONLY (to) = 1;
8183 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8184 name = targetm.strip_name_encoding (name);
8185 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8186 namelen = strlen (name);
8187 prefixlen = strlen (prefix);
8188 imp_name = (char *) alloca (namelen + prefixlen + 1);
8189 memcpy (imp_name, prefix, prefixlen);
8190 memcpy (imp_name + prefixlen, name, namelen + 1);
8192 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8193 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8194 SET_SYMBOL_REF_DECL (rtl, to);
8195 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8197 rtl = gen_const_mem (Pmode, rtl);
8198 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8200 SET_DECL_RTL (to, rtl);
8201 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8203 return to;
8206 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8207 true if we require the result be a register. */
8209 static rtx
8210 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8212 tree imp_decl;
8213 rtx x;
8215 gcc_assert (SYMBOL_REF_DECL (symbol));
8216 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8218 x = DECL_RTL (imp_decl);
8219 if (want_reg)
8220 x = force_reg (Pmode, x);
8221 return x;
8224 /* Try machine-dependent ways of modifying an illegitimate address
8225 to be legitimate. If we find one, return the new, valid address.
8226 This macro is used in only one place: `memory_address' in explow.c.
8228 OLDX is the address as it was before break_out_memory_refs was called.
8229 In some cases it is useful to look at this to decide what needs to be done.
8231 MODE and WIN are passed so that this macro can use
8232 GO_IF_LEGITIMATE_ADDRESS.
8234 It is always safe for this macro to do nothing. It exists to recognize
8235 opportunities to optimize the output.
8237 For the 80386, we handle X+REG by loading X into a register R and
8238 using R+REG. R will go in a general reg and indexing will be used.
8239 However, if REG is a broken-out memory address or multiplication,
8240 nothing needs to be done because REG can certainly go in a general reg.
8242 When -fpic is used, special handling is needed for symbolic references.
8243 See comments by legitimize_pic_address in i386.c for details. */
8246 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8248 int changed = 0;
8249 unsigned log;
8251 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8252 if (log)
8253 return legitimize_tls_address (x, (enum tls_model) log, false);
8254 if (GET_CODE (x) == CONST
8255 && GET_CODE (XEXP (x, 0)) == PLUS
8256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8257 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8259 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8260 (enum tls_model) log, false);
8261 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8264 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8266 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8267 return legitimize_dllimport_symbol (x, true);
8268 if (GET_CODE (x) == CONST
8269 && GET_CODE (XEXP (x, 0)) == PLUS
8270 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8271 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8273 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8274 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8278 if (flag_pic && SYMBOLIC_CONST (x))
8279 return legitimize_pic_address (x, 0);
8281 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8282 if (GET_CODE (x) == ASHIFT
8283 && CONST_INT_P (XEXP (x, 1))
8284 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8286 changed = 1;
8287 log = INTVAL (XEXP (x, 1));
8288 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8289 GEN_INT (1 << log));
8292 if (GET_CODE (x) == PLUS)
8294 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8296 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8297 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8298 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8300 changed = 1;
8301 log = INTVAL (XEXP (XEXP (x, 0), 1));
8302 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8303 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8304 GEN_INT (1 << log));
8307 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8308 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8309 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8311 changed = 1;
8312 log = INTVAL (XEXP (XEXP (x, 1), 1));
8313 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8314 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8315 GEN_INT (1 << log));
8318 /* Put multiply first if it isn't already. */
8319 if (GET_CODE (XEXP (x, 1)) == MULT)
8321 rtx tmp = XEXP (x, 0);
8322 XEXP (x, 0) = XEXP (x, 1);
8323 XEXP (x, 1) = tmp;
8324 changed = 1;
8327 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8328 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8329 created by virtual register instantiation, register elimination, and
8330 similar optimizations. */
8331 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8333 changed = 1;
8334 x = gen_rtx_PLUS (Pmode,
8335 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8336 XEXP (XEXP (x, 1), 0)),
8337 XEXP (XEXP (x, 1), 1));
8340 /* Canonicalize
8341 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8342 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8343 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8344 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8345 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8346 && CONSTANT_P (XEXP (x, 1)))
8348 rtx constant;
8349 rtx other = NULL_RTX;
8351 if (CONST_INT_P (XEXP (x, 1)))
8353 constant = XEXP (x, 1);
8354 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8356 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8358 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8359 other = XEXP (x, 1);
8361 else
8362 constant = 0;
8364 if (constant)
8366 changed = 1;
8367 x = gen_rtx_PLUS (Pmode,
8368 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8369 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8370 plus_constant (other, INTVAL (constant)));
8374 if (changed && legitimate_address_p (mode, x, FALSE))
8375 return x;
8377 if (GET_CODE (XEXP (x, 0)) == MULT)
8379 changed = 1;
8380 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8383 if (GET_CODE (XEXP (x, 1)) == MULT)
8385 changed = 1;
8386 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8389 if (changed
8390 && REG_P (XEXP (x, 1))
8391 && REG_P (XEXP (x, 0)))
8392 return x;
8394 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8396 changed = 1;
8397 x = legitimize_pic_address (x, 0);
8400 if (changed && legitimate_address_p (mode, x, FALSE))
8401 return x;
8403 if (REG_P (XEXP (x, 0)))
8405 rtx temp = gen_reg_rtx (Pmode);
8406 rtx val = force_operand (XEXP (x, 1), temp);
8407 if (val != temp)
8408 emit_move_insn (temp, val);
8410 XEXP (x, 1) = temp;
8411 return x;
8414 else if (REG_P (XEXP (x, 1)))
8416 rtx temp = gen_reg_rtx (Pmode);
8417 rtx val = force_operand (XEXP (x, 0), temp);
8418 if (val != temp)
8419 emit_move_insn (temp, val);
8421 XEXP (x, 0) = temp;
8422 return x;
8426 return x;
8429 /* Print an integer constant expression in assembler syntax. Addition
8430 and subtraction are the only arithmetic that may appear in these
8431 expressions. FILE is the stdio stream to write to, X is the rtx, and
8432 CODE is the operand print code from the output string. */
8434 static void
8435 output_pic_addr_const (FILE *file, rtx x, int code)
8437 char buf[256];
8439 switch (GET_CODE (x))
8441 case PC:
8442 gcc_assert (flag_pic);
8443 putc ('.', file);
8444 break;
8446 case SYMBOL_REF:
8447 if (! TARGET_MACHO || TARGET_64BIT)
8448 output_addr_const (file, x);
8449 else
8451 const char *name = XSTR (x, 0);
8453 /* Mark the decl as referenced so that cgraph will
8454 output the function. */
8455 if (SYMBOL_REF_DECL (x))
8456 mark_decl_referenced (SYMBOL_REF_DECL (x));
8458 #if TARGET_MACHO
8459 if (MACHOPIC_INDIRECT
8460 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8461 name = machopic_indirection_name (x, /*stub_p=*/true);
8462 #endif
8463 assemble_name (file, name);
8465 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8466 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8467 fputs ("@PLT", file);
8468 break;
8470 case LABEL_REF:
8471 x = XEXP (x, 0);
8472 /* FALLTHRU */
8473 case CODE_LABEL:
8474 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8475 assemble_name (asm_out_file, buf);
8476 break;
8478 case CONST_INT:
8479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8480 break;
8482 case CONST:
8483 /* This used to output parentheses around the expression,
8484 but that does not work on the 386 (either ATT or BSD assembler). */
8485 output_pic_addr_const (file, XEXP (x, 0), code);
8486 break;
8488 case CONST_DOUBLE:
8489 if (GET_MODE (x) == VOIDmode)
8491 /* We can use %d if the number is <32 bits and positive. */
8492 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8493 fprintf (file, "0x%lx%08lx",
8494 (unsigned long) CONST_DOUBLE_HIGH (x),
8495 (unsigned long) CONST_DOUBLE_LOW (x));
8496 else
8497 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8499 else
8500 /* We can't handle floating point constants;
8501 PRINT_OPERAND must handle them. */
8502 output_operand_lossage ("floating constant misused");
8503 break;
8505 case PLUS:
8506 /* Some assemblers need integer constants to appear first. */
8507 if (CONST_INT_P (XEXP (x, 0)))
8509 output_pic_addr_const (file, XEXP (x, 0), code);
8510 putc ('+', file);
8511 output_pic_addr_const (file, XEXP (x, 1), code);
8513 else
8515 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8516 output_pic_addr_const (file, XEXP (x, 1), code);
8517 putc ('+', file);
8518 output_pic_addr_const (file, XEXP (x, 0), code);
8520 break;
8522 case MINUS:
8523 if (!TARGET_MACHO)
8524 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8525 output_pic_addr_const (file, XEXP (x, 0), code);
8526 putc ('-', file);
8527 output_pic_addr_const (file, XEXP (x, 1), code);
8528 if (!TARGET_MACHO)
8529 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8530 break;
8532 case UNSPEC:
8533 gcc_assert (XVECLEN (x, 0) == 1);
8534 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8535 switch (XINT (x, 1))
8537 case UNSPEC_GOT:
8538 fputs ("@GOT", file);
8539 break;
8540 case UNSPEC_GOTOFF:
8541 fputs ("@GOTOFF", file);
8542 break;
8543 case UNSPEC_PLTOFF:
8544 fputs ("@PLTOFF", file);
8545 break;
8546 case UNSPEC_GOTPCREL:
8547 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8548 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8549 break;
8550 case UNSPEC_GOTTPOFF:
8551 /* FIXME: This might be @TPOFF in Sun ld too. */
8552 fputs ("@GOTTPOFF", file);
8553 break;
8554 case UNSPEC_TPOFF:
8555 fputs ("@TPOFF", file);
8556 break;
8557 case UNSPEC_NTPOFF:
8558 if (TARGET_64BIT)
8559 fputs ("@TPOFF", file);
8560 else
8561 fputs ("@NTPOFF", file);
8562 break;
8563 case UNSPEC_DTPOFF:
8564 fputs ("@DTPOFF", file);
8565 break;
8566 case UNSPEC_GOTNTPOFF:
8567 if (TARGET_64BIT)
8568 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8569 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8570 else
8571 fputs ("@GOTNTPOFF", file);
8572 break;
8573 case UNSPEC_INDNTPOFF:
8574 fputs ("@INDNTPOFF", file);
8575 break;
8576 default:
8577 output_operand_lossage ("invalid UNSPEC as operand");
8578 break;
8580 break;
8582 default:
8583 output_operand_lossage ("invalid expression as operand");
8587 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8588 We need to emit DTP-relative relocations. */
8590 static void ATTRIBUTE_UNUSED
8591 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8593 fputs (ASM_LONG, file);
8594 output_addr_const (file, x);
8595 fputs ("@DTPOFF", file);
8596 switch (size)
8598 case 4:
8599 break;
8600 case 8:
8601 fputs (", 0", file);
8602 break;
8603 default:
8604 gcc_unreachable ();
8608 /* In the name of slightly smaller debug output, and to cater to
8609 general assembler lossage, recognize PIC+GOTOFF and turn it back
8610 into a direct symbol reference.
8612 On Darwin, this is necessary to avoid a crash, because Darwin
8613 has a different PIC label for each routine but the DWARF debugging
8614 information is not associated with any particular routine, so it's
8615 necessary to remove references to the PIC label from RTL stored by
8616 the DWARF output code. */
8618 static rtx
8619 ix86_delegitimize_address (rtx orig_x)
8621 rtx x = orig_x;
8622 /* reg_addend is NULL or a multiple of some register. */
8623 rtx reg_addend = NULL_RTX;
8624 /* const_addend is NULL or a const_int. */
8625 rtx const_addend = NULL_RTX;
8626 /* This is the result, or NULL. */
8627 rtx result = NULL_RTX;
8629 if (MEM_P (x))
8630 x = XEXP (x, 0);
8632 if (TARGET_64BIT)
8634 if (GET_CODE (x) != CONST
8635 || GET_CODE (XEXP (x, 0)) != UNSPEC
8636 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8637 || !MEM_P (orig_x))
8638 return orig_x;
8639 return XVECEXP (XEXP (x, 0), 0, 0);
8642 if (GET_CODE (x) != PLUS
8643 || GET_CODE (XEXP (x, 1)) != CONST)
8644 return orig_x;
8646 if (REG_P (XEXP (x, 0))
8647 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8648 /* %ebx + GOT/GOTOFF */
8650 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8652 /* %ebx + %reg * scale + GOT/GOTOFF */
8653 reg_addend = XEXP (x, 0);
8654 if (REG_P (XEXP (reg_addend, 0))
8655 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8656 reg_addend = XEXP (reg_addend, 1);
8657 else if (REG_P (XEXP (reg_addend, 1))
8658 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8659 reg_addend = XEXP (reg_addend, 0);
8660 else
8661 return orig_x;
8662 if (!REG_P (reg_addend)
8663 && GET_CODE (reg_addend) != MULT
8664 && GET_CODE (reg_addend) != ASHIFT)
8665 return orig_x;
8667 else
8668 return orig_x;
8670 x = XEXP (XEXP (x, 1), 0);
8671 if (GET_CODE (x) == PLUS
8672 && CONST_INT_P (XEXP (x, 1)))
8674 const_addend = XEXP (x, 1);
8675 x = XEXP (x, 0);
8678 if (GET_CODE (x) == UNSPEC
8679 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8680 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8681 result = XVECEXP (x, 0, 0);
8683 if (TARGET_MACHO && darwin_local_data_pic (x)
8684 && !MEM_P (orig_x))
8685 result = XEXP (x, 0);
8687 if (! result)
8688 return orig_x;
8690 if (const_addend)
8691 result = gen_rtx_PLUS (Pmode, result, const_addend);
8692 if (reg_addend)
8693 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8694 return result;
8697 /* If X is a machine specific address (i.e. a symbol or label being
8698 referenced as a displacement from the GOT implemented using an
8699 UNSPEC), then return the base term. Otherwise return X. */
8702 ix86_find_base_term (rtx x)
8704 rtx term;
8706 if (TARGET_64BIT)
8708 if (GET_CODE (x) != CONST)
8709 return x;
8710 term = XEXP (x, 0);
8711 if (GET_CODE (term) == PLUS
8712 && (CONST_INT_P (XEXP (term, 1))
8713 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8714 term = XEXP (term, 0);
8715 if (GET_CODE (term) != UNSPEC
8716 || XINT (term, 1) != UNSPEC_GOTPCREL)
8717 return x;
8719 term = XVECEXP (term, 0, 0);
8721 if (GET_CODE (term) != SYMBOL_REF
8722 && GET_CODE (term) != LABEL_REF)
8723 return x;
8725 return term;
8728 term = ix86_delegitimize_address (x);
8730 if (GET_CODE (term) != SYMBOL_REF
8731 && GET_CODE (term) != LABEL_REF)
8732 return x;
8734 return term;
8737 static void
8738 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8739 int fp, FILE *file)
8741 const char *suffix;
8743 if (mode == CCFPmode || mode == CCFPUmode)
8745 enum rtx_code second_code, bypass_code;
8746 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8747 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8748 code = ix86_fp_compare_code_to_integer (code);
8749 mode = CCmode;
8751 if (reverse)
8752 code = reverse_condition (code);
8754 switch (code)
8756 case EQ:
8757 switch (mode)
8759 case CCAmode:
8760 suffix = "a";
8761 break;
8763 case CCCmode:
8764 suffix = "c";
8765 break;
8767 case CCOmode:
8768 suffix = "o";
8769 break;
8771 case CCSmode:
8772 suffix = "s";
8773 break;
8775 default:
8776 suffix = "e";
8778 break;
8779 case NE:
8780 switch (mode)
8782 case CCAmode:
8783 suffix = "na";
8784 break;
8786 case CCCmode:
8787 suffix = "nc";
8788 break;
8790 case CCOmode:
8791 suffix = "no";
8792 break;
8794 case CCSmode:
8795 suffix = "ns";
8796 break;
8798 default:
8799 suffix = "ne";
8801 break;
8802 case GT:
8803 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8804 suffix = "g";
8805 break;
8806 case GTU:
8807 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8808 Those same assemblers have the same but opposite lossage on cmov. */
8809 if (mode == CCmode)
8810 suffix = fp ? "nbe" : "a";
8811 else if (mode == CCCmode)
8812 suffix = "b";
8813 else
8814 gcc_unreachable ();
8815 break;
8816 case LT:
8817 switch (mode)
8819 case CCNOmode:
8820 case CCGOCmode:
8821 suffix = "s";
8822 break;
8824 case CCmode:
8825 case CCGCmode:
8826 suffix = "l";
8827 break;
8829 default:
8830 gcc_unreachable ();
8832 break;
8833 case LTU:
8834 gcc_assert (mode == CCmode || mode == CCCmode);
8835 suffix = "b";
8836 break;
8837 case GE:
8838 switch (mode)
8840 case CCNOmode:
8841 case CCGOCmode:
8842 suffix = "ns";
8843 break;
8845 case CCmode:
8846 case CCGCmode:
8847 suffix = "ge";
8848 break;
8850 default:
8851 gcc_unreachable ();
8853 break;
8854 case GEU:
8855 /* ??? As above. */
8856 gcc_assert (mode == CCmode || mode == CCCmode);
8857 suffix = fp ? "nb" : "ae";
8858 break;
8859 case LE:
8860 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8861 suffix = "le";
8862 break;
8863 case LEU:
8864 /* ??? As above. */
8865 if (mode == CCmode)
8866 suffix = "be";
8867 else if (mode == CCCmode)
8868 suffix = fp ? "nb" : "ae";
8869 else
8870 gcc_unreachable ();
8871 break;
8872 case UNORDERED:
8873 suffix = fp ? "u" : "p";
8874 break;
8875 case ORDERED:
8876 suffix = fp ? "nu" : "np";
8877 break;
8878 default:
8879 gcc_unreachable ();
8881 fputs (suffix, file);
8884 /* Print the name of register X to FILE based on its machine mode and number.
8885 If CODE is 'w', pretend the mode is HImode.
8886 If CODE is 'b', pretend the mode is QImode.
8887 If CODE is 'k', pretend the mode is SImode.
8888 If CODE is 'q', pretend the mode is DImode.
8889 If CODE is 'h', pretend the reg is the 'high' byte register.
8890 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8892 void
8893 print_reg (rtx x, int code, FILE *file)
8895 gcc_assert (x == pc_rtx
8896 || (REGNO (x) != ARG_POINTER_REGNUM
8897 && REGNO (x) != FRAME_POINTER_REGNUM
8898 && REGNO (x) != FLAGS_REG
8899 && REGNO (x) != FPSR_REG
8900 && REGNO (x) != FPCR_REG));
8902 if (ASSEMBLER_DIALECT == ASM_ATT)
8903 putc ('%', file);
8905 if (x == pc_rtx)
8907 gcc_assert (TARGET_64BIT);
8908 fputs ("rip", file);
8909 return;
8912 if (code == 'w' || MMX_REG_P (x))
8913 code = 2;
8914 else if (code == 'b')
8915 code = 1;
8916 else if (code == 'k')
8917 code = 4;
8918 else if (code == 'q')
8919 code = 8;
8920 else if (code == 'y')
8921 code = 3;
8922 else if (code == 'h')
8923 code = 0;
8924 else
8925 code = GET_MODE_SIZE (GET_MODE (x));
8927 /* Irritatingly, AMD extended registers use different naming convention
8928 from the normal registers. */
8929 if (REX_INT_REG_P (x))
8931 gcc_assert (TARGET_64BIT);
8932 switch (code)
8934 case 0:
8935 error ("extended registers have no high halves");
8936 break;
8937 case 1:
8938 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8939 break;
8940 case 2:
8941 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8942 break;
8943 case 4:
8944 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8945 break;
8946 case 8:
8947 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8948 break;
8949 default:
8950 error ("unsupported operand size for extended register");
8951 break;
8953 return;
8955 switch (code)
8957 case 3:
8958 if (STACK_TOP_P (x))
8960 fputs ("st(0)", file);
8961 break;
8963 /* FALLTHRU */
8964 case 8:
8965 case 4:
8966 case 12:
8967 if (! ANY_FP_REG_P (x))
8968 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8969 /* FALLTHRU */
8970 case 16:
8971 case 2:
8972 normal:
8973 fputs (hi_reg_name[REGNO (x)], file);
8974 break;
8975 case 1:
8976 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8977 goto normal;
8978 fputs (qi_reg_name[REGNO (x)], file);
8979 break;
8980 case 0:
8981 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8982 goto normal;
8983 fputs (qi_high_reg_name[REGNO (x)], file);
8984 break;
8985 default:
8986 gcc_unreachable ();
8990 /* Locate some local-dynamic symbol still in use by this function
8991 so that we can print its name in some tls_local_dynamic_base
8992 pattern. */
8994 static int
8995 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8997 rtx x = *px;
8999 if (GET_CODE (x) == SYMBOL_REF
9000 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9002 cfun->machine->some_ld_name = XSTR (x, 0);
9003 return 1;
9006 return 0;
9009 static const char *
9010 get_some_local_dynamic_name (void)
9012 rtx insn;
9014 if (cfun->machine->some_ld_name)
9015 return cfun->machine->some_ld_name;
9017 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9018 if (INSN_P (insn)
9019 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9020 return cfun->machine->some_ld_name;
9022 gcc_unreachable ();
9025 /* Meaning of CODE:
9026 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9027 C -- print opcode suffix for set/cmov insn.
9028 c -- like C, but print reversed condition
9029 E,e -- likewise, but for compare-and-branch fused insn.
9030 F,f -- likewise, but for floating-point.
9031 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9032 otherwise nothing
9033 R -- print the prefix for register names.
9034 z -- print the opcode suffix for the size of the current operand.
9035 * -- print a star (in certain assembler syntax)
9036 A -- print an absolute memory reference.
9037 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9038 s -- print a shift double count, followed by the assemblers argument
9039 delimiter.
9040 b -- print the QImode name of the register for the indicated operand.
9041 %b0 would print %al if operands[0] is reg 0.
9042 w -- likewise, print the HImode name of the register.
9043 k -- likewise, print the SImode name of the register.
9044 q -- likewise, print the DImode name of the register.
9045 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9046 y -- print "st(0)" instead of "st" as a register.
9047 D -- print condition for SSE cmp instruction.
9048 P -- if PIC, print an @PLT suffix.
9049 X -- don't print any sort of PIC '@' suffix for a symbol.
9050 & -- print some in-use local-dynamic symbol name.
9051 H -- print a memory address offset by 8; used for sse high-parts
9052 Y -- print condition for SSE5 com* instruction.
9053 + -- print a branch hint as 'cs' or 'ds' prefix
9054 ; -- print a semicolon (after prefixes due to bug in older gas).
9057 void
9058 print_operand (FILE *file, rtx x, int code)
9060 if (code)
9062 switch (code)
9064 case '*':
9065 if (ASSEMBLER_DIALECT == ASM_ATT)
9066 putc ('*', file);
9067 return;
9069 case '&':
9070 assemble_name (file, get_some_local_dynamic_name ());
9071 return;
9073 case 'A':
9074 switch (ASSEMBLER_DIALECT)
9076 case ASM_ATT:
9077 putc ('*', file);
9078 break;
9080 case ASM_INTEL:
9081 /* Intel syntax. For absolute addresses, registers should not
9082 be surrounded by braces. */
9083 if (!REG_P (x))
9085 putc ('[', file);
9086 PRINT_OPERAND (file, x, 0);
9087 putc (']', file);
9088 return;
9090 break;
9092 default:
9093 gcc_unreachable ();
9096 PRINT_OPERAND (file, x, 0);
9097 return;
9100 case 'L':
9101 if (ASSEMBLER_DIALECT == ASM_ATT)
9102 putc ('l', file);
9103 return;
9105 case 'W':
9106 if (ASSEMBLER_DIALECT == ASM_ATT)
9107 putc ('w', file);
9108 return;
9110 case 'B':
9111 if (ASSEMBLER_DIALECT == ASM_ATT)
9112 putc ('b', file);
9113 return;
9115 case 'Q':
9116 if (ASSEMBLER_DIALECT == ASM_ATT)
9117 putc ('l', file);
9118 return;
9120 case 'S':
9121 if (ASSEMBLER_DIALECT == ASM_ATT)
9122 putc ('s', file);
9123 return;
9125 case 'T':
9126 if (ASSEMBLER_DIALECT == ASM_ATT)
9127 putc ('t', file);
9128 return;
9130 case 'z':
9131 /* 387 opcodes don't get size suffixes if the operands are
9132 registers. */
9133 if (STACK_REG_P (x))
9134 return;
9136 /* Likewise if using Intel opcodes. */
9137 if (ASSEMBLER_DIALECT == ASM_INTEL)
9138 return;
9140 /* This is the size of op from size of operand. */
9141 switch (GET_MODE_SIZE (GET_MODE (x)))
9143 case 1:
9144 putc ('b', file);
9145 return;
9147 case 2:
9148 if (MEM_P (x))
9150 #ifdef HAVE_GAS_FILDS_FISTS
9151 putc ('s', file);
9152 #endif
9153 return;
9155 else
9156 putc ('w', file);
9157 return;
9159 case 4:
9160 if (GET_MODE (x) == SFmode)
9162 putc ('s', file);
9163 return;
9165 else
9166 putc ('l', file);
9167 return;
9169 case 12:
9170 case 16:
9171 putc ('t', file);
9172 return;
9174 case 8:
9175 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9177 if (MEM_P (x))
9179 #ifdef GAS_MNEMONICS
9180 putc ('q', file);
9181 #else
9182 putc ('l', file);
9183 putc ('l', file);
9184 #endif
9186 else
9187 putc ('q', file);
9189 else
9190 putc ('l', file);
9191 return;
9193 default:
9194 gcc_unreachable ();
9197 case 'b':
9198 case 'w':
9199 case 'k':
9200 case 'q':
9201 case 'h':
9202 case 'y':
9203 case 'X':
9204 case 'P':
9205 break;
9207 case 's':
9208 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9210 PRINT_OPERAND (file, x, 0);
9211 fputs (", ", file);
9213 return;
9215 case 'D':
9216 /* Little bit of braindamage here. The SSE compare instructions
9217 does use completely different names for the comparisons that the
9218 fp conditional moves. */
9219 switch (GET_CODE (x))
9221 case EQ:
9222 case UNEQ:
9223 fputs ("eq", file);
9224 break;
9225 case LT:
9226 case UNLT:
9227 fputs ("lt", file);
9228 break;
9229 case LE:
9230 case UNLE:
9231 fputs ("le", file);
9232 break;
9233 case UNORDERED:
9234 fputs ("unord", file);
9235 break;
9236 case NE:
9237 case LTGT:
9238 fputs ("neq", file);
9239 break;
9240 case UNGE:
9241 case GE:
9242 fputs ("nlt", file);
9243 break;
9244 case UNGT:
9245 case GT:
9246 fputs ("nle", file);
9247 break;
9248 case ORDERED:
9249 fputs ("ord", file);
9250 break;
9251 default:
9252 gcc_unreachable ();
9254 return;
9255 case 'O':
9256 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9257 if (ASSEMBLER_DIALECT == ASM_ATT)
9259 switch (GET_MODE (x))
9261 case HImode: putc ('w', file); break;
9262 case SImode:
9263 case SFmode: putc ('l', file); break;
9264 case DImode:
9265 case DFmode: putc ('q', file); break;
9266 default: gcc_unreachable ();
9268 putc ('.', file);
9270 #endif
9271 return;
9272 case 'C':
9273 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9274 return;
9275 case 'F':
9276 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9277 if (ASSEMBLER_DIALECT == ASM_ATT)
9278 putc ('.', file);
9279 #endif
9280 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9281 return;
9283 /* Like above, but reverse condition */
9284 case 'c':
9285 /* Check to see if argument to %c is really a constant
9286 and not a condition code which needs to be reversed. */
9287 if (!COMPARISON_P (x))
9289 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9290 return;
9292 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9293 return;
9294 case 'f':
9295 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9296 if (ASSEMBLER_DIALECT == ASM_ATT)
9297 putc ('.', file);
9298 #endif
9299 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9300 return;
9302 case 'E':
9303 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
9304 return;
9306 case 'e':
9307 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
9308 return;
9310 case 'H':
9311 /* It doesn't actually matter what mode we use here, as we're
9312 only going to use this for printing. */
9313 x = adjust_address_nv (x, DImode, 8);
9314 break;
9316 case '+':
9318 rtx x;
9320 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9321 return;
9323 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9324 if (x)
9326 int pred_val = INTVAL (XEXP (x, 0));
9328 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9329 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9331 int taken = pred_val > REG_BR_PROB_BASE / 2;
9332 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9334 /* Emit hints only in the case default branch prediction
9335 heuristics would fail. */
9336 if (taken != cputaken)
9338 /* We use 3e (DS) prefix for taken branches and
9339 2e (CS) prefix for not taken branches. */
9340 if (taken)
9341 fputs ("ds ; ", file);
9342 else
9343 fputs ("cs ; ", file);
9347 return;
9350 case 'Y':
9351 switch (GET_CODE (x))
9353 case NE:
9354 fputs ("neq", file);
9355 break;
9356 case EQ:
9357 fputs ("eq", file);
9358 break;
9359 case GE:
9360 case GEU:
9361 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9362 break;
9363 case GT:
9364 case GTU:
9365 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9366 break;
9367 case LE:
9368 case LEU:
9369 fputs ("le", file);
9370 break;
9371 case LT:
9372 case LTU:
9373 fputs ("lt", file);
9374 break;
9375 case UNORDERED:
9376 fputs ("unord", file);
9377 break;
9378 case ORDERED:
9379 fputs ("ord", file);
9380 break;
9381 case UNEQ:
9382 fputs ("ueq", file);
9383 break;
9384 case UNGE:
9385 fputs ("nlt", file);
9386 break;
9387 case UNGT:
9388 fputs ("nle", file);
9389 break;
9390 case UNLE:
9391 fputs ("ule", file);
9392 break;
9393 case UNLT:
9394 fputs ("ult", file);
9395 break;
9396 case LTGT:
9397 fputs ("une", file);
9398 break;
9399 default:
9400 gcc_unreachable ();
9402 return;
9404 case ';':
9405 #if TARGET_MACHO
9406 fputs (" ; ", file);
9407 #else
9408 fputc (' ', file);
9409 #endif
9410 return;
9412 default:
9413 output_operand_lossage ("invalid operand code '%c'", code);
9417 if (REG_P (x))
9418 print_reg (x, code, file);
9420 else if (MEM_P (x))
9422 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9423 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9424 && GET_MODE (x) != BLKmode)
9426 const char * size;
9427 switch (GET_MODE_SIZE (GET_MODE (x)))
9429 case 1: size = "BYTE"; break;
9430 case 2: size = "WORD"; break;
9431 case 4: size = "DWORD"; break;
9432 case 8: size = "QWORD"; break;
9433 case 12: size = "XWORD"; break;
9434 case 16:
9435 if (GET_MODE (x) == XFmode)
9436 size = "XWORD";
9437 else
9438 size = "XMMWORD";
9439 break;
9440 default:
9441 gcc_unreachable ();
9444 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9445 if (code == 'b')
9446 size = "BYTE";
9447 else if (code == 'w')
9448 size = "WORD";
9449 else if (code == 'k')
9450 size = "DWORD";
9452 fputs (size, file);
9453 fputs (" PTR ", file);
9456 x = XEXP (x, 0);
9457 /* Avoid (%rip) for call operands. */
9458 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9459 && !CONST_INT_P (x))
9460 output_addr_const (file, x);
9461 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9462 output_operand_lossage ("invalid constraints for operand");
9463 else
9464 output_address (x);
9467 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9469 REAL_VALUE_TYPE r;
9470 long l;
9472 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9473 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9475 if (ASSEMBLER_DIALECT == ASM_ATT)
9476 putc ('$', file);
9477 fprintf (file, "0x%08lx", (long unsigned int) l);
9480 /* These float cases don't actually occur as immediate operands. */
9481 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9483 char dstr[30];
9485 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9486 fprintf (file, "%s", dstr);
9489 else if (GET_CODE (x) == CONST_DOUBLE
9490 && GET_MODE (x) == XFmode)
9492 char dstr[30];
9494 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9495 fprintf (file, "%s", dstr);
9498 else
9500 /* We have patterns that allow zero sets of memory, for instance.
9501 In 64-bit mode, we should probably support all 8-byte vectors,
9502 since we can in fact encode that into an immediate. */
9503 if (GET_CODE (x) == CONST_VECTOR)
9505 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9506 x = const0_rtx;
9509 if (code != 'P')
9511 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9513 if (ASSEMBLER_DIALECT == ASM_ATT)
9514 putc ('$', file);
9516 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9517 || GET_CODE (x) == LABEL_REF)
9519 if (ASSEMBLER_DIALECT == ASM_ATT)
9520 putc ('$', file);
9521 else
9522 fputs ("OFFSET FLAT:", file);
9525 if (CONST_INT_P (x))
9526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9527 else if (flag_pic)
9528 output_pic_addr_const (file, x, code);
9529 else
9530 output_addr_const (file, x);
9534 /* Print a memory operand whose address is ADDR. */
9536 void
9537 print_operand_address (FILE *file, rtx addr)
9539 struct ix86_address parts;
9540 rtx base, index, disp;
9541 int scale;
9542 int ok = ix86_decompose_address (addr, &parts);
9544 gcc_assert (ok);
9546 base = parts.base;
9547 index = parts.index;
9548 disp = parts.disp;
9549 scale = parts.scale;
9551 switch (parts.seg)
9553 case SEG_DEFAULT:
9554 break;
9555 case SEG_FS:
9556 case SEG_GS:
9557 if (ASSEMBLER_DIALECT == ASM_ATT)
9558 putc ('%', file);
9559 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9560 break;
9561 default:
9562 gcc_unreachable ();
9565 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9566 if (TARGET_64BIT && !base && !index)
9568 rtx symbol = disp;
9570 if (GET_CODE (disp) == CONST
9571 && GET_CODE (XEXP (disp, 0)) == PLUS
9572 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9573 symbol = XEXP (XEXP (disp, 0), 0);
9575 if (GET_CODE (symbol) == LABEL_REF
9576 || (GET_CODE (symbol) == SYMBOL_REF
9577 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9578 base = pc_rtx;
9580 if (!base && !index)
9582 /* Displacement only requires special attention. */
9584 if (CONST_INT_P (disp))
9586 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9587 fputs ("ds:", file);
9588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9590 else if (flag_pic)
9591 output_pic_addr_const (file, disp, 0);
9592 else
9593 output_addr_const (file, disp);
9595 else
9597 if (ASSEMBLER_DIALECT == ASM_ATT)
9599 if (disp)
9601 if (flag_pic)
9602 output_pic_addr_const (file, disp, 0);
9603 else if (GET_CODE (disp) == LABEL_REF)
9604 output_asm_label (disp);
9605 else
9606 output_addr_const (file, disp);
9609 putc ('(', file);
9610 if (base)
9611 print_reg (base, 0, file);
9612 if (index)
9614 putc (',', file);
9615 print_reg (index, 0, file);
9616 if (scale != 1)
9617 fprintf (file, ",%d", scale);
9619 putc (')', file);
9621 else
9623 rtx offset = NULL_RTX;
9625 if (disp)
9627 /* Pull out the offset of a symbol; print any symbol itself. */
9628 if (GET_CODE (disp) == CONST
9629 && GET_CODE (XEXP (disp, 0)) == PLUS
9630 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9632 offset = XEXP (XEXP (disp, 0), 1);
9633 disp = gen_rtx_CONST (VOIDmode,
9634 XEXP (XEXP (disp, 0), 0));
9637 if (flag_pic)
9638 output_pic_addr_const (file, disp, 0);
9639 else if (GET_CODE (disp) == LABEL_REF)
9640 output_asm_label (disp);
9641 else if (CONST_INT_P (disp))
9642 offset = disp;
9643 else
9644 output_addr_const (file, disp);
9647 putc ('[', file);
9648 if (base)
9650 print_reg (base, 0, file);
9651 if (offset)
9653 if (INTVAL (offset) >= 0)
9654 putc ('+', file);
9655 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9658 else if (offset)
9659 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9660 else
9661 putc ('0', file);
9663 if (index)
9665 putc ('+', file);
9666 print_reg (index, 0, file);
9667 if (scale != 1)
9668 fprintf (file, "*%d", scale);
9670 putc (']', file);
9675 bool
9676 output_addr_const_extra (FILE *file, rtx x)
9678 rtx op;
9680 if (GET_CODE (x) != UNSPEC)
9681 return false;
9683 op = XVECEXP (x, 0, 0);
9684 switch (XINT (x, 1))
9686 case UNSPEC_GOTTPOFF:
9687 output_addr_const (file, op);
9688 /* FIXME: This might be @TPOFF in Sun ld. */
9689 fputs ("@GOTTPOFF", file);
9690 break;
9691 case UNSPEC_TPOFF:
9692 output_addr_const (file, op);
9693 fputs ("@TPOFF", file);
9694 break;
9695 case UNSPEC_NTPOFF:
9696 output_addr_const (file, op);
9697 if (TARGET_64BIT)
9698 fputs ("@TPOFF", file);
9699 else
9700 fputs ("@NTPOFF", file);
9701 break;
9702 case UNSPEC_DTPOFF:
9703 output_addr_const (file, op);
9704 fputs ("@DTPOFF", file);
9705 break;
9706 case UNSPEC_GOTNTPOFF:
9707 output_addr_const (file, op);
9708 if (TARGET_64BIT)
9709 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9710 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9711 else
9712 fputs ("@GOTNTPOFF", file);
9713 break;
9714 case UNSPEC_INDNTPOFF:
9715 output_addr_const (file, op);
9716 fputs ("@INDNTPOFF", file);
9717 break;
9719 default:
9720 return false;
9723 return true;
9726 /* Split one or more DImode RTL references into pairs of SImode
9727 references. The RTL can be REG, offsettable MEM, integer constant, or
9728 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9729 split and "num" is its length. lo_half and hi_half are output arrays
9730 that parallel "operands". */
9732 void
9733 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9735 while (num--)
9737 rtx op = operands[num];
9739 /* simplify_subreg refuse to split volatile memory addresses,
9740 but we still have to handle it. */
9741 if (MEM_P (op))
9743 lo_half[num] = adjust_address (op, SImode, 0);
9744 hi_half[num] = adjust_address (op, SImode, 4);
9746 else
9748 lo_half[num] = simplify_gen_subreg (SImode, op,
9749 GET_MODE (op) == VOIDmode
9750 ? DImode : GET_MODE (op), 0);
9751 hi_half[num] = simplify_gen_subreg (SImode, op,
9752 GET_MODE (op) == VOIDmode
9753 ? DImode : GET_MODE (op), 4);
9757 /* Split one or more TImode RTL references into pairs of DImode
9758 references. The RTL can be REG, offsettable MEM, integer constant, or
9759 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9760 split and "num" is its length. lo_half and hi_half are output arrays
9761 that parallel "operands". */
9763 void
9764 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9766 while (num--)
9768 rtx op = operands[num];
9770 /* simplify_subreg refuse to split volatile memory addresses, but we
9771 still have to handle it. */
9772 if (MEM_P (op))
9774 lo_half[num] = adjust_address (op, DImode, 0);
9775 hi_half[num] = adjust_address (op, DImode, 8);
9777 else
9779 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9780 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9785 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9786 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9787 is the expression of the binary operation. The output may either be
9788 emitted here, or returned to the caller, like all output_* functions.
9790 There is no guarantee that the operands are the same mode, as they
9791 might be within FLOAT or FLOAT_EXTEND expressions. */
9793 #ifndef SYSV386_COMPAT
9794 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9795 wants to fix the assemblers because that causes incompatibility
9796 with gcc. No-one wants to fix gcc because that causes
9797 incompatibility with assemblers... You can use the option of
9798 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9799 #define SYSV386_COMPAT 1
9800 #endif
9802 const char *
9803 output_387_binary_op (rtx insn, rtx *operands)
9805 static char buf[30];
9806 const char *p;
9807 const char *ssep;
9808 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9810 #ifdef ENABLE_CHECKING
9811 /* Even if we do not want to check the inputs, this documents input
9812 constraints. Which helps in understanding the following code. */
9813 if (STACK_REG_P (operands[0])
9814 && ((REG_P (operands[1])
9815 && REGNO (operands[0]) == REGNO (operands[1])
9816 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9817 || (REG_P (operands[2])
9818 && REGNO (operands[0]) == REGNO (operands[2])
9819 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9820 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9821 ; /* ok */
9822 else
9823 gcc_assert (is_sse);
9824 #endif
9826 switch (GET_CODE (operands[3]))
9828 case PLUS:
9829 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9830 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9831 p = "fiadd";
9832 else
9833 p = "fadd";
9834 ssep = "add";
9835 break;
9837 case MINUS:
9838 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9839 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9840 p = "fisub";
9841 else
9842 p = "fsub";
9843 ssep = "sub";
9844 break;
9846 case MULT:
9847 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9848 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9849 p = "fimul";
9850 else
9851 p = "fmul";
9852 ssep = "mul";
9853 break;
9855 case DIV:
9856 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9857 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9858 p = "fidiv";
9859 else
9860 p = "fdiv";
9861 ssep = "div";
9862 break;
9864 default:
9865 gcc_unreachable ();
9868 if (is_sse)
9870 strcpy (buf, ssep);
9871 if (GET_MODE (operands[0]) == SFmode)
9872 strcat (buf, "ss\t{%2, %0|%0, %2}");
9873 else
9874 strcat (buf, "sd\t{%2, %0|%0, %2}");
9875 return buf;
9877 strcpy (buf, p);
9879 switch (GET_CODE (operands[3]))
9881 case MULT:
9882 case PLUS:
9883 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9885 rtx temp = operands[2];
9886 operands[2] = operands[1];
9887 operands[1] = temp;
9890 /* know operands[0] == operands[1]. */
9892 if (MEM_P (operands[2]))
9894 p = "%z2\t%2";
9895 break;
9898 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9900 if (STACK_TOP_P (operands[0]))
9901 /* How is it that we are storing to a dead operand[2]?
9902 Well, presumably operands[1] is dead too. We can't
9903 store the result to st(0) as st(0) gets popped on this
9904 instruction. Instead store to operands[2] (which I
9905 think has to be st(1)). st(1) will be popped later.
9906 gcc <= 2.8.1 didn't have this check and generated
9907 assembly code that the Unixware assembler rejected. */
9908 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9909 else
9910 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9911 break;
9914 if (STACK_TOP_P (operands[0]))
9915 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9916 else
9917 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9918 break;
9920 case MINUS:
9921 case DIV:
9922 if (MEM_P (operands[1]))
9924 p = "r%z1\t%1";
9925 break;
9928 if (MEM_P (operands[2]))
9930 p = "%z2\t%2";
9931 break;
9934 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9936 #if SYSV386_COMPAT
9937 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9938 derived assemblers, confusingly reverse the direction of
9939 the operation for fsub{r} and fdiv{r} when the
9940 destination register is not st(0). The Intel assembler
9941 doesn't have this brain damage. Read !SYSV386_COMPAT to
9942 figure out what the hardware really does. */
9943 if (STACK_TOP_P (operands[0]))
9944 p = "{p\t%0, %2|rp\t%2, %0}";
9945 else
9946 p = "{rp\t%2, %0|p\t%0, %2}";
9947 #else
9948 if (STACK_TOP_P (operands[0]))
9949 /* As above for fmul/fadd, we can't store to st(0). */
9950 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9951 else
9952 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9953 #endif
9954 break;
9957 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9959 #if SYSV386_COMPAT
9960 if (STACK_TOP_P (operands[0]))
9961 p = "{rp\t%0, %1|p\t%1, %0}";
9962 else
9963 p = "{p\t%1, %0|rp\t%0, %1}";
9964 #else
9965 if (STACK_TOP_P (operands[0]))
9966 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9967 else
9968 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9969 #endif
9970 break;
9973 if (STACK_TOP_P (operands[0]))
9975 if (STACK_TOP_P (operands[1]))
9976 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9977 else
9978 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9979 break;
9981 else if (STACK_TOP_P (operands[1]))
9983 #if SYSV386_COMPAT
9984 p = "{\t%1, %0|r\t%0, %1}";
9985 #else
9986 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9987 #endif
9989 else
9991 #if SYSV386_COMPAT
9992 p = "{r\t%2, %0|\t%0, %2}";
9993 #else
9994 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9995 #endif
9997 break;
9999 default:
10000 gcc_unreachable ();
10003 strcat (buf, p);
10004 return buf;
10007 /* Return needed mode for entity in optimize_mode_switching pass. */
10010 ix86_mode_needed (int entity, rtx insn)
10012 enum attr_i387_cw mode;
10014 /* The mode UNINITIALIZED is used to store control word after a
10015 function call or ASM pattern. The mode ANY specify that function
10016 has no requirements on the control word and make no changes in the
10017 bits we are interested in. */
10019 if (CALL_P (insn)
10020 || (NONJUMP_INSN_P (insn)
10021 && (asm_noperands (PATTERN (insn)) >= 0
10022 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10023 return I387_CW_UNINITIALIZED;
10025 if (recog_memoized (insn) < 0)
10026 return I387_CW_ANY;
10028 mode = get_attr_i387_cw (insn);
10030 switch (entity)
10032 case I387_TRUNC:
10033 if (mode == I387_CW_TRUNC)
10034 return mode;
10035 break;
10037 case I387_FLOOR:
10038 if (mode == I387_CW_FLOOR)
10039 return mode;
10040 break;
10042 case I387_CEIL:
10043 if (mode == I387_CW_CEIL)
10044 return mode;
10045 break;
10047 case I387_MASK_PM:
10048 if (mode == I387_CW_MASK_PM)
10049 return mode;
10050 break;
10052 default:
10053 gcc_unreachable ();
10056 return I387_CW_ANY;
10059 /* Output code to initialize control word copies used by trunc?f?i and
10060 rounding patterns. CURRENT_MODE is set to current control word,
10061 while NEW_MODE is set to new control word. */
10063 void
10064 emit_i387_cw_initialization (int mode)
10066 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10067 rtx new_mode;
10069 enum ix86_stack_slot slot;
10071 rtx reg = gen_reg_rtx (HImode);
10073 emit_insn (gen_x86_fnstcw_1 (stored_mode));
10074 emit_move_insn (reg, copy_rtx (stored_mode));
10076 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10078 switch (mode)
10080 case I387_CW_TRUNC:
10081 /* round toward zero (truncate) */
10082 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10083 slot = SLOT_CW_TRUNC;
10084 break;
10086 case I387_CW_FLOOR:
10087 /* round down toward -oo */
10088 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10089 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10090 slot = SLOT_CW_FLOOR;
10091 break;
10093 case I387_CW_CEIL:
10094 /* round up toward +oo */
10095 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10096 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10097 slot = SLOT_CW_CEIL;
10098 break;
10100 case I387_CW_MASK_PM:
10101 /* mask precision exception for nearbyint() */
10102 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10103 slot = SLOT_CW_MASK_PM;
10104 break;
10106 default:
10107 gcc_unreachable ();
10110 else
10112 switch (mode)
10114 case I387_CW_TRUNC:
10115 /* round toward zero (truncate) */
10116 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10117 slot = SLOT_CW_TRUNC;
10118 break;
10120 case I387_CW_FLOOR:
10121 /* round down toward -oo */
10122 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10123 slot = SLOT_CW_FLOOR;
10124 break;
10126 case I387_CW_CEIL:
10127 /* round up toward +oo */
10128 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10129 slot = SLOT_CW_CEIL;
10130 break;
10132 case I387_CW_MASK_PM:
10133 /* mask precision exception for nearbyint() */
10134 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10135 slot = SLOT_CW_MASK_PM;
10136 break;
10138 default:
10139 gcc_unreachable ();
10143 gcc_assert (slot < MAX_386_STACK_LOCALS);
10145 new_mode = assign_386_stack_local (HImode, slot);
10146 emit_move_insn (new_mode, reg);
10149 /* Output code for INSN to convert a float to a signed int. OPERANDS
10150 are the insn operands. The output may be [HSD]Imode and the input
10151 operand may be [SDX]Fmode. */
10153 const char *
10154 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10156 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10157 int dimode_p = GET_MODE (operands[0]) == DImode;
10158 int round_mode = get_attr_i387_cw (insn);
10160 /* Jump through a hoop or two for DImode, since the hardware has no
10161 non-popping instruction. We used to do this a different way, but
10162 that was somewhat fragile and broke with post-reload splitters. */
10163 if ((dimode_p || fisttp) && !stack_top_dies)
10164 output_asm_insn ("fld\t%y1", operands);
10166 gcc_assert (STACK_TOP_P (operands[1]));
10167 gcc_assert (MEM_P (operands[0]));
10168 gcc_assert (GET_MODE (operands[1]) != TFmode);
10170 if (fisttp)
10171 output_asm_insn ("fisttp%z0\t%0", operands);
10172 else
10174 if (round_mode != I387_CW_ANY)
10175 output_asm_insn ("fldcw\t%3", operands);
10176 if (stack_top_dies || dimode_p)
10177 output_asm_insn ("fistp%z0\t%0", operands);
10178 else
10179 output_asm_insn ("fist%z0\t%0", operands);
10180 if (round_mode != I387_CW_ANY)
10181 output_asm_insn ("fldcw\t%2", operands);
10184 return "";
10187 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10188 have the values zero or one, indicates the ffreep insn's operand
10189 from the OPERANDS array. */
10191 static const char *
10192 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10194 if (TARGET_USE_FFREEP)
10195 #if HAVE_AS_IX86_FFREEP
10196 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10197 #else
10199 static char retval[] = ".word\t0xc_df";
10200 int regno = REGNO (operands[opno]);
10202 gcc_assert (FP_REGNO_P (regno));
10204 retval[9] = '0' + (regno - FIRST_STACK_REG);
10205 return retval;
10207 #endif
10209 return opno ? "fstp\t%y1" : "fstp\t%y0";
10213 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10214 should be used. UNORDERED_P is true when fucom should be used. */
10216 const char *
10217 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10219 int stack_top_dies;
10220 rtx cmp_op0, cmp_op1;
10221 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10223 if (eflags_p)
10225 cmp_op0 = operands[0];
10226 cmp_op1 = operands[1];
10228 else
10230 cmp_op0 = operands[1];
10231 cmp_op1 = operands[2];
10234 if (is_sse)
10236 if (GET_MODE (operands[0]) == SFmode)
10237 if (unordered_p)
10238 return "ucomiss\t{%1, %0|%0, %1}";
10239 else
10240 return "comiss\t{%1, %0|%0, %1}";
10241 else
10242 if (unordered_p)
10243 return "ucomisd\t{%1, %0|%0, %1}";
10244 else
10245 return "comisd\t{%1, %0|%0, %1}";
10248 gcc_assert (STACK_TOP_P (cmp_op0));
10250 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10252 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10254 if (stack_top_dies)
10256 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10257 return output_387_ffreep (operands, 1);
10259 else
10260 return "ftst\n\tfnstsw\t%0";
10263 if (STACK_REG_P (cmp_op1)
10264 && stack_top_dies
10265 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10266 && REGNO (cmp_op1) != FIRST_STACK_REG)
10268 /* If both the top of the 387 stack dies, and the other operand
10269 is also a stack register that dies, then this must be a
10270 `fcompp' float compare */
10272 if (eflags_p)
10274 /* There is no double popping fcomi variant. Fortunately,
10275 eflags is immune from the fstp's cc clobbering. */
10276 if (unordered_p)
10277 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10278 else
10279 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10280 return output_387_ffreep (operands, 0);
10282 else
10284 if (unordered_p)
10285 return "fucompp\n\tfnstsw\t%0";
10286 else
10287 return "fcompp\n\tfnstsw\t%0";
10290 else
10292 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10294 static const char * const alt[16] =
10296 "fcom%z2\t%y2\n\tfnstsw\t%0",
10297 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10298 "fucom%z2\t%y2\n\tfnstsw\t%0",
10299 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10301 "ficom%z2\t%y2\n\tfnstsw\t%0",
10302 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10303 NULL,
10304 NULL,
10306 "fcomi\t{%y1, %0|%0, %y1}",
10307 "fcomip\t{%y1, %0|%0, %y1}",
10308 "fucomi\t{%y1, %0|%0, %y1}",
10309 "fucomip\t{%y1, %0|%0, %y1}",
10311 NULL,
10312 NULL,
10313 NULL,
10314 NULL
10317 int mask;
10318 const char *ret;
10320 mask = eflags_p << 3;
10321 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10322 mask |= unordered_p << 1;
10323 mask |= stack_top_dies;
10325 gcc_assert (mask < 16);
10326 ret = alt[mask];
10327 gcc_assert (ret);
10329 return ret;
10333 void
10334 ix86_output_addr_vec_elt (FILE *file, int value)
10336 const char *directive = ASM_LONG;
10338 #ifdef ASM_QUAD
10339 if (TARGET_64BIT)
10340 directive = ASM_QUAD;
10341 #else
10342 gcc_assert (!TARGET_64BIT);
10343 #endif
10345 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10348 void
10349 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10351 const char *directive = ASM_LONG;
10353 #ifdef ASM_QUAD
10354 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10355 directive = ASM_QUAD;
10356 #else
10357 gcc_assert (!TARGET_64BIT);
10358 #endif
10359 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10360 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10361 fprintf (file, "%s%s%d-%s%d\n",
10362 directive, LPREFIX, value, LPREFIX, rel);
10363 else if (HAVE_AS_GOTOFF_IN_DATA)
10364 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10365 #if TARGET_MACHO
10366 else if (TARGET_MACHO)
10368 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10369 machopic_output_function_base_name (file);
10370 fprintf(file, "\n");
10372 #endif
10373 else
10374 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10375 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10378 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10379 for the target. */
10381 void
10382 ix86_expand_clear (rtx dest)
10384 rtx tmp;
10386 /* We play register width games, which are only valid after reload. */
10387 gcc_assert (reload_completed);
10389 /* Avoid HImode and its attendant prefix byte. */
10390 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10391 dest = gen_rtx_REG (SImode, REGNO (dest));
10392 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10394 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10395 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10397 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10398 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10401 emit_insn (tmp);
10404 /* X is an unchanging MEM. If it is a constant pool reference, return
10405 the constant pool rtx, else NULL. */
10408 maybe_get_pool_constant (rtx x)
10410 x = ix86_delegitimize_address (XEXP (x, 0));
10412 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10413 return get_pool_constant (x);
10415 return NULL_RTX;
10418 void
10419 ix86_expand_move (enum machine_mode mode, rtx operands[])
10421 rtx op0, op1;
10422 enum tls_model model;
10424 op0 = operands[0];
10425 op1 = operands[1];
10427 if (GET_CODE (op1) == SYMBOL_REF)
10429 model = SYMBOL_REF_TLS_MODEL (op1);
10430 if (model)
10432 op1 = legitimize_tls_address (op1, model, true);
10433 op1 = force_operand (op1, op0);
10434 if (op1 == op0)
10435 return;
10437 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10438 && SYMBOL_REF_DLLIMPORT_P (op1))
10439 op1 = legitimize_dllimport_symbol (op1, false);
10441 else if (GET_CODE (op1) == CONST
10442 && GET_CODE (XEXP (op1, 0)) == PLUS
10443 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10445 rtx addend = XEXP (XEXP (op1, 0), 1);
10446 rtx symbol = XEXP (XEXP (op1, 0), 0);
10447 rtx tmp = NULL;
10449 model = SYMBOL_REF_TLS_MODEL (symbol);
10450 if (model)
10451 tmp = legitimize_tls_address (symbol, model, true);
10452 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10453 && SYMBOL_REF_DLLIMPORT_P (symbol))
10454 tmp = legitimize_dllimport_symbol (symbol, true);
10456 if (tmp)
10458 tmp = force_operand (tmp, NULL);
10459 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10460 op0, 1, OPTAB_DIRECT);
10461 if (tmp == op0)
10462 return;
10466 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10468 if (TARGET_MACHO && !TARGET_64BIT)
10470 #if TARGET_MACHO
10471 if (MACHOPIC_PURE)
10473 rtx temp = ((reload_in_progress
10474 || ((op0 && REG_P (op0))
10475 && mode == Pmode))
10476 ? op0 : gen_reg_rtx (Pmode));
10477 op1 = machopic_indirect_data_reference (op1, temp);
10478 op1 = machopic_legitimize_pic_address (op1, mode,
10479 temp == op1 ? 0 : temp);
10481 else if (MACHOPIC_INDIRECT)
10482 op1 = machopic_indirect_data_reference (op1, 0);
10483 if (op0 == op1)
10484 return;
10485 #endif
10487 else
10489 if (MEM_P (op0))
10490 op1 = force_reg (Pmode, op1);
10491 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10493 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10494 op1 = legitimize_pic_address (op1, reg);
10495 if (op0 == op1)
10496 return;
10500 else
10502 if (MEM_P (op0)
10503 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10504 || !push_operand (op0, mode))
10505 && MEM_P (op1))
10506 op1 = force_reg (mode, op1);
10508 if (push_operand (op0, mode)
10509 && ! general_no_elim_operand (op1, mode))
10510 op1 = copy_to_mode_reg (mode, op1);
10512 /* Force large constants in 64bit compilation into register
10513 to get them CSEed. */
10514 if (can_create_pseudo_p ()
10515 && (mode == DImode) && TARGET_64BIT
10516 && immediate_operand (op1, mode)
10517 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10518 && !register_operand (op0, mode)
10519 && optimize)
10520 op1 = copy_to_mode_reg (mode, op1);
10522 if (can_create_pseudo_p ()
10523 && FLOAT_MODE_P (mode)
10524 && GET_CODE (op1) == CONST_DOUBLE)
10526 /* If we are loading a floating point constant to a register,
10527 force the value to memory now, since we'll get better code
10528 out the back end. */
10530 op1 = validize_mem (force_const_mem (mode, op1));
10531 if (!register_operand (op0, mode))
10533 rtx temp = gen_reg_rtx (mode);
10534 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10535 emit_move_insn (op0, temp);
10536 return;
10541 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10544 void
10545 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10547 rtx op0 = operands[0], op1 = operands[1];
10548 unsigned int align = GET_MODE_ALIGNMENT (mode);
10550 /* Force constants other than zero into memory. We do not know how
10551 the instructions used to build constants modify the upper 64 bits
10552 of the register, once we have that information we may be able
10553 to handle some of them more efficiently. */
10554 if (can_create_pseudo_p ()
10555 && register_operand (op0, mode)
10556 && (CONSTANT_P (op1)
10557 || (GET_CODE (op1) == SUBREG
10558 && CONSTANT_P (SUBREG_REG (op1))))
10559 && standard_sse_constant_p (op1) <= 0)
10560 op1 = validize_mem (force_const_mem (mode, op1));
10562 /* We need to check memory alignment for SSE mode since attribute
10563 can make operands unaligned. */
10564 if (can_create_pseudo_p ()
10565 && SSE_REG_MODE_P (mode)
10566 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10567 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10569 rtx tmp[2];
10571 /* ix86_expand_vector_move_misalign() does not like constants ... */
10572 if (CONSTANT_P (op1)
10573 || (GET_CODE (op1) == SUBREG
10574 && CONSTANT_P (SUBREG_REG (op1))))
10575 op1 = validize_mem (force_const_mem (mode, op1));
10577 /* ... nor both arguments in memory. */
10578 if (!register_operand (op0, mode)
10579 && !register_operand (op1, mode))
10580 op1 = force_reg (mode, op1);
10582 tmp[0] = op0; tmp[1] = op1;
10583 ix86_expand_vector_move_misalign (mode, tmp);
10584 return;
10587 /* Make operand1 a register if it isn't already. */
10588 if (can_create_pseudo_p ()
10589 && !register_operand (op0, mode)
10590 && !register_operand (op1, mode))
10592 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10593 return;
10596 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10599 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10600 straight to ix86_expand_vector_move. */
10601 /* Code generation for scalar reg-reg moves of single and double precision data:
10602 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10603 movaps reg, reg
10604 else
10605 movss reg, reg
10606 if (x86_sse_partial_reg_dependency == true)
10607 movapd reg, reg
10608 else
10609 movsd reg, reg
10611 Code generation for scalar loads of double precision data:
10612 if (x86_sse_split_regs == true)
10613 movlpd mem, reg (gas syntax)
10614 else
10615 movsd mem, reg
10617 Code generation for unaligned packed loads of single precision data
10618 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10619 if (x86_sse_unaligned_move_optimal)
10620 movups mem, reg
10622 if (x86_sse_partial_reg_dependency == true)
10624 xorps reg, reg
10625 movlps mem, reg
10626 movhps mem+8, reg
10628 else
10630 movlps mem, reg
10631 movhps mem+8, reg
10634 Code generation for unaligned packed loads of double precision data
10635 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10636 if (x86_sse_unaligned_move_optimal)
10637 movupd mem, reg
10639 if (x86_sse_split_regs == true)
10641 movlpd mem, reg
10642 movhpd mem+8, reg
10644 else
10646 movsd mem, reg
10647 movhpd mem+8, reg
10651 void
10652 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10654 rtx op0, op1, m;
10656 op0 = operands[0];
10657 op1 = operands[1];
10659 if (MEM_P (op1))
10661 /* If we're optimizing for size, movups is the smallest. */
10662 if (optimize_size)
10664 op0 = gen_lowpart (V4SFmode, op0);
10665 op1 = gen_lowpart (V4SFmode, op1);
10666 emit_insn (gen_sse_movups (op0, op1));
10667 return;
10670 /* ??? If we have typed data, then it would appear that using
10671 movdqu is the only way to get unaligned data loaded with
10672 integer type. */
10673 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10675 op0 = gen_lowpart (V16QImode, op0);
10676 op1 = gen_lowpart (V16QImode, op1);
10677 emit_insn (gen_sse2_movdqu (op0, op1));
10678 return;
10681 if (TARGET_SSE2 && mode == V2DFmode)
10683 rtx zero;
10685 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10687 op0 = gen_lowpart (V2DFmode, op0);
10688 op1 = gen_lowpart (V2DFmode, op1);
10689 emit_insn (gen_sse2_movupd (op0, op1));
10690 return;
10693 /* When SSE registers are split into halves, we can avoid
10694 writing to the top half twice. */
10695 if (TARGET_SSE_SPLIT_REGS)
10697 emit_clobber (op0);
10698 zero = op0;
10700 else
10702 /* ??? Not sure about the best option for the Intel chips.
10703 The following would seem to satisfy; the register is
10704 entirely cleared, breaking the dependency chain. We
10705 then store to the upper half, with a dependency depth
10706 of one. A rumor has it that Intel recommends two movsd
10707 followed by an unpacklpd, but this is unconfirmed. And
10708 given that the dependency depth of the unpacklpd would
10709 still be one, I'm not sure why this would be better. */
10710 zero = CONST0_RTX (V2DFmode);
10713 m = adjust_address (op1, DFmode, 0);
10714 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10715 m = adjust_address (op1, DFmode, 8);
10716 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10718 else
10720 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10722 op0 = gen_lowpart (V4SFmode, op0);
10723 op1 = gen_lowpart (V4SFmode, op1);
10724 emit_insn (gen_sse_movups (op0, op1));
10725 return;
10728 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10729 emit_move_insn (op0, CONST0_RTX (mode));
10730 else
10731 emit_clobber (op0);
10733 if (mode != V4SFmode)
10734 op0 = gen_lowpart (V4SFmode, op0);
10735 m = adjust_address (op1, V2SFmode, 0);
10736 emit_insn (gen_sse_loadlps (op0, op0, m));
10737 m = adjust_address (op1, V2SFmode, 8);
10738 emit_insn (gen_sse_loadhps (op0, op0, m));
10741 else if (MEM_P (op0))
10743 /* If we're optimizing for size, movups is the smallest. */
10744 if (optimize_size)
10746 op0 = gen_lowpart (V4SFmode, op0);
10747 op1 = gen_lowpart (V4SFmode, op1);
10748 emit_insn (gen_sse_movups (op0, op1));
10749 return;
10752 /* ??? Similar to above, only less clear because of quote
10753 typeless stores unquote. */
10754 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10755 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10757 op0 = gen_lowpart (V16QImode, op0);
10758 op1 = gen_lowpart (V16QImode, op1);
10759 emit_insn (gen_sse2_movdqu (op0, op1));
10760 return;
10763 if (TARGET_SSE2 && mode == V2DFmode)
10765 m = adjust_address (op0, DFmode, 0);
10766 emit_insn (gen_sse2_storelpd (m, op1));
10767 m = adjust_address (op0, DFmode, 8);
10768 emit_insn (gen_sse2_storehpd (m, op1));
10770 else
10772 if (mode != V4SFmode)
10773 op1 = gen_lowpart (V4SFmode, op1);
10774 m = adjust_address (op0, V2SFmode, 0);
10775 emit_insn (gen_sse_storelps (m, op1));
10776 m = adjust_address (op0, V2SFmode, 8);
10777 emit_insn (gen_sse_storehps (m, op1));
10780 else
10781 gcc_unreachable ();
10784 /* Expand a push in MODE. This is some mode for which we do not support
10785 proper push instructions, at least from the registers that we expect
10786 the value to live in. */
10788 void
10789 ix86_expand_push (enum machine_mode mode, rtx x)
10791 rtx tmp;
10793 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10794 GEN_INT (-GET_MODE_SIZE (mode)),
10795 stack_pointer_rtx, 1, OPTAB_DIRECT);
10796 if (tmp != stack_pointer_rtx)
10797 emit_move_insn (stack_pointer_rtx, tmp);
10799 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10800 emit_move_insn (tmp, x);
10803 /* Helper function of ix86_fixup_binary_operands to canonicalize
10804 operand order. Returns true if the operands should be swapped. */
10806 static bool
10807 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10808 rtx operands[])
10810 rtx dst = operands[0];
10811 rtx src1 = operands[1];
10812 rtx src2 = operands[2];
10814 /* If the operation is not commutative, we can't do anything. */
10815 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10816 return false;
10818 /* Highest priority is that src1 should match dst. */
10819 if (rtx_equal_p (dst, src1))
10820 return false;
10821 if (rtx_equal_p (dst, src2))
10822 return true;
10824 /* Next highest priority is that immediate constants come second. */
10825 if (immediate_operand (src2, mode))
10826 return false;
10827 if (immediate_operand (src1, mode))
10828 return true;
10830 /* Lowest priority is that memory references should come second. */
10831 if (MEM_P (src2))
10832 return false;
10833 if (MEM_P (src1))
10834 return true;
10836 return false;
10840 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10841 destination to use for the operation. If different from the true
10842 destination in operands[0], a copy operation will be required. */
10845 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10846 rtx operands[])
10848 rtx dst = operands[0];
10849 rtx src1 = operands[1];
10850 rtx src2 = operands[2];
10852 /* Canonicalize operand order. */
10853 if (ix86_swap_binary_operands_p (code, mode, operands))
10855 rtx temp;
10857 /* It is invalid to swap operands of different modes. */
10858 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10860 temp = src1;
10861 src1 = src2;
10862 src2 = temp;
10865 /* Both source operands cannot be in memory. */
10866 if (MEM_P (src1) && MEM_P (src2))
10868 /* Optimization: Only read from memory once. */
10869 if (rtx_equal_p (src1, src2))
10871 src2 = force_reg (mode, src2);
10872 src1 = src2;
10874 else
10875 src2 = force_reg (mode, src2);
10878 /* If the destination is memory, and we do not have matching source
10879 operands, do things in registers. */
10880 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10881 dst = gen_reg_rtx (mode);
10883 /* Source 1 cannot be a constant. */
10884 if (CONSTANT_P (src1))
10885 src1 = force_reg (mode, src1);
10887 /* Source 1 cannot be a non-matching memory. */
10888 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10889 src1 = force_reg (mode, src1);
10891 operands[1] = src1;
10892 operands[2] = src2;
10893 return dst;
10896 /* Similarly, but assume that the destination has already been
10897 set up properly. */
10899 void
10900 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10901 enum machine_mode mode, rtx operands[])
10903 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10904 gcc_assert (dst == operands[0]);
10907 /* Attempt to expand a binary operator. Make the expansion closer to the
10908 actual machine, then just general_operand, which will allow 3 separate
10909 memory references (one output, two input) in a single insn. */
10911 void
10912 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10913 rtx operands[])
10915 rtx src1, src2, dst, op, clob;
10917 dst = ix86_fixup_binary_operands (code, mode, operands);
10918 src1 = operands[1];
10919 src2 = operands[2];
10921 /* Emit the instruction. */
10923 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10924 if (reload_in_progress)
10926 /* Reload doesn't know about the flags register, and doesn't know that
10927 it doesn't want to clobber it. We can only do this with PLUS. */
10928 gcc_assert (code == PLUS);
10929 emit_insn (op);
10931 else
10933 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10934 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10937 /* Fix up the destination if needed. */
10938 if (dst != operands[0])
10939 emit_move_insn (operands[0], dst);
10942 /* Return TRUE or FALSE depending on whether the binary operator meets the
10943 appropriate constraints. */
10946 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10947 rtx operands[3])
10949 rtx dst = operands[0];
10950 rtx src1 = operands[1];
10951 rtx src2 = operands[2];
10953 /* Both source operands cannot be in memory. */
10954 if (MEM_P (src1) && MEM_P (src2))
10955 return 0;
10957 /* Canonicalize operand order for commutative operators. */
10958 if (ix86_swap_binary_operands_p (code, mode, operands))
10960 rtx temp = src1;
10961 src1 = src2;
10962 src2 = temp;
10965 /* If the destination is memory, we must have a matching source operand. */
10966 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10967 return 0;
10969 /* Source 1 cannot be a constant. */
10970 if (CONSTANT_P (src1))
10971 return 0;
10973 /* Source 1 cannot be a non-matching memory. */
10974 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10975 return 0;
10977 return 1;
10980 /* Attempt to expand a unary operator. Make the expansion closer to the
10981 actual machine, then just general_operand, which will allow 2 separate
10982 memory references (one output, one input) in a single insn. */
10984 void
10985 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10986 rtx operands[])
10988 int matching_memory;
10989 rtx src, dst, op, clob;
10991 dst = operands[0];
10992 src = operands[1];
10994 /* If the destination is memory, and we do not have matching source
10995 operands, do things in registers. */
10996 matching_memory = 0;
10997 if (MEM_P (dst))
10999 if (rtx_equal_p (dst, src))
11000 matching_memory = 1;
11001 else
11002 dst = gen_reg_rtx (mode);
11005 /* When source operand is memory, destination must match. */
11006 if (MEM_P (src) && !matching_memory)
11007 src = force_reg (mode, src);
11009 /* Emit the instruction. */
11011 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11012 if (reload_in_progress || code == NOT)
11014 /* Reload doesn't know about the flags register, and doesn't know that
11015 it doesn't want to clobber it. */
11016 gcc_assert (code == NOT);
11017 emit_insn (op);
11019 else
11021 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11022 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11025 /* Fix up the destination if needed. */
11026 if (dst != operands[0])
11027 emit_move_insn (operands[0], dst);
11030 /* Return TRUE or FALSE depending on whether the unary operator meets the
11031 appropriate constraints. */
11034 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11035 enum machine_mode mode ATTRIBUTE_UNUSED,
11036 rtx operands[2] ATTRIBUTE_UNUSED)
11038 /* If one of operands is memory, source and destination must match. */
11039 if ((MEM_P (operands[0])
11040 || MEM_P (operands[1]))
11041 && ! rtx_equal_p (operands[0], operands[1]))
11042 return FALSE;
11043 return TRUE;
11046 /* Post-reload splitter for converting an SF or DFmode value in an
11047 SSE register into an unsigned SImode. */
11049 void
11050 ix86_split_convert_uns_si_sse (rtx operands[])
11052 enum machine_mode vecmode;
11053 rtx value, large, zero_or_two31, input, two31, x;
11055 large = operands[1];
11056 zero_or_two31 = operands[2];
11057 input = operands[3];
11058 two31 = operands[4];
11059 vecmode = GET_MODE (large);
11060 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11062 /* Load up the value into the low element. We must ensure that the other
11063 elements are valid floats -- zero is the easiest such value. */
11064 if (MEM_P (input))
11066 if (vecmode == V4SFmode)
11067 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11068 else
11069 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11071 else
11073 input = gen_rtx_REG (vecmode, REGNO (input));
11074 emit_move_insn (value, CONST0_RTX (vecmode));
11075 if (vecmode == V4SFmode)
11076 emit_insn (gen_sse_movss (value, value, input));
11077 else
11078 emit_insn (gen_sse2_movsd (value, value, input));
11081 emit_move_insn (large, two31);
11082 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11084 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11085 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11087 x = gen_rtx_AND (vecmode, zero_or_two31, large);
11088 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11090 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11091 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11093 large = gen_rtx_REG (V4SImode, REGNO (large));
11094 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11096 x = gen_rtx_REG (V4SImode, REGNO (value));
11097 if (vecmode == V4SFmode)
11098 emit_insn (gen_sse2_cvttps2dq (x, value));
11099 else
11100 emit_insn (gen_sse2_cvttpd2dq (x, value));
11101 value = x;
11103 emit_insn (gen_xorv4si3 (value, value, large));
11106 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11107 Expects the 64-bit DImode to be supplied in a pair of integral
11108 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11109 -mfpmath=sse, !optimize_size only. */
11111 void
11112 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11114 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11115 rtx int_xmm, fp_xmm;
11116 rtx biases, exponents;
11117 rtx x;
11119 int_xmm = gen_reg_rtx (V4SImode);
11120 if (TARGET_INTER_UNIT_MOVES)
11121 emit_insn (gen_movdi_to_sse (int_xmm, input));
11122 else if (TARGET_SSE_SPLIT_REGS)
11124 emit_clobber (int_xmm);
11125 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11127 else
11129 x = gen_reg_rtx (V2DImode);
11130 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11131 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11134 x = gen_rtx_CONST_VECTOR (V4SImode,
11135 gen_rtvec (4, GEN_INT (0x43300000UL),
11136 GEN_INT (0x45300000UL),
11137 const0_rtx, const0_rtx));
11138 exponents = validize_mem (force_const_mem (V4SImode, x));
11140 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11141 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11143 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11144 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11145 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11146 (0x1.0p84 + double(fp_value_hi_xmm)).
11147 Note these exponents differ by 32. */
11149 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11151 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11152 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11153 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11154 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11155 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11156 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11157 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11158 biases = validize_mem (force_const_mem (V2DFmode, biases));
11159 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11161 /* Add the upper and lower DFmode values together. */
11162 if (TARGET_SSE3)
11163 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11164 else
11166 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11167 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11168 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11171 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11174 /* Not used, but eases macroization of patterns. */
11175 void
11176 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11177 rtx input ATTRIBUTE_UNUSED)
11179 gcc_unreachable ();
11182 /* Convert an unsigned SImode value into a DFmode. Only currently used
11183 for SSE, but applicable anywhere. */
11185 void
11186 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11188 REAL_VALUE_TYPE TWO31r;
11189 rtx x, fp;
11191 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11192 NULL, 1, OPTAB_DIRECT);
11194 fp = gen_reg_rtx (DFmode);
11195 emit_insn (gen_floatsidf2 (fp, x));
11197 real_ldexp (&TWO31r, &dconst1, 31);
11198 x = const_double_from_real_value (TWO31r, DFmode);
11200 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11201 if (x != target)
11202 emit_move_insn (target, x);
11205 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11206 32-bit mode; otherwise we have a direct convert instruction. */
11208 void
11209 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11211 REAL_VALUE_TYPE TWO32r;
11212 rtx fp_lo, fp_hi, x;
11214 fp_lo = gen_reg_rtx (DFmode);
11215 fp_hi = gen_reg_rtx (DFmode);
11217 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11219 real_ldexp (&TWO32r, &dconst1, 32);
11220 x = const_double_from_real_value (TWO32r, DFmode);
11221 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11223 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11225 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11226 0, OPTAB_DIRECT);
11227 if (x != target)
11228 emit_move_insn (target, x);
11231 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11232 For x86_32, -mfpmath=sse, !optimize_size only. */
11233 void
11234 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11236 REAL_VALUE_TYPE ONE16r;
11237 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11239 real_ldexp (&ONE16r, &dconst1, 16);
11240 x = const_double_from_real_value (ONE16r, SFmode);
11241 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11242 NULL, 0, OPTAB_DIRECT);
11243 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11244 NULL, 0, OPTAB_DIRECT);
11245 fp_hi = gen_reg_rtx (SFmode);
11246 fp_lo = gen_reg_rtx (SFmode);
11247 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11248 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11249 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11250 0, OPTAB_DIRECT);
11251 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11252 0, OPTAB_DIRECT);
11253 if (!rtx_equal_p (target, fp_hi))
11254 emit_move_insn (target, fp_hi);
11257 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11258 then replicate the value for all elements of the vector
11259 register. */
11262 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11264 rtvec v;
11265 switch (mode)
11267 case SImode:
11268 gcc_assert (vect);
11269 v = gen_rtvec (4, value, value, value, value);
11270 return gen_rtx_CONST_VECTOR (V4SImode, v);
11272 case DImode:
11273 gcc_assert (vect);
11274 v = gen_rtvec (2, value, value);
11275 return gen_rtx_CONST_VECTOR (V2DImode, v);
11277 case SFmode:
11278 if (vect)
11279 v = gen_rtvec (4, value, value, value, value);
11280 else
11281 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11282 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11283 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11285 case DFmode:
11286 if (vect)
11287 v = gen_rtvec (2, value, value);
11288 else
11289 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11290 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11292 default:
11293 gcc_unreachable ();
11297 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11298 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11299 for an SSE register. If VECT is true, then replicate the mask for
11300 all elements of the vector register. If INVERT is true, then create
11301 a mask excluding the sign bit. */
11304 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11306 enum machine_mode vec_mode, imode;
11307 HOST_WIDE_INT hi, lo;
11308 int shift = 63;
11309 rtx v;
11310 rtx mask;
11312 /* Find the sign bit, sign extended to 2*HWI. */
11313 switch (mode)
11315 case SImode:
11316 case SFmode:
11317 imode = SImode;
11318 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11319 lo = 0x80000000, hi = lo < 0;
11320 break;
11322 case DImode:
11323 case DFmode:
11324 imode = DImode;
11325 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11326 if (HOST_BITS_PER_WIDE_INT >= 64)
11327 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11328 else
11329 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11330 break;
11332 case TImode:
11333 case TFmode:
11334 vec_mode = VOIDmode;
11335 if (HOST_BITS_PER_WIDE_INT >= 64)
11337 imode = TImode;
11338 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11340 else
11342 rtvec vec;
11344 imode = DImode;
11345 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11347 if (invert)
11349 lo = ~lo, hi = ~hi;
11350 v = constm1_rtx;
11352 else
11353 v = const0_rtx;
11355 mask = immed_double_const (lo, hi, imode);
11357 vec = gen_rtvec (2, v, mask);
11358 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
11359 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
11361 return v;
11363 break;
11365 default:
11366 gcc_unreachable ();
11369 if (invert)
11370 lo = ~lo, hi = ~hi;
11372 /* Force this value into the low part of a fp vector constant. */
11373 mask = immed_double_const (lo, hi, imode);
11374 mask = gen_lowpart (mode, mask);
11376 if (vec_mode == VOIDmode)
11377 return force_reg (mode, mask);
11379 v = ix86_build_const_vector (mode, vect, mask);
11380 return force_reg (vec_mode, v);
11383 /* Generate code for floating point ABS or NEG. */
11385 void
11386 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11387 rtx operands[])
11389 rtx mask, set, use, clob, dst, src;
11390 bool use_sse = false;
11391 bool vector_mode = VECTOR_MODE_P (mode);
11392 enum machine_mode elt_mode = mode;
11394 if (vector_mode)
11396 elt_mode = GET_MODE_INNER (mode);
11397 use_sse = true;
11399 else if (mode == TFmode)
11400 use_sse = true;
11401 else if (TARGET_SSE_MATH)
11402 use_sse = SSE_FLOAT_MODE_P (mode);
11404 /* NEG and ABS performed with SSE use bitwise mask operations.
11405 Create the appropriate mask now. */
11406 if (use_sse)
11407 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11408 else
11409 mask = NULL_RTX;
11411 dst = operands[0];
11412 src = operands[1];
11414 if (vector_mode)
11416 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11417 set = gen_rtx_SET (VOIDmode, dst, set);
11418 emit_insn (set);
11420 else
11422 set = gen_rtx_fmt_e (code, mode, src);
11423 set = gen_rtx_SET (VOIDmode, dst, set);
11424 if (mask)
11426 use = gen_rtx_USE (VOIDmode, mask);
11427 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11428 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11429 gen_rtvec (3, set, use, clob)));
11431 else
11432 emit_insn (set);
11436 /* Expand a copysign operation. Special case operand 0 being a constant. */
11438 void
11439 ix86_expand_copysign (rtx operands[])
11441 enum machine_mode mode;
11442 rtx dest, op0, op1, mask, nmask;
11444 dest = operands[0];
11445 op0 = operands[1];
11446 op1 = operands[2];
11448 mode = GET_MODE (dest);
11450 if (GET_CODE (op0) == CONST_DOUBLE)
11452 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11454 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11455 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11457 if (mode == SFmode || mode == DFmode)
11459 enum machine_mode vmode;
11461 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11463 if (op0 == CONST0_RTX (mode))
11464 op0 = CONST0_RTX (vmode);
11465 else
11467 rtvec v;
11469 if (mode == SFmode)
11470 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11471 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11472 else
11473 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11475 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11478 else if (op0 != CONST0_RTX (mode))
11479 op0 = force_reg (mode, op0);
11481 mask = ix86_build_signbit_mask (mode, 0, 0);
11483 if (mode == SFmode)
11484 copysign_insn = gen_copysignsf3_const;
11485 else if (mode == DFmode)
11486 copysign_insn = gen_copysigndf3_const;
11487 else
11488 copysign_insn = gen_copysigntf3_const;
11490 emit_insn (copysign_insn (dest, op0, op1, mask));
11492 else
11494 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11496 nmask = ix86_build_signbit_mask (mode, 0, 1);
11497 mask = ix86_build_signbit_mask (mode, 0, 0);
11499 if (mode == SFmode)
11500 copysign_insn = gen_copysignsf3_var;
11501 else if (mode == DFmode)
11502 copysign_insn = gen_copysigndf3_var;
11503 else
11504 copysign_insn = gen_copysigntf3_var;
11506 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11510 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11511 be a constant, and so has already been expanded into a vector constant. */
11513 void
11514 ix86_split_copysign_const (rtx operands[])
11516 enum machine_mode mode, vmode;
11517 rtx dest, op0, op1, mask, x;
11519 dest = operands[0];
11520 op0 = operands[1];
11521 op1 = operands[2];
11522 mask = operands[3];
11524 mode = GET_MODE (dest);
11525 vmode = GET_MODE (mask);
11527 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11528 x = gen_rtx_AND (vmode, dest, mask);
11529 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11531 if (op0 != CONST0_RTX (vmode))
11533 x = gen_rtx_IOR (vmode, dest, op0);
11534 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11538 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11539 so we have to do two masks. */
11541 void
11542 ix86_split_copysign_var (rtx operands[])
11544 enum machine_mode mode, vmode;
11545 rtx dest, scratch, op0, op1, mask, nmask, x;
11547 dest = operands[0];
11548 scratch = operands[1];
11549 op0 = operands[2];
11550 op1 = operands[3];
11551 nmask = operands[4];
11552 mask = operands[5];
11554 mode = GET_MODE (dest);
11555 vmode = GET_MODE (mask);
11557 if (rtx_equal_p (op0, op1))
11559 /* Shouldn't happen often (it's useless, obviously), but when it does
11560 we'd generate incorrect code if we continue below. */
11561 emit_move_insn (dest, op0);
11562 return;
11565 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11567 gcc_assert (REGNO (op1) == REGNO (scratch));
11569 x = gen_rtx_AND (vmode, scratch, mask);
11570 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11572 dest = mask;
11573 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11574 x = gen_rtx_NOT (vmode, dest);
11575 x = gen_rtx_AND (vmode, x, op0);
11576 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11578 else
11580 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11582 x = gen_rtx_AND (vmode, scratch, mask);
11584 else /* alternative 2,4 */
11586 gcc_assert (REGNO (mask) == REGNO (scratch));
11587 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11588 x = gen_rtx_AND (vmode, scratch, op1);
11590 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11592 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11594 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11595 x = gen_rtx_AND (vmode, dest, nmask);
11597 else /* alternative 3,4 */
11599 gcc_assert (REGNO (nmask) == REGNO (dest));
11600 dest = nmask;
11601 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11602 x = gen_rtx_AND (vmode, dest, op0);
11604 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11607 x = gen_rtx_IOR (vmode, dest, scratch);
11608 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11611 /* Return TRUE or FALSE depending on whether the first SET in INSN
11612 has source and destination with matching CC modes, and that the
11613 CC mode is at least as constrained as REQ_MODE. */
11616 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11618 rtx set;
11619 enum machine_mode set_mode;
11621 set = PATTERN (insn);
11622 if (GET_CODE (set) == PARALLEL)
11623 set = XVECEXP (set, 0, 0);
11624 gcc_assert (GET_CODE (set) == SET);
11625 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11627 set_mode = GET_MODE (SET_DEST (set));
11628 switch (set_mode)
11630 case CCNOmode:
11631 if (req_mode != CCNOmode
11632 && (req_mode != CCmode
11633 || XEXP (SET_SRC (set), 1) != const0_rtx))
11634 return 0;
11635 break;
11636 case CCmode:
11637 if (req_mode == CCGCmode)
11638 return 0;
11639 /* FALLTHRU */
11640 case CCGCmode:
11641 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11642 return 0;
11643 /* FALLTHRU */
11644 case CCGOCmode:
11645 if (req_mode == CCZmode)
11646 return 0;
11647 /* FALLTHRU */
11648 case CCZmode:
11649 break;
11651 default:
11652 gcc_unreachable ();
11655 return (GET_MODE (SET_SRC (set)) == set_mode);
11658 /* Generate insn patterns to do an integer compare of OPERANDS. */
11660 static rtx
11661 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11663 enum machine_mode cmpmode;
11664 rtx tmp, flags;
11666 cmpmode = SELECT_CC_MODE (code, op0, op1);
11667 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11669 /* This is very simple, but making the interface the same as in the
11670 FP case makes the rest of the code easier. */
11671 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11672 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11674 /* Return the test that should be put into the flags user, i.e.
11675 the bcc, scc, or cmov instruction. */
11676 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11679 /* Figure out whether to use ordered or unordered fp comparisons.
11680 Return the appropriate mode to use. */
11682 enum machine_mode
11683 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11685 /* ??? In order to make all comparisons reversible, we do all comparisons
11686 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11687 all forms trapping and nontrapping comparisons, we can make inequality
11688 comparisons trapping again, since it results in better code when using
11689 FCOM based compares. */
11690 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11693 enum machine_mode
11694 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11696 enum machine_mode mode = GET_MODE (op0);
11698 if (SCALAR_FLOAT_MODE_P (mode))
11700 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11701 return ix86_fp_compare_mode (code);
11704 switch (code)
11706 /* Only zero flag is needed. */
11707 case EQ: /* ZF=0 */
11708 case NE: /* ZF!=0 */
11709 return CCZmode;
11710 /* Codes needing carry flag. */
11711 case GEU: /* CF=0 */
11712 case LTU: /* CF=1 */
11713 /* Detect overflow checks. They need just the carry flag. */
11714 if (GET_CODE (op0) == PLUS
11715 && rtx_equal_p (op1, XEXP (op0, 0)))
11716 return CCCmode;
11717 else
11718 return CCmode;
11719 case GTU: /* CF=0 & ZF=0 */
11720 case LEU: /* CF=1 | ZF=1 */
11721 /* Detect overflow checks. They need just the carry flag. */
11722 if (GET_CODE (op0) == MINUS
11723 && rtx_equal_p (op1, XEXP (op0, 0)))
11724 return CCCmode;
11725 else
11726 return CCmode;
11727 /* Codes possibly doable only with sign flag when
11728 comparing against zero. */
11729 case GE: /* SF=OF or SF=0 */
11730 case LT: /* SF<>OF or SF=1 */
11731 if (op1 == const0_rtx)
11732 return CCGOCmode;
11733 else
11734 /* For other cases Carry flag is not required. */
11735 return CCGCmode;
11736 /* Codes doable only with sign flag when comparing
11737 against zero, but we miss jump instruction for it
11738 so we need to use relational tests against overflow
11739 that thus needs to be zero. */
11740 case GT: /* ZF=0 & SF=OF */
11741 case LE: /* ZF=1 | SF<>OF */
11742 if (op1 == const0_rtx)
11743 return CCNOmode;
11744 else
11745 return CCGCmode;
11746 /* strcmp pattern do (use flags) and combine may ask us for proper
11747 mode. */
11748 case USE:
11749 return CCmode;
11750 default:
11751 gcc_unreachable ();
11755 /* Return the fixed registers used for condition codes. */
11757 static bool
11758 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11760 *p1 = FLAGS_REG;
11761 *p2 = FPSR_REG;
11762 return true;
11765 /* If two condition code modes are compatible, return a condition code
11766 mode which is compatible with both. Otherwise, return
11767 VOIDmode. */
11769 static enum machine_mode
11770 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11772 if (m1 == m2)
11773 return m1;
11775 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11776 return VOIDmode;
11778 if ((m1 == CCGCmode && m2 == CCGOCmode)
11779 || (m1 == CCGOCmode && m2 == CCGCmode))
11780 return CCGCmode;
11782 switch (m1)
11784 default:
11785 gcc_unreachable ();
11787 case CCmode:
11788 case CCGCmode:
11789 case CCGOCmode:
11790 case CCNOmode:
11791 case CCAmode:
11792 case CCCmode:
11793 case CCOmode:
11794 case CCSmode:
11795 case CCZmode:
11796 switch (m2)
11798 default:
11799 return VOIDmode;
11801 case CCmode:
11802 case CCGCmode:
11803 case CCGOCmode:
11804 case CCNOmode:
11805 case CCAmode:
11806 case CCCmode:
11807 case CCOmode:
11808 case CCSmode:
11809 case CCZmode:
11810 return CCmode;
11813 case CCFPmode:
11814 case CCFPUmode:
11815 /* These are only compatible with themselves, which we already
11816 checked above. */
11817 return VOIDmode;
11821 /* Split comparison code CODE into comparisons we can do using branch
11822 instructions. BYPASS_CODE is comparison code for branch that will
11823 branch around FIRST_CODE and SECOND_CODE. If some of branches
11824 is not required, set value to UNKNOWN.
11825 We never require more than two branches. */
11827 void
11828 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11829 enum rtx_code *first_code,
11830 enum rtx_code *second_code)
11832 *first_code = code;
11833 *bypass_code = UNKNOWN;
11834 *second_code = UNKNOWN;
11836 /* The fcomi comparison sets flags as follows:
11838 cmp ZF PF CF
11839 > 0 0 0
11840 < 0 0 1
11841 = 1 0 0
11842 un 1 1 1 */
11844 switch (code)
11846 case GT: /* GTU - CF=0 & ZF=0 */
11847 case GE: /* GEU - CF=0 */
11848 case ORDERED: /* PF=0 */
11849 case UNORDERED: /* PF=1 */
11850 case UNEQ: /* EQ - ZF=1 */
11851 case UNLT: /* LTU - CF=1 */
11852 case UNLE: /* LEU - CF=1 | ZF=1 */
11853 case LTGT: /* EQ - ZF=0 */
11854 break;
11855 case LT: /* LTU - CF=1 - fails on unordered */
11856 *first_code = UNLT;
11857 *bypass_code = UNORDERED;
11858 break;
11859 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11860 *first_code = UNLE;
11861 *bypass_code = UNORDERED;
11862 break;
11863 case EQ: /* EQ - ZF=1 - fails on unordered */
11864 *first_code = UNEQ;
11865 *bypass_code = UNORDERED;
11866 break;
11867 case NE: /* NE - ZF=0 - fails on unordered */
11868 *first_code = LTGT;
11869 *second_code = UNORDERED;
11870 break;
11871 case UNGE: /* GEU - CF=0 - fails on unordered */
11872 *first_code = GE;
11873 *second_code = UNORDERED;
11874 break;
11875 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11876 *first_code = GT;
11877 *second_code = UNORDERED;
11878 break;
11879 default:
11880 gcc_unreachable ();
11882 if (!TARGET_IEEE_FP)
11884 *second_code = UNKNOWN;
11885 *bypass_code = UNKNOWN;
11889 /* Return cost of comparison done fcom + arithmetics operations on AX.
11890 All following functions do use number of instructions as a cost metrics.
11891 In future this should be tweaked to compute bytes for optimize_size and
11892 take into account performance of various instructions on various CPUs. */
11893 static int
11894 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11896 if (!TARGET_IEEE_FP)
11897 return 4;
11898 /* The cost of code output by ix86_expand_fp_compare. */
11899 switch (code)
11901 case UNLE:
11902 case UNLT:
11903 case LTGT:
11904 case GT:
11905 case GE:
11906 case UNORDERED:
11907 case ORDERED:
11908 case UNEQ:
11909 return 4;
11910 break;
11911 case LT:
11912 case NE:
11913 case EQ:
11914 case UNGE:
11915 return 5;
11916 break;
11917 case LE:
11918 case UNGT:
11919 return 6;
11920 break;
11921 default:
11922 gcc_unreachable ();
11926 /* Return cost of comparison done using fcomi operation.
11927 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11928 static int
11929 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11931 enum rtx_code bypass_code, first_code, second_code;
11932 /* Return arbitrarily high cost when instruction is not supported - this
11933 prevents gcc from using it. */
11934 if (!TARGET_CMOVE)
11935 return 1024;
11936 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11937 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11940 /* Return cost of comparison done using sahf operation.
11941 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11942 static int
11943 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11945 enum rtx_code bypass_code, first_code, second_code;
11946 /* Return arbitrarily high cost when instruction is not preferred - this
11947 avoids gcc from using it. */
11948 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11949 return 1024;
11950 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11951 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11954 /* Compute cost of the comparison done using any method.
11955 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11956 static int
11957 ix86_fp_comparison_cost (enum rtx_code code)
11959 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11960 int min;
11962 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11963 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11965 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11966 if (min > sahf_cost)
11967 min = sahf_cost;
11968 if (min > fcomi_cost)
11969 min = fcomi_cost;
11970 return min;
11973 /* Return true if we should use an FCOMI instruction for this
11974 fp comparison. */
11977 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11979 enum rtx_code swapped_code = swap_condition (code);
11981 return ((ix86_fp_comparison_cost (code)
11982 == ix86_fp_comparison_fcomi_cost (code))
11983 || (ix86_fp_comparison_cost (swapped_code)
11984 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11987 /* Swap, force into registers, or otherwise massage the two operands
11988 to a fp comparison. The operands are updated in place; the new
11989 comparison code is returned. */
11991 static enum rtx_code
11992 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11994 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11995 rtx op0 = *pop0, op1 = *pop1;
11996 enum machine_mode op_mode = GET_MODE (op0);
11997 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11999 /* All of the unordered compare instructions only work on registers.
12000 The same is true of the fcomi compare instructions. The XFmode
12001 compare instructions require registers except when comparing
12002 against zero or when converting operand 1 from fixed point to
12003 floating point. */
12005 if (!is_sse
12006 && (fpcmp_mode == CCFPUmode
12007 || (op_mode == XFmode
12008 && ! (standard_80387_constant_p (op0) == 1
12009 || standard_80387_constant_p (op1) == 1)
12010 && GET_CODE (op1) != FLOAT)
12011 || ix86_use_fcomi_compare (code)))
12013 op0 = force_reg (op_mode, op0);
12014 op1 = force_reg (op_mode, op1);
12016 else
12018 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
12019 things around if they appear profitable, otherwise force op0
12020 into a register. */
12022 if (standard_80387_constant_p (op0) == 0
12023 || (MEM_P (op0)
12024 && ! (standard_80387_constant_p (op1) == 0
12025 || MEM_P (op1))))
12027 rtx tmp;
12028 tmp = op0, op0 = op1, op1 = tmp;
12029 code = swap_condition (code);
12032 if (!REG_P (op0))
12033 op0 = force_reg (op_mode, op0);
12035 if (CONSTANT_P (op1))
12037 int tmp = standard_80387_constant_p (op1);
12038 if (tmp == 0)
12039 op1 = validize_mem (force_const_mem (op_mode, op1));
12040 else if (tmp == 1)
12042 if (TARGET_CMOVE)
12043 op1 = force_reg (op_mode, op1);
12045 else
12046 op1 = force_reg (op_mode, op1);
12050 /* Try to rearrange the comparison to make it cheaper. */
12051 if (ix86_fp_comparison_cost (code)
12052 > ix86_fp_comparison_cost (swap_condition (code))
12053 && (REG_P (op1) || can_create_pseudo_p ()))
12055 rtx tmp;
12056 tmp = op0, op0 = op1, op1 = tmp;
12057 code = swap_condition (code);
12058 if (!REG_P (op0))
12059 op0 = force_reg (op_mode, op0);
12062 *pop0 = op0;
12063 *pop1 = op1;
12064 return code;
12067 /* Convert comparison codes we use to represent FP comparison to integer
12068 code that will result in proper branch. Return UNKNOWN if no such code
12069 is available. */
12071 enum rtx_code
12072 ix86_fp_compare_code_to_integer (enum rtx_code code)
12074 switch (code)
12076 case GT:
12077 return GTU;
12078 case GE:
12079 return GEU;
12080 case ORDERED:
12081 case UNORDERED:
12082 return code;
12083 break;
12084 case UNEQ:
12085 return EQ;
12086 break;
12087 case UNLT:
12088 return LTU;
12089 break;
12090 case UNLE:
12091 return LEU;
12092 break;
12093 case LTGT:
12094 return NE;
12095 break;
12096 default:
12097 return UNKNOWN;
12101 /* Generate insn patterns to do a floating point compare of OPERANDS. */
12103 static rtx
12104 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12105 rtx *second_test, rtx *bypass_test)
12107 enum machine_mode fpcmp_mode, intcmp_mode;
12108 rtx tmp, tmp2;
12109 int cost = ix86_fp_comparison_cost (code);
12110 enum rtx_code bypass_code, first_code, second_code;
12112 fpcmp_mode = ix86_fp_compare_mode (code);
12113 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12115 if (second_test)
12116 *second_test = NULL_RTX;
12117 if (bypass_test)
12118 *bypass_test = NULL_RTX;
12120 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12122 /* Do fcomi/sahf based test when profitable. */
12123 if (ix86_fp_comparison_arithmetics_cost (code) > cost
12124 && (bypass_code == UNKNOWN || bypass_test)
12125 && (second_code == UNKNOWN || second_test))
12127 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12128 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12129 tmp);
12130 if (TARGET_CMOVE)
12131 emit_insn (tmp);
12132 else
12134 gcc_assert (TARGET_SAHF);
12136 if (!scratch)
12137 scratch = gen_reg_rtx (HImode);
12138 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12140 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12143 /* The FP codes work out to act like unsigned. */
12144 intcmp_mode = fpcmp_mode;
12145 code = first_code;
12146 if (bypass_code != UNKNOWN)
12147 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12148 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12149 const0_rtx);
12150 if (second_code != UNKNOWN)
12151 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12152 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12153 const0_rtx);
12155 else
12157 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
12158 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12159 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12160 if (!scratch)
12161 scratch = gen_reg_rtx (HImode);
12162 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12164 /* In the unordered case, we have to check C2 for NaN's, which
12165 doesn't happen to work out to anything nice combination-wise.
12166 So do some bit twiddling on the value we've got in AH to come
12167 up with an appropriate set of condition codes. */
12169 intcmp_mode = CCNOmode;
12170 switch (code)
12172 case GT:
12173 case UNGT:
12174 if (code == GT || !TARGET_IEEE_FP)
12176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12177 code = EQ;
12179 else
12181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12182 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12183 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12184 intcmp_mode = CCmode;
12185 code = GEU;
12187 break;
12188 case LT:
12189 case UNLT:
12190 if (code == LT && TARGET_IEEE_FP)
12192 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12194 intcmp_mode = CCmode;
12195 code = EQ;
12197 else
12199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12200 code = NE;
12202 break;
12203 case GE:
12204 case UNGE:
12205 if (code == GE || !TARGET_IEEE_FP)
12207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12208 code = EQ;
12210 else
12212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12213 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12214 GEN_INT (0x01)));
12215 code = NE;
12217 break;
12218 case LE:
12219 case UNLE:
12220 if (code == LE && TARGET_IEEE_FP)
12222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12225 intcmp_mode = CCmode;
12226 code = LTU;
12228 else
12230 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12231 code = NE;
12233 break;
12234 case EQ:
12235 case UNEQ:
12236 if (code == EQ && TARGET_IEEE_FP)
12238 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12239 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12240 intcmp_mode = CCmode;
12241 code = EQ;
12243 else
12245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12246 code = NE;
12247 break;
12249 break;
12250 case NE:
12251 case LTGT:
12252 if (code == NE && TARGET_IEEE_FP)
12254 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12255 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12256 GEN_INT (0x40)));
12257 code = NE;
12259 else
12261 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12262 code = EQ;
12264 break;
12266 case UNORDERED:
12267 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12268 code = NE;
12269 break;
12270 case ORDERED:
12271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12272 code = EQ;
12273 break;
12275 default:
12276 gcc_unreachable ();
12280 /* Return the test that should be put into the flags user, i.e.
12281 the bcc, scc, or cmov instruction. */
12282 return gen_rtx_fmt_ee (code, VOIDmode,
12283 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12284 const0_rtx);
12288 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12290 rtx op0, op1, ret;
12291 op0 = ix86_compare_op0;
12292 op1 = ix86_compare_op1;
12294 if (second_test)
12295 *second_test = NULL_RTX;
12296 if (bypass_test)
12297 *bypass_test = NULL_RTX;
12299 if (ix86_compare_emitted)
12301 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12302 ix86_compare_emitted = NULL_RTX;
12304 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12306 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12307 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12308 second_test, bypass_test);
12310 else
12311 ret = ix86_expand_int_compare (code, op0, op1);
12313 return ret;
12316 /* Return true if the CODE will result in nontrivial jump sequence. */
12317 bool
12318 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12320 enum rtx_code bypass_code, first_code, second_code;
12321 if (!TARGET_CMOVE)
12322 return true;
12323 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12324 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12327 void
12328 ix86_expand_branch (enum rtx_code code, rtx label)
12330 rtx tmp;
12332 /* If we have emitted a compare insn, go straight to simple.
12333 ix86_expand_compare won't emit anything if ix86_compare_emitted
12334 is non NULL. */
12335 if (ix86_compare_emitted)
12336 goto simple;
12338 switch (GET_MODE (ix86_compare_op0))
12340 case QImode:
12341 case HImode:
12342 case SImode:
12343 simple:
12344 tmp = ix86_expand_compare (code, NULL, NULL);
12345 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12346 gen_rtx_LABEL_REF (VOIDmode, label),
12347 pc_rtx);
12348 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12349 return;
12351 case SFmode:
12352 case DFmode:
12353 case XFmode:
12355 rtvec vec;
12356 int use_fcomi;
12357 enum rtx_code bypass_code, first_code, second_code;
12359 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12360 &ix86_compare_op1);
12362 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12364 /* Check whether we will use the natural sequence with one jump. If
12365 so, we can expand jump early. Otherwise delay expansion by
12366 creating compound insn to not confuse optimizers. */
12367 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12369 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12370 gen_rtx_LABEL_REF (VOIDmode, label),
12371 pc_rtx, NULL_RTX, NULL_RTX);
12373 else
12375 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12376 ix86_compare_op0, ix86_compare_op1);
12377 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12378 gen_rtx_LABEL_REF (VOIDmode, label),
12379 pc_rtx);
12380 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12382 use_fcomi = ix86_use_fcomi_compare (code);
12383 vec = rtvec_alloc (3 + !use_fcomi);
12384 RTVEC_ELT (vec, 0) = tmp;
12385 RTVEC_ELT (vec, 1)
12386 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12387 RTVEC_ELT (vec, 2)
12388 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12389 if (! use_fcomi)
12390 RTVEC_ELT (vec, 3)
12391 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12393 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12395 return;
12398 case DImode:
12399 if (TARGET_64BIT)
12400 goto simple;
12401 case TImode:
12402 /* Expand DImode branch into multiple compare+branch. */
12404 rtx lo[2], hi[2], label2;
12405 enum rtx_code code1, code2, code3;
12406 enum machine_mode submode;
12408 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12410 tmp = ix86_compare_op0;
12411 ix86_compare_op0 = ix86_compare_op1;
12412 ix86_compare_op1 = tmp;
12413 code = swap_condition (code);
12415 if (GET_MODE (ix86_compare_op0) == DImode)
12417 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12418 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12419 submode = SImode;
12421 else
12423 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12424 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12425 submode = DImode;
12428 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12429 avoid two branches. This costs one extra insn, so disable when
12430 optimizing for size. */
12432 if ((code == EQ || code == NE)
12433 && (!optimize_size
12434 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12436 rtx xor0, xor1;
12438 xor1 = hi[0];
12439 if (hi[1] != const0_rtx)
12440 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12441 NULL_RTX, 0, OPTAB_WIDEN);
12443 xor0 = lo[0];
12444 if (lo[1] != const0_rtx)
12445 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12446 NULL_RTX, 0, OPTAB_WIDEN);
12448 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12449 NULL_RTX, 0, OPTAB_WIDEN);
12451 ix86_compare_op0 = tmp;
12452 ix86_compare_op1 = const0_rtx;
12453 ix86_expand_branch (code, label);
12454 return;
12457 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12458 op1 is a constant and the low word is zero, then we can just
12459 examine the high word. Similarly for low word -1 and
12460 less-or-equal-than or greater-than. */
12462 if (CONST_INT_P (hi[1]))
12463 switch (code)
12465 case LT: case LTU: case GE: case GEU:
12466 if (lo[1] == const0_rtx)
12468 ix86_compare_op0 = hi[0];
12469 ix86_compare_op1 = hi[1];
12470 ix86_expand_branch (code, label);
12471 return;
12473 break;
12474 case LE: case LEU: case GT: case GTU:
12475 if (lo[1] == constm1_rtx)
12477 ix86_compare_op0 = hi[0];
12478 ix86_compare_op1 = hi[1];
12479 ix86_expand_branch (code, label);
12480 return;
12482 break;
12483 default:
12484 break;
12487 /* Otherwise, we need two or three jumps. */
12489 label2 = gen_label_rtx ();
12491 code1 = code;
12492 code2 = swap_condition (code);
12493 code3 = unsigned_condition (code);
12495 switch (code)
12497 case LT: case GT: case LTU: case GTU:
12498 break;
12500 case LE: code1 = LT; code2 = GT; break;
12501 case GE: code1 = GT; code2 = LT; break;
12502 case LEU: code1 = LTU; code2 = GTU; break;
12503 case GEU: code1 = GTU; code2 = LTU; break;
12505 case EQ: code1 = UNKNOWN; code2 = NE; break;
12506 case NE: code2 = UNKNOWN; break;
12508 default:
12509 gcc_unreachable ();
12513 * a < b =>
12514 * if (hi(a) < hi(b)) goto true;
12515 * if (hi(a) > hi(b)) goto false;
12516 * if (lo(a) < lo(b)) goto true;
12517 * false:
12520 ix86_compare_op0 = hi[0];
12521 ix86_compare_op1 = hi[1];
12523 if (code1 != UNKNOWN)
12524 ix86_expand_branch (code1, label);
12525 if (code2 != UNKNOWN)
12526 ix86_expand_branch (code2, label2);
12528 ix86_compare_op0 = lo[0];
12529 ix86_compare_op1 = lo[1];
12530 ix86_expand_branch (code3, label);
12532 if (code2 != UNKNOWN)
12533 emit_label (label2);
12534 return;
12537 default:
12538 gcc_unreachable ();
12542 /* Split branch based on floating point condition. */
12543 void
12544 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12545 rtx target1, rtx target2, rtx tmp, rtx pushed)
12547 rtx second, bypass;
12548 rtx label = NULL_RTX;
12549 rtx condition;
12550 int bypass_probability = -1, second_probability = -1, probability = -1;
12551 rtx i;
12553 if (target2 != pc_rtx)
12555 rtx tmp = target2;
12556 code = reverse_condition_maybe_unordered (code);
12557 target2 = target1;
12558 target1 = tmp;
12561 condition = ix86_expand_fp_compare (code, op1, op2,
12562 tmp, &second, &bypass);
12564 /* Remove pushed operand from stack. */
12565 if (pushed)
12566 ix86_free_from_memory (GET_MODE (pushed));
12568 if (split_branch_probability >= 0)
12570 /* Distribute the probabilities across the jumps.
12571 Assume the BYPASS and SECOND to be always test
12572 for UNORDERED. */
12573 probability = split_branch_probability;
12575 /* Value of 1 is low enough to make no need for probability
12576 to be updated. Later we may run some experiments and see
12577 if unordered values are more frequent in practice. */
12578 if (bypass)
12579 bypass_probability = 1;
12580 if (second)
12581 second_probability = 1;
12583 if (bypass != NULL_RTX)
12585 label = gen_label_rtx ();
12586 i = emit_jump_insn (gen_rtx_SET
12587 (VOIDmode, pc_rtx,
12588 gen_rtx_IF_THEN_ELSE (VOIDmode,
12589 bypass,
12590 gen_rtx_LABEL_REF (VOIDmode,
12591 label),
12592 pc_rtx)));
12593 if (bypass_probability >= 0)
12594 REG_NOTES (i)
12595 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12596 GEN_INT (bypass_probability),
12597 REG_NOTES (i));
12599 i = emit_jump_insn (gen_rtx_SET
12600 (VOIDmode, pc_rtx,
12601 gen_rtx_IF_THEN_ELSE (VOIDmode,
12602 condition, target1, target2)));
12603 if (probability >= 0)
12604 REG_NOTES (i)
12605 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12606 GEN_INT (probability),
12607 REG_NOTES (i));
12608 if (second != NULL_RTX)
12610 i = emit_jump_insn (gen_rtx_SET
12611 (VOIDmode, pc_rtx,
12612 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12613 target2)));
12614 if (second_probability >= 0)
12615 REG_NOTES (i)
12616 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12617 GEN_INT (second_probability),
12618 REG_NOTES (i));
12620 if (label != NULL_RTX)
12621 emit_label (label);
12625 ix86_expand_setcc (enum rtx_code code, rtx dest)
12627 rtx ret, tmp, tmpreg, equiv;
12628 rtx second_test, bypass_test;
12630 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12631 return 0; /* FAIL */
12633 gcc_assert (GET_MODE (dest) == QImode);
12635 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12636 PUT_MODE (ret, QImode);
12638 tmp = dest;
12639 tmpreg = dest;
12641 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12642 if (bypass_test || second_test)
12644 rtx test = second_test;
12645 int bypass = 0;
12646 rtx tmp2 = gen_reg_rtx (QImode);
12647 if (bypass_test)
12649 gcc_assert (!second_test);
12650 test = bypass_test;
12651 bypass = 1;
12652 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12654 PUT_MODE (test, QImode);
12655 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12657 if (bypass)
12658 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12659 else
12660 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12663 /* Attach a REG_EQUAL note describing the comparison result. */
12664 if (ix86_compare_op0 && ix86_compare_op1)
12666 equiv = simplify_gen_relational (code, QImode,
12667 GET_MODE (ix86_compare_op0),
12668 ix86_compare_op0, ix86_compare_op1);
12669 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12672 return 1; /* DONE */
12675 /* Expand comparison setting or clearing carry flag. Return true when
12676 successful and set pop for the operation. */
12677 static bool
12678 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12680 enum machine_mode mode =
12681 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12683 /* Do not handle DImode compares that go through special path. */
12684 if (mode == (TARGET_64BIT ? TImode : DImode))
12685 return false;
12687 if (SCALAR_FLOAT_MODE_P (mode))
12689 rtx second_test = NULL, bypass_test = NULL;
12690 rtx compare_op, compare_seq;
12692 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12694 /* Shortcut: following common codes never translate
12695 into carry flag compares. */
12696 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12697 || code == ORDERED || code == UNORDERED)
12698 return false;
12700 /* These comparisons require zero flag; swap operands so they won't. */
12701 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12702 && !TARGET_IEEE_FP)
12704 rtx tmp = op0;
12705 op0 = op1;
12706 op1 = tmp;
12707 code = swap_condition (code);
12710 /* Try to expand the comparison and verify that we end up with
12711 carry flag based comparison. This fails to be true only when
12712 we decide to expand comparison using arithmetic that is not
12713 too common scenario. */
12714 start_sequence ();
12715 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12716 &second_test, &bypass_test);
12717 compare_seq = get_insns ();
12718 end_sequence ();
12720 if (second_test || bypass_test)
12721 return false;
12723 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12724 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12725 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12726 else
12727 code = GET_CODE (compare_op);
12729 if (code != LTU && code != GEU)
12730 return false;
12732 emit_insn (compare_seq);
12733 *pop = compare_op;
12734 return true;
12737 if (!INTEGRAL_MODE_P (mode))
12738 return false;
12740 switch (code)
12742 case LTU:
12743 case GEU:
12744 break;
12746 /* Convert a==0 into (unsigned)a<1. */
12747 case EQ:
12748 case NE:
12749 if (op1 != const0_rtx)
12750 return false;
12751 op1 = const1_rtx;
12752 code = (code == EQ ? LTU : GEU);
12753 break;
12755 /* Convert a>b into b<a or a>=b-1. */
12756 case GTU:
12757 case LEU:
12758 if (CONST_INT_P (op1))
12760 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12761 /* Bail out on overflow. We still can swap operands but that
12762 would force loading of the constant into register. */
12763 if (op1 == const0_rtx
12764 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12765 return false;
12766 code = (code == GTU ? GEU : LTU);
12768 else
12770 rtx tmp = op1;
12771 op1 = op0;
12772 op0 = tmp;
12773 code = (code == GTU ? LTU : GEU);
12775 break;
12777 /* Convert a>=0 into (unsigned)a<0x80000000. */
12778 case LT:
12779 case GE:
12780 if (mode == DImode || op1 != const0_rtx)
12781 return false;
12782 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12783 code = (code == LT ? GEU : LTU);
12784 break;
12785 case LE:
12786 case GT:
12787 if (mode == DImode || op1 != constm1_rtx)
12788 return false;
12789 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12790 code = (code == LE ? GEU : LTU);
12791 break;
12793 default:
12794 return false;
12796 /* Swapping operands may cause constant to appear as first operand. */
12797 if (!nonimmediate_operand (op0, VOIDmode))
12799 if (!can_create_pseudo_p ())
12800 return false;
12801 op0 = force_reg (mode, op0);
12803 ix86_compare_op0 = op0;
12804 ix86_compare_op1 = op1;
12805 *pop = ix86_expand_compare (code, NULL, NULL);
12806 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12807 return true;
12811 ix86_expand_int_movcc (rtx operands[])
12813 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12814 rtx compare_seq, compare_op;
12815 rtx second_test, bypass_test;
12816 enum machine_mode mode = GET_MODE (operands[0]);
12817 bool sign_bit_compare_p = false;;
12819 start_sequence ();
12820 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12821 compare_seq = get_insns ();
12822 end_sequence ();
12824 compare_code = GET_CODE (compare_op);
12826 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12827 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12828 sign_bit_compare_p = true;
12830 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12831 HImode insns, we'd be swallowed in word prefix ops. */
12833 if ((mode != HImode || TARGET_FAST_PREFIX)
12834 && (mode != (TARGET_64BIT ? TImode : DImode))
12835 && CONST_INT_P (operands[2])
12836 && CONST_INT_P (operands[3]))
12838 rtx out = operands[0];
12839 HOST_WIDE_INT ct = INTVAL (operands[2]);
12840 HOST_WIDE_INT cf = INTVAL (operands[3]);
12841 HOST_WIDE_INT diff;
12843 diff = ct - cf;
12844 /* Sign bit compares are better done using shifts than we do by using
12845 sbb. */
12846 if (sign_bit_compare_p
12847 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12848 ix86_compare_op1, &compare_op))
12850 /* Detect overlap between destination and compare sources. */
12851 rtx tmp = out;
12853 if (!sign_bit_compare_p)
12855 bool fpcmp = false;
12857 compare_code = GET_CODE (compare_op);
12859 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12860 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12862 fpcmp = true;
12863 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12866 /* To simplify rest of code, restrict to the GEU case. */
12867 if (compare_code == LTU)
12869 HOST_WIDE_INT tmp = ct;
12870 ct = cf;
12871 cf = tmp;
12872 compare_code = reverse_condition (compare_code);
12873 code = reverse_condition (code);
12875 else
12877 if (fpcmp)
12878 PUT_CODE (compare_op,
12879 reverse_condition_maybe_unordered
12880 (GET_CODE (compare_op)));
12881 else
12882 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12884 diff = ct - cf;
12886 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12887 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12888 tmp = gen_reg_rtx (mode);
12890 if (mode == DImode)
12891 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12892 else
12893 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12895 else
12897 if (code == GT || code == GE)
12898 code = reverse_condition (code);
12899 else
12901 HOST_WIDE_INT tmp = ct;
12902 ct = cf;
12903 cf = tmp;
12904 diff = ct - cf;
12906 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12907 ix86_compare_op1, VOIDmode, 0, -1);
12910 if (diff == 1)
12913 * cmpl op0,op1
12914 * sbbl dest,dest
12915 * [addl dest, ct]
12917 * Size 5 - 8.
12919 if (ct)
12920 tmp = expand_simple_binop (mode, PLUS,
12921 tmp, GEN_INT (ct),
12922 copy_rtx (tmp), 1, OPTAB_DIRECT);
12924 else if (cf == -1)
12927 * cmpl op0,op1
12928 * sbbl dest,dest
12929 * orl $ct, dest
12931 * Size 8.
12933 tmp = expand_simple_binop (mode, IOR,
12934 tmp, GEN_INT (ct),
12935 copy_rtx (tmp), 1, OPTAB_DIRECT);
12937 else if (diff == -1 && ct)
12940 * cmpl op0,op1
12941 * sbbl dest,dest
12942 * notl dest
12943 * [addl dest, cf]
12945 * Size 8 - 11.
12947 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12948 if (cf)
12949 tmp = expand_simple_binop (mode, PLUS,
12950 copy_rtx (tmp), GEN_INT (cf),
12951 copy_rtx (tmp), 1, OPTAB_DIRECT);
12953 else
12956 * cmpl op0,op1
12957 * sbbl dest,dest
12958 * [notl dest]
12959 * andl cf - ct, dest
12960 * [addl dest, ct]
12962 * Size 8 - 11.
12965 if (cf == 0)
12967 cf = ct;
12968 ct = 0;
12969 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12972 tmp = expand_simple_binop (mode, AND,
12973 copy_rtx (tmp),
12974 gen_int_mode (cf - ct, mode),
12975 copy_rtx (tmp), 1, OPTAB_DIRECT);
12976 if (ct)
12977 tmp = expand_simple_binop (mode, PLUS,
12978 copy_rtx (tmp), GEN_INT (ct),
12979 copy_rtx (tmp), 1, OPTAB_DIRECT);
12982 if (!rtx_equal_p (tmp, out))
12983 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12985 return 1; /* DONE */
12988 if (diff < 0)
12990 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12992 HOST_WIDE_INT tmp;
12993 tmp = ct, ct = cf, cf = tmp;
12994 diff = -diff;
12996 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12998 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13000 /* We may be reversing unordered compare to normal compare, that
13001 is not valid in general (we may convert non-trapping condition
13002 to trapping one), however on i386 we currently emit all
13003 comparisons unordered. */
13004 compare_code = reverse_condition_maybe_unordered (compare_code);
13005 code = reverse_condition_maybe_unordered (code);
13007 else
13009 compare_code = reverse_condition (compare_code);
13010 code = reverse_condition (code);
13014 compare_code = UNKNOWN;
13015 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
13016 && CONST_INT_P (ix86_compare_op1))
13018 if (ix86_compare_op1 == const0_rtx
13019 && (code == LT || code == GE))
13020 compare_code = code;
13021 else if (ix86_compare_op1 == constm1_rtx)
13023 if (code == LE)
13024 compare_code = LT;
13025 else if (code == GT)
13026 compare_code = GE;
13030 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13031 if (compare_code != UNKNOWN
13032 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13033 && (cf == -1 || ct == -1))
13035 /* If lea code below could be used, only optimize
13036 if it results in a 2 insn sequence. */
13038 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13039 || diff == 3 || diff == 5 || diff == 9)
13040 || (compare_code == LT && ct == -1)
13041 || (compare_code == GE && cf == -1))
13044 * notl op1 (if necessary)
13045 * sarl $31, op1
13046 * orl cf, op1
13048 if (ct != -1)
13050 cf = ct;
13051 ct = -1;
13052 code = reverse_condition (code);
13055 out = emit_store_flag (out, code, ix86_compare_op0,
13056 ix86_compare_op1, VOIDmode, 0, -1);
13058 out = expand_simple_binop (mode, IOR,
13059 out, GEN_INT (cf),
13060 out, 1, OPTAB_DIRECT);
13061 if (out != operands[0])
13062 emit_move_insn (operands[0], out);
13064 return 1; /* DONE */
13069 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13070 || diff == 3 || diff == 5 || diff == 9)
13071 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13072 && (mode != DImode
13073 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13076 * xorl dest,dest
13077 * cmpl op1,op2
13078 * setcc dest
13079 * lea cf(dest*(ct-cf)),dest
13081 * Size 14.
13083 * This also catches the degenerate setcc-only case.
13086 rtx tmp;
13087 int nops;
13089 out = emit_store_flag (out, code, ix86_compare_op0,
13090 ix86_compare_op1, VOIDmode, 0, 1);
13092 nops = 0;
13093 /* On x86_64 the lea instruction operates on Pmode, so we need
13094 to get arithmetics done in proper mode to match. */
13095 if (diff == 1)
13096 tmp = copy_rtx (out);
13097 else
13099 rtx out1;
13100 out1 = copy_rtx (out);
13101 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13102 nops++;
13103 if (diff & 1)
13105 tmp = gen_rtx_PLUS (mode, tmp, out1);
13106 nops++;
13109 if (cf != 0)
13111 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13112 nops++;
13114 if (!rtx_equal_p (tmp, out))
13116 if (nops == 1)
13117 out = force_operand (tmp, copy_rtx (out));
13118 else
13119 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13121 if (!rtx_equal_p (out, operands[0]))
13122 emit_move_insn (operands[0], copy_rtx (out));
13124 return 1; /* DONE */
13128 * General case: Jumpful:
13129 * xorl dest,dest cmpl op1, op2
13130 * cmpl op1, op2 movl ct, dest
13131 * setcc dest jcc 1f
13132 * decl dest movl cf, dest
13133 * andl (cf-ct),dest 1:
13134 * addl ct,dest
13136 * Size 20. Size 14.
13138 * This is reasonably steep, but branch mispredict costs are
13139 * high on modern cpus, so consider failing only if optimizing
13140 * for space.
13143 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13144 && BRANCH_COST >= 2)
13146 if (cf == 0)
13148 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13150 cf = ct;
13151 ct = 0;
13153 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13155 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13157 /* We may be reversing unordered compare to normal compare,
13158 that is not valid in general (we may convert non-trapping
13159 condition to trapping one), however on i386 we currently
13160 emit all comparisons unordered. */
13161 code = reverse_condition_maybe_unordered (code);
13163 else
13165 code = reverse_condition (code);
13166 if (compare_code != UNKNOWN)
13167 compare_code = reverse_condition (compare_code);
13171 if (compare_code != UNKNOWN)
13173 /* notl op1 (if needed)
13174 sarl $31, op1
13175 andl (cf-ct), op1
13176 addl ct, op1
13178 For x < 0 (resp. x <= -1) there will be no notl,
13179 so if possible swap the constants to get rid of the
13180 complement.
13181 True/false will be -1/0 while code below (store flag
13182 followed by decrement) is 0/-1, so the constants need
13183 to be exchanged once more. */
13185 if (compare_code == GE || !cf)
13187 code = reverse_condition (code);
13188 compare_code = LT;
13190 else
13192 HOST_WIDE_INT tmp = cf;
13193 cf = ct;
13194 ct = tmp;
13197 out = emit_store_flag (out, code, ix86_compare_op0,
13198 ix86_compare_op1, VOIDmode, 0, -1);
13200 else
13202 out = emit_store_flag (out, code, ix86_compare_op0,
13203 ix86_compare_op1, VOIDmode, 0, 1);
13205 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13206 copy_rtx (out), 1, OPTAB_DIRECT);
13209 out = expand_simple_binop (mode, AND, copy_rtx (out),
13210 gen_int_mode (cf - ct, mode),
13211 copy_rtx (out), 1, OPTAB_DIRECT);
13212 if (ct)
13213 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13214 copy_rtx (out), 1, OPTAB_DIRECT);
13215 if (!rtx_equal_p (out, operands[0]))
13216 emit_move_insn (operands[0], copy_rtx (out));
13218 return 1; /* DONE */
13222 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13224 /* Try a few things more with specific constants and a variable. */
13226 optab op;
13227 rtx var, orig_out, out, tmp;
13229 if (BRANCH_COST <= 2)
13230 return 0; /* FAIL */
13232 /* If one of the two operands is an interesting constant, load a
13233 constant with the above and mask it in with a logical operation. */
13235 if (CONST_INT_P (operands[2]))
13237 var = operands[3];
13238 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13239 operands[3] = constm1_rtx, op = and_optab;
13240 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13241 operands[3] = const0_rtx, op = ior_optab;
13242 else
13243 return 0; /* FAIL */
13245 else if (CONST_INT_P (operands[3]))
13247 var = operands[2];
13248 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13249 operands[2] = constm1_rtx, op = and_optab;
13250 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13251 operands[2] = const0_rtx, op = ior_optab;
13252 else
13253 return 0; /* FAIL */
13255 else
13256 return 0; /* FAIL */
13258 orig_out = operands[0];
13259 tmp = gen_reg_rtx (mode);
13260 operands[0] = tmp;
13262 /* Recurse to get the constant loaded. */
13263 if (ix86_expand_int_movcc (operands) == 0)
13264 return 0; /* FAIL */
13266 /* Mask in the interesting variable. */
13267 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13268 OPTAB_WIDEN);
13269 if (!rtx_equal_p (out, orig_out))
13270 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13272 return 1; /* DONE */
13276 * For comparison with above,
13278 * movl cf,dest
13279 * movl ct,tmp
13280 * cmpl op1,op2
13281 * cmovcc tmp,dest
13283 * Size 15.
13286 if (! nonimmediate_operand (operands[2], mode))
13287 operands[2] = force_reg (mode, operands[2]);
13288 if (! nonimmediate_operand (operands[3], mode))
13289 operands[3] = force_reg (mode, operands[3]);
13291 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13293 rtx tmp = gen_reg_rtx (mode);
13294 emit_move_insn (tmp, operands[3]);
13295 operands[3] = tmp;
13297 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13299 rtx tmp = gen_reg_rtx (mode);
13300 emit_move_insn (tmp, operands[2]);
13301 operands[2] = tmp;
13304 if (! register_operand (operands[2], VOIDmode)
13305 && (mode == QImode
13306 || ! register_operand (operands[3], VOIDmode)))
13307 operands[2] = force_reg (mode, operands[2]);
13309 if (mode == QImode
13310 && ! register_operand (operands[3], VOIDmode))
13311 operands[3] = force_reg (mode, operands[3]);
13313 emit_insn (compare_seq);
13314 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13315 gen_rtx_IF_THEN_ELSE (mode,
13316 compare_op, operands[2],
13317 operands[3])));
13318 if (bypass_test)
13319 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13320 gen_rtx_IF_THEN_ELSE (mode,
13321 bypass_test,
13322 copy_rtx (operands[3]),
13323 copy_rtx (operands[0]))));
13324 if (second_test)
13325 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13326 gen_rtx_IF_THEN_ELSE (mode,
13327 second_test,
13328 copy_rtx (operands[2]),
13329 copy_rtx (operands[0]))));
13331 return 1; /* DONE */
13334 /* Swap, force into registers, or otherwise massage the two operands
13335 to an sse comparison with a mask result. Thus we differ a bit from
13336 ix86_prepare_fp_compare_args which expects to produce a flags result.
13338 The DEST operand exists to help determine whether to commute commutative
13339 operators. The POP0/POP1 operands are updated in place. The new
13340 comparison code is returned, or UNKNOWN if not implementable. */
13342 static enum rtx_code
13343 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13344 rtx *pop0, rtx *pop1)
13346 rtx tmp;
13348 switch (code)
13350 case LTGT:
13351 case UNEQ:
13352 /* We have no LTGT as an operator. We could implement it with
13353 NE & ORDERED, but this requires an extra temporary. It's
13354 not clear that it's worth it. */
13355 return UNKNOWN;
13357 case LT:
13358 case LE:
13359 case UNGT:
13360 case UNGE:
13361 /* These are supported directly. */
13362 break;
13364 case EQ:
13365 case NE:
13366 case UNORDERED:
13367 case ORDERED:
13368 /* For commutative operators, try to canonicalize the destination
13369 operand to be first in the comparison - this helps reload to
13370 avoid extra moves. */
13371 if (!dest || !rtx_equal_p (dest, *pop1))
13372 break;
13373 /* FALLTHRU */
13375 case GE:
13376 case GT:
13377 case UNLE:
13378 case UNLT:
13379 /* These are not supported directly. Swap the comparison operands
13380 to transform into something that is supported. */
13381 tmp = *pop0;
13382 *pop0 = *pop1;
13383 *pop1 = tmp;
13384 code = swap_condition (code);
13385 break;
13387 default:
13388 gcc_unreachable ();
13391 return code;
13394 /* Detect conditional moves that exactly match min/max operational
13395 semantics. Note that this is IEEE safe, as long as we don't
13396 interchange the operands.
13398 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13399 and TRUE if the operation is successful and instructions are emitted. */
13401 static bool
13402 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13403 rtx cmp_op1, rtx if_true, rtx if_false)
13405 enum machine_mode mode;
13406 bool is_min;
13407 rtx tmp;
13409 if (code == LT)
13411 else if (code == UNGE)
13413 tmp = if_true;
13414 if_true = if_false;
13415 if_false = tmp;
13417 else
13418 return false;
13420 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13421 is_min = true;
13422 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13423 is_min = false;
13424 else
13425 return false;
13427 mode = GET_MODE (dest);
13429 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13430 but MODE may be a vector mode and thus not appropriate. */
13431 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13433 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13434 rtvec v;
13436 if_true = force_reg (mode, if_true);
13437 v = gen_rtvec (2, if_true, if_false);
13438 tmp = gen_rtx_UNSPEC (mode, v, u);
13440 else
13442 code = is_min ? SMIN : SMAX;
13443 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13446 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13447 return true;
13450 /* Expand an sse vector comparison. Return the register with the result. */
13452 static rtx
13453 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13454 rtx op_true, rtx op_false)
13456 enum machine_mode mode = GET_MODE (dest);
13457 rtx x;
13459 cmp_op0 = force_reg (mode, cmp_op0);
13460 if (!nonimmediate_operand (cmp_op1, mode))
13461 cmp_op1 = force_reg (mode, cmp_op1);
13463 if (optimize
13464 || reg_overlap_mentioned_p (dest, op_true)
13465 || reg_overlap_mentioned_p (dest, op_false))
13466 dest = gen_reg_rtx (mode);
13468 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13469 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13471 return dest;
13474 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13475 operations. This is used for both scalar and vector conditional moves. */
13477 static void
13478 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13480 enum machine_mode mode = GET_MODE (dest);
13481 rtx t2, t3, x;
13483 if (op_false == CONST0_RTX (mode))
13485 op_true = force_reg (mode, op_true);
13486 x = gen_rtx_AND (mode, cmp, op_true);
13487 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13489 else if (op_true == CONST0_RTX (mode))
13491 op_false = force_reg (mode, op_false);
13492 x = gen_rtx_NOT (mode, cmp);
13493 x = gen_rtx_AND (mode, x, op_false);
13494 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13496 else if (TARGET_SSE5)
13498 rtx pcmov = gen_rtx_SET (mode, dest,
13499 gen_rtx_IF_THEN_ELSE (mode, cmp,
13500 op_true,
13501 op_false));
13502 emit_insn (pcmov);
13504 else
13506 op_true = force_reg (mode, op_true);
13507 op_false = force_reg (mode, op_false);
13509 t2 = gen_reg_rtx (mode);
13510 if (optimize)
13511 t3 = gen_reg_rtx (mode);
13512 else
13513 t3 = dest;
13515 x = gen_rtx_AND (mode, op_true, cmp);
13516 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13518 x = gen_rtx_NOT (mode, cmp);
13519 x = gen_rtx_AND (mode, x, op_false);
13520 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13522 x = gen_rtx_IOR (mode, t3, t2);
13523 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13527 /* Expand a floating-point conditional move. Return true if successful. */
13530 ix86_expand_fp_movcc (rtx operands[])
13532 enum machine_mode mode = GET_MODE (operands[0]);
13533 enum rtx_code code = GET_CODE (operands[1]);
13534 rtx tmp, compare_op, second_test, bypass_test;
13536 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13538 enum machine_mode cmode;
13540 /* Since we've no cmove for sse registers, don't force bad register
13541 allocation just to gain access to it. Deny movcc when the
13542 comparison mode doesn't match the move mode. */
13543 cmode = GET_MODE (ix86_compare_op0);
13544 if (cmode == VOIDmode)
13545 cmode = GET_MODE (ix86_compare_op1);
13546 if (cmode != mode)
13547 return 0;
13549 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13550 &ix86_compare_op0,
13551 &ix86_compare_op1);
13552 if (code == UNKNOWN)
13553 return 0;
13555 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13556 ix86_compare_op1, operands[2],
13557 operands[3]))
13558 return 1;
13560 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13561 ix86_compare_op1, operands[2], operands[3]);
13562 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13563 return 1;
13566 /* The floating point conditional move instructions don't directly
13567 support conditions resulting from a signed integer comparison. */
13569 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13571 /* The floating point conditional move instructions don't directly
13572 support signed integer comparisons. */
13574 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13576 gcc_assert (!second_test && !bypass_test);
13577 tmp = gen_reg_rtx (QImode);
13578 ix86_expand_setcc (code, tmp);
13579 code = NE;
13580 ix86_compare_op0 = tmp;
13581 ix86_compare_op1 = const0_rtx;
13582 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13584 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13586 tmp = gen_reg_rtx (mode);
13587 emit_move_insn (tmp, operands[3]);
13588 operands[3] = tmp;
13590 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13592 tmp = gen_reg_rtx (mode);
13593 emit_move_insn (tmp, operands[2]);
13594 operands[2] = tmp;
13597 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13598 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13599 operands[2], operands[3])));
13600 if (bypass_test)
13601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13602 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13603 operands[3], operands[0])));
13604 if (second_test)
13605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13606 gen_rtx_IF_THEN_ELSE (mode, second_test,
13607 operands[2], operands[0])));
13609 return 1;
13612 /* Expand a floating-point vector conditional move; a vcond operation
13613 rather than a movcc operation. */
13615 bool
13616 ix86_expand_fp_vcond (rtx operands[])
13618 enum rtx_code code = GET_CODE (operands[3]);
13619 rtx cmp;
13621 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13622 &operands[4], &operands[5]);
13623 if (code == UNKNOWN)
13624 return false;
13626 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13627 operands[5], operands[1], operands[2]))
13628 return true;
13630 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13631 operands[1], operands[2]);
13632 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13633 return true;
13636 /* Expand a signed/unsigned integral vector conditional move. */
13638 bool
13639 ix86_expand_int_vcond (rtx operands[])
13641 enum machine_mode mode = GET_MODE (operands[0]);
13642 enum rtx_code code = GET_CODE (operands[3]);
13643 bool negate = false;
13644 rtx x, cop0, cop1;
13646 cop0 = operands[4];
13647 cop1 = operands[5];
13649 /* SSE5 supports all of the comparisons on all vector int types. */
13650 if (!TARGET_SSE5)
13652 /* Canonicalize the comparison to EQ, GT, GTU. */
13653 switch (code)
13655 case EQ:
13656 case GT:
13657 case GTU:
13658 break;
13660 case NE:
13661 case LE:
13662 case LEU:
13663 code = reverse_condition (code);
13664 negate = true;
13665 break;
13667 case GE:
13668 case GEU:
13669 code = reverse_condition (code);
13670 negate = true;
13671 /* FALLTHRU */
13673 case LT:
13674 case LTU:
13675 code = swap_condition (code);
13676 x = cop0, cop0 = cop1, cop1 = x;
13677 break;
13679 default:
13680 gcc_unreachable ();
13683 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13684 if (mode == V2DImode)
13686 switch (code)
13688 case EQ:
13689 /* SSE4.1 supports EQ. */
13690 if (!TARGET_SSE4_1)
13691 return false;
13692 break;
13694 case GT:
13695 case GTU:
13696 /* SSE4.2 supports GT/GTU. */
13697 if (!TARGET_SSE4_2)
13698 return false;
13699 break;
13701 default:
13702 gcc_unreachable ();
13706 /* Unsigned parallel compare is not supported by the hardware. Play some
13707 tricks to turn this into a signed comparison against 0. */
13708 if (code == GTU)
13710 cop0 = force_reg (mode, cop0);
13712 switch (mode)
13714 case V4SImode:
13715 case V2DImode:
13717 rtx t1, t2, mask;
13719 /* Perform a parallel modulo subtraction. */
13720 t1 = gen_reg_rtx (mode);
13721 emit_insn ((mode == V4SImode
13722 ? gen_subv4si3
13723 : gen_subv2di3) (t1, cop0, cop1));
13725 /* Extract the original sign bit of op0. */
13726 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13727 true, false);
13728 t2 = gen_reg_rtx (mode);
13729 emit_insn ((mode == V4SImode
13730 ? gen_andv4si3
13731 : gen_andv2di3) (t2, cop0, mask));
13733 /* XOR it back into the result of the subtraction. This results
13734 in the sign bit set iff we saw unsigned underflow. */
13735 x = gen_reg_rtx (mode);
13736 emit_insn ((mode == V4SImode
13737 ? gen_xorv4si3
13738 : gen_xorv2di3) (x, t1, t2));
13740 code = GT;
13742 break;
13744 case V16QImode:
13745 case V8HImode:
13746 /* Perform a parallel unsigned saturating subtraction. */
13747 x = gen_reg_rtx (mode);
13748 emit_insn (gen_rtx_SET (VOIDmode, x,
13749 gen_rtx_US_MINUS (mode, cop0, cop1)));
13751 code = EQ;
13752 negate = !negate;
13753 break;
13755 default:
13756 gcc_unreachable ();
13759 cop0 = x;
13760 cop1 = CONST0_RTX (mode);
13764 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13765 operands[1+negate], operands[2-negate]);
13767 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13768 operands[2-negate]);
13769 return true;
13772 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13773 true if we should do zero extension, else sign extension. HIGH_P is
13774 true if we want the N/2 high elements, else the low elements. */
13776 void
13777 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13779 enum machine_mode imode = GET_MODE (operands[1]);
13780 rtx (*unpack)(rtx, rtx, rtx);
13781 rtx se, dest;
13783 switch (imode)
13785 case V16QImode:
13786 if (high_p)
13787 unpack = gen_vec_interleave_highv16qi;
13788 else
13789 unpack = gen_vec_interleave_lowv16qi;
13790 break;
13791 case V8HImode:
13792 if (high_p)
13793 unpack = gen_vec_interleave_highv8hi;
13794 else
13795 unpack = gen_vec_interleave_lowv8hi;
13796 break;
13797 case V4SImode:
13798 if (high_p)
13799 unpack = gen_vec_interleave_highv4si;
13800 else
13801 unpack = gen_vec_interleave_lowv4si;
13802 break;
13803 default:
13804 gcc_unreachable ();
13807 dest = gen_lowpart (imode, operands[0]);
13809 if (unsigned_p)
13810 se = force_reg (imode, CONST0_RTX (imode));
13811 else
13812 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13813 operands[1], pc_rtx, pc_rtx);
13815 emit_insn (unpack (dest, operands[1], se));
13818 /* This function performs the same task as ix86_expand_sse_unpack,
13819 but with SSE4.1 instructions. */
13821 void
13822 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13824 enum machine_mode imode = GET_MODE (operands[1]);
13825 rtx (*unpack)(rtx, rtx);
13826 rtx src, dest;
13828 switch (imode)
13830 case V16QImode:
13831 if (unsigned_p)
13832 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13833 else
13834 unpack = gen_sse4_1_extendv8qiv8hi2;
13835 break;
13836 case V8HImode:
13837 if (unsigned_p)
13838 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13839 else
13840 unpack = gen_sse4_1_extendv4hiv4si2;
13841 break;
13842 case V4SImode:
13843 if (unsigned_p)
13844 unpack = gen_sse4_1_zero_extendv2siv2di2;
13845 else
13846 unpack = gen_sse4_1_extendv2siv2di2;
13847 break;
13848 default:
13849 gcc_unreachable ();
13852 dest = operands[0];
13853 if (high_p)
13855 /* Shift higher 8 bytes to lower 8 bytes. */
13856 src = gen_reg_rtx (imode);
13857 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13858 gen_lowpart (TImode, operands[1]),
13859 GEN_INT (64)));
13861 else
13862 src = operands[1];
13864 emit_insn (unpack (dest, src));
13867 /* This function performs the same task as ix86_expand_sse_unpack,
13868 but with sse5 instructions. */
13870 void
13871 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13873 enum machine_mode imode = GET_MODE (operands[1]);
13874 int pperm_bytes[16];
13875 int i;
13876 int h = (high_p) ? 8 : 0;
13877 int h2;
13878 int sign_extend;
13879 rtvec v = rtvec_alloc (16);
13880 rtvec vs;
13881 rtx x, p;
13882 rtx op0 = operands[0], op1 = operands[1];
13884 switch (imode)
13886 case V16QImode:
13887 vs = rtvec_alloc (8);
13888 h2 = (high_p) ? 8 : 0;
13889 for (i = 0; i < 8; i++)
13891 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13892 pperm_bytes[2*i+1] = ((unsigned_p)
13893 ? PPERM_ZERO
13894 : PPERM_SIGN | PPERM_SRC2 | i | h);
13897 for (i = 0; i < 16; i++)
13898 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13900 for (i = 0; i < 8; i++)
13901 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13903 p = gen_rtx_PARALLEL (VOIDmode, vs);
13904 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13905 if (unsigned_p)
13906 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13907 else
13908 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13909 break;
13911 case V8HImode:
13912 vs = rtvec_alloc (4);
13913 h2 = (high_p) ? 4 : 0;
13914 for (i = 0; i < 4; i++)
13916 sign_extend = ((unsigned_p)
13917 ? PPERM_ZERO
13918 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13919 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13920 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13921 pperm_bytes[4*i+2] = sign_extend;
13922 pperm_bytes[4*i+3] = sign_extend;
13925 for (i = 0; i < 16; i++)
13926 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13928 for (i = 0; i < 4; i++)
13929 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13931 p = gen_rtx_PARALLEL (VOIDmode, vs);
13932 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13933 if (unsigned_p)
13934 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13935 else
13936 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13937 break;
13939 case V4SImode:
13940 vs = rtvec_alloc (2);
13941 h2 = (high_p) ? 2 : 0;
13942 for (i = 0; i < 2; i++)
13944 sign_extend = ((unsigned_p)
13945 ? PPERM_ZERO
13946 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13947 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13948 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13949 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13950 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13951 pperm_bytes[8*i+4] = sign_extend;
13952 pperm_bytes[8*i+5] = sign_extend;
13953 pperm_bytes[8*i+6] = sign_extend;
13954 pperm_bytes[8*i+7] = sign_extend;
13957 for (i = 0; i < 16; i++)
13958 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13960 for (i = 0; i < 2; i++)
13961 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13963 p = gen_rtx_PARALLEL (VOIDmode, vs);
13964 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13965 if (unsigned_p)
13966 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13967 else
13968 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13969 break;
13971 default:
13972 gcc_unreachable ();
13975 return;
13978 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13979 next narrower integer vector type */
13980 void
13981 ix86_expand_sse5_pack (rtx operands[3])
13983 enum machine_mode imode = GET_MODE (operands[0]);
13984 int pperm_bytes[16];
13985 int i;
13986 rtvec v = rtvec_alloc (16);
13987 rtx x;
13988 rtx op0 = operands[0];
13989 rtx op1 = operands[1];
13990 rtx op2 = operands[2];
13992 switch (imode)
13994 case V16QImode:
13995 for (i = 0; i < 8; i++)
13997 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13998 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
14001 for (i = 0; i < 16; i++)
14002 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14004 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14005 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
14006 break;
14008 case V8HImode:
14009 for (i = 0; i < 4; i++)
14011 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
14012 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
14013 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
14014 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
14017 for (i = 0; i < 16; i++)
14018 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14020 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14021 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
14022 break;
14024 case V4SImode:
14025 for (i = 0; i < 2; i++)
14027 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14028 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14029 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14030 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14031 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14032 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14033 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14034 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14037 for (i = 0; i < 16; i++)
14038 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14040 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14041 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14042 break;
14044 default:
14045 gcc_unreachable ();
14048 return;
14051 /* Expand conditional increment or decrement using adb/sbb instructions.
14052 The default case using setcc followed by the conditional move can be
14053 done by generic code. */
14055 ix86_expand_int_addcc (rtx operands[])
14057 enum rtx_code code = GET_CODE (operands[1]);
14058 rtx compare_op;
14059 rtx val = const0_rtx;
14060 bool fpcmp = false;
14061 enum machine_mode mode = GET_MODE (operands[0]);
14063 if (operands[3] != const1_rtx
14064 && operands[3] != constm1_rtx)
14065 return 0;
14066 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14067 ix86_compare_op1, &compare_op))
14068 return 0;
14069 code = GET_CODE (compare_op);
14071 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14072 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14074 fpcmp = true;
14075 code = ix86_fp_compare_code_to_integer (code);
14078 if (code != LTU)
14080 val = constm1_rtx;
14081 if (fpcmp)
14082 PUT_CODE (compare_op,
14083 reverse_condition_maybe_unordered
14084 (GET_CODE (compare_op)));
14085 else
14086 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14088 PUT_MODE (compare_op, mode);
14090 /* Construct either adc or sbb insn. */
14091 if ((code == LTU) == (operands[3] == constm1_rtx))
14093 switch (GET_MODE (operands[0]))
14095 case QImode:
14096 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14097 break;
14098 case HImode:
14099 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14100 break;
14101 case SImode:
14102 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14103 break;
14104 case DImode:
14105 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14106 break;
14107 default:
14108 gcc_unreachable ();
14111 else
14113 switch (GET_MODE (operands[0]))
14115 case QImode:
14116 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14117 break;
14118 case HImode:
14119 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14120 break;
14121 case SImode:
14122 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14123 break;
14124 case DImode:
14125 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14126 break;
14127 default:
14128 gcc_unreachable ();
14131 return 1; /* DONE */
14135 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14136 works for floating pointer parameters and nonoffsetable memories.
14137 For pushes, it returns just stack offsets; the values will be saved
14138 in the right order. Maximally three parts are generated. */
14140 static int
14141 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14143 int size;
14145 if (!TARGET_64BIT)
14146 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14147 else
14148 size = (GET_MODE_SIZE (mode) + 4) / 8;
14150 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14151 gcc_assert (size >= 2 && size <= 4);
14153 /* Optimize constant pool reference to immediates. This is used by fp
14154 moves, that force all constants to memory to allow combining. */
14155 if (MEM_P (operand) && MEM_READONLY_P (operand))
14157 rtx tmp = maybe_get_pool_constant (operand);
14158 if (tmp)
14159 operand = tmp;
14162 if (MEM_P (operand) && !offsettable_memref_p (operand))
14164 /* The only non-offsetable memories we handle are pushes. */
14165 int ok = push_operand (operand, VOIDmode);
14167 gcc_assert (ok);
14169 operand = copy_rtx (operand);
14170 PUT_MODE (operand, Pmode);
14171 parts[0] = parts[1] = parts[2] = parts[3] = operand;
14172 return size;
14175 if (GET_CODE (operand) == CONST_VECTOR)
14177 enum machine_mode imode = int_mode_for_mode (mode);
14178 /* Caution: if we looked through a constant pool memory above,
14179 the operand may actually have a different mode now. That's
14180 ok, since we want to pun this all the way back to an integer. */
14181 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14182 gcc_assert (operand != NULL);
14183 mode = imode;
14186 if (!TARGET_64BIT)
14188 if (mode == DImode)
14189 split_di (&operand, 1, &parts[0], &parts[1]);
14190 else
14192 int i;
14194 if (REG_P (operand))
14196 gcc_assert (reload_completed);
14197 for (i = 0; i < size; i++)
14198 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
14200 else if (offsettable_memref_p (operand))
14202 operand = adjust_address (operand, SImode, 0);
14203 parts[0] = operand;
14204 for (i = 1; i < size; i++)
14205 parts[i] = adjust_address (operand, SImode, 4 * i);
14207 else if (GET_CODE (operand) == CONST_DOUBLE)
14209 REAL_VALUE_TYPE r;
14210 long l[4];
14212 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14213 switch (mode)
14215 case TFmode:
14216 real_to_target (l, &r, mode);
14217 parts[3] = gen_int_mode (l[3], SImode);
14218 parts[2] = gen_int_mode (l[2], SImode);
14219 break;
14220 case XFmode:
14221 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14222 parts[2] = gen_int_mode (l[2], SImode);
14223 break;
14224 case DFmode:
14225 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14226 break;
14227 default:
14228 gcc_unreachable ();
14230 parts[1] = gen_int_mode (l[1], SImode);
14231 parts[0] = gen_int_mode (l[0], SImode);
14233 else
14234 gcc_unreachable ();
14237 else
14239 if (mode == TImode)
14240 split_ti (&operand, 1, &parts[0], &parts[1]);
14241 if (mode == XFmode || mode == TFmode)
14243 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14244 if (REG_P (operand))
14246 gcc_assert (reload_completed);
14247 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14248 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14250 else if (offsettable_memref_p (operand))
14252 operand = adjust_address (operand, DImode, 0);
14253 parts[0] = operand;
14254 parts[1] = adjust_address (operand, upper_mode, 8);
14256 else if (GET_CODE (operand) == CONST_DOUBLE)
14258 REAL_VALUE_TYPE r;
14259 long l[4];
14261 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14262 real_to_target (l, &r, mode);
14264 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14265 if (HOST_BITS_PER_WIDE_INT >= 64)
14266 parts[0]
14267 = gen_int_mode
14268 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14269 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14270 DImode);
14271 else
14272 parts[0] = immed_double_const (l[0], l[1], DImode);
14274 if (upper_mode == SImode)
14275 parts[1] = gen_int_mode (l[2], SImode);
14276 else if (HOST_BITS_PER_WIDE_INT >= 64)
14277 parts[1]
14278 = gen_int_mode
14279 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14280 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14281 DImode);
14282 else
14283 parts[1] = immed_double_const (l[2], l[3], DImode);
14285 else
14286 gcc_unreachable ();
14290 return size;
14293 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
14294 Return false when normal moves are needed; true when all required
14295 insns have been emitted. Operands 2-4 contain the input values
14296 int the correct order; operands 5-7 contain the output values. */
14298 void
14299 ix86_split_long_move (rtx operands[])
14301 rtx part[2][4];
14302 int nparts, i, j;
14303 int push = 0;
14304 int collisions = 0;
14305 enum machine_mode mode = GET_MODE (operands[0]);
14306 bool collisionparts[4];
14308 /* The DFmode expanders may ask us to move double.
14309 For 64bit target this is single move. By hiding the fact
14310 here we simplify i386.md splitters. */
14311 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14313 /* Optimize constant pool reference to immediates. This is used by
14314 fp moves, that force all constants to memory to allow combining. */
14316 if (MEM_P (operands[1])
14317 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14318 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14319 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14320 if (push_operand (operands[0], VOIDmode))
14322 operands[0] = copy_rtx (operands[0]);
14323 PUT_MODE (operands[0], Pmode);
14325 else
14326 operands[0] = gen_lowpart (DImode, operands[0]);
14327 operands[1] = gen_lowpart (DImode, operands[1]);
14328 emit_move_insn (operands[0], operands[1]);
14329 return;
14332 /* The only non-offsettable memory we handle is push. */
14333 if (push_operand (operands[0], VOIDmode))
14334 push = 1;
14335 else
14336 gcc_assert (!MEM_P (operands[0])
14337 || offsettable_memref_p (operands[0]));
14339 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14340 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14342 /* When emitting push, take care for source operands on the stack. */
14343 if (push && MEM_P (operands[1])
14344 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14345 for (i = 0; i < nparts - 1; i++)
14346 part[1][i] = change_address (part[1][i],
14347 GET_MODE (part[1][i]),
14348 XEXP (part[1][i + 1], 0));
14350 /* We need to do copy in the right order in case an address register
14351 of the source overlaps the destination. */
14352 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14354 rtx tmp;
14356 for (i = 0; i < nparts; i++)
14358 collisionparts[i]
14359 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
14360 if (collisionparts[i])
14361 collisions++;
14364 /* Collision in the middle part can be handled by reordering. */
14365 if (collisions == 1 && nparts == 3 && collisionparts [1])
14367 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14368 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14370 else if (collisions == 1
14371 && nparts == 4
14372 && (collisionparts [1] || collisionparts [2]))
14374 if (collisionparts [1])
14376 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14377 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14379 else
14381 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
14382 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
14386 /* If there are more collisions, we can't handle it by reordering.
14387 Do an lea to the last part and use only one colliding move. */
14388 else if (collisions > 1)
14390 rtx base;
14392 collisions = 1;
14394 base = part[0][nparts - 1];
14396 /* Handle the case when the last part isn't valid for lea.
14397 Happens in 64-bit mode storing the 12-byte XFmode. */
14398 if (GET_MODE (base) != Pmode)
14399 base = gen_rtx_REG (Pmode, REGNO (base));
14401 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14402 part[1][0] = replace_equiv_address (part[1][0], base);
14403 for (i = 1; i < nparts; i++)
14405 tmp = plus_constant (base, UNITS_PER_WORD * i);
14406 part[1][i] = replace_equiv_address (part[1][i], tmp);
14411 if (push)
14413 if (!TARGET_64BIT)
14415 if (nparts == 3)
14417 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14418 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14419 emit_move_insn (part[0][2], part[1][2]);
14421 else if (nparts == 4)
14423 emit_move_insn (part[0][3], part[1][3]);
14424 emit_move_insn (part[0][2], part[1][2]);
14427 else
14429 /* In 64bit mode we don't have 32bit push available. In case this is
14430 register, it is OK - we will just use larger counterpart. We also
14431 retype memory - these comes from attempt to avoid REX prefix on
14432 moving of second half of TFmode value. */
14433 if (GET_MODE (part[1][1]) == SImode)
14435 switch (GET_CODE (part[1][1]))
14437 case MEM:
14438 part[1][1] = adjust_address (part[1][1], DImode, 0);
14439 break;
14441 case REG:
14442 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14443 break;
14445 default:
14446 gcc_unreachable ();
14449 if (GET_MODE (part[1][0]) == SImode)
14450 part[1][0] = part[1][1];
14453 emit_move_insn (part[0][1], part[1][1]);
14454 emit_move_insn (part[0][0], part[1][0]);
14455 return;
14458 /* Choose correct order to not overwrite the source before it is copied. */
14459 if ((REG_P (part[0][0])
14460 && REG_P (part[1][1])
14461 && (REGNO (part[0][0]) == REGNO (part[1][1])
14462 || (nparts == 3
14463 && REGNO (part[0][0]) == REGNO (part[1][2]))
14464 || (nparts == 4
14465 && REGNO (part[0][0]) == REGNO (part[1][3]))))
14466 || (collisions > 0
14467 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14469 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
14471 operands[2 + i] = part[0][j];
14472 operands[6 + i] = part[1][j];
14475 else
14477 for (i = 0; i < nparts; i++)
14479 operands[2 + i] = part[0][i];
14480 operands[6 + i] = part[1][i];
14484 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14485 if (optimize_size)
14487 for (j = 0; j < nparts - 1; j++)
14488 if (CONST_INT_P (operands[6 + j])
14489 && operands[6 + j] != const0_rtx
14490 && REG_P (operands[2 + j]))
14491 for (i = j; i < nparts - 1; i++)
14492 if (CONST_INT_P (operands[7 + i])
14493 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
14494 operands[7 + i] = operands[2 + j];
14497 for (i = 0; i < nparts; i++)
14498 emit_move_insn (operands[2 + i], operands[6 + i]);
14500 return;
14503 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14504 left shift by a constant, either using a single shift or
14505 a sequence of add instructions. */
14507 static void
14508 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14510 if (count == 1)
14512 emit_insn ((mode == DImode
14513 ? gen_addsi3
14514 : gen_adddi3) (operand, operand, operand));
14516 else if (!optimize_size
14517 && count * ix86_cost->add <= ix86_cost->shift_const)
14519 int i;
14520 for (i=0; i<count; i++)
14522 emit_insn ((mode == DImode
14523 ? gen_addsi3
14524 : gen_adddi3) (operand, operand, operand));
14527 else
14528 emit_insn ((mode == DImode
14529 ? gen_ashlsi3
14530 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14533 void
14534 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14536 rtx low[2], high[2];
14537 int count;
14538 const int single_width = mode == DImode ? 32 : 64;
14540 if (CONST_INT_P (operands[2]))
14542 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14543 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14545 if (count >= single_width)
14547 emit_move_insn (high[0], low[1]);
14548 emit_move_insn (low[0], const0_rtx);
14550 if (count > single_width)
14551 ix86_expand_ashl_const (high[0], count - single_width, mode);
14553 else
14555 if (!rtx_equal_p (operands[0], operands[1]))
14556 emit_move_insn (operands[0], operands[1]);
14557 emit_insn ((mode == DImode
14558 ? gen_x86_shld
14559 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14560 ix86_expand_ashl_const (low[0], count, mode);
14562 return;
14565 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14567 if (operands[1] == const1_rtx)
14569 /* Assuming we've chosen a QImode capable registers, then 1 << N
14570 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14571 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14573 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14575 ix86_expand_clear (low[0]);
14576 ix86_expand_clear (high[0]);
14577 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14579 d = gen_lowpart (QImode, low[0]);
14580 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14581 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14582 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14584 d = gen_lowpart (QImode, high[0]);
14585 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14586 s = gen_rtx_NE (QImode, flags, const0_rtx);
14587 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14590 /* Otherwise, we can get the same results by manually performing
14591 a bit extract operation on bit 5/6, and then performing the two
14592 shifts. The two methods of getting 0/1 into low/high are exactly
14593 the same size. Avoiding the shift in the bit extract case helps
14594 pentium4 a bit; no one else seems to care much either way. */
14595 else
14597 rtx x;
14599 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14600 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14601 else
14602 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14603 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14605 emit_insn ((mode == DImode
14606 ? gen_lshrsi3
14607 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14608 emit_insn ((mode == DImode
14609 ? gen_andsi3
14610 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14611 emit_move_insn (low[0], high[0]);
14612 emit_insn ((mode == DImode
14613 ? gen_xorsi3
14614 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14617 emit_insn ((mode == DImode
14618 ? gen_ashlsi3
14619 : gen_ashldi3) (low[0], low[0], operands[2]));
14620 emit_insn ((mode == DImode
14621 ? gen_ashlsi3
14622 : gen_ashldi3) (high[0], high[0], operands[2]));
14623 return;
14626 if (operands[1] == constm1_rtx)
14628 /* For -1 << N, we can avoid the shld instruction, because we
14629 know that we're shifting 0...31/63 ones into a -1. */
14630 emit_move_insn (low[0], constm1_rtx);
14631 if (optimize_size)
14632 emit_move_insn (high[0], low[0]);
14633 else
14634 emit_move_insn (high[0], constm1_rtx);
14636 else
14638 if (!rtx_equal_p (operands[0], operands[1]))
14639 emit_move_insn (operands[0], operands[1]);
14641 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14642 emit_insn ((mode == DImode
14643 ? gen_x86_shld
14644 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14647 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14649 if (TARGET_CMOVE && scratch)
14651 ix86_expand_clear (scratch);
14652 emit_insn ((mode == DImode
14653 ? gen_x86_shift_adj_1
14654 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14656 else
14657 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14660 void
14661 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14663 rtx low[2], high[2];
14664 int count;
14665 const int single_width = mode == DImode ? 32 : 64;
14667 if (CONST_INT_P (operands[2]))
14669 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14670 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14672 if (count == single_width * 2 - 1)
14674 emit_move_insn (high[0], high[1]);
14675 emit_insn ((mode == DImode
14676 ? gen_ashrsi3
14677 : gen_ashrdi3) (high[0], high[0],
14678 GEN_INT (single_width - 1)));
14679 emit_move_insn (low[0], high[0]);
14682 else if (count >= single_width)
14684 emit_move_insn (low[0], high[1]);
14685 emit_move_insn (high[0], low[0]);
14686 emit_insn ((mode == DImode
14687 ? gen_ashrsi3
14688 : gen_ashrdi3) (high[0], high[0],
14689 GEN_INT (single_width - 1)));
14690 if (count > single_width)
14691 emit_insn ((mode == DImode
14692 ? gen_ashrsi3
14693 : gen_ashrdi3) (low[0], low[0],
14694 GEN_INT (count - single_width)));
14696 else
14698 if (!rtx_equal_p (operands[0], operands[1]))
14699 emit_move_insn (operands[0], operands[1]);
14700 emit_insn ((mode == DImode
14701 ? gen_x86_shrd
14702 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14703 emit_insn ((mode == DImode
14704 ? gen_ashrsi3
14705 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14708 else
14710 if (!rtx_equal_p (operands[0], operands[1]))
14711 emit_move_insn (operands[0], operands[1]);
14713 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14715 emit_insn ((mode == DImode
14716 ? gen_x86_shrd
14717 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14718 emit_insn ((mode == DImode
14719 ? gen_ashrsi3
14720 : gen_ashrdi3) (high[0], high[0], operands[2]));
14722 if (TARGET_CMOVE && scratch)
14724 emit_move_insn (scratch, high[0]);
14725 emit_insn ((mode == DImode
14726 ? gen_ashrsi3
14727 : gen_ashrdi3) (scratch, scratch,
14728 GEN_INT (single_width - 1)));
14729 emit_insn ((mode == DImode
14730 ? gen_x86_shift_adj_1
14731 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14732 scratch));
14734 else
14735 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14739 void
14740 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14742 rtx low[2], high[2];
14743 int count;
14744 const int single_width = mode == DImode ? 32 : 64;
14746 if (CONST_INT_P (operands[2]))
14748 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14749 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14751 if (count >= single_width)
14753 emit_move_insn (low[0], high[1]);
14754 ix86_expand_clear (high[0]);
14756 if (count > single_width)
14757 emit_insn ((mode == DImode
14758 ? gen_lshrsi3
14759 : gen_lshrdi3) (low[0], low[0],
14760 GEN_INT (count - single_width)));
14762 else
14764 if (!rtx_equal_p (operands[0], operands[1]))
14765 emit_move_insn (operands[0], operands[1]);
14766 emit_insn ((mode == DImode
14767 ? gen_x86_shrd
14768 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14769 emit_insn ((mode == DImode
14770 ? gen_lshrsi3
14771 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14774 else
14776 if (!rtx_equal_p (operands[0], operands[1]))
14777 emit_move_insn (operands[0], operands[1]);
14779 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14781 emit_insn ((mode == DImode
14782 ? gen_x86_shrd
14783 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14784 emit_insn ((mode == DImode
14785 ? gen_lshrsi3
14786 : gen_lshrdi3) (high[0], high[0], operands[2]));
14788 /* Heh. By reversing the arguments, we can reuse this pattern. */
14789 if (TARGET_CMOVE && scratch)
14791 ix86_expand_clear (scratch);
14792 emit_insn ((mode == DImode
14793 ? gen_x86_shift_adj_1
14794 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14795 scratch));
14797 else
14798 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14802 /* Predict just emitted jump instruction to be taken with probability PROB. */
14803 static void
14804 predict_jump (int prob)
14806 rtx insn = get_last_insn ();
14807 gcc_assert (JUMP_P (insn));
14808 REG_NOTES (insn)
14809 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14810 GEN_INT (prob),
14811 REG_NOTES (insn));
14814 /* Helper function for the string operations below. Dest VARIABLE whether
14815 it is aligned to VALUE bytes. If true, jump to the label. */
14816 static rtx
14817 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14819 rtx label = gen_label_rtx ();
14820 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14821 if (GET_MODE (variable) == DImode)
14822 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14823 else
14824 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14825 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14826 1, label);
14827 if (epilogue)
14828 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14829 else
14830 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14831 return label;
14834 /* Adjust COUNTER by the VALUE. */
14835 static void
14836 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14838 if (GET_MODE (countreg) == DImode)
14839 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14840 else
14841 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14844 /* Zero extend possibly SImode EXP to Pmode register. */
14846 ix86_zero_extend_to_Pmode (rtx exp)
14848 rtx r;
14849 if (GET_MODE (exp) == VOIDmode)
14850 return force_reg (Pmode, exp);
14851 if (GET_MODE (exp) == Pmode)
14852 return copy_to_mode_reg (Pmode, exp);
14853 r = gen_reg_rtx (Pmode);
14854 emit_insn (gen_zero_extendsidi2 (r, exp));
14855 return r;
14858 /* Divide COUNTREG by SCALE. */
14859 static rtx
14860 scale_counter (rtx countreg, int scale)
14862 rtx sc;
14863 rtx piece_size_mask;
14865 if (scale == 1)
14866 return countreg;
14867 if (CONST_INT_P (countreg))
14868 return GEN_INT (INTVAL (countreg) / scale);
14869 gcc_assert (REG_P (countreg));
14871 piece_size_mask = GEN_INT (scale - 1);
14872 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14873 GEN_INT (exact_log2 (scale)),
14874 NULL, 1, OPTAB_DIRECT);
14875 return sc;
14878 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14879 DImode for constant loop counts. */
14881 static enum machine_mode
14882 counter_mode (rtx count_exp)
14884 if (GET_MODE (count_exp) != VOIDmode)
14885 return GET_MODE (count_exp);
14886 if (GET_CODE (count_exp) != CONST_INT)
14887 return Pmode;
14888 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14889 return DImode;
14890 return SImode;
14893 /* When SRCPTR is non-NULL, output simple loop to move memory
14894 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14895 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14896 equivalent loop to set memory by VALUE (supposed to be in MODE).
14898 The size is rounded down to whole number of chunk size moved at once.
14899 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14902 static void
14903 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14904 rtx destptr, rtx srcptr, rtx value,
14905 rtx count, enum machine_mode mode, int unroll,
14906 int expected_size)
14908 rtx out_label, top_label, iter, tmp;
14909 enum machine_mode iter_mode = counter_mode (count);
14910 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14911 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14912 rtx size;
14913 rtx x_addr;
14914 rtx y_addr;
14915 int i;
14917 top_label = gen_label_rtx ();
14918 out_label = gen_label_rtx ();
14919 iter = gen_reg_rtx (iter_mode);
14921 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14922 NULL, 1, OPTAB_DIRECT);
14923 /* Those two should combine. */
14924 if (piece_size == const1_rtx)
14926 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14927 true, out_label);
14928 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14930 emit_move_insn (iter, const0_rtx);
14932 emit_label (top_label);
14934 tmp = convert_modes (Pmode, iter_mode, iter, true);
14935 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14936 destmem = change_address (destmem, mode, x_addr);
14938 if (srcmem)
14940 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14941 srcmem = change_address (srcmem, mode, y_addr);
14943 /* When unrolling for chips that reorder memory reads and writes,
14944 we can save registers by using single temporary.
14945 Also using 4 temporaries is overkill in 32bit mode. */
14946 if (!TARGET_64BIT && 0)
14948 for (i = 0; i < unroll; i++)
14950 if (i)
14952 destmem =
14953 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14954 srcmem =
14955 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14957 emit_move_insn (destmem, srcmem);
14960 else
14962 rtx tmpreg[4];
14963 gcc_assert (unroll <= 4);
14964 for (i = 0; i < unroll; i++)
14966 tmpreg[i] = gen_reg_rtx (mode);
14967 if (i)
14969 srcmem =
14970 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14972 emit_move_insn (tmpreg[i], srcmem);
14974 for (i = 0; i < unroll; i++)
14976 if (i)
14978 destmem =
14979 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14981 emit_move_insn (destmem, tmpreg[i]);
14985 else
14986 for (i = 0; i < unroll; i++)
14988 if (i)
14989 destmem =
14990 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14991 emit_move_insn (destmem, value);
14994 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14995 true, OPTAB_LIB_WIDEN);
14996 if (tmp != iter)
14997 emit_move_insn (iter, tmp);
14999 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
15000 true, top_label);
15001 if (expected_size != -1)
15003 expected_size /= GET_MODE_SIZE (mode) * unroll;
15004 if (expected_size == 0)
15005 predict_jump (0);
15006 else if (expected_size > REG_BR_PROB_BASE)
15007 predict_jump (REG_BR_PROB_BASE - 1);
15008 else
15009 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
15011 else
15012 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15013 iter = ix86_zero_extend_to_Pmode (iter);
15014 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15015 true, OPTAB_LIB_WIDEN);
15016 if (tmp != destptr)
15017 emit_move_insn (destptr, tmp);
15018 if (srcptr)
15020 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15021 true, OPTAB_LIB_WIDEN);
15022 if (tmp != srcptr)
15023 emit_move_insn (srcptr, tmp);
15025 emit_label (out_label);
15028 /* Output "rep; mov" instruction.
15029 Arguments have same meaning as for previous function */
15030 static void
15031 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15032 rtx destptr, rtx srcptr,
15033 rtx count,
15034 enum machine_mode mode)
15036 rtx destexp;
15037 rtx srcexp;
15038 rtx countreg;
15040 /* If the size is known, it is shorter to use rep movs. */
15041 if (mode == QImode && CONST_INT_P (count)
15042 && !(INTVAL (count) & 3))
15043 mode = SImode;
15045 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15046 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15047 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15048 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15049 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15050 if (mode != QImode)
15052 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15053 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15054 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15055 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15056 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15057 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15059 else
15061 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15062 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15064 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15065 destexp, srcexp));
15068 /* Output "rep; stos" instruction.
15069 Arguments have same meaning as for previous function */
15070 static void
15071 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15072 rtx count,
15073 enum machine_mode mode)
15075 rtx destexp;
15076 rtx countreg;
15078 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15079 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15080 value = force_reg (mode, gen_lowpart (mode, value));
15081 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15082 if (mode != QImode)
15084 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15085 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15086 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15088 else
15089 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15090 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15093 static void
15094 emit_strmov (rtx destmem, rtx srcmem,
15095 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15097 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15098 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15099 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15102 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15103 static void
15104 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15105 rtx destptr, rtx srcptr, rtx count, int max_size)
15107 rtx src, dest;
15108 if (CONST_INT_P (count))
15110 HOST_WIDE_INT countval = INTVAL (count);
15111 int offset = 0;
15113 if ((countval & 0x10) && max_size > 16)
15115 if (TARGET_64BIT)
15117 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15118 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15120 else
15121 gcc_unreachable ();
15122 offset += 16;
15124 if ((countval & 0x08) && max_size > 8)
15126 if (TARGET_64BIT)
15127 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15128 else
15130 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15131 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15133 offset += 8;
15135 if ((countval & 0x04) && max_size > 4)
15137 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15138 offset += 4;
15140 if ((countval & 0x02) && max_size > 2)
15142 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15143 offset += 2;
15145 if ((countval & 0x01) && max_size > 1)
15147 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15148 offset += 1;
15150 return;
15152 if (max_size > 8)
15154 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15155 count, 1, OPTAB_DIRECT);
15156 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15157 count, QImode, 1, 4);
15158 return;
15161 /* When there are stringops, we can cheaply increase dest and src pointers.
15162 Otherwise we save code size by maintaining offset (zero is readily
15163 available from preceding rep operation) and using x86 addressing modes.
15165 if (TARGET_SINGLE_STRINGOP)
15167 if (max_size > 4)
15169 rtx label = ix86_expand_aligntest (count, 4, true);
15170 src = change_address (srcmem, SImode, srcptr);
15171 dest = change_address (destmem, SImode, destptr);
15172 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15173 emit_label (label);
15174 LABEL_NUSES (label) = 1;
15176 if (max_size > 2)
15178 rtx label = ix86_expand_aligntest (count, 2, true);
15179 src = change_address (srcmem, HImode, srcptr);
15180 dest = change_address (destmem, HImode, destptr);
15181 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15182 emit_label (label);
15183 LABEL_NUSES (label) = 1;
15185 if (max_size > 1)
15187 rtx label = ix86_expand_aligntest (count, 1, true);
15188 src = change_address (srcmem, QImode, srcptr);
15189 dest = change_address (destmem, QImode, destptr);
15190 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15191 emit_label (label);
15192 LABEL_NUSES (label) = 1;
15195 else
15197 rtx offset = force_reg (Pmode, const0_rtx);
15198 rtx tmp;
15200 if (max_size > 4)
15202 rtx label = ix86_expand_aligntest (count, 4, true);
15203 src = change_address (srcmem, SImode, srcptr);
15204 dest = change_address (destmem, SImode, destptr);
15205 emit_move_insn (dest, src);
15206 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15207 true, OPTAB_LIB_WIDEN);
15208 if (tmp != offset)
15209 emit_move_insn (offset, tmp);
15210 emit_label (label);
15211 LABEL_NUSES (label) = 1;
15213 if (max_size > 2)
15215 rtx label = ix86_expand_aligntest (count, 2, true);
15216 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15217 src = change_address (srcmem, HImode, tmp);
15218 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15219 dest = change_address (destmem, HImode, tmp);
15220 emit_move_insn (dest, src);
15221 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15222 true, OPTAB_LIB_WIDEN);
15223 if (tmp != offset)
15224 emit_move_insn (offset, tmp);
15225 emit_label (label);
15226 LABEL_NUSES (label) = 1;
15228 if (max_size > 1)
15230 rtx label = ix86_expand_aligntest (count, 1, true);
15231 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15232 src = change_address (srcmem, QImode, tmp);
15233 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15234 dest = change_address (destmem, QImode, tmp);
15235 emit_move_insn (dest, src);
15236 emit_label (label);
15237 LABEL_NUSES (label) = 1;
15242 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15243 static void
15244 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15245 rtx count, int max_size)
15247 count =
15248 expand_simple_binop (counter_mode (count), AND, count,
15249 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15250 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15251 gen_lowpart (QImode, value), count, QImode,
15252 1, max_size / 2);
15255 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15256 static void
15257 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15259 rtx dest;
15261 if (CONST_INT_P (count))
15263 HOST_WIDE_INT countval = INTVAL (count);
15264 int offset = 0;
15266 if ((countval & 0x10) && max_size > 16)
15268 if (TARGET_64BIT)
15270 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15271 emit_insn (gen_strset (destptr, dest, value));
15272 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15273 emit_insn (gen_strset (destptr, dest, value));
15275 else
15276 gcc_unreachable ();
15277 offset += 16;
15279 if ((countval & 0x08) && max_size > 8)
15281 if (TARGET_64BIT)
15283 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15284 emit_insn (gen_strset (destptr, dest, value));
15286 else
15288 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15289 emit_insn (gen_strset (destptr, dest, value));
15290 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15291 emit_insn (gen_strset (destptr, dest, value));
15293 offset += 8;
15295 if ((countval & 0x04) && max_size > 4)
15297 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15298 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15299 offset += 4;
15301 if ((countval & 0x02) && max_size > 2)
15303 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15304 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15305 offset += 2;
15307 if ((countval & 0x01) && max_size > 1)
15309 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15310 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15311 offset += 1;
15313 return;
15315 if (max_size > 32)
15317 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15318 return;
15320 if (max_size > 16)
15322 rtx label = ix86_expand_aligntest (count, 16, true);
15323 if (TARGET_64BIT)
15325 dest = change_address (destmem, DImode, destptr);
15326 emit_insn (gen_strset (destptr, dest, value));
15327 emit_insn (gen_strset (destptr, dest, value));
15329 else
15331 dest = change_address (destmem, SImode, destptr);
15332 emit_insn (gen_strset (destptr, dest, value));
15333 emit_insn (gen_strset (destptr, dest, value));
15334 emit_insn (gen_strset (destptr, dest, value));
15335 emit_insn (gen_strset (destptr, dest, value));
15337 emit_label (label);
15338 LABEL_NUSES (label) = 1;
15340 if (max_size > 8)
15342 rtx label = ix86_expand_aligntest (count, 8, true);
15343 if (TARGET_64BIT)
15345 dest = change_address (destmem, DImode, destptr);
15346 emit_insn (gen_strset (destptr, dest, value));
15348 else
15350 dest = change_address (destmem, SImode, destptr);
15351 emit_insn (gen_strset (destptr, dest, value));
15352 emit_insn (gen_strset (destptr, dest, value));
15354 emit_label (label);
15355 LABEL_NUSES (label) = 1;
15357 if (max_size > 4)
15359 rtx label = ix86_expand_aligntest (count, 4, true);
15360 dest = change_address (destmem, SImode, destptr);
15361 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15362 emit_label (label);
15363 LABEL_NUSES (label) = 1;
15365 if (max_size > 2)
15367 rtx label = ix86_expand_aligntest (count, 2, true);
15368 dest = change_address (destmem, HImode, destptr);
15369 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15370 emit_label (label);
15371 LABEL_NUSES (label) = 1;
15373 if (max_size > 1)
15375 rtx label = ix86_expand_aligntest (count, 1, true);
15376 dest = change_address (destmem, QImode, destptr);
15377 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15378 emit_label (label);
15379 LABEL_NUSES (label) = 1;
15383 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15384 DESIRED_ALIGNMENT. */
15385 static void
15386 expand_movmem_prologue (rtx destmem, rtx srcmem,
15387 rtx destptr, rtx srcptr, rtx count,
15388 int align, int desired_alignment)
15390 if (align <= 1 && desired_alignment > 1)
15392 rtx label = ix86_expand_aligntest (destptr, 1, false);
15393 srcmem = change_address (srcmem, QImode, srcptr);
15394 destmem = change_address (destmem, QImode, destptr);
15395 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15396 ix86_adjust_counter (count, 1);
15397 emit_label (label);
15398 LABEL_NUSES (label) = 1;
15400 if (align <= 2 && desired_alignment > 2)
15402 rtx label = ix86_expand_aligntest (destptr, 2, false);
15403 srcmem = change_address (srcmem, HImode, srcptr);
15404 destmem = change_address (destmem, HImode, destptr);
15405 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15406 ix86_adjust_counter (count, 2);
15407 emit_label (label);
15408 LABEL_NUSES (label) = 1;
15410 if (align <= 4 && desired_alignment > 4)
15412 rtx label = ix86_expand_aligntest (destptr, 4, false);
15413 srcmem = change_address (srcmem, SImode, srcptr);
15414 destmem = change_address (destmem, SImode, destptr);
15415 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15416 ix86_adjust_counter (count, 4);
15417 emit_label (label);
15418 LABEL_NUSES (label) = 1;
15420 gcc_assert (desired_alignment <= 8);
15423 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15424 DESIRED_ALIGNMENT. */
15425 static void
15426 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15427 int align, int desired_alignment)
15429 if (align <= 1 && desired_alignment > 1)
15431 rtx label = ix86_expand_aligntest (destptr, 1, false);
15432 destmem = change_address (destmem, QImode, destptr);
15433 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15434 ix86_adjust_counter (count, 1);
15435 emit_label (label);
15436 LABEL_NUSES (label) = 1;
15438 if (align <= 2 && desired_alignment > 2)
15440 rtx label = ix86_expand_aligntest (destptr, 2, false);
15441 destmem = change_address (destmem, HImode, destptr);
15442 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15443 ix86_adjust_counter (count, 2);
15444 emit_label (label);
15445 LABEL_NUSES (label) = 1;
15447 if (align <= 4 && desired_alignment > 4)
15449 rtx label = ix86_expand_aligntest (destptr, 4, false);
15450 destmem = change_address (destmem, SImode, destptr);
15451 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15452 ix86_adjust_counter (count, 4);
15453 emit_label (label);
15454 LABEL_NUSES (label) = 1;
15456 gcc_assert (desired_alignment <= 8);
15459 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15460 static enum stringop_alg
15461 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15462 int *dynamic_check)
15464 const struct stringop_algs * algs;
15465 /* Algorithms using the rep prefix want at least edi and ecx;
15466 additionally, memset wants eax and memcpy wants esi. Don't
15467 consider such algorithms if the user has appropriated those
15468 registers for their own purposes. */
15469 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15470 || (memset
15471 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15473 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15474 || (alg != rep_prefix_1_byte \
15475 && alg != rep_prefix_4_byte \
15476 && alg != rep_prefix_8_byte))
15478 *dynamic_check = -1;
15479 if (memset)
15480 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15481 else
15482 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15483 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15484 return stringop_alg;
15485 /* rep; movq or rep; movl is the smallest variant. */
15486 else if (optimize_size)
15488 if (!count || (count & 3))
15489 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15490 else
15491 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15493 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15495 else if (expected_size != -1 && expected_size < 4)
15496 return loop_1_byte;
15497 else if (expected_size != -1)
15499 unsigned int i;
15500 enum stringop_alg alg = libcall;
15501 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15503 /* We get here if the algorithms that were not libcall-based
15504 were rep-prefix based and we are unable to use rep prefixes
15505 based on global register usage. Break out of the loop and
15506 use the heuristic below. */
15507 if (algs->size[i].max == 0)
15508 break;
15509 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15511 enum stringop_alg candidate = algs->size[i].alg;
15513 if (candidate != libcall && ALG_USABLE_P (candidate))
15514 alg = candidate;
15515 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15516 last non-libcall inline algorithm. */
15517 if (TARGET_INLINE_ALL_STRINGOPS)
15519 /* When the current size is best to be copied by a libcall,
15520 but we are still forced to inline, run the heuristic below
15521 that will pick code for medium sized blocks. */
15522 if (alg != libcall)
15523 return alg;
15524 break;
15526 else if (ALG_USABLE_P (candidate))
15527 return candidate;
15530 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15532 /* When asked to inline the call anyway, try to pick meaningful choice.
15533 We look for maximal size of block that is faster to copy by hand and
15534 take blocks of at most of that size guessing that average size will
15535 be roughly half of the block.
15537 If this turns out to be bad, we might simply specify the preferred
15538 choice in ix86_costs. */
15539 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15540 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15542 int max = -1;
15543 enum stringop_alg alg;
15544 int i;
15545 bool any_alg_usable_p = true;
15547 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15549 enum stringop_alg candidate = algs->size[i].alg;
15550 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15552 if (candidate != libcall && candidate
15553 && ALG_USABLE_P (candidate))
15554 max = algs->size[i].max;
15556 /* If there aren't any usable algorithms, then recursing on
15557 smaller sizes isn't going to find anything. Just return the
15558 simple byte-at-a-time copy loop. */
15559 if (!any_alg_usable_p)
15561 /* Pick something reasonable. */
15562 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15563 *dynamic_check = 128;
15564 return loop_1_byte;
15566 if (max == -1)
15567 max = 4096;
15568 alg = decide_alg (count, max / 2, memset, dynamic_check);
15569 gcc_assert (*dynamic_check == -1);
15570 gcc_assert (alg != libcall);
15571 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15572 *dynamic_check = max;
15573 return alg;
15575 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15576 #undef ALG_USABLE_P
15579 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15580 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15581 static int
15582 decide_alignment (int align,
15583 enum stringop_alg alg,
15584 int expected_size)
15586 int desired_align = 0;
15587 switch (alg)
15589 case no_stringop:
15590 gcc_unreachable ();
15591 case loop:
15592 case unrolled_loop:
15593 desired_align = GET_MODE_SIZE (Pmode);
15594 break;
15595 case rep_prefix_8_byte:
15596 desired_align = 8;
15597 break;
15598 case rep_prefix_4_byte:
15599 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15600 copying whole cacheline at once. */
15601 if (TARGET_PENTIUMPRO)
15602 desired_align = 8;
15603 else
15604 desired_align = 4;
15605 break;
15606 case rep_prefix_1_byte:
15607 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15608 copying whole cacheline at once. */
15609 if (TARGET_PENTIUMPRO)
15610 desired_align = 8;
15611 else
15612 desired_align = 1;
15613 break;
15614 case loop_1_byte:
15615 desired_align = 1;
15616 break;
15617 case libcall:
15618 return 0;
15621 if (optimize_size)
15622 desired_align = 1;
15623 if (desired_align < align)
15624 desired_align = align;
15625 if (expected_size != -1 && expected_size < 4)
15626 desired_align = align;
15627 return desired_align;
15630 /* Return the smallest power of 2 greater than VAL. */
15631 static int
15632 smallest_pow2_greater_than (int val)
15634 int ret = 1;
15635 while (ret <= val)
15636 ret <<= 1;
15637 return ret;
15640 /* Expand string move (memcpy) operation. Use i386 string operations when
15641 profitable. expand_setmem contains similar code. The code depends upon
15642 architecture, block size and alignment, but always has the same
15643 overall structure:
15645 1) Prologue guard: Conditional that jumps up to epilogues for small
15646 blocks that can be handled by epilogue alone. This is faster but
15647 also needed for correctness, since prologue assume the block is larger
15648 than the desired alignment.
15650 Optional dynamic check for size and libcall for large
15651 blocks is emitted here too, with -minline-stringops-dynamically.
15653 2) Prologue: copy first few bytes in order to get destination aligned
15654 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15655 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15656 We emit either a jump tree on power of two sized blocks, or a byte loop.
15658 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15659 with specified algorithm.
15661 4) Epilogue: code copying tail of the block that is too small to be
15662 handled by main body (or up to size guarded by prologue guard). */
15665 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15666 rtx expected_align_exp, rtx expected_size_exp)
15668 rtx destreg;
15669 rtx srcreg;
15670 rtx label = NULL;
15671 rtx tmp;
15672 rtx jump_around_label = NULL;
15673 HOST_WIDE_INT align = 1;
15674 unsigned HOST_WIDE_INT count = 0;
15675 HOST_WIDE_INT expected_size = -1;
15676 int size_needed = 0, epilogue_size_needed;
15677 int desired_align = 0;
15678 enum stringop_alg alg;
15679 int dynamic_check;
15681 if (CONST_INT_P (align_exp))
15682 align = INTVAL (align_exp);
15683 /* i386 can do misaligned access on reasonably increased cost. */
15684 if (CONST_INT_P (expected_align_exp)
15685 && INTVAL (expected_align_exp) > align)
15686 align = INTVAL (expected_align_exp);
15687 if (CONST_INT_P (count_exp))
15688 count = expected_size = INTVAL (count_exp);
15689 if (CONST_INT_P (expected_size_exp) && count == 0)
15690 expected_size = INTVAL (expected_size_exp);
15692 /* Make sure we don't need to care about overflow later on. */
15693 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15694 return 0;
15696 /* Step 0: Decide on preferred algorithm, desired alignment and
15697 size of chunks to be copied by main loop. */
15699 alg = decide_alg (count, expected_size, false, &dynamic_check);
15700 desired_align = decide_alignment (align, alg, expected_size);
15702 if (!TARGET_ALIGN_STRINGOPS)
15703 align = desired_align;
15705 if (alg == libcall)
15706 return 0;
15707 gcc_assert (alg != no_stringop);
15708 if (!count)
15709 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15710 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15711 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15712 switch (alg)
15714 case libcall:
15715 case no_stringop:
15716 gcc_unreachable ();
15717 case loop:
15718 size_needed = GET_MODE_SIZE (Pmode);
15719 break;
15720 case unrolled_loop:
15721 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15722 break;
15723 case rep_prefix_8_byte:
15724 size_needed = 8;
15725 break;
15726 case rep_prefix_4_byte:
15727 size_needed = 4;
15728 break;
15729 case rep_prefix_1_byte:
15730 case loop_1_byte:
15731 size_needed = 1;
15732 break;
15735 epilogue_size_needed = size_needed;
15737 /* Step 1: Prologue guard. */
15739 /* Alignment code needs count to be in register. */
15740 if (CONST_INT_P (count_exp) && desired_align > align)
15741 count_exp = force_reg (counter_mode (count_exp), count_exp);
15742 gcc_assert (desired_align >= 1 && align >= 1);
15744 /* Ensure that alignment prologue won't copy past end of block. */
15745 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15747 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15748 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15749 Make sure it is power of 2. */
15750 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15752 if (CONST_INT_P (count_exp))
15754 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15755 goto epilogue;
15757 else
15759 label = gen_label_rtx ();
15760 emit_cmp_and_jump_insns (count_exp,
15761 GEN_INT (epilogue_size_needed),
15762 LTU, 0, counter_mode (count_exp), 1, label);
15763 if (expected_size == -1 || expected_size < epilogue_size_needed)
15764 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15765 else
15766 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15770 /* Emit code to decide on runtime whether library call or inline should be
15771 used. */
15772 if (dynamic_check != -1)
15774 if (CONST_INT_P (count_exp))
15776 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15778 emit_block_move_via_libcall (dst, src, count_exp, false);
15779 count_exp = const0_rtx;
15780 goto epilogue;
15783 else
15785 rtx hot_label = gen_label_rtx ();
15786 jump_around_label = gen_label_rtx ();
15787 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15788 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15789 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15790 emit_block_move_via_libcall (dst, src, count_exp, false);
15791 emit_jump (jump_around_label);
15792 emit_label (hot_label);
15796 /* Step 2: Alignment prologue. */
15798 if (desired_align > align)
15800 /* Except for the first move in epilogue, we no longer know
15801 constant offset in aliasing info. It don't seems to worth
15802 the pain to maintain it for the first move, so throw away
15803 the info early. */
15804 src = change_address (src, BLKmode, srcreg);
15805 dst = change_address (dst, BLKmode, destreg);
15806 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15807 desired_align);
15809 if (label && size_needed == 1)
15811 emit_label (label);
15812 LABEL_NUSES (label) = 1;
15813 label = NULL;
15816 /* Step 3: Main loop. */
15818 switch (alg)
15820 case libcall:
15821 case no_stringop:
15822 gcc_unreachable ();
15823 case loop_1_byte:
15824 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15825 count_exp, QImode, 1, expected_size);
15826 break;
15827 case loop:
15828 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15829 count_exp, Pmode, 1, expected_size);
15830 break;
15831 case unrolled_loop:
15832 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15833 registers for 4 temporaries anyway. */
15834 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15835 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15836 expected_size);
15837 break;
15838 case rep_prefix_8_byte:
15839 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15840 DImode);
15841 break;
15842 case rep_prefix_4_byte:
15843 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15844 SImode);
15845 break;
15846 case rep_prefix_1_byte:
15847 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15848 QImode);
15849 break;
15851 /* Adjust properly the offset of src and dest memory for aliasing. */
15852 if (CONST_INT_P (count_exp))
15854 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15855 (count / size_needed) * size_needed);
15856 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15857 (count / size_needed) * size_needed);
15859 else
15861 src = change_address (src, BLKmode, srcreg);
15862 dst = change_address (dst, BLKmode, destreg);
15865 /* Step 4: Epilogue to copy the remaining bytes. */
15866 epilogue:
15867 if (label)
15869 /* When the main loop is done, COUNT_EXP might hold original count,
15870 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15871 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15872 bytes. Compensate if needed. */
15874 if (size_needed < epilogue_size_needed)
15876 tmp =
15877 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15878 GEN_INT (size_needed - 1), count_exp, 1,
15879 OPTAB_DIRECT);
15880 if (tmp != count_exp)
15881 emit_move_insn (count_exp, tmp);
15883 emit_label (label);
15884 LABEL_NUSES (label) = 1;
15887 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15888 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15889 epilogue_size_needed);
15890 if (jump_around_label)
15891 emit_label (jump_around_label);
15892 return 1;
15895 /* Helper function for memcpy. For QImode value 0xXY produce
15896 0xXYXYXYXY of wide specified by MODE. This is essentially
15897 a * 0x10101010, but we can do slightly better than
15898 synth_mult by unwinding the sequence by hand on CPUs with
15899 slow multiply. */
15900 static rtx
15901 promote_duplicated_reg (enum machine_mode mode, rtx val)
15903 enum machine_mode valmode = GET_MODE (val);
15904 rtx tmp;
15905 int nops = mode == DImode ? 3 : 2;
15907 gcc_assert (mode == SImode || mode == DImode);
15908 if (val == const0_rtx)
15909 return copy_to_mode_reg (mode, const0_rtx);
15910 if (CONST_INT_P (val))
15912 HOST_WIDE_INT v = INTVAL (val) & 255;
15914 v |= v << 8;
15915 v |= v << 16;
15916 if (mode == DImode)
15917 v |= (v << 16) << 16;
15918 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15921 if (valmode == VOIDmode)
15922 valmode = QImode;
15923 if (valmode != QImode)
15924 val = gen_lowpart (QImode, val);
15925 if (mode == QImode)
15926 return val;
15927 if (!TARGET_PARTIAL_REG_STALL)
15928 nops--;
15929 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15930 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15931 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15932 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15934 rtx reg = convert_modes (mode, QImode, val, true);
15935 tmp = promote_duplicated_reg (mode, const1_rtx);
15936 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15937 OPTAB_DIRECT);
15939 else
15941 rtx reg = convert_modes (mode, QImode, val, true);
15943 if (!TARGET_PARTIAL_REG_STALL)
15944 if (mode == SImode)
15945 emit_insn (gen_movsi_insv_1 (reg, reg));
15946 else
15947 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15948 else
15950 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15951 NULL, 1, OPTAB_DIRECT);
15952 reg =
15953 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15955 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15956 NULL, 1, OPTAB_DIRECT);
15957 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15958 if (mode == SImode)
15959 return reg;
15960 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15961 NULL, 1, OPTAB_DIRECT);
15962 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15963 return reg;
15967 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15968 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15969 alignment from ALIGN to DESIRED_ALIGN. */
15970 static rtx
15971 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15973 rtx promoted_val;
15975 if (TARGET_64BIT
15976 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15977 promoted_val = promote_duplicated_reg (DImode, val);
15978 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15979 promoted_val = promote_duplicated_reg (SImode, val);
15980 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15981 promoted_val = promote_duplicated_reg (HImode, val);
15982 else
15983 promoted_val = val;
15985 return promoted_val;
15988 /* Expand string clear operation (bzero). Use i386 string operations when
15989 profitable. See expand_movmem comment for explanation of individual
15990 steps performed. */
15992 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15993 rtx expected_align_exp, rtx expected_size_exp)
15995 rtx destreg;
15996 rtx label = NULL;
15997 rtx tmp;
15998 rtx jump_around_label = NULL;
15999 HOST_WIDE_INT align = 1;
16000 unsigned HOST_WIDE_INT count = 0;
16001 HOST_WIDE_INT expected_size = -1;
16002 int size_needed = 0, epilogue_size_needed;
16003 int desired_align = 0;
16004 enum stringop_alg alg;
16005 rtx promoted_val = NULL;
16006 bool force_loopy_epilogue = false;
16007 int dynamic_check;
16009 if (CONST_INT_P (align_exp))
16010 align = INTVAL (align_exp);
16011 /* i386 can do misaligned access on reasonably increased cost. */
16012 if (CONST_INT_P (expected_align_exp)
16013 && INTVAL (expected_align_exp) > align)
16014 align = INTVAL (expected_align_exp);
16015 if (CONST_INT_P (count_exp))
16016 count = expected_size = INTVAL (count_exp);
16017 if (CONST_INT_P (expected_size_exp) && count == 0)
16018 expected_size = INTVAL (expected_size_exp);
16020 /* Make sure we don't need to care about overflow later on. */
16021 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16022 return 0;
16024 /* Step 0: Decide on preferred algorithm, desired alignment and
16025 size of chunks to be copied by main loop. */
16027 alg = decide_alg (count, expected_size, true, &dynamic_check);
16028 desired_align = decide_alignment (align, alg, expected_size);
16030 if (!TARGET_ALIGN_STRINGOPS)
16031 align = desired_align;
16033 if (alg == libcall)
16034 return 0;
16035 gcc_assert (alg != no_stringop);
16036 if (!count)
16037 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16038 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16039 switch (alg)
16041 case libcall:
16042 case no_stringop:
16043 gcc_unreachable ();
16044 case loop:
16045 size_needed = GET_MODE_SIZE (Pmode);
16046 break;
16047 case unrolled_loop:
16048 size_needed = GET_MODE_SIZE (Pmode) * 4;
16049 break;
16050 case rep_prefix_8_byte:
16051 size_needed = 8;
16052 break;
16053 case rep_prefix_4_byte:
16054 size_needed = 4;
16055 break;
16056 case rep_prefix_1_byte:
16057 case loop_1_byte:
16058 size_needed = 1;
16059 break;
16061 epilogue_size_needed = size_needed;
16063 /* Step 1: Prologue guard. */
16065 /* Alignment code needs count to be in register. */
16066 if (CONST_INT_P (count_exp) && desired_align > align)
16068 enum machine_mode mode = SImode;
16069 if (TARGET_64BIT && (count & ~0xffffffff))
16070 mode = DImode;
16071 count_exp = force_reg (mode, count_exp);
16073 /* Do the cheap promotion to allow better CSE across the
16074 main loop and epilogue (ie one load of the big constant in the
16075 front of all code. */
16076 if (CONST_INT_P (val_exp))
16077 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16078 desired_align, align);
16079 /* Ensure that alignment prologue won't copy past end of block. */
16080 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16082 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16083 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16084 Make sure it is power of 2. */
16085 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16087 /* To improve performance of small blocks, we jump around the VAL
16088 promoting mode. This mean that if the promoted VAL is not constant,
16089 we might not use it in the epilogue and have to use byte
16090 loop variant. */
16091 if (epilogue_size_needed > 2 && !promoted_val)
16092 force_loopy_epilogue = true;
16093 label = gen_label_rtx ();
16094 emit_cmp_and_jump_insns (count_exp,
16095 GEN_INT (epilogue_size_needed),
16096 LTU, 0, counter_mode (count_exp), 1, label);
16097 if (GET_CODE (count_exp) == CONST_INT)
16099 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16100 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16101 else
16102 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16104 if (dynamic_check != -1)
16106 rtx hot_label = gen_label_rtx ();
16107 jump_around_label = gen_label_rtx ();
16108 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16109 LEU, 0, counter_mode (count_exp), 1, hot_label);
16110 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16111 set_storage_via_libcall (dst, count_exp, val_exp, false);
16112 emit_jump (jump_around_label);
16113 emit_label (hot_label);
16116 /* Step 2: Alignment prologue. */
16118 /* Do the expensive promotion once we branched off the small blocks. */
16119 if (!promoted_val)
16120 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16121 desired_align, align);
16122 gcc_assert (desired_align >= 1 && align >= 1);
16124 if (desired_align > align)
16126 /* Except for the first move in epilogue, we no longer know
16127 constant offset in aliasing info. It don't seems to worth
16128 the pain to maintain it for the first move, so throw away
16129 the info early. */
16130 dst = change_address (dst, BLKmode, destreg);
16131 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16132 desired_align);
16134 if (label && size_needed == 1)
16136 emit_label (label);
16137 LABEL_NUSES (label) = 1;
16138 label = NULL;
16141 /* Step 3: Main loop. */
16143 switch (alg)
16145 case libcall:
16146 case no_stringop:
16147 gcc_unreachable ();
16148 case loop_1_byte:
16149 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16150 count_exp, QImode, 1, expected_size);
16151 break;
16152 case loop:
16153 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16154 count_exp, Pmode, 1, expected_size);
16155 break;
16156 case unrolled_loop:
16157 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16158 count_exp, Pmode, 4, expected_size);
16159 break;
16160 case rep_prefix_8_byte:
16161 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16162 DImode);
16163 break;
16164 case rep_prefix_4_byte:
16165 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16166 SImode);
16167 break;
16168 case rep_prefix_1_byte:
16169 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16170 QImode);
16171 break;
16173 /* Adjust properly the offset of src and dest memory for aliasing. */
16174 if (CONST_INT_P (count_exp))
16175 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16176 (count / size_needed) * size_needed);
16177 else
16178 dst = change_address (dst, BLKmode, destreg);
16180 /* Step 4: Epilogue to copy the remaining bytes. */
16182 if (label)
16184 /* When the main loop is done, COUNT_EXP might hold original count,
16185 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16186 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16187 bytes. Compensate if needed. */
16189 if (size_needed < desired_align - align)
16191 tmp =
16192 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16193 GEN_INT (size_needed - 1), count_exp, 1,
16194 OPTAB_DIRECT);
16195 size_needed = desired_align - align + 1;
16196 if (tmp != count_exp)
16197 emit_move_insn (count_exp, tmp);
16199 emit_label (label);
16200 LABEL_NUSES (label) = 1;
16202 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16204 if (force_loopy_epilogue)
16205 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16206 size_needed);
16207 else
16208 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16209 size_needed);
16211 if (jump_around_label)
16212 emit_label (jump_around_label);
16213 return 1;
16216 /* Expand the appropriate insns for doing strlen if not just doing
16217 repnz; scasb
16219 out = result, initialized with the start address
16220 align_rtx = alignment of the address.
16221 scratch = scratch register, initialized with the startaddress when
16222 not aligned, otherwise undefined
16224 This is just the body. It needs the initializations mentioned above and
16225 some address computing at the end. These things are done in i386.md. */
16227 static void
16228 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16230 int align;
16231 rtx tmp;
16232 rtx align_2_label = NULL_RTX;
16233 rtx align_3_label = NULL_RTX;
16234 rtx align_4_label = gen_label_rtx ();
16235 rtx end_0_label = gen_label_rtx ();
16236 rtx mem;
16237 rtx tmpreg = gen_reg_rtx (SImode);
16238 rtx scratch = gen_reg_rtx (SImode);
16239 rtx cmp;
16241 align = 0;
16242 if (CONST_INT_P (align_rtx))
16243 align = INTVAL (align_rtx);
16245 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16247 /* Is there a known alignment and is it less than 4? */
16248 if (align < 4)
16250 rtx scratch1 = gen_reg_rtx (Pmode);
16251 emit_move_insn (scratch1, out);
16252 /* Is there a known alignment and is it not 2? */
16253 if (align != 2)
16255 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16256 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16258 /* Leave just the 3 lower bits. */
16259 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16260 NULL_RTX, 0, OPTAB_WIDEN);
16262 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16263 Pmode, 1, align_4_label);
16264 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16265 Pmode, 1, align_2_label);
16266 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16267 Pmode, 1, align_3_label);
16269 else
16271 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16272 check if is aligned to 4 - byte. */
16274 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16275 NULL_RTX, 0, OPTAB_WIDEN);
16277 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16278 Pmode, 1, align_4_label);
16281 mem = change_address (src, QImode, out);
16283 /* Now compare the bytes. */
16285 /* Compare the first n unaligned byte on a byte per byte basis. */
16286 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16287 QImode, 1, end_0_label);
16289 /* Increment the address. */
16290 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16292 /* Not needed with an alignment of 2 */
16293 if (align != 2)
16295 emit_label (align_2_label);
16297 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16298 end_0_label);
16300 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16302 emit_label (align_3_label);
16305 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16306 end_0_label);
16308 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16311 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16312 align this loop. It gives only huge programs, but does not help to
16313 speed up. */
16314 emit_label (align_4_label);
16316 mem = change_address (src, SImode, out);
16317 emit_move_insn (scratch, mem);
16318 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
16320 /* This formula yields a nonzero result iff one of the bytes is zero.
16321 This saves three branches inside loop and many cycles. */
16323 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16324 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16325 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16326 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16327 gen_int_mode (0x80808080, SImode)));
16328 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16329 align_4_label);
16331 if (TARGET_CMOVE)
16333 rtx reg = gen_reg_rtx (SImode);
16334 rtx reg2 = gen_reg_rtx (Pmode);
16335 emit_move_insn (reg, tmpreg);
16336 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16338 /* If zero is not in the first two bytes, move two bytes forward. */
16339 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16340 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16341 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16342 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16343 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16344 reg,
16345 tmpreg)));
16346 /* Emit lea manually to avoid clobbering of flags. */
16347 emit_insn (gen_rtx_SET (SImode, reg2,
16348 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16350 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16351 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16352 emit_insn (gen_rtx_SET (VOIDmode, out,
16353 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16354 reg2,
16355 out)));
16358 else
16360 rtx end_2_label = gen_label_rtx ();
16361 /* Is zero in the first two bytes? */
16363 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16364 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16365 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16366 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16367 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16368 pc_rtx);
16369 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16370 JUMP_LABEL (tmp) = end_2_label;
16372 /* Not in the first two. Move two bytes forward. */
16373 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16374 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
16376 emit_label (end_2_label);
16380 /* Avoid branch in fixing the byte. */
16381 tmpreg = gen_lowpart (QImode, tmpreg);
16382 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16383 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16384 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
16386 emit_label (end_0_label);
16389 /* Expand strlen. */
16392 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16394 rtx addr, scratch1, scratch2, scratch3, scratch4;
16396 /* The generic case of strlen expander is long. Avoid it's
16397 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16399 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16400 && !TARGET_INLINE_ALL_STRINGOPS
16401 && !optimize_size
16402 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16403 return 0;
16405 addr = force_reg (Pmode, XEXP (src, 0));
16406 scratch1 = gen_reg_rtx (Pmode);
16408 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16409 && !optimize_size)
16411 /* Well it seems that some optimizer does not combine a call like
16412 foo(strlen(bar), strlen(bar));
16413 when the move and the subtraction is done here. It does calculate
16414 the length just once when these instructions are done inside of
16415 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16416 often used and I use one fewer register for the lifetime of
16417 output_strlen_unroll() this is better. */
16419 emit_move_insn (out, addr);
16421 ix86_expand_strlensi_unroll_1 (out, src, align);
16423 /* strlensi_unroll_1 returns the address of the zero at the end of
16424 the string, like memchr(), so compute the length by subtracting
16425 the start address. */
16426 emit_insn ((*ix86_gen_sub3) (out, out, addr));
16428 else
16430 rtx unspec;
16432 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16433 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16434 return false;
16436 scratch2 = gen_reg_rtx (Pmode);
16437 scratch3 = gen_reg_rtx (Pmode);
16438 scratch4 = force_reg (Pmode, constm1_rtx);
16440 emit_move_insn (scratch3, addr);
16441 eoschar = force_reg (QImode, eoschar);
16443 src = replace_equiv_address_nv (src, scratch3);
16445 /* If .md starts supporting :P, this can be done in .md. */
16446 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16447 scratch4), UNSPEC_SCAS);
16448 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16449 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
16450 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
16452 return 1;
16455 /* For given symbol (function) construct code to compute address of it's PLT
16456 entry in large x86-64 PIC model. */
16458 construct_plt_address (rtx symbol)
16460 rtx tmp = gen_reg_rtx (Pmode);
16461 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16463 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16464 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16466 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16467 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16468 return tmp;
16471 void
16472 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16473 rtx callarg2 ATTRIBUTE_UNUSED,
16474 rtx pop, int sibcall)
16476 rtx use = NULL, call;
16478 if (pop == const0_rtx)
16479 pop = NULL;
16480 gcc_assert (!TARGET_64BIT || !pop);
16482 if (TARGET_MACHO && !TARGET_64BIT)
16484 #if TARGET_MACHO
16485 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16486 fnaddr = machopic_indirect_call_target (fnaddr);
16487 #endif
16489 else
16491 /* Static functions and indirect calls don't need the pic register. */
16492 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16493 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16494 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16495 use_reg (&use, pic_offset_table_rtx);
16498 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16500 rtx al = gen_rtx_REG (QImode, AX_REG);
16501 emit_move_insn (al, callarg2);
16502 use_reg (&use, al);
16505 if (ix86_cmodel == CM_LARGE_PIC
16506 && GET_CODE (fnaddr) == MEM
16507 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16508 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16509 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16510 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16512 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16513 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16515 if (sibcall && TARGET_64BIT
16516 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16518 rtx addr;
16519 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16520 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16521 emit_move_insn (fnaddr, addr);
16522 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16525 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16526 if (retval)
16527 call = gen_rtx_SET (VOIDmode, retval, call);
16528 if (pop)
16530 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16531 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16532 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16535 call = emit_call_insn (call);
16536 if (use)
16537 CALL_INSN_FUNCTION_USAGE (call) = use;
16541 /* Clear stack slot assignments remembered from previous functions.
16542 This is called from INIT_EXPANDERS once before RTL is emitted for each
16543 function. */
16545 static struct machine_function *
16546 ix86_init_machine_status (void)
16548 struct machine_function *f;
16550 f = GGC_CNEW (struct machine_function);
16551 f->use_fast_prologue_epilogue_nregs = -1;
16552 f->tls_descriptor_call_expanded_p = 0;
16553 f->call_abi = DEFAULT_ABI;
16555 return f;
16558 /* Return a MEM corresponding to a stack slot with mode MODE.
16559 Allocate a new slot if necessary.
16561 The RTL for a function can have several slots available: N is
16562 which slot to use. */
16565 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16567 struct stack_local_entry *s;
16569 gcc_assert (n < MAX_386_STACK_LOCALS);
16571 /* Virtual slot is valid only before vregs are instantiated. */
16572 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16574 for (s = ix86_stack_locals; s; s = s->next)
16575 if (s->mode == mode && s->n == n)
16576 return copy_rtx (s->rtl);
16578 s = (struct stack_local_entry *)
16579 ggc_alloc (sizeof (struct stack_local_entry));
16580 s->n = n;
16581 s->mode = mode;
16582 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16584 s->next = ix86_stack_locals;
16585 ix86_stack_locals = s;
16586 return s->rtl;
16589 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16591 static GTY(()) rtx ix86_tls_symbol;
16593 ix86_tls_get_addr (void)
16596 if (!ix86_tls_symbol)
16598 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16599 (TARGET_ANY_GNU_TLS
16600 && !TARGET_64BIT)
16601 ? "___tls_get_addr"
16602 : "__tls_get_addr");
16605 return ix86_tls_symbol;
16608 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16610 static GTY(()) rtx ix86_tls_module_base_symbol;
16612 ix86_tls_module_base (void)
16615 if (!ix86_tls_module_base_symbol)
16617 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16618 "_TLS_MODULE_BASE_");
16619 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16620 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16623 return ix86_tls_module_base_symbol;
16626 /* Calculate the length of the memory address in the instruction
16627 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16630 memory_address_length (rtx addr)
16632 struct ix86_address parts;
16633 rtx base, index, disp;
16634 int len;
16635 int ok;
16637 if (GET_CODE (addr) == PRE_DEC
16638 || GET_CODE (addr) == POST_INC
16639 || GET_CODE (addr) == PRE_MODIFY
16640 || GET_CODE (addr) == POST_MODIFY)
16641 return 0;
16643 ok = ix86_decompose_address (addr, &parts);
16644 gcc_assert (ok);
16646 if (parts.base && GET_CODE (parts.base) == SUBREG)
16647 parts.base = SUBREG_REG (parts.base);
16648 if (parts.index && GET_CODE (parts.index) == SUBREG)
16649 parts.index = SUBREG_REG (parts.index);
16651 base = parts.base;
16652 index = parts.index;
16653 disp = parts.disp;
16654 len = 0;
16656 /* Rule of thumb:
16657 - esp as the base always wants an index,
16658 - ebp as the base always wants a displacement. */
16660 /* Register Indirect. */
16661 if (base && !index && !disp)
16663 /* esp (for its index) and ebp (for its displacement) need
16664 the two-byte modrm form. */
16665 if (addr == stack_pointer_rtx
16666 || addr == arg_pointer_rtx
16667 || addr == frame_pointer_rtx
16668 || addr == hard_frame_pointer_rtx)
16669 len = 1;
16672 /* Direct Addressing. */
16673 else if (disp && !base && !index)
16674 len = 4;
16676 else
16678 /* Find the length of the displacement constant. */
16679 if (disp)
16681 if (base && satisfies_constraint_K (disp))
16682 len = 1;
16683 else
16684 len = 4;
16686 /* ebp always wants a displacement. */
16687 else if (base == hard_frame_pointer_rtx)
16688 len = 1;
16690 /* An index requires the two-byte modrm form.... */
16691 if (index
16692 /* ...like esp, which always wants an index. */
16693 || base == stack_pointer_rtx
16694 || base == arg_pointer_rtx
16695 || base == frame_pointer_rtx)
16696 len += 1;
16699 return len;
16702 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16703 is set, expect that insn have 8bit immediate alternative. */
16705 ix86_attr_length_immediate_default (rtx insn, int shortform)
16707 int len = 0;
16708 int i;
16709 extract_insn_cached (insn);
16710 for (i = recog_data.n_operands - 1; i >= 0; --i)
16711 if (CONSTANT_P (recog_data.operand[i]))
16713 gcc_assert (!len);
16714 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16715 len = 1;
16716 else
16718 switch (get_attr_mode (insn))
16720 case MODE_QI:
16721 len+=1;
16722 break;
16723 case MODE_HI:
16724 len+=2;
16725 break;
16726 case MODE_SI:
16727 len+=4;
16728 break;
16729 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16730 case MODE_DI:
16731 len+=4;
16732 break;
16733 default:
16734 fatal_insn ("unknown insn mode", insn);
16738 return len;
16740 /* Compute default value for "length_address" attribute. */
16742 ix86_attr_length_address_default (rtx insn)
16744 int i;
16746 if (get_attr_type (insn) == TYPE_LEA)
16748 rtx set = PATTERN (insn);
16750 if (GET_CODE (set) == PARALLEL)
16751 set = XVECEXP (set, 0, 0);
16753 gcc_assert (GET_CODE (set) == SET);
16755 return memory_address_length (SET_SRC (set));
16758 extract_insn_cached (insn);
16759 for (i = recog_data.n_operands - 1; i >= 0; --i)
16760 if (MEM_P (recog_data.operand[i]))
16762 return memory_address_length (XEXP (recog_data.operand[i], 0));
16763 break;
16765 return 0;
16768 /* Return the maximum number of instructions a cpu can issue. */
16770 static int
16771 ix86_issue_rate (void)
16773 switch (ix86_tune)
16775 case PROCESSOR_PENTIUM:
16776 case PROCESSOR_K6:
16777 return 2;
16779 case PROCESSOR_PENTIUMPRO:
16780 case PROCESSOR_PENTIUM4:
16781 case PROCESSOR_ATHLON:
16782 case PROCESSOR_K8:
16783 case PROCESSOR_AMDFAM10:
16784 case PROCESSOR_NOCONA:
16785 case PROCESSOR_GENERIC32:
16786 case PROCESSOR_GENERIC64:
16787 return 3;
16789 case PROCESSOR_CORE2:
16790 return 4;
16792 default:
16793 return 1;
16797 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16798 by DEP_INSN and nothing set by DEP_INSN. */
16800 static int
16801 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16803 rtx set, set2;
16805 /* Simplify the test for uninteresting insns. */
16806 if (insn_type != TYPE_SETCC
16807 && insn_type != TYPE_ICMOV
16808 && insn_type != TYPE_FCMOV
16809 && insn_type != TYPE_IBR)
16810 return 0;
16812 if ((set = single_set (dep_insn)) != 0)
16814 set = SET_DEST (set);
16815 set2 = NULL_RTX;
16817 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16818 && XVECLEN (PATTERN (dep_insn), 0) == 2
16819 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16820 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16822 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16823 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16825 else
16826 return 0;
16828 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16829 return 0;
16831 /* This test is true if the dependent insn reads the flags but
16832 not any other potentially set register. */
16833 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16834 return 0;
16836 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16837 return 0;
16839 return 1;
16842 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16843 address with operands set by DEP_INSN. */
16845 static int
16846 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16848 rtx addr;
16850 if (insn_type == TYPE_LEA
16851 && TARGET_PENTIUM)
16853 addr = PATTERN (insn);
16855 if (GET_CODE (addr) == PARALLEL)
16856 addr = XVECEXP (addr, 0, 0);
16858 gcc_assert (GET_CODE (addr) == SET);
16860 addr = SET_SRC (addr);
16862 else
16864 int i;
16865 extract_insn_cached (insn);
16866 for (i = recog_data.n_operands - 1; i >= 0; --i)
16867 if (MEM_P (recog_data.operand[i]))
16869 addr = XEXP (recog_data.operand[i], 0);
16870 goto found;
16872 return 0;
16873 found:;
16876 return modified_in_p (addr, dep_insn);
16879 static int
16880 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16882 enum attr_type insn_type, dep_insn_type;
16883 enum attr_memory memory;
16884 rtx set, set2;
16885 int dep_insn_code_number;
16887 /* Anti and output dependencies have zero cost on all CPUs. */
16888 if (REG_NOTE_KIND (link) != 0)
16889 return 0;
16891 dep_insn_code_number = recog_memoized (dep_insn);
16893 /* If we can't recognize the insns, we can't really do anything. */
16894 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16895 return cost;
16897 insn_type = get_attr_type (insn);
16898 dep_insn_type = get_attr_type (dep_insn);
16900 switch (ix86_tune)
16902 case PROCESSOR_PENTIUM:
16903 /* Address Generation Interlock adds a cycle of latency. */
16904 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16905 cost += 1;
16907 /* ??? Compares pair with jump/setcc. */
16908 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16909 cost = 0;
16911 /* Floating point stores require value to be ready one cycle earlier. */
16912 if (insn_type == TYPE_FMOV
16913 && get_attr_memory (insn) == MEMORY_STORE
16914 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16915 cost += 1;
16916 break;
16918 case PROCESSOR_PENTIUMPRO:
16919 memory = get_attr_memory (insn);
16921 /* INT->FP conversion is expensive. */
16922 if (get_attr_fp_int_src (dep_insn))
16923 cost += 5;
16925 /* There is one cycle extra latency between an FP op and a store. */
16926 if (insn_type == TYPE_FMOV
16927 && (set = single_set (dep_insn)) != NULL_RTX
16928 && (set2 = single_set (insn)) != NULL_RTX
16929 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16930 && MEM_P (SET_DEST (set2)))
16931 cost += 1;
16933 /* Show ability of reorder buffer to hide latency of load by executing
16934 in parallel with previous instruction in case
16935 previous instruction is not needed to compute the address. */
16936 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16937 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16939 /* Claim moves to take one cycle, as core can issue one load
16940 at time and the next load can start cycle later. */
16941 if (dep_insn_type == TYPE_IMOV
16942 || dep_insn_type == TYPE_FMOV)
16943 cost = 1;
16944 else if (cost > 1)
16945 cost--;
16947 break;
16949 case PROCESSOR_K6:
16950 memory = get_attr_memory (insn);
16952 /* The esp dependency is resolved before the instruction is really
16953 finished. */
16954 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16955 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16956 return 1;
16958 /* INT->FP conversion is expensive. */
16959 if (get_attr_fp_int_src (dep_insn))
16960 cost += 5;
16962 /* Show ability of reorder buffer to hide latency of load by executing
16963 in parallel with previous instruction in case
16964 previous instruction is not needed to compute the address. */
16965 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16966 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16968 /* Claim moves to take one cycle, as core can issue one load
16969 at time and the next load can start cycle later. */
16970 if (dep_insn_type == TYPE_IMOV
16971 || dep_insn_type == TYPE_FMOV)
16972 cost = 1;
16973 else if (cost > 2)
16974 cost -= 2;
16975 else
16976 cost = 1;
16978 break;
16980 case PROCESSOR_ATHLON:
16981 case PROCESSOR_K8:
16982 case PROCESSOR_AMDFAM10:
16983 case PROCESSOR_GENERIC32:
16984 case PROCESSOR_GENERIC64:
16985 memory = get_attr_memory (insn);
16987 /* Show ability of reorder buffer to hide latency of load by executing
16988 in parallel with previous instruction in case
16989 previous instruction is not needed to compute the address. */
16990 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16991 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16993 enum attr_unit unit = get_attr_unit (insn);
16994 int loadcost = 3;
16996 /* Because of the difference between the length of integer and
16997 floating unit pipeline preparation stages, the memory operands
16998 for floating point are cheaper.
17000 ??? For Athlon it the difference is most probably 2. */
17001 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17002 loadcost = 3;
17003 else
17004 loadcost = TARGET_ATHLON ? 2 : 0;
17006 if (cost >= loadcost)
17007 cost -= loadcost;
17008 else
17009 cost = 0;
17012 default:
17013 break;
17016 return cost;
17019 /* How many alternative schedules to try. This should be as wide as the
17020 scheduling freedom in the DFA, but no wider. Making this value too
17021 large results extra work for the scheduler. */
17023 static int
17024 ia32_multipass_dfa_lookahead (void)
17026 switch (ix86_tune)
17028 case PROCESSOR_PENTIUM:
17029 return 2;
17031 case PROCESSOR_PENTIUMPRO:
17032 case PROCESSOR_K6:
17033 return 1;
17035 default:
17036 return 0;
17041 /* Compute the alignment given to a constant that is being placed in memory.
17042 EXP is the constant and ALIGN is the alignment that the object would
17043 ordinarily have.
17044 The value of this function is used instead of that alignment to align
17045 the object. */
17048 ix86_constant_alignment (tree exp, int align)
17050 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17051 || TREE_CODE (exp) == INTEGER_CST)
17053 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17054 return 64;
17055 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17056 return 128;
17058 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17059 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17060 return BITS_PER_WORD;
17062 return align;
17065 /* Compute the alignment for a static variable.
17066 TYPE is the data type, and ALIGN is the alignment that
17067 the object would ordinarily have. The value of this function is used
17068 instead of that alignment to align the object. */
17071 ix86_data_alignment (tree type, int align)
17073 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17075 if (AGGREGATE_TYPE_P (type)
17076 && TYPE_SIZE (type)
17077 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17078 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17079 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17080 && align < max_align)
17081 align = max_align;
17083 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17084 to 16byte boundary. */
17085 if (TARGET_64BIT)
17087 if (AGGREGATE_TYPE_P (type)
17088 && TYPE_SIZE (type)
17089 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17090 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17091 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17092 return 128;
17095 if (TREE_CODE (type) == ARRAY_TYPE)
17097 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17098 return 64;
17099 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17100 return 128;
17102 else if (TREE_CODE (type) == COMPLEX_TYPE)
17105 if (TYPE_MODE (type) == DCmode && align < 64)
17106 return 64;
17107 if ((TYPE_MODE (type) == XCmode
17108 || TYPE_MODE (type) == TCmode) && align < 128)
17109 return 128;
17111 else if ((TREE_CODE (type) == RECORD_TYPE
17112 || TREE_CODE (type) == UNION_TYPE
17113 || TREE_CODE (type) == QUAL_UNION_TYPE)
17114 && TYPE_FIELDS (type))
17116 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17117 return 64;
17118 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17119 return 128;
17121 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17122 || TREE_CODE (type) == INTEGER_TYPE)
17124 if (TYPE_MODE (type) == DFmode && align < 64)
17125 return 64;
17126 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17127 return 128;
17130 return align;
17133 /* Compute the alignment for a local variable or a stack slot. TYPE is
17134 the data type, MODE is the widest mode available and ALIGN is the
17135 alignment that the object would ordinarily have. The value of this
17136 macro is used instead of that alignment to align the object. */
17138 unsigned int
17139 ix86_local_alignment (tree type, enum machine_mode mode,
17140 unsigned int align)
17142 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17143 register in MODE. We will return the largest alignment of XF
17144 and DF. */
17145 if (!type)
17147 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17148 align = GET_MODE_ALIGNMENT (DFmode);
17149 return align;
17152 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17153 to 16byte boundary. */
17154 if (TARGET_64BIT)
17156 if (AGGREGATE_TYPE_P (type)
17157 && TYPE_SIZE (type)
17158 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17159 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17160 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17161 return 128;
17163 if (TREE_CODE (type) == ARRAY_TYPE)
17165 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17166 return 64;
17167 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17168 return 128;
17170 else if (TREE_CODE (type) == COMPLEX_TYPE)
17172 if (TYPE_MODE (type) == DCmode && align < 64)
17173 return 64;
17174 if ((TYPE_MODE (type) == XCmode
17175 || TYPE_MODE (type) == TCmode) && align < 128)
17176 return 128;
17178 else if ((TREE_CODE (type) == RECORD_TYPE
17179 || TREE_CODE (type) == UNION_TYPE
17180 || TREE_CODE (type) == QUAL_UNION_TYPE)
17181 && TYPE_FIELDS (type))
17183 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17184 return 64;
17185 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17186 return 128;
17188 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17189 || TREE_CODE (type) == INTEGER_TYPE)
17192 if (TYPE_MODE (type) == DFmode && align < 64)
17193 return 64;
17194 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17195 return 128;
17197 return align;
17200 /* Emit RTL insns to initialize the variable parts of a trampoline.
17201 FNADDR is an RTX for the address of the function's pure code.
17202 CXT is an RTX for the static chain value for the function. */
17203 void
17204 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17206 if (!TARGET_64BIT)
17208 /* Compute offset from the end of the jmp to the target function. */
17209 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17210 plus_constant (tramp, 10),
17211 NULL_RTX, 1, OPTAB_DIRECT);
17212 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17213 gen_int_mode (0xb9, QImode));
17214 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17215 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17216 gen_int_mode (0xe9, QImode));
17217 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17219 else
17221 int offset = 0;
17222 /* Try to load address using shorter movl instead of movabs.
17223 We may want to support movq for kernel mode, but kernel does not use
17224 trampolines at the moment. */
17225 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17227 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17228 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17229 gen_int_mode (0xbb41, HImode));
17230 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17231 gen_lowpart (SImode, fnaddr));
17232 offset += 6;
17234 else
17236 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17237 gen_int_mode (0xbb49, HImode));
17238 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17239 fnaddr);
17240 offset += 10;
17242 /* Load static chain using movabs to r10. */
17243 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17244 gen_int_mode (0xba49, HImode));
17245 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17246 cxt);
17247 offset += 10;
17248 /* Jump to the r11 */
17249 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17250 gen_int_mode (0xff49, HImode));
17251 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17252 gen_int_mode (0xe3, QImode));
17253 offset += 3;
17254 gcc_assert (offset <= TRAMPOLINE_SIZE);
17257 #ifdef ENABLE_EXECUTE_STACK
17258 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17259 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17260 #endif
17263 /* Codes for all the SSE/MMX builtins. */
17264 enum ix86_builtins
17266 IX86_BUILTIN_ADDPS,
17267 IX86_BUILTIN_ADDSS,
17268 IX86_BUILTIN_DIVPS,
17269 IX86_BUILTIN_DIVSS,
17270 IX86_BUILTIN_MULPS,
17271 IX86_BUILTIN_MULSS,
17272 IX86_BUILTIN_SUBPS,
17273 IX86_BUILTIN_SUBSS,
17275 IX86_BUILTIN_CMPEQPS,
17276 IX86_BUILTIN_CMPLTPS,
17277 IX86_BUILTIN_CMPLEPS,
17278 IX86_BUILTIN_CMPGTPS,
17279 IX86_BUILTIN_CMPGEPS,
17280 IX86_BUILTIN_CMPNEQPS,
17281 IX86_BUILTIN_CMPNLTPS,
17282 IX86_BUILTIN_CMPNLEPS,
17283 IX86_BUILTIN_CMPNGTPS,
17284 IX86_BUILTIN_CMPNGEPS,
17285 IX86_BUILTIN_CMPORDPS,
17286 IX86_BUILTIN_CMPUNORDPS,
17287 IX86_BUILTIN_CMPEQSS,
17288 IX86_BUILTIN_CMPLTSS,
17289 IX86_BUILTIN_CMPLESS,
17290 IX86_BUILTIN_CMPNEQSS,
17291 IX86_BUILTIN_CMPNLTSS,
17292 IX86_BUILTIN_CMPNLESS,
17293 IX86_BUILTIN_CMPNGTSS,
17294 IX86_BUILTIN_CMPNGESS,
17295 IX86_BUILTIN_CMPORDSS,
17296 IX86_BUILTIN_CMPUNORDSS,
17298 IX86_BUILTIN_COMIEQSS,
17299 IX86_BUILTIN_COMILTSS,
17300 IX86_BUILTIN_COMILESS,
17301 IX86_BUILTIN_COMIGTSS,
17302 IX86_BUILTIN_COMIGESS,
17303 IX86_BUILTIN_COMINEQSS,
17304 IX86_BUILTIN_UCOMIEQSS,
17305 IX86_BUILTIN_UCOMILTSS,
17306 IX86_BUILTIN_UCOMILESS,
17307 IX86_BUILTIN_UCOMIGTSS,
17308 IX86_BUILTIN_UCOMIGESS,
17309 IX86_BUILTIN_UCOMINEQSS,
17311 IX86_BUILTIN_CVTPI2PS,
17312 IX86_BUILTIN_CVTPS2PI,
17313 IX86_BUILTIN_CVTSI2SS,
17314 IX86_BUILTIN_CVTSI642SS,
17315 IX86_BUILTIN_CVTSS2SI,
17316 IX86_BUILTIN_CVTSS2SI64,
17317 IX86_BUILTIN_CVTTPS2PI,
17318 IX86_BUILTIN_CVTTSS2SI,
17319 IX86_BUILTIN_CVTTSS2SI64,
17321 IX86_BUILTIN_MAXPS,
17322 IX86_BUILTIN_MAXSS,
17323 IX86_BUILTIN_MINPS,
17324 IX86_BUILTIN_MINSS,
17326 IX86_BUILTIN_LOADUPS,
17327 IX86_BUILTIN_STOREUPS,
17328 IX86_BUILTIN_MOVSS,
17330 IX86_BUILTIN_MOVHLPS,
17331 IX86_BUILTIN_MOVLHPS,
17332 IX86_BUILTIN_LOADHPS,
17333 IX86_BUILTIN_LOADLPS,
17334 IX86_BUILTIN_STOREHPS,
17335 IX86_BUILTIN_STORELPS,
17337 IX86_BUILTIN_MASKMOVQ,
17338 IX86_BUILTIN_MOVMSKPS,
17339 IX86_BUILTIN_PMOVMSKB,
17341 IX86_BUILTIN_MOVNTPS,
17342 IX86_BUILTIN_MOVNTQ,
17344 IX86_BUILTIN_LOADDQU,
17345 IX86_BUILTIN_STOREDQU,
17347 IX86_BUILTIN_PACKSSWB,
17348 IX86_BUILTIN_PACKSSDW,
17349 IX86_BUILTIN_PACKUSWB,
17351 IX86_BUILTIN_PADDB,
17352 IX86_BUILTIN_PADDW,
17353 IX86_BUILTIN_PADDD,
17354 IX86_BUILTIN_PADDQ,
17355 IX86_BUILTIN_PADDSB,
17356 IX86_BUILTIN_PADDSW,
17357 IX86_BUILTIN_PADDUSB,
17358 IX86_BUILTIN_PADDUSW,
17359 IX86_BUILTIN_PSUBB,
17360 IX86_BUILTIN_PSUBW,
17361 IX86_BUILTIN_PSUBD,
17362 IX86_BUILTIN_PSUBQ,
17363 IX86_BUILTIN_PSUBSB,
17364 IX86_BUILTIN_PSUBSW,
17365 IX86_BUILTIN_PSUBUSB,
17366 IX86_BUILTIN_PSUBUSW,
17368 IX86_BUILTIN_PAND,
17369 IX86_BUILTIN_PANDN,
17370 IX86_BUILTIN_POR,
17371 IX86_BUILTIN_PXOR,
17373 IX86_BUILTIN_PAVGB,
17374 IX86_BUILTIN_PAVGW,
17376 IX86_BUILTIN_PCMPEQB,
17377 IX86_BUILTIN_PCMPEQW,
17378 IX86_BUILTIN_PCMPEQD,
17379 IX86_BUILTIN_PCMPGTB,
17380 IX86_BUILTIN_PCMPGTW,
17381 IX86_BUILTIN_PCMPGTD,
17383 IX86_BUILTIN_PMADDWD,
17385 IX86_BUILTIN_PMAXSW,
17386 IX86_BUILTIN_PMAXUB,
17387 IX86_BUILTIN_PMINSW,
17388 IX86_BUILTIN_PMINUB,
17390 IX86_BUILTIN_PMULHUW,
17391 IX86_BUILTIN_PMULHW,
17392 IX86_BUILTIN_PMULLW,
17394 IX86_BUILTIN_PSADBW,
17395 IX86_BUILTIN_PSHUFW,
17397 IX86_BUILTIN_PSLLW,
17398 IX86_BUILTIN_PSLLD,
17399 IX86_BUILTIN_PSLLQ,
17400 IX86_BUILTIN_PSRAW,
17401 IX86_BUILTIN_PSRAD,
17402 IX86_BUILTIN_PSRLW,
17403 IX86_BUILTIN_PSRLD,
17404 IX86_BUILTIN_PSRLQ,
17405 IX86_BUILTIN_PSLLWI,
17406 IX86_BUILTIN_PSLLDI,
17407 IX86_BUILTIN_PSLLQI,
17408 IX86_BUILTIN_PSRAWI,
17409 IX86_BUILTIN_PSRADI,
17410 IX86_BUILTIN_PSRLWI,
17411 IX86_BUILTIN_PSRLDI,
17412 IX86_BUILTIN_PSRLQI,
17414 IX86_BUILTIN_PUNPCKHBW,
17415 IX86_BUILTIN_PUNPCKHWD,
17416 IX86_BUILTIN_PUNPCKHDQ,
17417 IX86_BUILTIN_PUNPCKLBW,
17418 IX86_BUILTIN_PUNPCKLWD,
17419 IX86_BUILTIN_PUNPCKLDQ,
17421 IX86_BUILTIN_SHUFPS,
17423 IX86_BUILTIN_RCPPS,
17424 IX86_BUILTIN_RCPSS,
17425 IX86_BUILTIN_RSQRTPS,
17426 IX86_BUILTIN_RSQRTPS_NR,
17427 IX86_BUILTIN_RSQRTSS,
17428 IX86_BUILTIN_RSQRTF,
17429 IX86_BUILTIN_SQRTPS,
17430 IX86_BUILTIN_SQRTPS_NR,
17431 IX86_BUILTIN_SQRTSS,
17433 IX86_BUILTIN_UNPCKHPS,
17434 IX86_BUILTIN_UNPCKLPS,
17436 IX86_BUILTIN_ANDPS,
17437 IX86_BUILTIN_ANDNPS,
17438 IX86_BUILTIN_ORPS,
17439 IX86_BUILTIN_XORPS,
17441 IX86_BUILTIN_EMMS,
17442 IX86_BUILTIN_LDMXCSR,
17443 IX86_BUILTIN_STMXCSR,
17444 IX86_BUILTIN_SFENCE,
17446 /* 3DNow! Original */
17447 IX86_BUILTIN_FEMMS,
17448 IX86_BUILTIN_PAVGUSB,
17449 IX86_BUILTIN_PF2ID,
17450 IX86_BUILTIN_PFACC,
17451 IX86_BUILTIN_PFADD,
17452 IX86_BUILTIN_PFCMPEQ,
17453 IX86_BUILTIN_PFCMPGE,
17454 IX86_BUILTIN_PFCMPGT,
17455 IX86_BUILTIN_PFMAX,
17456 IX86_BUILTIN_PFMIN,
17457 IX86_BUILTIN_PFMUL,
17458 IX86_BUILTIN_PFRCP,
17459 IX86_BUILTIN_PFRCPIT1,
17460 IX86_BUILTIN_PFRCPIT2,
17461 IX86_BUILTIN_PFRSQIT1,
17462 IX86_BUILTIN_PFRSQRT,
17463 IX86_BUILTIN_PFSUB,
17464 IX86_BUILTIN_PFSUBR,
17465 IX86_BUILTIN_PI2FD,
17466 IX86_BUILTIN_PMULHRW,
17468 /* 3DNow! Athlon Extensions */
17469 IX86_BUILTIN_PF2IW,
17470 IX86_BUILTIN_PFNACC,
17471 IX86_BUILTIN_PFPNACC,
17472 IX86_BUILTIN_PI2FW,
17473 IX86_BUILTIN_PSWAPDSI,
17474 IX86_BUILTIN_PSWAPDSF,
17476 /* SSE2 */
17477 IX86_BUILTIN_ADDPD,
17478 IX86_BUILTIN_ADDSD,
17479 IX86_BUILTIN_DIVPD,
17480 IX86_BUILTIN_DIVSD,
17481 IX86_BUILTIN_MULPD,
17482 IX86_BUILTIN_MULSD,
17483 IX86_BUILTIN_SUBPD,
17484 IX86_BUILTIN_SUBSD,
17486 IX86_BUILTIN_CMPEQPD,
17487 IX86_BUILTIN_CMPLTPD,
17488 IX86_BUILTIN_CMPLEPD,
17489 IX86_BUILTIN_CMPGTPD,
17490 IX86_BUILTIN_CMPGEPD,
17491 IX86_BUILTIN_CMPNEQPD,
17492 IX86_BUILTIN_CMPNLTPD,
17493 IX86_BUILTIN_CMPNLEPD,
17494 IX86_BUILTIN_CMPNGTPD,
17495 IX86_BUILTIN_CMPNGEPD,
17496 IX86_BUILTIN_CMPORDPD,
17497 IX86_BUILTIN_CMPUNORDPD,
17498 IX86_BUILTIN_CMPEQSD,
17499 IX86_BUILTIN_CMPLTSD,
17500 IX86_BUILTIN_CMPLESD,
17501 IX86_BUILTIN_CMPNEQSD,
17502 IX86_BUILTIN_CMPNLTSD,
17503 IX86_BUILTIN_CMPNLESD,
17504 IX86_BUILTIN_CMPORDSD,
17505 IX86_BUILTIN_CMPUNORDSD,
17507 IX86_BUILTIN_COMIEQSD,
17508 IX86_BUILTIN_COMILTSD,
17509 IX86_BUILTIN_COMILESD,
17510 IX86_BUILTIN_COMIGTSD,
17511 IX86_BUILTIN_COMIGESD,
17512 IX86_BUILTIN_COMINEQSD,
17513 IX86_BUILTIN_UCOMIEQSD,
17514 IX86_BUILTIN_UCOMILTSD,
17515 IX86_BUILTIN_UCOMILESD,
17516 IX86_BUILTIN_UCOMIGTSD,
17517 IX86_BUILTIN_UCOMIGESD,
17518 IX86_BUILTIN_UCOMINEQSD,
17520 IX86_BUILTIN_MAXPD,
17521 IX86_BUILTIN_MAXSD,
17522 IX86_BUILTIN_MINPD,
17523 IX86_BUILTIN_MINSD,
17525 IX86_BUILTIN_ANDPD,
17526 IX86_BUILTIN_ANDNPD,
17527 IX86_BUILTIN_ORPD,
17528 IX86_BUILTIN_XORPD,
17530 IX86_BUILTIN_SQRTPD,
17531 IX86_BUILTIN_SQRTSD,
17533 IX86_BUILTIN_UNPCKHPD,
17534 IX86_BUILTIN_UNPCKLPD,
17536 IX86_BUILTIN_SHUFPD,
17538 IX86_BUILTIN_LOADUPD,
17539 IX86_BUILTIN_STOREUPD,
17540 IX86_BUILTIN_MOVSD,
17542 IX86_BUILTIN_LOADHPD,
17543 IX86_BUILTIN_LOADLPD,
17545 IX86_BUILTIN_CVTDQ2PD,
17546 IX86_BUILTIN_CVTDQ2PS,
17548 IX86_BUILTIN_CVTPD2DQ,
17549 IX86_BUILTIN_CVTPD2PI,
17550 IX86_BUILTIN_CVTPD2PS,
17551 IX86_BUILTIN_CVTTPD2DQ,
17552 IX86_BUILTIN_CVTTPD2PI,
17554 IX86_BUILTIN_CVTPI2PD,
17555 IX86_BUILTIN_CVTSI2SD,
17556 IX86_BUILTIN_CVTSI642SD,
17558 IX86_BUILTIN_CVTSD2SI,
17559 IX86_BUILTIN_CVTSD2SI64,
17560 IX86_BUILTIN_CVTSD2SS,
17561 IX86_BUILTIN_CVTSS2SD,
17562 IX86_BUILTIN_CVTTSD2SI,
17563 IX86_BUILTIN_CVTTSD2SI64,
17565 IX86_BUILTIN_CVTPS2DQ,
17566 IX86_BUILTIN_CVTPS2PD,
17567 IX86_BUILTIN_CVTTPS2DQ,
17569 IX86_BUILTIN_MOVNTI,
17570 IX86_BUILTIN_MOVNTPD,
17571 IX86_BUILTIN_MOVNTDQ,
17573 /* SSE2 MMX */
17574 IX86_BUILTIN_MASKMOVDQU,
17575 IX86_BUILTIN_MOVMSKPD,
17576 IX86_BUILTIN_PMOVMSKB128,
17578 IX86_BUILTIN_PACKSSWB128,
17579 IX86_BUILTIN_PACKSSDW128,
17580 IX86_BUILTIN_PACKUSWB128,
17582 IX86_BUILTIN_PADDB128,
17583 IX86_BUILTIN_PADDW128,
17584 IX86_BUILTIN_PADDD128,
17585 IX86_BUILTIN_PADDQ128,
17586 IX86_BUILTIN_PADDSB128,
17587 IX86_BUILTIN_PADDSW128,
17588 IX86_BUILTIN_PADDUSB128,
17589 IX86_BUILTIN_PADDUSW128,
17590 IX86_BUILTIN_PSUBB128,
17591 IX86_BUILTIN_PSUBW128,
17592 IX86_BUILTIN_PSUBD128,
17593 IX86_BUILTIN_PSUBQ128,
17594 IX86_BUILTIN_PSUBSB128,
17595 IX86_BUILTIN_PSUBSW128,
17596 IX86_BUILTIN_PSUBUSB128,
17597 IX86_BUILTIN_PSUBUSW128,
17599 IX86_BUILTIN_PAND128,
17600 IX86_BUILTIN_PANDN128,
17601 IX86_BUILTIN_POR128,
17602 IX86_BUILTIN_PXOR128,
17604 IX86_BUILTIN_PAVGB128,
17605 IX86_BUILTIN_PAVGW128,
17607 IX86_BUILTIN_PCMPEQB128,
17608 IX86_BUILTIN_PCMPEQW128,
17609 IX86_BUILTIN_PCMPEQD128,
17610 IX86_BUILTIN_PCMPGTB128,
17611 IX86_BUILTIN_PCMPGTW128,
17612 IX86_BUILTIN_PCMPGTD128,
17614 IX86_BUILTIN_PMADDWD128,
17616 IX86_BUILTIN_PMAXSW128,
17617 IX86_BUILTIN_PMAXUB128,
17618 IX86_BUILTIN_PMINSW128,
17619 IX86_BUILTIN_PMINUB128,
17621 IX86_BUILTIN_PMULUDQ,
17622 IX86_BUILTIN_PMULUDQ128,
17623 IX86_BUILTIN_PMULHUW128,
17624 IX86_BUILTIN_PMULHW128,
17625 IX86_BUILTIN_PMULLW128,
17627 IX86_BUILTIN_PSADBW128,
17628 IX86_BUILTIN_PSHUFHW,
17629 IX86_BUILTIN_PSHUFLW,
17630 IX86_BUILTIN_PSHUFD,
17632 IX86_BUILTIN_PSLLDQI128,
17633 IX86_BUILTIN_PSLLWI128,
17634 IX86_BUILTIN_PSLLDI128,
17635 IX86_BUILTIN_PSLLQI128,
17636 IX86_BUILTIN_PSRAWI128,
17637 IX86_BUILTIN_PSRADI128,
17638 IX86_BUILTIN_PSRLDQI128,
17639 IX86_BUILTIN_PSRLWI128,
17640 IX86_BUILTIN_PSRLDI128,
17641 IX86_BUILTIN_PSRLQI128,
17643 IX86_BUILTIN_PSLLDQ128,
17644 IX86_BUILTIN_PSLLW128,
17645 IX86_BUILTIN_PSLLD128,
17646 IX86_BUILTIN_PSLLQ128,
17647 IX86_BUILTIN_PSRAW128,
17648 IX86_BUILTIN_PSRAD128,
17649 IX86_BUILTIN_PSRLW128,
17650 IX86_BUILTIN_PSRLD128,
17651 IX86_BUILTIN_PSRLQ128,
17653 IX86_BUILTIN_PUNPCKHBW128,
17654 IX86_BUILTIN_PUNPCKHWD128,
17655 IX86_BUILTIN_PUNPCKHDQ128,
17656 IX86_BUILTIN_PUNPCKHQDQ128,
17657 IX86_BUILTIN_PUNPCKLBW128,
17658 IX86_BUILTIN_PUNPCKLWD128,
17659 IX86_BUILTIN_PUNPCKLDQ128,
17660 IX86_BUILTIN_PUNPCKLQDQ128,
17662 IX86_BUILTIN_CLFLUSH,
17663 IX86_BUILTIN_MFENCE,
17664 IX86_BUILTIN_LFENCE,
17666 /* SSE3. */
17667 IX86_BUILTIN_ADDSUBPS,
17668 IX86_BUILTIN_HADDPS,
17669 IX86_BUILTIN_HSUBPS,
17670 IX86_BUILTIN_MOVSHDUP,
17671 IX86_BUILTIN_MOVSLDUP,
17672 IX86_BUILTIN_ADDSUBPD,
17673 IX86_BUILTIN_HADDPD,
17674 IX86_BUILTIN_HSUBPD,
17675 IX86_BUILTIN_LDDQU,
17677 IX86_BUILTIN_MONITOR,
17678 IX86_BUILTIN_MWAIT,
17680 /* SSSE3. */
17681 IX86_BUILTIN_PHADDW,
17682 IX86_BUILTIN_PHADDD,
17683 IX86_BUILTIN_PHADDSW,
17684 IX86_BUILTIN_PHSUBW,
17685 IX86_BUILTIN_PHSUBD,
17686 IX86_BUILTIN_PHSUBSW,
17687 IX86_BUILTIN_PMADDUBSW,
17688 IX86_BUILTIN_PMULHRSW,
17689 IX86_BUILTIN_PSHUFB,
17690 IX86_BUILTIN_PSIGNB,
17691 IX86_BUILTIN_PSIGNW,
17692 IX86_BUILTIN_PSIGND,
17693 IX86_BUILTIN_PALIGNR,
17694 IX86_BUILTIN_PABSB,
17695 IX86_BUILTIN_PABSW,
17696 IX86_BUILTIN_PABSD,
17698 IX86_BUILTIN_PHADDW128,
17699 IX86_BUILTIN_PHADDD128,
17700 IX86_BUILTIN_PHADDSW128,
17701 IX86_BUILTIN_PHSUBW128,
17702 IX86_BUILTIN_PHSUBD128,
17703 IX86_BUILTIN_PHSUBSW128,
17704 IX86_BUILTIN_PMADDUBSW128,
17705 IX86_BUILTIN_PMULHRSW128,
17706 IX86_BUILTIN_PSHUFB128,
17707 IX86_BUILTIN_PSIGNB128,
17708 IX86_BUILTIN_PSIGNW128,
17709 IX86_BUILTIN_PSIGND128,
17710 IX86_BUILTIN_PALIGNR128,
17711 IX86_BUILTIN_PABSB128,
17712 IX86_BUILTIN_PABSW128,
17713 IX86_BUILTIN_PABSD128,
17715 /* AMDFAM10 - SSE4A New Instructions. */
17716 IX86_BUILTIN_MOVNTSD,
17717 IX86_BUILTIN_MOVNTSS,
17718 IX86_BUILTIN_EXTRQI,
17719 IX86_BUILTIN_EXTRQ,
17720 IX86_BUILTIN_INSERTQI,
17721 IX86_BUILTIN_INSERTQ,
17723 /* SSE4.1. */
17724 IX86_BUILTIN_BLENDPD,
17725 IX86_BUILTIN_BLENDPS,
17726 IX86_BUILTIN_BLENDVPD,
17727 IX86_BUILTIN_BLENDVPS,
17728 IX86_BUILTIN_PBLENDVB128,
17729 IX86_BUILTIN_PBLENDW128,
17731 IX86_BUILTIN_DPPD,
17732 IX86_BUILTIN_DPPS,
17734 IX86_BUILTIN_INSERTPS128,
17736 IX86_BUILTIN_MOVNTDQA,
17737 IX86_BUILTIN_MPSADBW128,
17738 IX86_BUILTIN_PACKUSDW128,
17739 IX86_BUILTIN_PCMPEQQ,
17740 IX86_BUILTIN_PHMINPOSUW128,
17742 IX86_BUILTIN_PMAXSB128,
17743 IX86_BUILTIN_PMAXSD128,
17744 IX86_BUILTIN_PMAXUD128,
17745 IX86_BUILTIN_PMAXUW128,
17747 IX86_BUILTIN_PMINSB128,
17748 IX86_BUILTIN_PMINSD128,
17749 IX86_BUILTIN_PMINUD128,
17750 IX86_BUILTIN_PMINUW128,
17752 IX86_BUILTIN_PMOVSXBW128,
17753 IX86_BUILTIN_PMOVSXBD128,
17754 IX86_BUILTIN_PMOVSXBQ128,
17755 IX86_BUILTIN_PMOVSXWD128,
17756 IX86_BUILTIN_PMOVSXWQ128,
17757 IX86_BUILTIN_PMOVSXDQ128,
17759 IX86_BUILTIN_PMOVZXBW128,
17760 IX86_BUILTIN_PMOVZXBD128,
17761 IX86_BUILTIN_PMOVZXBQ128,
17762 IX86_BUILTIN_PMOVZXWD128,
17763 IX86_BUILTIN_PMOVZXWQ128,
17764 IX86_BUILTIN_PMOVZXDQ128,
17766 IX86_BUILTIN_PMULDQ128,
17767 IX86_BUILTIN_PMULLD128,
17769 IX86_BUILTIN_ROUNDPD,
17770 IX86_BUILTIN_ROUNDPS,
17771 IX86_BUILTIN_ROUNDSD,
17772 IX86_BUILTIN_ROUNDSS,
17774 IX86_BUILTIN_PTESTZ,
17775 IX86_BUILTIN_PTESTC,
17776 IX86_BUILTIN_PTESTNZC,
17778 IX86_BUILTIN_VEC_INIT_V2SI,
17779 IX86_BUILTIN_VEC_INIT_V4HI,
17780 IX86_BUILTIN_VEC_INIT_V8QI,
17781 IX86_BUILTIN_VEC_EXT_V2DF,
17782 IX86_BUILTIN_VEC_EXT_V2DI,
17783 IX86_BUILTIN_VEC_EXT_V4SF,
17784 IX86_BUILTIN_VEC_EXT_V4SI,
17785 IX86_BUILTIN_VEC_EXT_V8HI,
17786 IX86_BUILTIN_VEC_EXT_V2SI,
17787 IX86_BUILTIN_VEC_EXT_V4HI,
17788 IX86_BUILTIN_VEC_EXT_V16QI,
17789 IX86_BUILTIN_VEC_SET_V2DI,
17790 IX86_BUILTIN_VEC_SET_V4SF,
17791 IX86_BUILTIN_VEC_SET_V4SI,
17792 IX86_BUILTIN_VEC_SET_V8HI,
17793 IX86_BUILTIN_VEC_SET_V4HI,
17794 IX86_BUILTIN_VEC_SET_V16QI,
17796 IX86_BUILTIN_VEC_PACK_SFIX,
17798 /* SSE4.2. */
17799 IX86_BUILTIN_CRC32QI,
17800 IX86_BUILTIN_CRC32HI,
17801 IX86_BUILTIN_CRC32SI,
17802 IX86_BUILTIN_CRC32DI,
17804 IX86_BUILTIN_PCMPESTRI128,
17805 IX86_BUILTIN_PCMPESTRM128,
17806 IX86_BUILTIN_PCMPESTRA128,
17807 IX86_BUILTIN_PCMPESTRC128,
17808 IX86_BUILTIN_PCMPESTRO128,
17809 IX86_BUILTIN_PCMPESTRS128,
17810 IX86_BUILTIN_PCMPESTRZ128,
17811 IX86_BUILTIN_PCMPISTRI128,
17812 IX86_BUILTIN_PCMPISTRM128,
17813 IX86_BUILTIN_PCMPISTRA128,
17814 IX86_BUILTIN_PCMPISTRC128,
17815 IX86_BUILTIN_PCMPISTRO128,
17816 IX86_BUILTIN_PCMPISTRS128,
17817 IX86_BUILTIN_PCMPISTRZ128,
17819 IX86_BUILTIN_PCMPGTQ,
17821 /* AES instructions */
17822 IX86_BUILTIN_AESENC128,
17823 IX86_BUILTIN_AESENCLAST128,
17824 IX86_BUILTIN_AESDEC128,
17825 IX86_BUILTIN_AESDECLAST128,
17826 IX86_BUILTIN_AESIMC128,
17827 IX86_BUILTIN_AESKEYGENASSIST128,
17829 /* PCLMUL instruction */
17830 IX86_BUILTIN_PCLMULQDQ128,
17832 /* TFmode support builtins. */
17833 IX86_BUILTIN_INFQ,
17834 IX86_BUILTIN_FABSQ,
17835 IX86_BUILTIN_COPYSIGNQ,
17837 /* SSE5 instructions */
17838 IX86_BUILTIN_FMADDSS,
17839 IX86_BUILTIN_FMADDSD,
17840 IX86_BUILTIN_FMADDPS,
17841 IX86_BUILTIN_FMADDPD,
17842 IX86_BUILTIN_FMSUBSS,
17843 IX86_BUILTIN_FMSUBSD,
17844 IX86_BUILTIN_FMSUBPS,
17845 IX86_BUILTIN_FMSUBPD,
17846 IX86_BUILTIN_FNMADDSS,
17847 IX86_BUILTIN_FNMADDSD,
17848 IX86_BUILTIN_FNMADDPS,
17849 IX86_BUILTIN_FNMADDPD,
17850 IX86_BUILTIN_FNMSUBSS,
17851 IX86_BUILTIN_FNMSUBSD,
17852 IX86_BUILTIN_FNMSUBPS,
17853 IX86_BUILTIN_FNMSUBPD,
17854 IX86_BUILTIN_PCMOV_V2DI,
17855 IX86_BUILTIN_PCMOV_V4SI,
17856 IX86_BUILTIN_PCMOV_V8HI,
17857 IX86_BUILTIN_PCMOV_V16QI,
17858 IX86_BUILTIN_PCMOV_V4SF,
17859 IX86_BUILTIN_PCMOV_V2DF,
17860 IX86_BUILTIN_PPERM,
17861 IX86_BUILTIN_PERMPS,
17862 IX86_BUILTIN_PERMPD,
17863 IX86_BUILTIN_PMACSSWW,
17864 IX86_BUILTIN_PMACSWW,
17865 IX86_BUILTIN_PMACSSWD,
17866 IX86_BUILTIN_PMACSWD,
17867 IX86_BUILTIN_PMACSSDD,
17868 IX86_BUILTIN_PMACSDD,
17869 IX86_BUILTIN_PMACSSDQL,
17870 IX86_BUILTIN_PMACSSDQH,
17871 IX86_BUILTIN_PMACSDQL,
17872 IX86_BUILTIN_PMACSDQH,
17873 IX86_BUILTIN_PMADCSSWD,
17874 IX86_BUILTIN_PMADCSWD,
17875 IX86_BUILTIN_PHADDBW,
17876 IX86_BUILTIN_PHADDBD,
17877 IX86_BUILTIN_PHADDBQ,
17878 IX86_BUILTIN_PHADDWD,
17879 IX86_BUILTIN_PHADDWQ,
17880 IX86_BUILTIN_PHADDDQ,
17881 IX86_BUILTIN_PHADDUBW,
17882 IX86_BUILTIN_PHADDUBD,
17883 IX86_BUILTIN_PHADDUBQ,
17884 IX86_BUILTIN_PHADDUWD,
17885 IX86_BUILTIN_PHADDUWQ,
17886 IX86_BUILTIN_PHADDUDQ,
17887 IX86_BUILTIN_PHSUBBW,
17888 IX86_BUILTIN_PHSUBWD,
17889 IX86_BUILTIN_PHSUBDQ,
17890 IX86_BUILTIN_PROTB,
17891 IX86_BUILTIN_PROTW,
17892 IX86_BUILTIN_PROTD,
17893 IX86_BUILTIN_PROTQ,
17894 IX86_BUILTIN_PROTB_IMM,
17895 IX86_BUILTIN_PROTW_IMM,
17896 IX86_BUILTIN_PROTD_IMM,
17897 IX86_BUILTIN_PROTQ_IMM,
17898 IX86_BUILTIN_PSHLB,
17899 IX86_BUILTIN_PSHLW,
17900 IX86_BUILTIN_PSHLD,
17901 IX86_BUILTIN_PSHLQ,
17902 IX86_BUILTIN_PSHAB,
17903 IX86_BUILTIN_PSHAW,
17904 IX86_BUILTIN_PSHAD,
17905 IX86_BUILTIN_PSHAQ,
17906 IX86_BUILTIN_FRCZSS,
17907 IX86_BUILTIN_FRCZSD,
17908 IX86_BUILTIN_FRCZPS,
17909 IX86_BUILTIN_FRCZPD,
17910 IX86_BUILTIN_CVTPH2PS,
17911 IX86_BUILTIN_CVTPS2PH,
17913 IX86_BUILTIN_COMEQSS,
17914 IX86_BUILTIN_COMNESS,
17915 IX86_BUILTIN_COMLTSS,
17916 IX86_BUILTIN_COMLESS,
17917 IX86_BUILTIN_COMGTSS,
17918 IX86_BUILTIN_COMGESS,
17919 IX86_BUILTIN_COMUEQSS,
17920 IX86_BUILTIN_COMUNESS,
17921 IX86_BUILTIN_COMULTSS,
17922 IX86_BUILTIN_COMULESS,
17923 IX86_BUILTIN_COMUGTSS,
17924 IX86_BUILTIN_COMUGESS,
17925 IX86_BUILTIN_COMORDSS,
17926 IX86_BUILTIN_COMUNORDSS,
17927 IX86_BUILTIN_COMFALSESS,
17928 IX86_BUILTIN_COMTRUESS,
17930 IX86_BUILTIN_COMEQSD,
17931 IX86_BUILTIN_COMNESD,
17932 IX86_BUILTIN_COMLTSD,
17933 IX86_BUILTIN_COMLESD,
17934 IX86_BUILTIN_COMGTSD,
17935 IX86_BUILTIN_COMGESD,
17936 IX86_BUILTIN_COMUEQSD,
17937 IX86_BUILTIN_COMUNESD,
17938 IX86_BUILTIN_COMULTSD,
17939 IX86_BUILTIN_COMULESD,
17940 IX86_BUILTIN_COMUGTSD,
17941 IX86_BUILTIN_COMUGESD,
17942 IX86_BUILTIN_COMORDSD,
17943 IX86_BUILTIN_COMUNORDSD,
17944 IX86_BUILTIN_COMFALSESD,
17945 IX86_BUILTIN_COMTRUESD,
17947 IX86_BUILTIN_COMEQPS,
17948 IX86_BUILTIN_COMNEPS,
17949 IX86_BUILTIN_COMLTPS,
17950 IX86_BUILTIN_COMLEPS,
17951 IX86_BUILTIN_COMGTPS,
17952 IX86_BUILTIN_COMGEPS,
17953 IX86_BUILTIN_COMUEQPS,
17954 IX86_BUILTIN_COMUNEPS,
17955 IX86_BUILTIN_COMULTPS,
17956 IX86_BUILTIN_COMULEPS,
17957 IX86_BUILTIN_COMUGTPS,
17958 IX86_BUILTIN_COMUGEPS,
17959 IX86_BUILTIN_COMORDPS,
17960 IX86_BUILTIN_COMUNORDPS,
17961 IX86_BUILTIN_COMFALSEPS,
17962 IX86_BUILTIN_COMTRUEPS,
17964 IX86_BUILTIN_COMEQPD,
17965 IX86_BUILTIN_COMNEPD,
17966 IX86_BUILTIN_COMLTPD,
17967 IX86_BUILTIN_COMLEPD,
17968 IX86_BUILTIN_COMGTPD,
17969 IX86_BUILTIN_COMGEPD,
17970 IX86_BUILTIN_COMUEQPD,
17971 IX86_BUILTIN_COMUNEPD,
17972 IX86_BUILTIN_COMULTPD,
17973 IX86_BUILTIN_COMULEPD,
17974 IX86_BUILTIN_COMUGTPD,
17975 IX86_BUILTIN_COMUGEPD,
17976 IX86_BUILTIN_COMORDPD,
17977 IX86_BUILTIN_COMUNORDPD,
17978 IX86_BUILTIN_COMFALSEPD,
17979 IX86_BUILTIN_COMTRUEPD,
17981 IX86_BUILTIN_PCOMEQUB,
17982 IX86_BUILTIN_PCOMNEUB,
17983 IX86_BUILTIN_PCOMLTUB,
17984 IX86_BUILTIN_PCOMLEUB,
17985 IX86_BUILTIN_PCOMGTUB,
17986 IX86_BUILTIN_PCOMGEUB,
17987 IX86_BUILTIN_PCOMFALSEUB,
17988 IX86_BUILTIN_PCOMTRUEUB,
17989 IX86_BUILTIN_PCOMEQUW,
17990 IX86_BUILTIN_PCOMNEUW,
17991 IX86_BUILTIN_PCOMLTUW,
17992 IX86_BUILTIN_PCOMLEUW,
17993 IX86_BUILTIN_PCOMGTUW,
17994 IX86_BUILTIN_PCOMGEUW,
17995 IX86_BUILTIN_PCOMFALSEUW,
17996 IX86_BUILTIN_PCOMTRUEUW,
17997 IX86_BUILTIN_PCOMEQUD,
17998 IX86_BUILTIN_PCOMNEUD,
17999 IX86_BUILTIN_PCOMLTUD,
18000 IX86_BUILTIN_PCOMLEUD,
18001 IX86_BUILTIN_PCOMGTUD,
18002 IX86_BUILTIN_PCOMGEUD,
18003 IX86_BUILTIN_PCOMFALSEUD,
18004 IX86_BUILTIN_PCOMTRUEUD,
18005 IX86_BUILTIN_PCOMEQUQ,
18006 IX86_BUILTIN_PCOMNEUQ,
18007 IX86_BUILTIN_PCOMLTUQ,
18008 IX86_BUILTIN_PCOMLEUQ,
18009 IX86_BUILTIN_PCOMGTUQ,
18010 IX86_BUILTIN_PCOMGEUQ,
18011 IX86_BUILTIN_PCOMFALSEUQ,
18012 IX86_BUILTIN_PCOMTRUEUQ,
18014 IX86_BUILTIN_PCOMEQB,
18015 IX86_BUILTIN_PCOMNEB,
18016 IX86_BUILTIN_PCOMLTB,
18017 IX86_BUILTIN_PCOMLEB,
18018 IX86_BUILTIN_PCOMGTB,
18019 IX86_BUILTIN_PCOMGEB,
18020 IX86_BUILTIN_PCOMFALSEB,
18021 IX86_BUILTIN_PCOMTRUEB,
18022 IX86_BUILTIN_PCOMEQW,
18023 IX86_BUILTIN_PCOMNEW,
18024 IX86_BUILTIN_PCOMLTW,
18025 IX86_BUILTIN_PCOMLEW,
18026 IX86_BUILTIN_PCOMGTW,
18027 IX86_BUILTIN_PCOMGEW,
18028 IX86_BUILTIN_PCOMFALSEW,
18029 IX86_BUILTIN_PCOMTRUEW,
18030 IX86_BUILTIN_PCOMEQD,
18031 IX86_BUILTIN_PCOMNED,
18032 IX86_BUILTIN_PCOMLTD,
18033 IX86_BUILTIN_PCOMLED,
18034 IX86_BUILTIN_PCOMGTD,
18035 IX86_BUILTIN_PCOMGED,
18036 IX86_BUILTIN_PCOMFALSED,
18037 IX86_BUILTIN_PCOMTRUED,
18038 IX86_BUILTIN_PCOMEQQ,
18039 IX86_BUILTIN_PCOMNEQ,
18040 IX86_BUILTIN_PCOMLTQ,
18041 IX86_BUILTIN_PCOMLEQ,
18042 IX86_BUILTIN_PCOMGTQ,
18043 IX86_BUILTIN_PCOMGEQ,
18044 IX86_BUILTIN_PCOMFALSEQ,
18045 IX86_BUILTIN_PCOMTRUEQ,
18047 IX86_BUILTIN_MAX
18050 /* Table for the ix86 builtin decls. */
18051 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18053 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
18054 * if the target_flags include one of MASK. Stores the function decl
18055 * in the ix86_builtins array.
18056 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18058 static inline tree
18059 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18061 tree decl = NULL_TREE;
18063 if (mask & ix86_isa_flags
18064 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18066 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18067 NULL, NULL_TREE);
18068 ix86_builtins[(int) code] = decl;
18071 return decl;
18074 /* Like def_builtin, but also marks the function decl "const". */
18076 static inline tree
18077 def_builtin_const (int mask, const char *name, tree type,
18078 enum ix86_builtins code)
18080 tree decl = def_builtin (mask, name, type, code);
18081 if (decl)
18082 TREE_READONLY (decl) = 1;
18083 return decl;
18086 /* Bits for builtin_description.flag. */
18088 /* Set when we don't support the comparison natively, and should
18089 swap_comparison in order to support it. */
18090 #define BUILTIN_DESC_SWAP_OPERANDS 1
18092 struct builtin_description
18094 const unsigned int mask;
18095 const enum insn_code icode;
18096 const char *const name;
18097 const enum ix86_builtins code;
18098 const enum rtx_code comparison;
18099 const int flag;
18102 static const struct builtin_description bdesc_comi[] =
18104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18130 static const struct builtin_description bdesc_pcmpestr[] =
18132 /* SSE4.2 */
18133 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18136 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18137 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18138 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18139 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18142 static const struct builtin_description bdesc_pcmpistr[] =
18144 /* SSE4.2 */
18145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18151 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18154 /* Special builtin types */
18155 enum ix86_special_builtin_type
18157 SPECIAL_FTYPE_UNKNOWN,
18158 VOID_FTYPE_VOID,
18159 V16QI_FTYPE_PCCHAR,
18160 V4SF_FTYPE_PCFLOAT,
18161 V2DF_FTYPE_PCDOUBLE,
18162 V4SF_FTYPE_V4SF_PCV2SF,
18163 V2DF_FTYPE_V2DF_PCDOUBLE,
18164 V2DI_FTYPE_PV2DI,
18165 VOID_FTYPE_PV2SF_V4SF,
18166 VOID_FTYPE_PV2DI_V2DI,
18167 VOID_FTYPE_PCHAR_V16QI,
18168 VOID_FTYPE_PFLOAT_V4SF,
18169 VOID_FTYPE_PDOUBLE_V2DF,
18170 VOID_FTYPE_PDI_DI,
18171 VOID_FTYPE_PINT_INT
18174 /* Builtin types */
18175 enum ix86_builtin_type
18177 FTYPE_UNKNOWN,
18178 FLOAT128_FTYPE_FLOAT128,
18179 FLOAT_FTYPE_FLOAT,
18180 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18181 INT_FTYPE_V2DI_V2DI_PTEST,
18182 INT64_FTYPE_V4SF,
18183 INT64_FTYPE_V2DF,
18184 INT_FTYPE_V16QI,
18185 INT_FTYPE_V8QI,
18186 INT_FTYPE_V4SF,
18187 INT_FTYPE_V2DF,
18188 V16QI_FTYPE_V16QI,
18189 V8HI_FTYPE_V8HI,
18190 V8HI_FTYPE_V16QI,
18191 V8QI_FTYPE_V8QI,
18192 V4SI_FTYPE_V4SI,
18193 V4SI_FTYPE_V16QI,
18194 V4SI_FTYPE_V8HI,
18195 V4SI_FTYPE_V4SF,
18196 V4SI_FTYPE_V2DF,
18197 V4HI_FTYPE_V4HI,
18198 V4SF_FTYPE_V4SF,
18199 V4SF_FTYPE_V4SF_VEC_MERGE,
18200 V4SF_FTYPE_V4SI,
18201 V4SF_FTYPE_V2DF,
18202 V2DI_FTYPE_V2DI,
18203 V2DI_FTYPE_V16QI,
18204 V2DI_FTYPE_V8HI,
18205 V2DI_FTYPE_V4SI,
18206 V2DF_FTYPE_V2DF,
18207 V2DF_FTYPE_V2DF_VEC_MERGE,
18208 V2DF_FTYPE_V4SI,
18209 V2DF_FTYPE_V4SF,
18210 V2DF_FTYPE_V2SI,
18211 V2SI_FTYPE_V2SI,
18212 V2SI_FTYPE_V4SF,
18213 V2SI_FTYPE_V2SF,
18214 V2SI_FTYPE_V2DF,
18215 V2SF_FTYPE_V2SF,
18216 V2SF_FTYPE_V2SI,
18217 V16QI_FTYPE_V16QI_V16QI,
18218 V16QI_FTYPE_V8HI_V8HI,
18219 V8QI_FTYPE_V8QI_V8QI,
18220 V8QI_FTYPE_V4HI_V4HI,
18221 V8HI_FTYPE_V8HI_V8HI,
18222 V8HI_FTYPE_V8HI_V8HI_COUNT,
18223 V8HI_FTYPE_V16QI_V16QI,
18224 V8HI_FTYPE_V4SI_V4SI,
18225 V8HI_FTYPE_V8HI_SI_COUNT,
18226 V4SI_FTYPE_V4SI_V4SI,
18227 V4SI_FTYPE_V4SI_V4SI_COUNT,
18228 V4SI_FTYPE_V8HI_V8HI,
18229 V4SI_FTYPE_V4SF_V4SF,
18230 V4SI_FTYPE_V2DF_V2DF,
18231 V4SI_FTYPE_V4SI_SI_COUNT,
18232 V4HI_FTYPE_V4HI_V4HI,
18233 V4HI_FTYPE_V4HI_V4HI_COUNT,
18234 V4HI_FTYPE_V8QI_V8QI,
18235 V4HI_FTYPE_V2SI_V2SI,
18236 V4HI_FTYPE_V4HI_SI_COUNT,
18237 V4SF_FTYPE_V4SF_V4SF,
18238 V4SF_FTYPE_V4SF_V4SF_SWAP,
18239 V4SF_FTYPE_V4SF_V2SI,
18240 V4SF_FTYPE_V4SF_V2DF,
18241 V4SF_FTYPE_V4SF_DI,
18242 V4SF_FTYPE_V4SF_SI,
18243 V2DI_FTYPE_V2DI_V2DI,
18244 V2DI_FTYPE_V2DI_V2DI_COUNT,
18245 V2DI_FTYPE_V16QI_V16QI,
18246 V2DI_FTYPE_V4SI_V4SI,
18247 V2DI_FTYPE_V2DI_V16QI,
18248 V2DI_FTYPE_V2DF_V2DF,
18249 V2DI_FTYPE_V2DI_SI_COUNT,
18250 V2SI_FTYPE_V2SI_V2SI,
18251 V2SI_FTYPE_V2SI_V2SI_COUNT,
18252 V2SI_FTYPE_V4HI_V4HI,
18253 V2SI_FTYPE_V2SF_V2SF,
18254 V2SI_FTYPE_V2SI_SI_COUNT,
18255 V2DF_FTYPE_V2DF_V2DF,
18256 V2DF_FTYPE_V2DF_V2DF_SWAP,
18257 V2DF_FTYPE_V2DF_V4SF,
18258 V2DF_FTYPE_V2DF_DI,
18259 V2DF_FTYPE_V2DF_SI,
18260 V2SF_FTYPE_V2SF_V2SF,
18261 V1DI_FTYPE_V1DI_V1DI,
18262 V1DI_FTYPE_V1DI_V1DI_COUNT,
18263 V1DI_FTYPE_V8QI_V8QI,
18264 V1DI_FTYPE_V2SI_V2SI,
18265 V1DI_FTYPE_V1DI_SI_COUNT,
18266 UINT64_FTYPE_UINT64_UINT64,
18267 UINT_FTYPE_UINT_UINT,
18268 UINT_FTYPE_UINT_USHORT,
18269 UINT_FTYPE_UINT_UCHAR,
18270 V8HI_FTYPE_V8HI_INT,
18271 V4SI_FTYPE_V4SI_INT,
18272 V4HI_FTYPE_V4HI_INT,
18273 V4SF_FTYPE_V4SF_INT,
18274 V2DI_FTYPE_V2DI_INT,
18275 V2DI2TI_FTYPE_V2DI_INT,
18276 V2DF_FTYPE_V2DF_INT,
18277 V16QI_FTYPE_V16QI_V16QI_V16QI,
18278 V4SF_FTYPE_V4SF_V4SF_V4SF,
18279 V2DF_FTYPE_V2DF_V2DF_V2DF,
18280 V16QI_FTYPE_V16QI_V16QI_INT,
18281 V8HI_FTYPE_V8HI_V8HI_INT,
18282 V4SI_FTYPE_V4SI_V4SI_INT,
18283 V4SF_FTYPE_V4SF_V4SF_INT,
18284 V2DI_FTYPE_V2DI_V2DI_INT,
18285 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18286 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18287 V2DF_FTYPE_V2DF_V2DF_INT,
18288 V2DI_FTYPE_V2DI_UINT_UINT,
18289 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18292 /* Special builtins with variable number of arguments. */
18293 static const struct builtin_description bdesc_special_args[] =
18295 /* MMX */
18296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18298 /* 3DNow! */
18299 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18301 /* SSE */
18302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18311 /* SSE or 3DNow!A */
18312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18315 /* SSE2 */
18316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18329 /* SSE3 */
18330 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18332 /* SSE4.1 */
18333 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18335 /* SSE4A */
18336 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18337 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18340 /* Builtins with variable number of arguments. */
18341 static const struct builtin_description bdesc_args[] =
18343 /* MMX */
18344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18407 /* 3DNow! */
18408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18413 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18414 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18415 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18416 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18417 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18418 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18419 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18424 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18429 /* 3DNow!A */
18430 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18431 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18432 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18433 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18434 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18435 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18437 /* SSE */
18438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18440 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18442 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18446 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18449 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18453 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18454 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18455 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18485 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18486 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18490 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18492 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18493 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18503 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18505 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18511 /* SSE MMX or 3Dnow!A */
18512 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18514 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18516 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18517 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18518 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18519 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18521 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18522 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18526 /* SSE2 */
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18545 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18546 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18553 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18554 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18583 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18587 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18589 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18590 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18596 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18598 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18599 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18600 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18601 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18602 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18603 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18604 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18605 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18616 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18619 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18621 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18635 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18636 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18652 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18661 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18670 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18671 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18674 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18676 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18677 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18678 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18679 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18683 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18684 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18692 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18693 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18695 /* SSE2 MMX */
18696 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18697 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18699 /* SSE3 */
18700 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18701 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18703 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18704 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18705 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18706 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18707 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18708 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18710 /* SSSE3 */
18711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18743 /* SSSE3. */
18744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18747 /* SSE4.1 */
18748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18786 /* SSE4.1 and SSE5 */
18787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18788 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18789 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18790 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18792 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18793 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18794 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18796 /* SSE4.2 */
18797 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18798 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18799 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18800 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18801 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18803 /* SSE4A */
18804 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18805 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18806 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18807 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18809 /* AES */
18810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18811 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18813 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18814 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18815 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18816 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18818 /* PCLMUL */
18819 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18822 /* SSE5 */
18823 enum multi_arg_type {
18824 MULTI_ARG_UNKNOWN,
18825 MULTI_ARG_3_SF,
18826 MULTI_ARG_3_DF,
18827 MULTI_ARG_3_DI,
18828 MULTI_ARG_3_SI,
18829 MULTI_ARG_3_SI_DI,
18830 MULTI_ARG_3_HI,
18831 MULTI_ARG_3_HI_SI,
18832 MULTI_ARG_3_QI,
18833 MULTI_ARG_3_PERMPS,
18834 MULTI_ARG_3_PERMPD,
18835 MULTI_ARG_2_SF,
18836 MULTI_ARG_2_DF,
18837 MULTI_ARG_2_DI,
18838 MULTI_ARG_2_SI,
18839 MULTI_ARG_2_HI,
18840 MULTI_ARG_2_QI,
18841 MULTI_ARG_2_DI_IMM,
18842 MULTI_ARG_2_SI_IMM,
18843 MULTI_ARG_2_HI_IMM,
18844 MULTI_ARG_2_QI_IMM,
18845 MULTI_ARG_2_SF_CMP,
18846 MULTI_ARG_2_DF_CMP,
18847 MULTI_ARG_2_DI_CMP,
18848 MULTI_ARG_2_SI_CMP,
18849 MULTI_ARG_2_HI_CMP,
18850 MULTI_ARG_2_QI_CMP,
18851 MULTI_ARG_2_DI_TF,
18852 MULTI_ARG_2_SI_TF,
18853 MULTI_ARG_2_HI_TF,
18854 MULTI_ARG_2_QI_TF,
18855 MULTI_ARG_2_SF_TF,
18856 MULTI_ARG_2_DF_TF,
18857 MULTI_ARG_1_SF,
18858 MULTI_ARG_1_DF,
18859 MULTI_ARG_1_DI,
18860 MULTI_ARG_1_SI,
18861 MULTI_ARG_1_HI,
18862 MULTI_ARG_1_QI,
18863 MULTI_ARG_1_SI_DI,
18864 MULTI_ARG_1_HI_DI,
18865 MULTI_ARG_1_HI_SI,
18866 MULTI_ARG_1_QI_DI,
18867 MULTI_ARG_1_QI_SI,
18868 MULTI_ARG_1_QI_HI,
18869 MULTI_ARG_1_PH2PS,
18870 MULTI_ARG_1_PS2PH
18873 static const struct builtin_description bdesc_multi_arg[] =
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19111 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
19112 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19113 builtins. */
19114 static void
19115 ix86_init_mmx_sse_builtins (void)
19117 const struct builtin_description * d;
19118 size_t i;
19120 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19121 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19122 tree V1DI_type_node
19123 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19124 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19125 tree V2DI_type_node
19126 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19127 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19128 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19129 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19130 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19131 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19132 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19134 tree pchar_type_node = build_pointer_type (char_type_node);
19135 tree pcchar_type_node
19136 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19137 tree pfloat_type_node = build_pointer_type (float_type_node);
19138 tree pcfloat_type_node
19139 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19140 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19141 tree pcv2sf_type_node
19142 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19143 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19144 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19146 /* Comparisons. */
19147 tree int_ftype_v4sf_v4sf
19148 = build_function_type_list (integer_type_node,
19149 V4SF_type_node, V4SF_type_node, NULL_TREE);
19150 tree v4si_ftype_v4sf_v4sf
19151 = build_function_type_list (V4SI_type_node,
19152 V4SF_type_node, V4SF_type_node, NULL_TREE);
19153 /* MMX/SSE/integer conversions. */
19154 tree int_ftype_v4sf
19155 = build_function_type_list (integer_type_node,
19156 V4SF_type_node, NULL_TREE);
19157 tree int64_ftype_v4sf
19158 = build_function_type_list (long_long_integer_type_node,
19159 V4SF_type_node, NULL_TREE);
19160 tree int_ftype_v8qi
19161 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19162 tree v4sf_ftype_v4sf_int
19163 = build_function_type_list (V4SF_type_node,
19164 V4SF_type_node, integer_type_node, NULL_TREE);
19165 tree v4sf_ftype_v4sf_int64
19166 = build_function_type_list (V4SF_type_node,
19167 V4SF_type_node, long_long_integer_type_node,
19168 NULL_TREE);
19169 tree v4sf_ftype_v4sf_v2si
19170 = build_function_type_list (V4SF_type_node,
19171 V4SF_type_node, V2SI_type_node, NULL_TREE);
19173 /* Miscellaneous. */
19174 tree v8qi_ftype_v4hi_v4hi
19175 = build_function_type_list (V8QI_type_node,
19176 V4HI_type_node, V4HI_type_node, NULL_TREE);
19177 tree v4hi_ftype_v2si_v2si
19178 = build_function_type_list (V4HI_type_node,
19179 V2SI_type_node, V2SI_type_node, NULL_TREE);
19180 tree v4sf_ftype_v4sf_v4sf_int
19181 = build_function_type_list (V4SF_type_node,
19182 V4SF_type_node, V4SF_type_node,
19183 integer_type_node, NULL_TREE);
19184 tree v2si_ftype_v4hi_v4hi
19185 = build_function_type_list (V2SI_type_node,
19186 V4HI_type_node, V4HI_type_node, NULL_TREE);
19187 tree v4hi_ftype_v4hi_int
19188 = build_function_type_list (V4HI_type_node,
19189 V4HI_type_node, integer_type_node, NULL_TREE);
19190 tree v2si_ftype_v2si_int
19191 = build_function_type_list (V2SI_type_node,
19192 V2SI_type_node, integer_type_node, NULL_TREE);
19193 tree v1di_ftype_v1di_int
19194 = build_function_type_list (V1DI_type_node,
19195 V1DI_type_node, integer_type_node, NULL_TREE);
19197 tree void_ftype_void
19198 = build_function_type (void_type_node, void_list_node);
19199 tree void_ftype_unsigned
19200 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19201 tree void_ftype_unsigned_unsigned
19202 = build_function_type_list (void_type_node, unsigned_type_node,
19203 unsigned_type_node, NULL_TREE);
19204 tree void_ftype_pcvoid_unsigned_unsigned
19205 = build_function_type_list (void_type_node, const_ptr_type_node,
19206 unsigned_type_node, unsigned_type_node,
19207 NULL_TREE);
19208 tree unsigned_ftype_void
19209 = build_function_type (unsigned_type_node, void_list_node);
19210 tree v2si_ftype_v4sf
19211 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19212 /* Loads/stores. */
19213 tree void_ftype_v8qi_v8qi_pchar
19214 = build_function_type_list (void_type_node,
19215 V8QI_type_node, V8QI_type_node,
19216 pchar_type_node, NULL_TREE);
19217 tree v4sf_ftype_pcfloat
19218 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19219 tree v4sf_ftype_v4sf_pcv2sf
19220 = build_function_type_list (V4SF_type_node,
19221 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19222 tree void_ftype_pv2sf_v4sf
19223 = build_function_type_list (void_type_node,
19224 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19225 tree void_ftype_pfloat_v4sf
19226 = build_function_type_list (void_type_node,
19227 pfloat_type_node, V4SF_type_node, NULL_TREE);
19228 tree void_ftype_pdi_di
19229 = build_function_type_list (void_type_node,
19230 pdi_type_node, long_long_unsigned_type_node,
19231 NULL_TREE);
19232 tree void_ftype_pv2di_v2di
19233 = build_function_type_list (void_type_node,
19234 pv2di_type_node, V2DI_type_node, NULL_TREE);
19235 /* Normal vector unops. */
19236 tree v4sf_ftype_v4sf
19237 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19238 tree v16qi_ftype_v16qi
19239 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19240 tree v8hi_ftype_v8hi
19241 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19242 tree v4si_ftype_v4si
19243 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19244 tree v8qi_ftype_v8qi
19245 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19246 tree v4hi_ftype_v4hi
19247 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19249 /* Normal vector binops. */
19250 tree v4sf_ftype_v4sf_v4sf
19251 = build_function_type_list (V4SF_type_node,
19252 V4SF_type_node, V4SF_type_node, NULL_TREE);
19253 tree v8qi_ftype_v8qi_v8qi
19254 = build_function_type_list (V8QI_type_node,
19255 V8QI_type_node, V8QI_type_node, NULL_TREE);
19256 tree v4hi_ftype_v4hi_v4hi
19257 = build_function_type_list (V4HI_type_node,
19258 V4HI_type_node, V4HI_type_node, NULL_TREE);
19259 tree v2si_ftype_v2si_v2si
19260 = build_function_type_list (V2SI_type_node,
19261 V2SI_type_node, V2SI_type_node, NULL_TREE);
19262 tree v1di_ftype_v1di_v1di
19263 = build_function_type_list (V1DI_type_node,
19264 V1DI_type_node, V1DI_type_node, NULL_TREE);
19265 tree v1di_ftype_v1di_v1di_int
19266 = build_function_type_list (V1DI_type_node,
19267 V1DI_type_node, V1DI_type_node,
19268 integer_type_node, NULL_TREE);
19269 tree v2si_ftype_v2sf
19270 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19271 tree v2sf_ftype_v2si
19272 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19273 tree v2si_ftype_v2si
19274 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19275 tree v2sf_ftype_v2sf
19276 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19277 tree v2sf_ftype_v2sf_v2sf
19278 = build_function_type_list (V2SF_type_node,
19279 V2SF_type_node, V2SF_type_node, NULL_TREE);
19280 tree v2si_ftype_v2sf_v2sf
19281 = build_function_type_list (V2SI_type_node,
19282 V2SF_type_node, V2SF_type_node, NULL_TREE);
19283 tree pint_type_node = build_pointer_type (integer_type_node);
19284 tree pdouble_type_node = build_pointer_type (double_type_node);
19285 tree pcdouble_type_node = build_pointer_type (
19286 build_type_variant (double_type_node, 1, 0));
19287 tree int_ftype_v2df_v2df
19288 = build_function_type_list (integer_type_node,
19289 V2DF_type_node, V2DF_type_node, NULL_TREE);
19291 tree void_ftype_pcvoid
19292 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19293 tree v4sf_ftype_v4si
19294 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19295 tree v4si_ftype_v4sf
19296 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19297 tree v2df_ftype_v4si
19298 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19299 tree v4si_ftype_v2df
19300 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19301 tree v4si_ftype_v2df_v2df
19302 = build_function_type_list (V4SI_type_node,
19303 V2DF_type_node, V2DF_type_node, NULL_TREE);
19304 tree v2si_ftype_v2df
19305 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19306 tree v4sf_ftype_v2df
19307 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19308 tree v2df_ftype_v2si
19309 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19310 tree v2df_ftype_v4sf
19311 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19312 tree int_ftype_v2df
19313 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19314 tree int64_ftype_v2df
19315 = build_function_type_list (long_long_integer_type_node,
19316 V2DF_type_node, NULL_TREE);
19317 tree v2df_ftype_v2df_int
19318 = build_function_type_list (V2DF_type_node,
19319 V2DF_type_node, integer_type_node, NULL_TREE);
19320 tree v2df_ftype_v2df_int64
19321 = build_function_type_list (V2DF_type_node,
19322 V2DF_type_node, long_long_integer_type_node,
19323 NULL_TREE);
19324 tree v4sf_ftype_v4sf_v2df
19325 = build_function_type_list (V4SF_type_node,
19326 V4SF_type_node, V2DF_type_node, NULL_TREE);
19327 tree v2df_ftype_v2df_v4sf
19328 = build_function_type_list (V2DF_type_node,
19329 V2DF_type_node, V4SF_type_node, NULL_TREE);
19330 tree v2df_ftype_v2df_v2df_int
19331 = build_function_type_list (V2DF_type_node,
19332 V2DF_type_node, V2DF_type_node,
19333 integer_type_node,
19334 NULL_TREE);
19335 tree v2df_ftype_v2df_pcdouble
19336 = build_function_type_list (V2DF_type_node,
19337 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19338 tree void_ftype_pdouble_v2df
19339 = build_function_type_list (void_type_node,
19340 pdouble_type_node, V2DF_type_node, NULL_TREE);
19341 tree void_ftype_pint_int
19342 = build_function_type_list (void_type_node,
19343 pint_type_node, integer_type_node, NULL_TREE);
19344 tree void_ftype_v16qi_v16qi_pchar
19345 = build_function_type_list (void_type_node,
19346 V16QI_type_node, V16QI_type_node,
19347 pchar_type_node, NULL_TREE);
19348 tree v2df_ftype_pcdouble
19349 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19350 tree v2df_ftype_v2df_v2df
19351 = build_function_type_list (V2DF_type_node,
19352 V2DF_type_node, V2DF_type_node, NULL_TREE);
19353 tree v16qi_ftype_v16qi_v16qi
19354 = build_function_type_list (V16QI_type_node,
19355 V16QI_type_node, V16QI_type_node, NULL_TREE);
19356 tree v8hi_ftype_v8hi_v8hi
19357 = build_function_type_list (V8HI_type_node,
19358 V8HI_type_node, V8HI_type_node, NULL_TREE);
19359 tree v4si_ftype_v4si_v4si
19360 = build_function_type_list (V4SI_type_node,
19361 V4SI_type_node, V4SI_type_node, NULL_TREE);
19362 tree v2di_ftype_v2di_v2di
19363 = build_function_type_list (V2DI_type_node,
19364 V2DI_type_node, V2DI_type_node, NULL_TREE);
19365 tree v2di_ftype_v2df_v2df
19366 = build_function_type_list (V2DI_type_node,
19367 V2DF_type_node, V2DF_type_node, NULL_TREE);
19368 tree v2df_ftype_v2df
19369 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19370 tree v2di_ftype_v2di_int
19371 = build_function_type_list (V2DI_type_node,
19372 V2DI_type_node, integer_type_node, NULL_TREE);
19373 tree v2di_ftype_v2di_v2di_int
19374 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19375 V2DI_type_node, integer_type_node, NULL_TREE);
19376 tree v4si_ftype_v4si_int
19377 = build_function_type_list (V4SI_type_node,
19378 V4SI_type_node, integer_type_node, NULL_TREE);
19379 tree v8hi_ftype_v8hi_int
19380 = build_function_type_list (V8HI_type_node,
19381 V8HI_type_node, integer_type_node, NULL_TREE);
19382 tree v4si_ftype_v8hi_v8hi
19383 = build_function_type_list (V4SI_type_node,
19384 V8HI_type_node, V8HI_type_node, NULL_TREE);
19385 tree v1di_ftype_v8qi_v8qi
19386 = build_function_type_list (V1DI_type_node,
19387 V8QI_type_node, V8QI_type_node, NULL_TREE);
19388 tree v1di_ftype_v2si_v2si
19389 = build_function_type_list (V1DI_type_node,
19390 V2SI_type_node, V2SI_type_node, NULL_TREE);
19391 tree v2di_ftype_v16qi_v16qi
19392 = build_function_type_list (V2DI_type_node,
19393 V16QI_type_node, V16QI_type_node, NULL_TREE);
19394 tree v2di_ftype_v4si_v4si
19395 = build_function_type_list (V2DI_type_node,
19396 V4SI_type_node, V4SI_type_node, NULL_TREE);
19397 tree int_ftype_v16qi
19398 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19399 tree v16qi_ftype_pcchar
19400 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19401 tree void_ftype_pchar_v16qi
19402 = build_function_type_list (void_type_node,
19403 pchar_type_node, V16QI_type_node, NULL_TREE);
19405 tree v2di_ftype_v2di_unsigned_unsigned
19406 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19407 unsigned_type_node, unsigned_type_node,
19408 NULL_TREE);
19409 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19410 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19411 unsigned_type_node, unsigned_type_node,
19412 NULL_TREE);
19413 tree v2di_ftype_v2di_v16qi
19414 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19415 NULL_TREE);
19416 tree v2df_ftype_v2df_v2df_v2df
19417 = build_function_type_list (V2DF_type_node,
19418 V2DF_type_node, V2DF_type_node,
19419 V2DF_type_node, NULL_TREE);
19420 tree v4sf_ftype_v4sf_v4sf_v4sf
19421 = build_function_type_list (V4SF_type_node,
19422 V4SF_type_node, V4SF_type_node,
19423 V4SF_type_node, NULL_TREE);
19424 tree v8hi_ftype_v16qi
19425 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19426 NULL_TREE);
19427 tree v4si_ftype_v16qi
19428 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19429 NULL_TREE);
19430 tree v2di_ftype_v16qi
19431 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19432 NULL_TREE);
19433 tree v4si_ftype_v8hi
19434 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19435 NULL_TREE);
19436 tree v2di_ftype_v8hi
19437 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19438 NULL_TREE);
19439 tree v2di_ftype_v4si
19440 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19441 NULL_TREE);
19442 tree v2di_ftype_pv2di
19443 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19444 NULL_TREE);
19445 tree v16qi_ftype_v16qi_v16qi_int
19446 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19447 V16QI_type_node, integer_type_node,
19448 NULL_TREE);
19449 tree v16qi_ftype_v16qi_v16qi_v16qi
19450 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19451 V16QI_type_node, V16QI_type_node,
19452 NULL_TREE);
19453 tree v8hi_ftype_v8hi_v8hi_int
19454 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19455 V8HI_type_node, integer_type_node,
19456 NULL_TREE);
19457 tree v4si_ftype_v4si_v4si_int
19458 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19459 V4SI_type_node, integer_type_node,
19460 NULL_TREE);
19461 tree int_ftype_v2di_v2di
19462 = build_function_type_list (integer_type_node,
19463 V2DI_type_node, V2DI_type_node,
19464 NULL_TREE);
19465 tree int_ftype_v16qi_int_v16qi_int_int
19466 = build_function_type_list (integer_type_node,
19467 V16QI_type_node,
19468 integer_type_node,
19469 V16QI_type_node,
19470 integer_type_node,
19471 integer_type_node,
19472 NULL_TREE);
19473 tree v16qi_ftype_v16qi_int_v16qi_int_int
19474 = build_function_type_list (V16QI_type_node,
19475 V16QI_type_node,
19476 integer_type_node,
19477 V16QI_type_node,
19478 integer_type_node,
19479 integer_type_node,
19480 NULL_TREE);
19481 tree int_ftype_v16qi_v16qi_int
19482 = build_function_type_list (integer_type_node,
19483 V16QI_type_node,
19484 V16QI_type_node,
19485 integer_type_node,
19486 NULL_TREE);
19488 /* SSE5 instructions */
19489 tree v2di_ftype_v2di_v2di_v2di
19490 = build_function_type_list (V2DI_type_node,
19491 V2DI_type_node,
19492 V2DI_type_node,
19493 V2DI_type_node,
19494 NULL_TREE);
19496 tree v4si_ftype_v4si_v4si_v4si
19497 = build_function_type_list (V4SI_type_node,
19498 V4SI_type_node,
19499 V4SI_type_node,
19500 V4SI_type_node,
19501 NULL_TREE);
19503 tree v4si_ftype_v4si_v4si_v2di
19504 = build_function_type_list (V4SI_type_node,
19505 V4SI_type_node,
19506 V4SI_type_node,
19507 V2DI_type_node,
19508 NULL_TREE);
19510 tree v8hi_ftype_v8hi_v8hi_v8hi
19511 = build_function_type_list (V8HI_type_node,
19512 V8HI_type_node,
19513 V8HI_type_node,
19514 V8HI_type_node,
19515 NULL_TREE);
19517 tree v8hi_ftype_v8hi_v8hi_v4si
19518 = build_function_type_list (V8HI_type_node,
19519 V8HI_type_node,
19520 V8HI_type_node,
19521 V4SI_type_node,
19522 NULL_TREE);
19524 tree v2df_ftype_v2df_v2df_v16qi
19525 = build_function_type_list (V2DF_type_node,
19526 V2DF_type_node,
19527 V2DF_type_node,
19528 V16QI_type_node,
19529 NULL_TREE);
19531 tree v4sf_ftype_v4sf_v4sf_v16qi
19532 = build_function_type_list (V4SF_type_node,
19533 V4SF_type_node,
19534 V4SF_type_node,
19535 V16QI_type_node,
19536 NULL_TREE);
19538 tree v2di_ftype_v2di_si
19539 = build_function_type_list (V2DI_type_node,
19540 V2DI_type_node,
19541 integer_type_node,
19542 NULL_TREE);
19544 tree v4si_ftype_v4si_si
19545 = build_function_type_list (V4SI_type_node,
19546 V4SI_type_node,
19547 integer_type_node,
19548 NULL_TREE);
19550 tree v8hi_ftype_v8hi_si
19551 = build_function_type_list (V8HI_type_node,
19552 V8HI_type_node,
19553 integer_type_node,
19554 NULL_TREE);
19556 tree v16qi_ftype_v16qi_si
19557 = build_function_type_list (V16QI_type_node,
19558 V16QI_type_node,
19559 integer_type_node,
19560 NULL_TREE);
19561 tree v4sf_ftype_v4hi
19562 = build_function_type_list (V4SF_type_node,
19563 V4HI_type_node,
19564 NULL_TREE);
19566 tree v4hi_ftype_v4sf
19567 = build_function_type_list (V4HI_type_node,
19568 V4SF_type_node,
19569 NULL_TREE);
19571 tree v2di_ftype_v2di
19572 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19574 tree v16qi_ftype_v8hi_v8hi
19575 = build_function_type_list (V16QI_type_node,
19576 V8HI_type_node, V8HI_type_node,
19577 NULL_TREE);
19578 tree v8hi_ftype_v4si_v4si
19579 = build_function_type_list (V8HI_type_node,
19580 V4SI_type_node, V4SI_type_node,
19581 NULL_TREE);
19582 tree v8hi_ftype_v16qi_v16qi
19583 = build_function_type_list (V8HI_type_node,
19584 V16QI_type_node, V16QI_type_node,
19585 NULL_TREE);
19586 tree v4hi_ftype_v8qi_v8qi
19587 = build_function_type_list (V4HI_type_node,
19588 V8QI_type_node, V8QI_type_node,
19589 NULL_TREE);
19590 tree unsigned_ftype_unsigned_uchar
19591 = build_function_type_list (unsigned_type_node,
19592 unsigned_type_node,
19593 unsigned_char_type_node,
19594 NULL_TREE);
19595 tree unsigned_ftype_unsigned_ushort
19596 = build_function_type_list (unsigned_type_node,
19597 unsigned_type_node,
19598 short_unsigned_type_node,
19599 NULL_TREE);
19600 tree unsigned_ftype_unsigned_unsigned
19601 = build_function_type_list (unsigned_type_node,
19602 unsigned_type_node,
19603 unsigned_type_node,
19604 NULL_TREE);
19605 tree uint64_ftype_uint64_uint64
19606 = build_function_type_list (long_long_unsigned_type_node,
19607 long_long_unsigned_type_node,
19608 long_long_unsigned_type_node,
19609 NULL_TREE);
19610 tree float_ftype_float
19611 = build_function_type_list (float_type_node,
19612 float_type_node,
19613 NULL_TREE);
19615 tree ftype;
19617 /* Add all special builtins with variable number of operands. */
19618 for (i = 0, d = bdesc_special_args;
19619 i < ARRAY_SIZE (bdesc_special_args);
19620 i++, d++)
19622 tree type;
19624 if (d->name == 0)
19625 continue;
19627 switch ((enum ix86_special_builtin_type) d->flag)
19629 case VOID_FTYPE_VOID:
19630 type = void_ftype_void;
19631 break;
19632 case V16QI_FTYPE_PCCHAR:
19633 type = v16qi_ftype_pcchar;
19634 break;
19635 case V4SF_FTYPE_PCFLOAT:
19636 type = v4sf_ftype_pcfloat;
19637 break;
19638 case V2DI_FTYPE_PV2DI:
19639 type = v2di_ftype_pv2di;
19640 break;
19641 case V2DF_FTYPE_PCDOUBLE:
19642 type = v2df_ftype_pcdouble;
19643 break;
19644 case V4SF_FTYPE_V4SF_PCV2SF:
19645 type = v4sf_ftype_v4sf_pcv2sf;
19646 break;
19647 case V2DF_FTYPE_V2DF_PCDOUBLE:
19648 type = v2df_ftype_v2df_pcdouble;
19649 break;
19650 case VOID_FTYPE_PV2SF_V4SF:
19651 type = void_ftype_pv2sf_v4sf;
19652 break;
19653 case VOID_FTYPE_PV2DI_V2DI:
19654 type = void_ftype_pv2di_v2di;
19655 break;
19656 case VOID_FTYPE_PCHAR_V16QI:
19657 type = void_ftype_pchar_v16qi;
19658 break;
19659 case VOID_FTYPE_PFLOAT_V4SF:
19660 type = void_ftype_pfloat_v4sf;
19661 break;
19662 case VOID_FTYPE_PDOUBLE_V2DF:
19663 type = void_ftype_pdouble_v2df;
19664 break;
19665 case VOID_FTYPE_PDI_DI:
19666 type = void_ftype_pdi_di;
19667 break;
19668 case VOID_FTYPE_PINT_INT:
19669 type = void_ftype_pint_int;
19670 break;
19671 default:
19672 gcc_unreachable ();
19675 def_builtin (d->mask, d->name, type, d->code);
19678 /* Add all builtins with variable number of operands. */
19679 for (i = 0, d = bdesc_args;
19680 i < ARRAY_SIZE (bdesc_args);
19681 i++, d++)
19683 tree type;
19685 if (d->name == 0)
19686 continue;
19688 switch ((enum ix86_builtin_type) d->flag)
19690 case FLOAT_FTYPE_FLOAT:
19691 type = float_ftype_float;
19692 break;
19693 case INT_FTYPE_V2DI_V2DI_PTEST:
19694 type = int_ftype_v2di_v2di;
19695 break;
19696 case INT64_FTYPE_V4SF:
19697 type = int64_ftype_v4sf;
19698 break;
19699 case INT64_FTYPE_V2DF:
19700 type = int64_ftype_v2df;
19701 break;
19702 case INT_FTYPE_V16QI:
19703 type = int_ftype_v16qi;
19704 break;
19705 case INT_FTYPE_V8QI:
19706 type = int_ftype_v8qi;
19707 break;
19708 case INT_FTYPE_V4SF:
19709 type = int_ftype_v4sf;
19710 break;
19711 case INT_FTYPE_V2DF:
19712 type = int_ftype_v2df;
19713 break;
19714 case V16QI_FTYPE_V16QI:
19715 type = v16qi_ftype_v16qi;
19716 break;
19717 case V8HI_FTYPE_V8HI:
19718 type = v8hi_ftype_v8hi;
19719 break;
19720 case V8HI_FTYPE_V16QI:
19721 type = v8hi_ftype_v16qi;
19722 break;
19723 case V8QI_FTYPE_V8QI:
19724 type = v8qi_ftype_v8qi;
19725 break;
19726 case V4SI_FTYPE_V4SI:
19727 type = v4si_ftype_v4si;
19728 break;
19729 case V4SI_FTYPE_V16QI:
19730 type = v4si_ftype_v16qi;
19731 break;
19732 case V4SI_FTYPE_V8HI:
19733 type = v4si_ftype_v8hi;
19734 break;
19735 case V4SI_FTYPE_V4SF:
19736 type = v4si_ftype_v4sf;
19737 break;
19738 case V4SI_FTYPE_V2DF:
19739 type = v4si_ftype_v2df;
19740 break;
19741 case V4HI_FTYPE_V4HI:
19742 type = v4hi_ftype_v4hi;
19743 break;
19744 case V4SF_FTYPE_V4SF:
19745 case V4SF_FTYPE_V4SF_VEC_MERGE:
19746 type = v4sf_ftype_v4sf;
19747 break;
19748 case V4SF_FTYPE_V4SI:
19749 type = v4sf_ftype_v4si;
19750 break;
19751 case V4SF_FTYPE_V2DF:
19752 type = v4sf_ftype_v2df;
19753 break;
19754 case V2DI_FTYPE_V2DI:
19755 type = v2di_ftype_v2di;
19756 break;
19757 case V2DI_FTYPE_V16QI:
19758 type = v2di_ftype_v16qi;
19759 break;
19760 case V2DI_FTYPE_V8HI:
19761 type = v2di_ftype_v8hi;
19762 break;
19763 case V2DI_FTYPE_V4SI:
19764 type = v2di_ftype_v4si;
19765 break;
19766 case V2SI_FTYPE_V2SI:
19767 type = v2si_ftype_v2si;
19768 break;
19769 case V2SI_FTYPE_V4SF:
19770 type = v2si_ftype_v4sf;
19771 break;
19772 case V2SI_FTYPE_V2DF:
19773 type = v2si_ftype_v2df;
19774 break;
19775 case V2SI_FTYPE_V2SF:
19776 type = v2si_ftype_v2sf;
19777 break;
19778 case V2DF_FTYPE_V4SF:
19779 type = v2df_ftype_v4sf;
19780 break;
19781 case V2DF_FTYPE_V2DF:
19782 case V2DF_FTYPE_V2DF_VEC_MERGE:
19783 type = v2df_ftype_v2df;
19784 break;
19785 case V2DF_FTYPE_V2SI:
19786 type = v2df_ftype_v2si;
19787 break;
19788 case V2DF_FTYPE_V4SI:
19789 type = v2df_ftype_v4si;
19790 break;
19791 case V2SF_FTYPE_V2SF:
19792 type = v2sf_ftype_v2sf;
19793 break;
19794 case V2SF_FTYPE_V2SI:
19795 type = v2sf_ftype_v2si;
19796 break;
19797 case V16QI_FTYPE_V16QI_V16QI:
19798 type = v16qi_ftype_v16qi_v16qi;
19799 break;
19800 case V16QI_FTYPE_V8HI_V8HI:
19801 type = v16qi_ftype_v8hi_v8hi;
19802 break;
19803 case V8QI_FTYPE_V8QI_V8QI:
19804 type = v8qi_ftype_v8qi_v8qi;
19805 break;
19806 case V8QI_FTYPE_V4HI_V4HI:
19807 type = v8qi_ftype_v4hi_v4hi;
19808 break;
19809 case V8HI_FTYPE_V8HI_V8HI:
19810 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19811 type = v8hi_ftype_v8hi_v8hi;
19812 break;
19813 case V8HI_FTYPE_V16QI_V16QI:
19814 type = v8hi_ftype_v16qi_v16qi;
19815 break;
19816 case V8HI_FTYPE_V4SI_V4SI:
19817 type = v8hi_ftype_v4si_v4si;
19818 break;
19819 case V8HI_FTYPE_V8HI_SI_COUNT:
19820 type = v8hi_ftype_v8hi_int;
19821 break;
19822 case V4SI_FTYPE_V4SI_V4SI:
19823 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19824 type = v4si_ftype_v4si_v4si;
19825 break;
19826 case V4SI_FTYPE_V8HI_V8HI:
19827 type = v4si_ftype_v8hi_v8hi;
19828 break;
19829 case V4SI_FTYPE_V4SF_V4SF:
19830 type = v4si_ftype_v4sf_v4sf;
19831 break;
19832 case V4SI_FTYPE_V2DF_V2DF:
19833 type = v4si_ftype_v2df_v2df;
19834 break;
19835 case V4SI_FTYPE_V4SI_SI_COUNT:
19836 type = v4si_ftype_v4si_int;
19837 break;
19838 case V4HI_FTYPE_V4HI_V4HI:
19839 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19840 type = v4hi_ftype_v4hi_v4hi;
19841 break;
19842 case V4HI_FTYPE_V8QI_V8QI:
19843 type = v4hi_ftype_v8qi_v8qi;
19844 break;
19845 case V4HI_FTYPE_V2SI_V2SI:
19846 type = v4hi_ftype_v2si_v2si;
19847 break;
19848 case V4HI_FTYPE_V4HI_SI_COUNT:
19849 type = v4hi_ftype_v4hi_int;
19850 break;
19851 case V4SF_FTYPE_V4SF_V4SF:
19852 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19853 type = v4sf_ftype_v4sf_v4sf;
19854 break;
19855 case V4SF_FTYPE_V4SF_V2SI:
19856 type = v4sf_ftype_v4sf_v2si;
19857 break;
19858 case V4SF_FTYPE_V4SF_V2DF:
19859 type = v4sf_ftype_v4sf_v2df;
19860 break;
19861 case V4SF_FTYPE_V4SF_DI:
19862 type = v4sf_ftype_v4sf_int64;
19863 break;
19864 case V4SF_FTYPE_V4SF_SI:
19865 type = v4sf_ftype_v4sf_int;
19866 break;
19867 case V2DI_FTYPE_V2DI_V2DI:
19868 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19869 type = v2di_ftype_v2di_v2di;
19870 break;
19871 case V2DI_FTYPE_V16QI_V16QI:
19872 type = v2di_ftype_v16qi_v16qi;
19873 break;
19874 case V2DI_FTYPE_V4SI_V4SI:
19875 type = v2di_ftype_v4si_v4si;
19876 break;
19877 case V2DI_FTYPE_V2DI_V16QI:
19878 type = v2di_ftype_v2di_v16qi;
19879 break;
19880 case V2DI_FTYPE_V2DF_V2DF:
19881 type = v2di_ftype_v2df_v2df;
19882 break;
19883 case V2DI_FTYPE_V2DI_SI_COUNT:
19884 type = v2di_ftype_v2di_int;
19885 break;
19886 case V2SI_FTYPE_V2SI_V2SI:
19887 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19888 type = v2si_ftype_v2si_v2si;
19889 break;
19890 case V2SI_FTYPE_V4HI_V4HI:
19891 type = v2si_ftype_v4hi_v4hi;
19892 break;
19893 case V2SI_FTYPE_V2SF_V2SF:
19894 type = v2si_ftype_v2sf_v2sf;
19895 break;
19896 case V2SI_FTYPE_V2SI_SI_COUNT:
19897 type = v2si_ftype_v2si_int;
19898 break;
19899 case V2DF_FTYPE_V2DF_V2DF:
19900 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19901 type = v2df_ftype_v2df_v2df;
19902 break;
19903 case V2DF_FTYPE_V2DF_V4SF:
19904 type = v2df_ftype_v2df_v4sf;
19905 break;
19906 case V2DF_FTYPE_V2DF_DI:
19907 type = v2df_ftype_v2df_int64;
19908 break;
19909 case V2DF_FTYPE_V2DF_SI:
19910 type = v2df_ftype_v2df_int;
19911 break;
19912 case V2SF_FTYPE_V2SF_V2SF:
19913 type = v2sf_ftype_v2sf_v2sf;
19914 break;
19915 case V1DI_FTYPE_V1DI_V1DI:
19916 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19917 type = v1di_ftype_v1di_v1di;
19918 break;
19919 case V1DI_FTYPE_V8QI_V8QI:
19920 type = v1di_ftype_v8qi_v8qi;
19921 break;
19922 case V1DI_FTYPE_V2SI_V2SI:
19923 type = v1di_ftype_v2si_v2si;
19924 break;
19925 case V1DI_FTYPE_V1DI_SI_COUNT:
19926 type = v1di_ftype_v1di_int;
19927 break;
19928 case UINT64_FTYPE_UINT64_UINT64:
19929 type = uint64_ftype_uint64_uint64;
19930 break;
19931 case UINT_FTYPE_UINT_UINT:
19932 type = unsigned_ftype_unsigned_unsigned;
19933 break;
19934 case UINT_FTYPE_UINT_USHORT:
19935 type = unsigned_ftype_unsigned_ushort;
19936 break;
19937 case UINT_FTYPE_UINT_UCHAR:
19938 type = unsigned_ftype_unsigned_uchar;
19939 break;
19940 case V8HI_FTYPE_V8HI_INT:
19941 type = v8hi_ftype_v8hi_int;
19942 break;
19943 case V4SI_FTYPE_V4SI_INT:
19944 type = v4si_ftype_v4si_int;
19945 break;
19946 case V4HI_FTYPE_V4HI_INT:
19947 type = v4hi_ftype_v4hi_int;
19948 break;
19949 case V4SF_FTYPE_V4SF_INT:
19950 type = v4sf_ftype_v4sf_int;
19951 break;
19952 case V2DI_FTYPE_V2DI_INT:
19953 case V2DI2TI_FTYPE_V2DI_INT:
19954 type = v2di_ftype_v2di_int;
19955 break;
19956 case V2DF_FTYPE_V2DF_INT:
19957 type = v2df_ftype_v2df_int;
19958 break;
19959 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19960 type = v16qi_ftype_v16qi_v16qi_v16qi;
19961 break;
19962 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19963 type = v4sf_ftype_v4sf_v4sf_v4sf;
19964 break;
19965 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19966 type = v2df_ftype_v2df_v2df_v2df;
19967 break;
19968 case V16QI_FTYPE_V16QI_V16QI_INT:
19969 type = v16qi_ftype_v16qi_v16qi_int;
19970 break;
19971 case V8HI_FTYPE_V8HI_V8HI_INT:
19972 type = v8hi_ftype_v8hi_v8hi_int;
19973 break;
19974 case V4SI_FTYPE_V4SI_V4SI_INT:
19975 type = v4si_ftype_v4si_v4si_int;
19976 break;
19977 case V4SF_FTYPE_V4SF_V4SF_INT:
19978 type = v4sf_ftype_v4sf_v4sf_int;
19979 break;
19980 case V2DI_FTYPE_V2DI_V2DI_INT:
19981 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19982 type = v2di_ftype_v2di_v2di_int;
19983 break;
19984 case V2DF_FTYPE_V2DF_V2DF_INT:
19985 type = v2df_ftype_v2df_v2df_int;
19986 break;
19987 case V2DI_FTYPE_V2DI_UINT_UINT:
19988 type = v2di_ftype_v2di_unsigned_unsigned;
19989 break;
19990 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19991 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19992 break;
19993 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19994 type = v1di_ftype_v1di_v1di_int;
19995 break;
19996 default:
19997 gcc_unreachable ();
20000 def_builtin_const (d->mask, d->name, type, d->code);
20003 /* pcmpestr[im] insns. */
20004 for (i = 0, d = bdesc_pcmpestr;
20005 i < ARRAY_SIZE (bdesc_pcmpestr);
20006 i++, d++)
20008 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20009 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20010 else
20011 ftype = int_ftype_v16qi_int_v16qi_int_int;
20012 def_builtin_const (d->mask, d->name, ftype, d->code);
20015 /* pcmpistr[im] insns. */
20016 for (i = 0, d = bdesc_pcmpistr;
20017 i < ARRAY_SIZE (bdesc_pcmpistr);
20018 i++, d++)
20020 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20021 ftype = v16qi_ftype_v16qi_v16qi_int;
20022 else
20023 ftype = int_ftype_v16qi_v16qi_int;
20024 def_builtin_const (d->mask, d->name, ftype, d->code);
20027 /* comi/ucomi insns. */
20028 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20029 if (d->mask == OPTION_MASK_ISA_SSE2)
20030 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20031 else
20032 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20034 /* SSE */
20035 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20036 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20038 /* SSE or 3DNow!A */
20039 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20041 /* SSE2 */
20042 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20044 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20045 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20047 /* SSE3. */
20048 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20049 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20051 /* AES */
20052 if (TARGET_AES)
20054 /* Define AES built-in functions only if AES is enabled. */
20055 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20057 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20058 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20059 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20060 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20063 /* PCLMUL */
20064 if (TARGET_PCLMUL)
20066 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20067 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20070 /* Access to the vec_init patterns. */
20071 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20072 integer_type_node, NULL_TREE);
20073 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20075 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20076 short_integer_type_node,
20077 short_integer_type_node,
20078 short_integer_type_node, NULL_TREE);
20079 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20081 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20082 char_type_node, char_type_node,
20083 char_type_node, char_type_node,
20084 char_type_node, char_type_node,
20085 char_type_node, NULL_TREE);
20086 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20088 /* Access to the vec_extract patterns. */
20089 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20090 integer_type_node, NULL_TREE);
20091 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20093 ftype = build_function_type_list (long_long_integer_type_node,
20094 V2DI_type_node, integer_type_node,
20095 NULL_TREE);
20096 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20098 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20099 integer_type_node, NULL_TREE);
20100 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20102 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20103 integer_type_node, NULL_TREE);
20104 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20106 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20107 integer_type_node, NULL_TREE);
20108 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20110 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20111 integer_type_node, NULL_TREE);
20112 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20114 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20115 integer_type_node, NULL_TREE);
20116 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20118 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20119 integer_type_node, NULL_TREE);
20120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20122 /* Access to the vec_set patterns. */
20123 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20124 intDI_type_node,
20125 integer_type_node, NULL_TREE);
20126 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20128 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20129 float_type_node,
20130 integer_type_node, NULL_TREE);
20131 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20133 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20134 intSI_type_node,
20135 integer_type_node, NULL_TREE);
20136 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20138 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20139 intHI_type_node,
20140 integer_type_node, NULL_TREE);
20141 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20143 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20144 intHI_type_node,
20145 integer_type_node, NULL_TREE);
20146 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20148 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20149 intQI_type_node,
20150 integer_type_node, NULL_TREE);
20151 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20153 /* Add SSE5 multi-arg argument instructions */
20154 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20156 tree mtype = NULL_TREE;
20158 if (d->name == 0)
20159 continue;
20161 switch ((enum multi_arg_type)d->flag)
20163 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20164 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20165 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20166 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20167 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20168 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20169 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20170 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20171 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20172 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20173 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20174 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20175 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20176 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20177 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20178 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20179 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20180 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20181 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20182 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20183 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20184 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20185 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20186 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20187 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20188 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20189 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20190 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20191 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20192 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20193 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20194 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20195 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20196 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20197 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20198 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20199 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20200 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20201 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20202 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20203 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20204 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20205 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20206 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20207 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20208 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20209 case MULTI_ARG_UNKNOWN:
20210 default:
20211 gcc_unreachable ();
20214 if (mtype)
20215 def_builtin_const (d->mask, d->name, mtype, d->code);
20219 static void
20220 ix86_init_builtins (void)
20222 tree float128_type_node = make_node (REAL_TYPE);
20223 tree ftype, decl;
20225 /* The __float80 type. */
20226 if (TYPE_MODE (long_double_type_node) == XFmode)
20227 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
20228 "__float80");
20229 else
20231 /* The __float80 type. */
20232 tree float80_type_node = make_node (REAL_TYPE);
20234 TYPE_PRECISION (float80_type_node) = 80;
20235 layout_type (float80_type_node);
20236 (*lang_hooks.types.register_builtin_type) (float80_type_node,
20237 "__float80");
20240 /* The __float128 type. */
20241 TYPE_PRECISION (float128_type_node) = 128;
20242 layout_type (float128_type_node);
20243 (*lang_hooks.types.register_builtin_type) (float128_type_node,
20244 "__float128");
20246 /* TFmode support builtins. */
20247 ftype = build_function_type (float128_type_node, void_list_node);
20248 decl = add_builtin_function ("__builtin_infq", ftype,
20249 IX86_BUILTIN_INFQ, BUILT_IN_MD,
20250 NULL, NULL_TREE);
20251 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
20253 /* We will expand them to normal call if SSE2 isn't available since
20254 they are used by libgcc. */
20255 ftype = build_function_type_list (float128_type_node,
20256 float128_type_node,
20257 NULL_TREE);
20258 decl = add_builtin_function ("__builtin_fabsq", ftype,
20259 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
20260 "__fabstf2", NULL_TREE);
20261 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
20262 TREE_READONLY (decl) = 1;
20264 ftype = build_function_type_list (float128_type_node,
20265 float128_type_node,
20266 float128_type_node,
20267 NULL_TREE);
20268 decl = add_builtin_function ("__builtin_copysignq", ftype,
20269 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
20270 "__copysigntf3", NULL_TREE);
20271 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
20272 TREE_READONLY (decl) = 1;
20274 if (TARGET_MMX)
20275 ix86_init_mmx_sse_builtins ();
20278 /* Errors in the source file can cause expand_expr to return const0_rtx
20279 where we expect a vector. To avoid crashing, use one of the vector
20280 clear instructions. */
20281 static rtx
20282 safe_vector_operand (rtx x, enum machine_mode mode)
20284 if (x == const0_rtx)
20285 x = CONST0_RTX (mode);
20286 return x;
20289 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20291 static rtx
20292 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20294 rtx pat;
20295 tree arg0 = CALL_EXPR_ARG (exp, 0);
20296 tree arg1 = CALL_EXPR_ARG (exp, 1);
20297 rtx op0 = expand_normal (arg0);
20298 rtx op1 = expand_normal (arg1);
20299 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20300 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20301 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20303 if (VECTOR_MODE_P (mode0))
20304 op0 = safe_vector_operand (op0, mode0);
20305 if (VECTOR_MODE_P (mode1))
20306 op1 = safe_vector_operand (op1, mode1);
20308 if (optimize || !target
20309 || GET_MODE (target) != tmode
20310 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20311 target = gen_reg_rtx (tmode);
20313 if (GET_MODE (op1) == SImode && mode1 == TImode)
20315 rtx x = gen_reg_rtx (V4SImode);
20316 emit_insn (gen_sse2_loadd (x, op1));
20317 op1 = gen_lowpart (TImode, x);
20320 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20321 op0 = copy_to_mode_reg (mode0, op0);
20322 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20323 op1 = copy_to_mode_reg (mode1, op1);
20325 pat = GEN_FCN (icode) (target, op0, op1);
20326 if (! pat)
20327 return 0;
20329 emit_insn (pat);
20331 return target;
20334 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20336 static rtx
20337 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20338 enum multi_arg_type m_type,
20339 enum insn_code sub_code)
20341 rtx pat;
20342 int i;
20343 int nargs;
20344 bool comparison_p = false;
20345 bool tf_p = false;
20346 bool last_arg_constant = false;
20347 int num_memory = 0;
20348 struct {
20349 rtx op;
20350 enum machine_mode mode;
20351 } args[4];
20353 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20355 switch (m_type)
20357 case MULTI_ARG_3_SF:
20358 case MULTI_ARG_3_DF:
20359 case MULTI_ARG_3_DI:
20360 case MULTI_ARG_3_SI:
20361 case MULTI_ARG_3_SI_DI:
20362 case MULTI_ARG_3_HI:
20363 case MULTI_ARG_3_HI_SI:
20364 case MULTI_ARG_3_QI:
20365 case MULTI_ARG_3_PERMPS:
20366 case MULTI_ARG_3_PERMPD:
20367 nargs = 3;
20368 break;
20370 case MULTI_ARG_2_SF:
20371 case MULTI_ARG_2_DF:
20372 case MULTI_ARG_2_DI:
20373 case MULTI_ARG_2_SI:
20374 case MULTI_ARG_2_HI:
20375 case MULTI_ARG_2_QI:
20376 nargs = 2;
20377 break;
20379 case MULTI_ARG_2_DI_IMM:
20380 case MULTI_ARG_2_SI_IMM:
20381 case MULTI_ARG_2_HI_IMM:
20382 case MULTI_ARG_2_QI_IMM:
20383 nargs = 2;
20384 last_arg_constant = true;
20385 break;
20387 case MULTI_ARG_1_SF:
20388 case MULTI_ARG_1_DF:
20389 case MULTI_ARG_1_DI:
20390 case MULTI_ARG_1_SI:
20391 case MULTI_ARG_1_HI:
20392 case MULTI_ARG_1_QI:
20393 case MULTI_ARG_1_SI_DI:
20394 case MULTI_ARG_1_HI_DI:
20395 case MULTI_ARG_1_HI_SI:
20396 case MULTI_ARG_1_QI_DI:
20397 case MULTI_ARG_1_QI_SI:
20398 case MULTI_ARG_1_QI_HI:
20399 case MULTI_ARG_1_PH2PS:
20400 case MULTI_ARG_1_PS2PH:
20401 nargs = 1;
20402 break;
20404 case MULTI_ARG_2_SF_CMP:
20405 case MULTI_ARG_2_DF_CMP:
20406 case MULTI_ARG_2_DI_CMP:
20407 case MULTI_ARG_2_SI_CMP:
20408 case MULTI_ARG_2_HI_CMP:
20409 case MULTI_ARG_2_QI_CMP:
20410 nargs = 2;
20411 comparison_p = true;
20412 break;
20414 case MULTI_ARG_2_SF_TF:
20415 case MULTI_ARG_2_DF_TF:
20416 case MULTI_ARG_2_DI_TF:
20417 case MULTI_ARG_2_SI_TF:
20418 case MULTI_ARG_2_HI_TF:
20419 case MULTI_ARG_2_QI_TF:
20420 nargs = 2;
20421 tf_p = true;
20422 break;
20424 case MULTI_ARG_UNKNOWN:
20425 default:
20426 gcc_unreachable ();
20429 if (optimize || !target
20430 || GET_MODE (target) != tmode
20431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20432 target = gen_reg_rtx (tmode);
20434 gcc_assert (nargs <= 4);
20436 for (i = 0; i < nargs; i++)
20438 tree arg = CALL_EXPR_ARG (exp, i);
20439 rtx op = expand_normal (arg);
20440 int adjust = (comparison_p) ? 1 : 0;
20441 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20443 if (last_arg_constant && i == nargs-1)
20445 if (GET_CODE (op) != CONST_INT)
20447 error ("last argument must be an immediate");
20448 return gen_reg_rtx (tmode);
20451 else
20453 if (VECTOR_MODE_P (mode))
20454 op = safe_vector_operand (op, mode);
20456 /* If we aren't optimizing, only allow one memory operand to be
20457 generated. */
20458 if (memory_operand (op, mode))
20459 num_memory++;
20461 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20463 if (optimize
20464 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20465 || num_memory > 1)
20466 op = force_reg (mode, op);
20469 args[i].op = op;
20470 args[i].mode = mode;
20473 switch (nargs)
20475 case 1:
20476 pat = GEN_FCN (icode) (target, args[0].op);
20477 break;
20479 case 2:
20480 if (tf_p)
20481 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20482 GEN_INT ((int)sub_code));
20483 else if (! comparison_p)
20484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20485 else
20487 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20488 args[0].op,
20489 args[1].op);
20491 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20493 break;
20495 case 3:
20496 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20497 break;
20499 default:
20500 gcc_unreachable ();
20503 if (! pat)
20504 return 0;
20506 emit_insn (pat);
20507 return target;
20510 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20511 insns with vec_merge. */
20513 static rtx
20514 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20515 rtx target)
20517 rtx pat;
20518 tree arg0 = CALL_EXPR_ARG (exp, 0);
20519 rtx op1, op0 = expand_normal (arg0);
20520 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20521 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20523 if (optimize || !target
20524 || GET_MODE (target) != tmode
20525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20526 target = gen_reg_rtx (tmode);
20528 if (VECTOR_MODE_P (mode0))
20529 op0 = safe_vector_operand (op0, mode0);
20531 if ((optimize && !register_operand (op0, mode0))
20532 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20533 op0 = copy_to_mode_reg (mode0, op0);
20535 op1 = op0;
20536 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20537 op1 = copy_to_mode_reg (mode0, op1);
20539 pat = GEN_FCN (icode) (target, op0, op1);
20540 if (! pat)
20541 return 0;
20542 emit_insn (pat);
20543 return target;
20546 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20548 static rtx
20549 ix86_expand_sse_compare (const struct builtin_description *d,
20550 tree exp, rtx target, bool swap)
20552 rtx pat;
20553 tree arg0 = CALL_EXPR_ARG (exp, 0);
20554 tree arg1 = CALL_EXPR_ARG (exp, 1);
20555 rtx op0 = expand_normal (arg0);
20556 rtx op1 = expand_normal (arg1);
20557 rtx op2;
20558 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20559 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20560 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20561 enum rtx_code comparison = d->comparison;
20563 if (VECTOR_MODE_P (mode0))
20564 op0 = safe_vector_operand (op0, mode0);
20565 if (VECTOR_MODE_P (mode1))
20566 op1 = safe_vector_operand (op1, mode1);
20568 /* Swap operands if we have a comparison that isn't available in
20569 hardware. */
20570 if (swap)
20572 rtx tmp = gen_reg_rtx (mode1);
20573 emit_move_insn (tmp, op1);
20574 op1 = op0;
20575 op0 = tmp;
20578 if (optimize || !target
20579 || GET_MODE (target) != tmode
20580 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20581 target = gen_reg_rtx (tmode);
20583 if ((optimize && !register_operand (op0, mode0))
20584 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20585 op0 = copy_to_mode_reg (mode0, op0);
20586 if ((optimize && !register_operand (op1, mode1))
20587 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20588 op1 = copy_to_mode_reg (mode1, op1);
20590 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20591 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20592 if (! pat)
20593 return 0;
20594 emit_insn (pat);
20595 return target;
20598 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20600 static rtx
20601 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20602 rtx target)
20604 rtx pat;
20605 tree arg0 = CALL_EXPR_ARG (exp, 0);
20606 tree arg1 = CALL_EXPR_ARG (exp, 1);
20607 rtx op0 = expand_normal (arg0);
20608 rtx op1 = expand_normal (arg1);
20609 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20610 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20611 enum rtx_code comparison = d->comparison;
20613 if (VECTOR_MODE_P (mode0))
20614 op0 = safe_vector_operand (op0, mode0);
20615 if (VECTOR_MODE_P (mode1))
20616 op1 = safe_vector_operand (op1, mode1);
20618 /* Swap operands if we have a comparison that isn't available in
20619 hardware. */
20620 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20622 rtx tmp = op1;
20623 op1 = op0;
20624 op0 = tmp;
20627 target = gen_reg_rtx (SImode);
20628 emit_move_insn (target, const0_rtx);
20629 target = gen_rtx_SUBREG (QImode, target, 0);
20631 if ((optimize && !register_operand (op0, mode0))
20632 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20633 op0 = copy_to_mode_reg (mode0, op0);
20634 if ((optimize && !register_operand (op1, mode1))
20635 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20636 op1 = copy_to_mode_reg (mode1, op1);
20638 pat = GEN_FCN (d->icode) (op0, op1);
20639 if (! pat)
20640 return 0;
20641 emit_insn (pat);
20642 emit_insn (gen_rtx_SET (VOIDmode,
20643 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20644 gen_rtx_fmt_ee (comparison, QImode,
20645 SET_DEST (pat),
20646 const0_rtx)));
20648 return SUBREG_REG (target);
20651 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20653 static rtx
20654 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20655 rtx target)
20657 rtx pat;
20658 tree arg0 = CALL_EXPR_ARG (exp, 0);
20659 tree arg1 = CALL_EXPR_ARG (exp, 1);
20660 rtx op0 = expand_normal (arg0);
20661 rtx op1 = expand_normal (arg1);
20662 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20663 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20664 enum rtx_code comparison = d->comparison;
20666 if (VECTOR_MODE_P (mode0))
20667 op0 = safe_vector_operand (op0, mode0);
20668 if (VECTOR_MODE_P (mode1))
20669 op1 = safe_vector_operand (op1, mode1);
20671 target = gen_reg_rtx (SImode);
20672 emit_move_insn (target, const0_rtx);
20673 target = gen_rtx_SUBREG (QImode, target, 0);
20675 if ((optimize && !register_operand (op0, mode0))
20676 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20677 op0 = copy_to_mode_reg (mode0, op0);
20678 if ((optimize && !register_operand (op1, mode1))
20679 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20680 op1 = copy_to_mode_reg (mode1, op1);
20682 pat = GEN_FCN (d->icode) (op0, op1);
20683 if (! pat)
20684 return 0;
20685 emit_insn (pat);
20686 emit_insn (gen_rtx_SET (VOIDmode,
20687 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20688 gen_rtx_fmt_ee (comparison, QImode,
20689 SET_DEST (pat),
20690 const0_rtx)));
20692 return SUBREG_REG (target);
20695 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20697 static rtx
20698 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20699 tree exp, rtx target)
20701 rtx pat;
20702 tree arg0 = CALL_EXPR_ARG (exp, 0);
20703 tree arg1 = CALL_EXPR_ARG (exp, 1);
20704 tree arg2 = CALL_EXPR_ARG (exp, 2);
20705 tree arg3 = CALL_EXPR_ARG (exp, 3);
20706 tree arg4 = CALL_EXPR_ARG (exp, 4);
20707 rtx scratch0, scratch1;
20708 rtx op0 = expand_normal (arg0);
20709 rtx op1 = expand_normal (arg1);
20710 rtx op2 = expand_normal (arg2);
20711 rtx op3 = expand_normal (arg3);
20712 rtx op4 = expand_normal (arg4);
20713 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20715 tmode0 = insn_data[d->icode].operand[0].mode;
20716 tmode1 = insn_data[d->icode].operand[1].mode;
20717 modev2 = insn_data[d->icode].operand[2].mode;
20718 modei3 = insn_data[d->icode].operand[3].mode;
20719 modev4 = insn_data[d->icode].operand[4].mode;
20720 modei5 = insn_data[d->icode].operand[5].mode;
20721 modeimm = insn_data[d->icode].operand[6].mode;
20723 if (VECTOR_MODE_P (modev2))
20724 op0 = safe_vector_operand (op0, modev2);
20725 if (VECTOR_MODE_P (modev4))
20726 op2 = safe_vector_operand (op2, modev4);
20728 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20729 op0 = copy_to_mode_reg (modev2, op0);
20730 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20731 op1 = copy_to_mode_reg (modei3, op1);
20732 if ((optimize && !register_operand (op2, modev4))
20733 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20734 op2 = copy_to_mode_reg (modev4, op2);
20735 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20736 op3 = copy_to_mode_reg (modei5, op3);
20738 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20740 error ("the fifth argument must be a 8-bit immediate");
20741 return const0_rtx;
20744 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20746 if (optimize || !target
20747 || GET_MODE (target) != tmode0
20748 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20749 target = gen_reg_rtx (tmode0);
20751 scratch1 = gen_reg_rtx (tmode1);
20753 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20755 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20757 if (optimize || !target
20758 || GET_MODE (target) != tmode1
20759 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20760 target = gen_reg_rtx (tmode1);
20762 scratch0 = gen_reg_rtx (tmode0);
20764 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20766 else
20768 gcc_assert (d->flag);
20770 scratch0 = gen_reg_rtx (tmode0);
20771 scratch1 = gen_reg_rtx (tmode1);
20773 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20776 if (! pat)
20777 return 0;
20779 emit_insn (pat);
20781 if (d->flag)
20783 target = gen_reg_rtx (SImode);
20784 emit_move_insn (target, const0_rtx);
20785 target = gen_rtx_SUBREG (QImode, target, 0);
20787 emit_insn
20788 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20789 gen_rtx_fmt_ee (EQ, QImode,
20790 gen_rtx_REG ((enum machine_mode) d->flag,
20791 FLAGS_REG),
20792 const0_rtx)));
20793 return SUBREG_REG (target);
20795 else
20796 return target;
20800 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20802 static rtx
20803 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20804 tree exp, rtx target)
20806 rtx pat;
20807 tree arg0 = CALL_EXPR_ARG (exp, 0);
20808 tree arg1 = CALL_EXPR_ARG (exp, 1);
20809 tree arg2 = CALL_EXPR_ARG (exp, 2);
20810 rtx scratch0, scratch1;
20811 rtx op0 = expand_normal (arg0);
20812 rtx op1 = expand_normal (arg1);
20813 rtx op2 = expand_normal (arg2);
20814 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20816 tmode0 = insn_data[d->icode].operand[0].mode;
20817 tmode1 = insn_data[d->icode].operand[1].mode;
20818 modev2 = insn_data[d->icode].operand[2].mode;
20819 modev3 = insn_data[d->icode].operand[3].mode;
20820 modeimm = insn_data[d->icode].operand[4].mode;
20822 if (VECTOR_MODE_P (modev2))
20823 op0 = safe_vector_operand (op0, modev2);
20824 if (VECTOR_MODE_P (modev3))
20825 op1 = safe_vector_operand (op1, modev3);
20827 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20828 op0 = copy_to_mode_reg (modev2, op0);
20829 if ((optimize && !register_operand (op1, modev3))
20830 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20831 op1 = copy_to_mode_reg (modev3, op1);
20833 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20835 error ("the third argument must be a 8-bit immediate");
20836 return const0_rtx;
20839 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20841 if (optimize || !target
20842 || GET_MODE (target) != tmode0
20843 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20844 target = gen_reg_rtx (tmode0);
20846 scratch1 = gen_reg_rtx (tmode1);
20848 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20850 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20852 if (optimize || !target
20853 || GET_MODE (target) != tmode1
20854 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20855 target = gen_reg_rtx (tmode1);
20857 scratch0 = gen_reg_rtx (tmode0);
20859 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20861 else
20863 gcc_assert (d->flag);
20865 scratch0 = gen_reg_rtx (tmode0);
20866 scratch1 = gen_reg_rtx (tmode1);
20868 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20871 if (! pat)
20872 return 0;
20874 emit_insn (pat);
20876 if (d->flag)
20878 target = gen_reg_rtx (SImode);
20879 emit_move_insn (target, const0_rtx);
20880 target = gen_rtx_SUBREG (QImode, target, 0);
20882 emit_insn
20883 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20884 gen_rtx_fmt_ee (EQ, QImode,
20885 gen_rtx_REG ((enum machine_mode) d->flag,
20886 FLAGS_REG),
20887 const0_rtx)));
20888 return SUBREG_REG (target);
20890 else
20891 return target;
20894 /* Subroutine of ix86_expand_builtin to take care of insns with
20895 variable number of operands. */
20897 static rtx
20898 ix86_expand_args_builtin (const struct builtin_description *d,
20899 tree exp, rtx target)
20901 rtx pat, real_target;
20902 unsigned int i, nargs;
20903 unsigned int nargs_constant = 0;
20904 int num_memory = 0;
20905 struct
20907 rtx op;
20908 enum machine_mode mode;
20909 } args[4];
20910 bool last_arg_count = false;
20911 enum insn_code icode = d->icode;
20912 const struct insn_data *insn_p = &insn_data[icode];
20913 enum machine_mode tmode = insn_p->operand[0].mode;
20914 enum machine_mode rmode = VOIDmode;
20915 bool swap = false;
20916 enum rtx_code comparison = d->comparison;
20918 switch ((enum ix86_builtin_type) d->flag)
20920 case INT_FTYPE_V2DI_V2DI_PTEST:
20921 return ix86_expand_sse_ptest (d, exp, target);
20922 case FLOAT128_FTYPE_FLOAT128:
20923 case FLOAT_FTYPE_FLOAT:
20924 case INT64_FTYPE_V4SF:
20925 case INT64_FTYPE_V2DF:
20926 case INT_FTYPE_V16QI:
20927 case INT_FTYPE_V8QI:
20928 case INT_FTYPE_V4SF:
20929 case INT_FTYPE_V2DF:
20930 case V16QI_FTYPE_V16QI:
20931 case V8HI_FTYPE_V8HI:
20932 case V8HI_FTYPE_V16QI:
20933 case V8QI_FTYPE_V8QI:
20934 case V4SI_FTYPE_V4SI:
20935 case V4SI_FTYPE_V16QI:
20936 case V4SI_FTYPE_V4SF:
20937 case V4SI_FTYPE_V8HI:
20938 case V4SI_FTYPE_V2DF:
20939 case V4HI_FTYPE_V4HI:
20940 case V4SF_FTYPE_V4SF:
20941 case V4SF_FTYPE_V4SI:
20942 case V4SF_FTYPE_V2DF:
20943 case V2DI_FTYPE_V2DI:
20944 case V2DI_FTYPE_V16QI:
20945 case V2DI_FTYPE_V8HI:
20946 case V2DI_FTYPE_V4SI:
20947 case V2DF_FTYPE_V2DF:
20948 case V2DF_FTYPE_V4SI:
20949 case V2DF_FTYPE_V4SF:
20950 case V2DF_FTYPE_V2SI:
20951 case V2SI_FTYPE_V2SI:
20952 case V2SI_FTYPE_V4SF:
20953 case V2SI_FTYPE_V2SF:
20954 case V2SI_FTYPE_V2DF:
20955 case V2SF_FTYPE_V2SF:
20956 case V2SF_FTYPE_V2SI:
20957 nargs = 1;
20958 break;
20959 case V4SF_FTYPE_V4SF_VEC_MERGE:
20960 case V2DF_FTYPE_V2DF_VEC_MERGE:
20961 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20962 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20963 case V16QI_FTYPE_V16QI_V16QI:
20964 case V16QI_FTYPE_V8HI_V8HI:
20965 case V8QI_FTYPE_V8QI_V8QI:
20966 case V8QI_FTYPE_V4HI_V4HI:
20967 case V8HI_FTYPE_V8HI_V8HI:
20968 case V8HI_FTYPE_V16QI_V16QI:
20969 case V8HI_FTYPE_V4SI_V4SI:
20970 case V4SI_FTYPE_V4SI_V4SI:
20971 case V4SI_FTYPE_V8HI_V8HI:
20972 case V4SI_FTYPE_V4SF_V4SF:
20973 case V4SI_FTYPE_V2DF_V2DF:
20974 case V4HI_FTYPE_V4HI_V4HI:
20975 case V4HI_FTYPE_V8QI_V8QI:
20976 case V4HI_FTYPE_V2SI_V2SI:
20977 case V4SF_FTYPE_V4SF_V4SF:
20978 case V4SF_FTYPE_V4SF_V2SI:
20979 case V4SF_FTYPE_V4SF_V2DF:
20980 case V4SF_FTYPE_V4SF_DI:
20981 case V4SF_FTYPE_V4SF_SI:
20982 case V2DI_FTYPE_V2DI_V2DI:
20983 case V2DI_FTYPE_V16QI_V16QI:
20984 case V2DI_FTYPE_V4SI_V4SI:
20985 case V2DI_FTYPE_V2DI_V16QI:
20986 case V2DI_FTYPE_V2DF_V2DF:
20987 case V2SI_FTYPE_V2SI_V2SI:
20988 case V2SI_FTYPE_V4HI_V4HI:
20989 case V2SI_FTYPE_V2SF_V2SF:
20990 case V2DF_FTYPE_V2DF_V2DF:
20991 case V2DF_FTYPE_V2DF_V4SF:
20992 case V2DF_FTYPE_V2DF_DI:
20993 case V2DF_FTYPE_V2DF_SI:
20994 case V2SF_FTYPE_V2SF_V2SF:
20995 case V1DI_FTYPE_V1DI_V1DI:
20996 case V1DI_FTYPE_V8QI_V8QI:
20997 case V1DI_FTYPE_V2SI_V2SI:
20998 if (comparison == UNKNOWN)
20999 return ix86_expand_binop_builtin (icode, exp, target);
21000 nargs = 2;
21001 break;
21002 case V4SF_FTYPE_V4SF_V4SF_SWAP:
21003 case V2DF_FTYPE_V2DF_V2DF_SWAP:
21004 gcc_assert (comparison != UNKNOWN);
21005 nargs = 2;
21006 swap = true;
21007 break;
21008 case V8HI_FTYPE_V8HI_V8HI_COUNT:
21009 case V8HI_FTYPE_V8HI_SI_COUNT:
21010 case V4SI_FTYPE_V4SI_V4SI_COUNT:
21011 case V4SI_FTYPE_V4SI_SI_COUNT:
21012 case V4HI_FTYPE_V4HI_V4HI_COUNT:
21013 case V4HI_FTYPE_V4HI_SI_COUNT:
21014 case V2DI_FTYPE_V2DI_V2DI_COUNT:
21015 case V2DI_FTYPE_V2DI_SI_COUNT:
21016 case V2SI_FTYPE_V2SI_V2SI_COUNT:
21017 case V2SI_FTYPE_V2SI_SI_COUNT:
21018 case V1DI_FTYPE_V1DI_V1DI_COUNT:
21019 case V1DI_FTYPE_V1DI_SI_COUNT:
21020 nargs = 2;
21021 last_arg_count = true;
21022 break;
21023 case UINT64_FTYPE_UINT64_UINT64:
21024 case UINT_FTYPE_UINT_UINT:
21025 case UINT_FTYPE_UINT_USHORT:
21026 case UINT_FTYPE_UINT_UCHAR:
21027 nargs = 2;
21028 break;
21029 case V2DI2TI_FTYPE_V2DI_INT:
21030 nargs = 2;
21031 rmode = V2DImode;
21032 nargs_constant = 1;
21033 break;
21034 case V8HI_FTYPE_V8HI_INT:
21035 case V4SI_FTYPE_V4SI_INT:
21036 case V4HI_FTYPE_V4HI_INT:
21037 case V4SF_FTYPE_V4SF_INT:
21038 case V2DI_FTYPE_V2DI_INT:
21039 case V2DF_FTYPE_V2DF_INT:
21040 nargs = 2;
21041 nargs_constant = 1;
21042 break;
21043 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21044 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21045 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21046 nargs = 3;
21047 break;
21048 case V16QI_FTYPE_V16QI_V16QI_INT:
21049 case V8HI_FTYPE_V8HI_V8HI_INT:
21050 case V4SI_FTYPE_V4SI_V4SI_INT:
21051 case V4SF_FTYPE_V4SF_V4SF_INT:
21052 case V2DI_FTYPE_V2DI_V2DI_INT:
21053 case V2DF_FTYPE_V2DF_V2DF_INT:
21054 nargs = 3;
21055 nargs_constant = 1;
21056 break;
21057 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21058 nargs = 3;
21059 rmode = V2DImode;
21060 nargs_constant = 1;
21061 break;
21062 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21063 nargs = 3;
21064 rmode = DImode;
21065 nargs_constant = 1;
21066 break;
21067 case V2DI_FTYPE_V2DI_UINT_UINT:
21068 nargs = 3;
21069 nargs_constant = 2;
21070 break;
21071 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21072 nargs = 4;
21073 nargs_constant = 2;
21074 break;
21075 default:
21076 gcc_unreachable ();
21079 gcc_assert (nargs <= ARRAY_SIZE (args));
21081 if (comparison != UNKNOWN)
21083 gcc_assert (nargs == 2);
21084 return ix86_expand_sse_compare (d, exp, target, swap);
21087 if (rmode == VOIDmode || rmode == tmode)
21089 if (optimize
21090 || target == 0
21091 || GET_MODE (target) != tmode
21092 || ! (*insn_p->operand[0].predicate) (target, tmode))
21093 target = gen_reg_rtx (tmode);
21094 real_target = target;
21096 else
21098 target = gen_reg_rtx (rmode);
21099 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21102 for (i = 0; i < nargs; i++)
21104 tree arg = CALL_EXPR_ARG (exp, i);
21105 rtx op = expand_normal (arg);
21106 enum machine_mode mode = insn_p->operand[i + 1].mode;
21107 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21109 if (last_arg_count && (i + 1) == nargs)
21111 /* SIMD shift insns take either an 8-bit immediate or
21112 register as count. But builtin functions take int as
21113 count. If count doesn't match, we put it in register. */
21114 if (!match)
21116 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21117 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21118 op = copy_to_reg (op);
21121 else if ((nargs - i) <= nargs_constant)
21123 if (!match)
21124 switch (icode)
21126 case CODE_FOR_sse4_1_roundpd:
21127 case CODE_FOR_sse4_1_roundps:
21128 case CODE_FOR_sse4_1_roundsd:
21129 case CODE_FOR_sse4_1_roundss:
21130 case CODE_FOR_sse4_1_blendps:
21131 error ("the last argument must be a 4-bit immediate");
21132 return const0_rtx;
21134 case CODE_FOR_sse4_1_blendpd:
21135 error ("the last argument must be a 2-bit immediate");
21136 return const0_rtx;
21138 default:
21139 switch (nargs_constant)
21141 case 2:
21142 if ((nargs - i) == nargs_constant)
21144 error ("the next to last argument must be an 8-bit immediate");
21145 break;
21147 case 1:
21148 error ("the last argument must be an 8-bit immediate");
21149 break;
21150 default:
21151 gcc_unreachable ();
21153 return const0_rtx;
21156 else
21158 if (VECTOR_MODE_P (mode))
21159 op = safe_vector_operand (op, mode);
21161 /* If we aren't optimizing, only allow one memory operand to
21162 be generated. */
21163 if (memory_operand (op, mode))
21164 num_memory++;
21166 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21168 if (optimize || !match || num_memory > 1)
21169 op = copy_to_mode_reg (mode, op);
21171 else
21173 op = copy_to_reg (op);
21174 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21178 args[i].op = op;
21179 args[i].mode = mode;
21182 switch (nargs)
21184 case 1:
21185 pat = GEN_FCN (icode) (real_target, args[0].op);
21186 break;
21187 case 2:
21188 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21189 break;
21190 case 3:
21191 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21192 args[2].op);
21193 break;
21194 case 4:
21195 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21196 args[2].op, args[3].op);
21197 break;
21198 default:
21199 gcc_unreachable ();
21202 if (! pat)
21203 return 0;
21205 emit_insn (pat);
21206 return target;
21209 /* Subroutine of ix86_expand_builtin to take care of special insns
21210 with variable number of operands. */
21212 static rtx
21213 ix86_expand_special_args_builtin (const struct builtin_description *d,
21214 tree exp, rtx target)
21216 tree arg;
21217 rtx pat, op;
21218 unsigned int i, nargs, arg_adjust, memory;
21219 struct
21221 rtx op;
21222 enum machine_mode mode;
21223 } args[2];
21224 enum insn_code icode = d->icode;
21225 bool last_arg_constant = false;
21226 const struct insn_data *insn_p = &insn_data[icode];
21227 enum machine_mode tmode = insn_p->operand[0].mode;
21228 enum { load, store } class;
21230 switch ((enum ix86_special_builtin_type) d->flag)
21232 case VOID_FTYPE_VOID:
21233 emit_insn (GEN_FCN (icode) (target));
21234 return 0;
21235 case V2DI_FTYPE_PV2DI:
21236 case V16QI_FTYPE_PCCHAR:
21237 case V4SF_FTYPE_PCFLOAT:
21238 case V2DF_FTYPE_PCDOUBLE:
21239 nargs = 1;
21240 class = load;
21241 memory = 0;
21242 break;
21243 case VOID_FTYPE_PV2SF_V4SF:
21244 case VOID_FTYPE_PV2DI_V2DI:
21245 case VOID_FTYPE_PCHAR_V16QI:
21246 case VOID_FTYPE_PFLOAT_V4SF:
21247 case VOID_FTYPE_PDOUBLE_V2DF:
21248 case VOID_FTYPE_PDI_DI:
21249 case VOID_FTYPE_PINT_INT:
21250 nargs = 1;
21251 class = store;
21252 /* Reserve memory operand for target. */
21253 memory = ARRAY_SIZE (args);
21254 break;
21255 case V4SF_FTYPE_V4SF_PCV2SF:
21256 case V2DF_FTYPE_V2DF_PCDOUBLE:
21257 nargs = 2;
21258 class = load;
21259 memory = 1;
21260 break;
21261 default:
21262 gcc_unreachable ();
21265 gcc_assert (nargs <= ARRAY_SIZE (args));
21267 if (class == store)
21269 arg = CALL_EXPR_ARG (exp, 0);
21270 op = expand_normal (arg);
21271 gcc_assert (target == 0);
21272 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21273 arg_adjust = 1;
21275 else
21277 arg_adjust = 0;
21278 if (optimize
21279 || target == 0
21280 || GET_MODE (target) != tmode
21281 || ! (*insn_p->operand[0].predicate) (target, tmode))
21282 target = gen_reg_rtx (tmode);
21285 for (i = 0; i < nargs; i++)
21287 enum machine_mode mode = insn_p->operand[i + 1].mode;
21288 bool match;
21290 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21291 op = expand_normal (arg);
21292 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21294 if (last_arg_constant && (i + 1) == nargs)
21296 if (!match)
21297 switch (icode)
21299 default:
21300 error ("the last argument must be an 8-bit immediate");
21301 return const0_rtx;
21304 else
21306 if (i == memory)
21308 /* This must be the memory operand. */
21309 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21310 gcc_assert (GET_MODE (op) == mode
21311 || GET_MODE (op) == VOIDmode);
21313 else
21315 /* This must be register. */
21316 if (VECTOR_MODE_P (mode))
21317 op = safe_vector_operand (op, mode);
21319 gcc_assert (GET_MODE (op) == mode
21320 || GET_MODE (op) == VOIDmode);
21321 op = copy_to_mode_reg (mode, op);
21325 args[i].op = op;
21326 args[i].mode = mode;
21329 switch (nargs)
21331 case 1:
21332 pat = GEN_FCN (icode) (target, args[0].op);
21333 break;
21334 case 2:
21335 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21336 break;
21337 default:
21338 gcc_unreachable ();
21341 if (! pat)
21342 return 0;
21343 emit_insn (pat);
21344 return class == store ? 0 : target;
21347 /* Return the integer constant in ARG. Constrain it to be in the range
21348 of the subparts of VEC_TYPE; issue an error if not. */
21350 static int
21351 get_element_number (tree vec_type, tree arg)
21353 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21355 if (!host_integerp (arg, 1)
21356 || (elt = tree_low_cst (arg, 1), elt > max))
21358 error ("selector must be an integer constant in the range 0..%wi", max);
21359 return 0;
21362 return elt;
21365 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21366 ix86_expand_vector_init. We DO have language-level syntax for this, in
21367 the form of (type){ init-list }. Except that since we can't place emms
21368 instructions from inside the compiler, we can't allow the use of MMX
21369 registers unless the user explicitly asks for it. So we do *not* define
21370 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21371 we have builtins invoked by mmintrin.h that gives us license to emit
21372 these sorts of instructions. */
21374 static rtx
21375 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21377 enum machine_mode tmode = TYPE_MODE (type);
21378 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21379 int i, n_elt = GET_MODE_NUNITS (tmode);
21380 rtvec v = rtvec_alloc (n_elt);
21382 gcc_assert (VECTOR_MODE_P (tmode));
21383 gcc_assert (call_expr_nargs (exp) == n_elt);
21385 for (i = 0; i < n_elt; ++i)
21387 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21388 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21391 if (!target || !register_operand (target, tmode))
21392 target = gen_reg_rtx (tmode);
21394 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21395 return target;
21398 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21399 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21400 had a language-level syntax for referencing vector elements. */
21402 static rtx
21403 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21405 enum machine_mode tmode, mode0;
21406 tree arg0, arg1;
21407 int elt;
21408 rtx op0;
21410 arg0 = CALL_EXPR_ARG (exp, 0);
21411 arg1 = CALL_EXPR_ARG (exp, 1);
21413 op0 = expand_normal (arg0);
21414 elt = get_element_number (TREE_TYPE (arg0), arg1);
21416 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21417 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21418 gcc_assert (VECTOR_MODE_P (mode0));
21420 op0 = force_reg (mode0, op0);
21422 if (optimize || !target || !register_operand (target, tmode))
21423 target = gen_reg_rtx (tmode);
21425 ix86_expand_vector_extract (true, target, op0, elt);
21427 return target;
21430 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21431 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21432 a language-level syntax for referencing vector elements. */
21434 static rtx
21435 ix86_expand_vec_set_builtin (tree exp)
21437 enum machine_mode tmode, mode1;
21438 tree arg0, arg1, arg2;
21439 int elt;
21440 rtx op0, op1, target;
21442 arg0 = CALL_EXPR_ARG (exp, 0);
21443 arg1 = CALL_EXPR_ARG (exp, 1);
21444 arg2 = CALL_EXPR_ARG (exp, 2);
21446 tmode = TYPE_MODE (TREE_TYPE (arg0));
21447 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21448 gcc_assert (VECTOR_MODE_P (tmode));
21450 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21451 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21452 elt = get_element_number (TREE_TYPE (arg0), arg2);
21454 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21455 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21457 op0 = force_reg (tmode, op0);
21458 op1 = force_reg (mode1, op1);
21460 /* OP0 is the source of these builtin functions and shouldn't be
21461 modified. Create a copy, use it and return it as target. */
21462 target = gen_reg_rtx (tmode);
21463 emit_move_insn (target, op0);
21464 ix86_expand_vector_set (true, target, op1, elt);
21466 return target;
21469 /* Expand an expression EXP that calls a built-in function,
21470 with result going to TARGET if that's convenient
21471 (and in mode MODE if that's convenient).
21472 SUBTARGET may be used as the target for computing one of EXP's operands.
21473 IGNORE is nonzero if the value is to be ignored. */
21475 static rtx
21476 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21477 enum machine_mode mode ATTRIBUTE_UNUSED,
21478 int ignore ATTRIBUTE_UNUSED)
21480 const struct builtin_description *d;
21481 size_t i;
21482 enum insn_code icode;
21483 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21484 tree arg0, arg1, arg2;
21485 rtx op0, op1, op2, pat;
21486 enum machine_mode mode0, mode1, mode2;
21487 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21489 switch (fcode)
21491 case IX86_BUILTIN_MASKMOVQ:
21492 case IX86_BUILTIN_MASKMOVDQU:
21493 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21494 ? CODE_FOR_mmx_maskmovq
21495 : CODE_FOR_sse2_maskmovdqu);
21496 /* Note the arg order is different from the operand order. */
21497 arg1 = CALL_EXPR_ARG (exp, 0);
21498 arg2 = CALL_EXPR_ARG (exp, 1);
21499 arg0 = CALL_EXPR_ARG (exp, 2);
21500 op0 = expand_normal (arg0);
21501 op1 = expand_normal (arg1);
21502 op2 = expand_normal (arg2);
21503 mode0 = insn_data[icode].operand[0].mode;
21504 mode1 = insn_data[icode].operand[1].mode;
21505 mode2 = insn_data[icode].operand[2].mode;
21507 op0 = force_reg (Pmode, op0);
21508 op0 = gen_rtx_MEM (mode1, op0);
21510 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21511 op0 = copy_to_mode_reg (mode0, op0);
21512 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21513 op1 = copy_to_mode_reg (mode1, op1);
21514 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21515 op2 = copy_to_mode_reg (mode2, op2);
21516 pat = GEN_FCN (icode) (op0, op1, op2);
21517 if (! pat)
21518 return 0;
21519 emit_insn (pat);
21520 return 0;
21522 case IX86_BUILTIN_LDMXCSR:
21523 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21524 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21525 emit_move_insn (target, op0);
21526 emit_insn (gen_sse_ldmxcsr (target));
21527 return 0;
21529 case IX86_BUILTIN_STMXCSR:
21530 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21531 emit_insn (gen_sse_stmxcsr (target));
21532 return copy_to_mode_reg (SImode, target);
21534 case IX86_BUILTIN_CLFLUSH:
21535 arg0 = CALL_EXPR_ARG (exp, 0);
21536 op0 = expand_normal (arg0);
21537 icode = CODE_FOR_sse2_clflush;
21538 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21539 op0 = copy_to_mode_reg (Pmode, op0);
21541 emit_insn (gen_sse2_clflush (op0));
21542 return 0;
21544 case IX86_BUILTIN_MONITOR:
21545 arg0 = CALL_EXPR_ARG (exp, 0);
21546 arg1 = CALL_EXPR_ARG (exp, 1);
21547 arg2 = CALL_EXPR_ARG (exp, 2);
21548 op0 = expand_normal (arg0);
21549 op1 = expand_normal (arg1);
21550 op2 = expand_normal (arg2);
21551 if (!REG_P (op0))
21552 op0 = copy_to_mode_reg (Pmode, op0);
21553 if (!REG_P (op1))
21554 op1 = copy_to_mode_reg (SImode, op1);
21555 if (!REG_P (op2))
21556 op2 = copy_to_mode_reg (SImode, op2);
21557 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
21558 return 0;
21560 case IX86_BUILTIN_MWAIT:
21561 arg0 = CALL_EXPR_ARG (exp, 0);
21562 arg1 = CALL_EXPR_ARG (exp, 1);
21563 op0 = expand_normal (arg0);
21564 op1 = expand_normal (arg1);
21565 if (!REG_P (op0))
21566 op0 = copy_to_mode_reg (SImode, op0);
21567 if (!REG_P (op1))
21568 op1 = copy_to_mode_reg (SImode, op1);
21569 emit_insn (gen_sse3_mwait (op0, op1));
21570 return 0;
21572 case IX86_BUILTIN_VEC_INIT_V2SI:
21573 case IX86_BUILTIN_VEC_INIT_V4HI:
21574 case IX86_BUILTIN_VEC_INIT_V8QI:
21575 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21577 case IX86_BUILTIN_VEC_EXT_V2DF:
21578 case IX86_BUILTIN_VEC_EXT_V2DI:
21579 case IX86_BUILTIN_VEC_EXT_V4SF:
21580 case IX86_BUILTIN_VEC_EXT_V4SI:
21581 case IX86_BUILTIN_VEC_EXT_V8HI:
21582 case IX86_BUILTIN_VEC_EXT_V2SI:
21583 case IX86_BUILTIN_VEC_EXT_V4HI:
21584 case IX86_BUILTIN_VEC_EXT_V16QI:
21585 return ix86_expand_vec_ext_builtin (exp, target);
21587 case IX86_BUILTIN_VEC_SET_V2DI:
21588 case IX86_BUILTIN_VEC_SET_V4SF:
21589 case IX86_BUILTIN_VEC_SET_V4SI:
21590 case IX86_BUILTIN_VEC_SET_V8HI:
21591 case IX86_BUILTIN_VEC_SET_V4HI:
21592 case IX86_BUILTIN_VEC_SET_V16QI:
21593 return ix86_expand_vec_set_builtin (exp);
21595 case IX86_BUILTIN_INFQ:
21597 REAL_VALUE_TYPE inf;
21598 rtx tmp;
21600 real_inf (&inf);
21601 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21603 tmp = validize_mem (force_const_mem (mode, tmp));
21605 if (target == 0)
21606 target = gen_reg_rtx (mode);
21608 emit_move_insn (target, tmp);
21609 return target;
21612 default:
21613 break;
21616 for (i = 0, d = bdesc_special_args;
21617 i < ARRAY_SIZE (bdesc_special_args);
21618 i++, d++)
21619 if (d->code == fcode)
21620 return ix86_expand_special_args_builtin (d, exp, target);
21622 for (i = 0, d = bdesc_args;
21623 i < ARRAY_SIZE (bdesc_args);
21624 i++, d++)
21625 if (d->code == fcode)
21626 switch (fcode)
21628 case IX86_BUILTIN_FABSQ:
21629 case IX86_BUILTIN_COPYSIGNQ:
21630 if (!TARGET_SSE2)
21631 /* Emit a normal call if SSE2 isn't available. */
21632 return expand_call (exp, target, ignore);
21633 default:
21634 return ix86_expand_args_builtin (d, exp, target);
21637 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21638 if (d->code == fcode)
21639 return ix86_expand_sse_comi (d, exp, target);
21641 for (i = 0, d = bdesc_pcmpestr;
21642 i < ARRAY_SIZE (bdesc_pcmpestr);
21643 i++, d++)
21644 if (d->code == fcode)
21645 return ix86_expand_sse_pcmpestr (d, exp, target);
21647 for (i = 0, d = bdesc_pcmpistr;
21648 i < ARRAY_SIZE (bdesc_pcmpistr);
21649 i++, d++)
21650 if (d->code == fcode)
21651 return ix86_expand_sse_pcmpistr (d, exp, target);
21653 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21654 if (d->code == fcode)
21655 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21656 (enum multi_arg_type)d->flag,
21657 d->comparison);
21659 gcc_unreachable ();
21662 /* Returns a function decl for a vectorized version of the builtin function
21663 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21664 if it is not available. */
21666 static tree
21667 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21668 tree type_in)
21670 enum machine_mode in_mode, out_mode;
21671 int in_n, out_n;
21673 if (TREE_CODE (type_out) != VECTOR_TYPE
21674 || TREE_CODE (type_in) != VECTOR_TYPE)
21675 return NULL_TREE;
21677 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21678 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21679 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21680 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21682 switch (fn)
21684 case BUILT_IN_SQRT:
21685 if (out_mode == DFmode && out_n == 2
21686 && in_mode == DFmode && in_n == 2)
21687 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21688 break;
21690 case BUILT_IN_SQRTF:
21691 if (out_mode == SFmode && out_n == 4
21692 && in_mode == SFmode && in_n == 4)
21693 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21694 break;
21696 case BUILT_IN_LRINT:
21697 if (out_mode == SImode && out_n == 4
21698 && in_mode == DFmode && in_n == 2)
21699 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21700 break;
21702 case BUILT_IN_LRINTF:
21703 if (out_mode == SImode && out_n == 4
21704 && in_mode == SFmode && in_n == 4)
21705 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21706 break;
21708 default:
21712 /* Dispatch to a handler for a vectorization library. */
21713 if (ix86_veclib_handler)
21714 return (*ix86_veclib_handler)(fn, type_out, type_in);
21716 return NULL_TREE;
21719 /* Handler for an SVML-style interface to
21720 a library with vectorized intrinsics. */
21722 static tree
21723 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21725 char name[20];
21726 tree fntype, new_fndecl, args;
21727 unsigned arity;
21728 const char *bname;
21729 enum machine_mode el_mode, in_mode;
21730 int n, in_n;
21732 /* The SVML is suitable for unsafe math only. */
21733 if (!flag_unsafe_math_optimizations)
21734 return NULL_TREE;
21736 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21737 n = TYPE_VECTOR_SUBPARTS (type_out);
21738 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21739 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21740 if (el_mode != in_mode
21741 || n != in_n)
21742 return NULL_TREE;
21744 switch (fn)
21746 case BUILT_IN_EXP:
21747 case BUILT_IN_LOG:
21748 case BUILT_IN_LOG10:
21749 case BUILT_IN_POW:
21750 case BUILT_IN_TANH:
21751 case BUILT_IN_TAN:
21752 case BUILT_IN_ATAN:
21753 case BUILT_IN_ATAN2:
21754 case BUILT_IN_ATANH:
21755 case BUILT_IN_CBRT:
21756 case BUILT_IN_SINH:
21757 case BUILT_IN_SIN:
21758 case BUILT_IN_ASINH:
21759 case BUILT_IN_ASIN:
21760 case BUILT_IN_COSH:
21761 case BUILT_IN_COS:
21762 case BUILT_IN_ACOSH:
21763 case BUILT_IN_ACOS:
21764 if (el_mode != DFmode || n != 2)
21765 return NULL_TREE;
21766 break;
21768 case BUILT_IN_EXPF:
21769 case BUILT_IN_LOGF:
21770 case BUILT_IN_LOG10F:
21771 case BUILT_IN_POWF:
21772 case BUILT_IN_TANHF:
21773 case BUILT_IN_TANF:
21774 case BUILT_IN_ATANF:
21775 case BUILT_IN_ATAN2F:
21776 case BUILT_IN_ATANHF:
21777 case BUILT_IN_CBRTF:
21778 case BUILT_IN_SINHF:
21779 case BUILT_IN_SINF:
21780 case BUILT_IN_ASINHF:
21781 case BUILT_IN_ASINF:
21782 case BUILT_IN_COSHF:
21783 case BUILT_IN_COSF:
21784 case BUILT_IN_ACOSHF:
21785 case BUILT_IN_ACOSF:
21786 if (el_mode != SFmode || n != 4)
21787 return NULL_TREE;
21788 break;
21790 default:
21791 return NULL_TREE;
21794 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21796 if (fn == BUILT_IN_LOGF)
21797 strcpy (name, "vmlsLn4");
21798 else if (fn == BUILT_IN_LOG)
21799 strcpy (name, "vmldLn2");
21800 else if (n == 4)
21802 sprintf (name, "vmls%s", bname+10);
21803 name[strlen (name)-1] = '4';
21805 else
21806 sprintf (name, "vmld%s2", bname+10);
21808 /* Convert to uppercase. */
21809 name[4] &= ~0x20;
21811 arity = 0;
21812 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21813 args = TREE_CHAIN (args))
21814 arity++;
21816 if (arity == 1)
21817 fntype = build_function_type_list (type_out, type_in, NULL);
21818 else
21819 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21821 /* Build a function declaration for the vectorized function. */
21822 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21823 TREE_PUBLIC (new_fndecl) = 1;
21824 DECL_EXTERNAL (new_fndecl) = 1;
21825 DECL_IS_NOVOPS (new_fndecl) = 1;
21826 TREE_READONLY (new_fndecl) = 1;
21828 return new_fndecl;
21831 /* Handler for an ACML-style interface to
21832 a library with vectorized intrinsics. */
21834 static tree
21835 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21837 char name[20] = "__vr.._";
21838 tree fntype, new_fndecl, args;
21839 unsigned arity;
21840 const char *bname;
21841 enum machine_mode el_mode, in_mode;
21842 int n, in_n;
21844 /* The ACML is 64bits only and suitable for unsafe math only as
21845 it does not correctly support parts of IEEE with the required
21846 precision such as denormals. */
21847 if (!TARGET_64BIT
21848 || !flag_unsafe_math_optimizations)
21849 return NULL_TREE;
21851 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21852 n = TYPE_VECTOR_SUBPARTS (type_out);
21853 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21854 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21855 if (el_mode != in_mode
21856 || n != in_n)
21857 return NULL_TREE;
21859 switch (fn)
21861 case BUILT_IN_SIN:
21862 case BUILT_IN_COS:
21863 case BUILT_IN_EXP:
21864 case BUILT_IN_LOG:
21865 case BUILT_IN_LOG2:
21866 case BUILT_IN_LOG10:
21867 name[4] = 'd';
21868 name[5] = '2';
21869 if (el_mode != DFmode
21870 || n != 2)
21871 return NULL_TREE;
21872 break;
21874 case BUILT_IN_SINF:
21875 case BUILT_IN_COSF:
21876 case BUILT_IN_EXPF:
21877 case BUILT_IN_POWF:
21878 case BUILT_IN_LOGF:
21879 case BUILT_IN_LOG2F:
21880 case BUILT_IN_LOG10F:
21881 name[4] = 's';
21882 name[5] = '4';
21883 if (el_mode != SFmode
21884 || n != 4)
21885 return NULL_TREE;
21886 break;
21888 default:
21889 return NULL_TREE;
21892 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21893 sprintf (name + 7, "%s", bname+10);
21895 arity = 0;
21896 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21897 args = TREE_CHAIN (args))
21898 arity++;
21900 if (arity == 1)
21901 fntype = build_function_type_list (type_out, type_in, NULL);
21902 else
21903 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21905 /* Build a function declaration for the vectorized function. */
21906 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21907 TREE_PUBLIC (new_fndecl) = 1;
21908 DECL_EXTERNAL (new_fndecl) = 1;
21909 DECL_IS_NOVOPS (new_fndecl) = 1;
21910 TREE_READONLY (new_fndecl) = 1;
21912 return new_fndecl;
21916 /* Returns a decl of a function that implements conversion of the
21917 input vector of type TYPE, or NULL_TREE if it is not available. */
21919 static tree
21920 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21922 if (TREE_CODE (type) != VECTOR_TYPE)
21923 return NULL_TREE;
21925 switch (code)
21927 case FLOAT_EXPR:
21928 switch (TYPE_MODE (type))
21930 case V4SImode:
21931 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21932 default:
21933 return NULL_TREE;
21936 case FIX_TRUNC_EXPR:
21937 switch (TYPE_MODE (type))
21939 case V4SFmode:
21940 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21941 default:
21942 return NULL_TREE;
21944 default:
21945 return NULL_TREE;
21950 /* Returns a code for a target-specific builtin that implements
21951 reciprocal of the function, or NULL_TREE if not available. */
21953 static tree
21954 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21955 bool sqrt ATTRIBUTE_UNUSED)
21957 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21958 && flag_finite_math_only && !flag_trapping_math
21959 && flag_unsafe_math_optimizations))
21960 return NULL_TREE;
21962 if (md_fn)
21963 /* Machine dependent builtins. */
21964 switch (fn)
21966 /* Vectorized version of sqrt to rsqrt conversion. */
21967 case IX86_BUILTIN_SQRTPS_NR:
21968 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21970 default:
21971 return NULL_TREE;
21973 else
21974 /* Normal builtins. */
21975 switch (fn)
21977 /* Sqrt to rsqrt conversion. */
21978 case BUILT_IN_SQRTF:
21979 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21981 default:
21982 return NULL_TREE;
21986 /* Store OPERAND to the memory after reload is completed. This means
21987 that we can't easily use assign_stack_local. */
21989 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21991 rtx result;
21993 gcc_assert (reload_completed);
21994 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
21996 result = gen_rtx_MEM (mode,
21997 gen_rtx_PLUS (Pmode,
21998 stack_pointer_rtx,
21999 GEN_INT (-RED_ZONE_SIZE)));
22000 emit_move_insn (result, operand);
22002 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
22004 switch (mode)
22006 case HImode:
22007 case SImode:
22008 operand = gen_lowpart (DImode, operand);
22009 /* FALLTHRU */
22010 case DImode:
22011 emit_insn (
22012 gen_rtx_SET (VOIDmode,
22013 gen_rtx_MEM (DImode,
22014 gen_rtx_PRE_DEC (DImode,
22015 stack_pointer_rtx)),
22016 operand));
22017 break;
22018 default:
22019 gcc_unreachable ();
22021 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22023 else
22025 switch (mode)
22027 case DImode:
22029 rtx operands[2];
22030 split_di (&operand, 1, operands, operands + 1);
22031 emit_insn (
22032 gen_rtx_SET (VOIDmode,
22033 gen_rtx_MEM (SImode,
22034 gen_rtx_PRE_DEC (Pmode,
22035 stack_pointer_rtx)),
22036 operands[1]));
22037 emit_insn (
22038 gen_rtx_SET (VOIDmode,
22039 gen_rtx_MEM (SImode,
22040 gen_rtx_PRE_DEC (Pmode,
22041 stack_pointer_rtx)),
22042 operands[0]));
22044 break;
22045 case HImode:
22046 /* Store HImodes as SImodes. */
22047 operand = gen_lowpart (SImode, operand);
22048 /* FALLTHRU */
22049 case SImode:
22050 emit_insn (
22051 gen_rtx_SET (VOIDmode,
22052 gen_rtx_MEM (GET_MODE (operand),
22053 gen_rtx_PRE_DEC (SImode,
22054 stack_pointer_rtx)),
22055 operand));
22056 break;
22057 default:
22058 gcc_unreachable ();
22060 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22062 return result;
22065 /* Free operand from the memory. */
22066 void
22067 ix86_free_from_memory (enum machine_mode mode)
22069 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
22071 int size;
22073 if (mode == DImode || TARGET_64BIT)
22074 size = 8;
22075 else
22076 size = 4;
22077 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22078 to pop or add instruction if registers are available. */
22079 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22080 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22081 GEN_INT (size))));
22085 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22086 QImode must go into class Q_REGS.
22087 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
22088 movdf to do mem-to-mem moves through integer regs. */
22089 enum reg_class
22090 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22092 enum machine_mode mode = GET_MODE (x);
22094 /* We're only allowed to return a subclass of CLASS. Many of the
22095 following checks fail for NO_REGS, so eliminate that early. */
22096 if (regclass == NO_REGS)
22097 return NO_REGS;
22099 /* All classes can load zeros. */
22100 if (x == CONST0_RTX (mode))
22101 return regclass;
22103 /* Force constants into memory if we are loading a (nonzero) constant into
22104 an MMX or SSE register. This is because there are no MMX/SSE instructions
22105 to load from a constant. */
22106 if (CONSTANT_P (x)
22107 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22108 return NO_REGS;
22110 /* Prefer SSE regs only, if we can use them for math. */
22111 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22112 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22114 /* Floating-point constants need more complex checks. */
22115 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22117 /* General regs can load everything. */
22118 if (reg_class_subset_p (regclass, GENERAL_REGS))
22119 return regclass;
22121 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22122 zero above. We only want to wind up preferring 80387 registers if
22123 we plan on doing computation with them. */
22124 if (TARGET_80387
22125 && standard_80387_constant_p (x))
22127 /* Limit class to non-sse. */
22128 if (regclass == FLOAT_SSE_REGS)
22129 return FLOAT_REGS;
22130 if (regclass == FP_TOP_SSE_REGS)
22131 return FP_TOP_REG;
22132 if (regclass == FP_SECOND_SSE_REGS)
22133 return FP_SECOND_REG;
22134 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22135 return regclass;
22138 return NO_REGS;
22141 /* Generally when we see PLUS here, it's the function invariant
22142 (plus soft-fp const_int). Which can only be computed into general
22143 regs. */
22144 if (GET_CODE (x) == PLUS)
22145 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22147 /* QImode constants are easy to load, but non-constant QImode data
22148 must go into Q_REGS. */
22149 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22151 if (reg_class_subset_p (regclass, Q_REGS))
22152 return regclass;
22153 if (reg_class_subset_p (Q_REGS, regclass))
22154 return Q_REGS;
22155 return NO_REGS;
22158 return regclass;
22161 /* Discourage putting floating-point values in SSE registers unless
22162 SSE math is being used, and likewise for the 387 registers. */
22163 enum reg_class
22164 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22166 enum machine_mode mode = GET_MODE (x);
22168 /* Restrict the output reload class to the register bank that we are doing
22169 math on. If we would like not to return a subset of CLASS, reject this
22170 alternative: if reload cannot do this, it will still use its choice. */
22171 mode = GET_MODE (x);
22172 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22173 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22175 if (X87_FLOAT_MODE_P (mode))
22177 if (regclass == FP_TOP_SSE_REGS)
22178 return FP_TOP_REG;
22179 else if (regclass == FP_SECOND_SSE_REGS)
22180 return FP_SECOND_REG;
22181 else
22182 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22185 return regclass;
22188 static enum reg_class
22189 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22190 enum machine_mode mode,
22191 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22193 /* QImode spills from non-QI registers require
22194 intermediate register on 32bit targets. */
22195 if (!in_p && mode == QImode && !TARGET_64BIT
22196 && (class == GENERAL_REGS
22197 || class == LEGACY_REGS
22198 || class == INDEX_REGS))
22200 int regno;
22202 if (REG_P (x))
22203 regno = REGNO (x);
22204 else
22205 regno = -1;
22207 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22208 regno = true_regnum (x);
22210 /* Return Q_REGS if the operand is in memory. */
22211 if (regno == -1)
22212 return Q_REGS;
22215 return NO_REGS;
22218 /* If we are copying between general and FP registers, we need a memory
22219 location. The same is true for SSE and MMX registers.
22221 To optimize register_move_cost performance, allow inline variant.
22223 The macro can't work reliably when one of the CLASSES is class containing
22224 registers from multiple units (SSE, MMX, integer). We avoid this by never
22225 combining those units in single alternative in the machine description.
22226 Ensure that this constraint holds to avoid unexpected surprises.
22228 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22229 enforce these sanity checks. */
22231 static inline int
22232 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22233 enum machine_mode mode, int strict)
22235 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22236 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22237 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22238 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22239 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22240 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22242 gcc_assert (!strict);
22243 return true;
22246 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22247 return true;
22249 /* ??? This is a lie. We do have moves between mmx/general, and for
22250 mmx/sse2. But by saying we need secondary memory we discourage the
22251 register allocator from using the mmx registers unless needed. */
22252 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22253 return true;
22255 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22257 /* SSE1 doesn't have any direct moves from other classes. */
22258 if (!TARGET_SSE2)
22259 return true;
22261 /* If the target says that inter-unit moves are more expensive
22262 than moving through memory, then don't generate them. */
22263 if (!TARGET_INTER_UNIT_MOVES)
22264 return true;
22266 /* Between SSE and general, we have moves no larger than word size. */
22267 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22268 return true;
22271 return false;
22275 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22276 enum machine_mode mode, int strict)
22278 return inline_secondary_memory_needed (class1, class2, mode, strict);
22281 /* Return true if the registers in CLASS cannot represent the change from
22282 modes FROM to TO. */
22284 bool
22285 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22286 enum reg_class regclass)
22288 if (from == to)
22289 return false;
22291 /* x87 registers can't do subreg at all, as all values are reformatted
22292 to extended precision. */
22293 if (MAYBE_FLOAT_CLASS_P (regclass))
22294 return true;
22296 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22298 /* Vector registers do not support QI or HImode loads. If we don't
22299 disallow a change to these modes, reload will assume it's ok to
22300 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22301 the vec_dupv4hi pattern. */
22302 if (GET_MODE_SIZE (from) < 4)
22303 return true;
22305 /* Vector registers do not support subreg with nonzero offsets, which
22306 are otherwise valid for integer registers. Since we can't see
22307 whether we have a nonzero offset from here, prohibit all
22308 nonparadoxical subregs changing size. */
22309 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22310 return true;
22313 return false;
22316 /* Return the cost of moving data of mode M between a
22317 register and memory. A value of 2 is the default; this cost is
22318 relative to those in `REGISTER_MOVE_COST'.
22320 This function is used extensively by register_move_cost that is used to
22321 build tables at startup. Make it inline in this case.
22322 When IN is 2, return maximum of in and out move cost.
22324 If moving between registers and memory is more expensive than
22325 between two registers, you should define this macro to express the
22326 relative cost.
22328 Model also increased moving costs of QImode registers in non
22329 Q_REGS classes.
22331 static inline int
22332 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22333 int in)
22335 int cost;
22336 if (FLOAT_CLASS_P (regclass))
22338 int index;
22339 switch (mode)
22341 case SFmode:
22342 index = 0;
22343 break;
22344 case DFmode:
22345 index = 1;
22346 break;
22347 case XFmode:
22348 index = 2;
22349 break;
22350 default:
22351 return 100;
22353 if (in == 2)
22354 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22355 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22357 if (SSE_CLASS_P (regclass))
22359 int index;
22360 switch (GET_MODE_SIZE (mode))
22362 case 4:
22363 index = 0;
22364 break;
22365 case 8:
22366 index = 1;
22367 break;
22368 case 16:
22369 index = 2;
22370 break;
22371 default:
22372 return 100;
22374 if (in == 2)
22375 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22376 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22378 if (MMX_CLASS_P (regclass))
22380 int index;
22381 switch (GET_MODE_SIZE (mode))
22383 case 4:
22384 index = 0;
22385 break;
22386 case 8:
22387 index = 1;
22388 break;
22389 default:
22390 return 100;
22392 if (in)
22393 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22394 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22396 switch (GET_MODE_SIZE (mode))
22398 case 1:
22399 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22401 if (!in)
22402 return ix86_cost->int_store[0];
22403 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22404 cost = ix86_cost->movzbl_load;
22405 else
22406 cost = ix86_cost->int_load[0];
22407 if (in == 2)
22408 return MAX (cost, ix86_cost->int_store[0]);
22409 return cost;
22411 else
22413 if (in == 2)
22414 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22415 if (in)
22416 return ix86_cost->movzbl_load;
22417 else
22418 return ix86_cost->int_store[0] + 4;
22420 break;
22421 case 2:
22422 if (in == 2)
22423 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22424 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22425 default:
22426 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22427 if (mode == TFmode)
22428 mode = XFmode;
22429 if (in == 2)
22430 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22431 else if (in)
22432 cost = ix86_cost->int_load[2];
22433 else
22434 cost = ix86_cost->int_store[2];
22435 return (cost * (((int) GET_MODE_SIZE (mode)
22436 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22441 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22443 return inline_memory_move_cost (mode, regclass, in);
22447 /* Return the cost of moving data from a register in class CLASS1 to
22448 one in class CLASS2.
22450 It is not required that the cost always equal 2 when FROM is the same as TO;
22451 on some machines it is expensive to move between registers if they are not
22452 general registers. */
22455 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22456 enum reg_class class2)
22458 /* In case we require secondary memory, compute cost of the store followed
22459 by load. In order to avoid bad register allocation choices, we need
22460 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22462 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22464 int cost = 1;
22466 cost += inline_memory_move_cost (mode, class1, 2);
22467 cost += inline_memory_move_cost (mode, class2, 2);
22469 /* In case of copying from general_purpose_register we may emit multiple
22470 stores followed by single load causing memory size mismatch stall.
22471 Count this as arbitrarily high cost of 20. */
22472 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22473 cost += 20;
22475 /* In the case of FP/MMX moves, the registers actually overlap, and we
22476 have to switch modes in order to treat them differently. */
22477 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22478 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22479 cost += 20;
22481 return cost;
22484 /* Moves between SSE/MMX and integer unit are expensive. */
22485 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22486 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22488 /* ??? By keeping returned value relatively high, we limit the number
22489 of moves between integer and MMX/SSE registers for all targets.
22490 Additionally, high value prevents problem with x86_modes_tieable_p(),
22491 where integer modes in MMX/SSE registers are not tieable
22492 because of missing QImode and HImode moves to, from or between
22493 MMX/SSE registers. */
22494 return MAX (8, ix86_cost->mmxsse_to_integer);
22496 if (MAYBE_FLOAT_CLASS_P (class1))
22497 return ix86_cost->fp_move;
22498 if (MAYBE_SSE_CLASS_P (class1))
22499 return ix86_cost->sse_move;
22500 if (MAYBE_MMX_CLASS_P (class1))
22501 return ix86_cost->mmx_move;
22502 return 2;
22505 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22507 bool
22508 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22510 /* Flags and only flags can only hold CCmode values. */
22511 if (CC_REGNO_P (regno))
22512 return GET_MODE_CLASS (mode) == MODE_CC;
22513 if (GET_MODE_CLASS (mode) == MODE_CC
22514 || GET_MODE_CLASS (mode) == MODE_RANDOM
22515 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22516 return 0;
22517 if (FP_REGNO_P (regno))
22518 return VALID_FP_MODE_P (mode);
22519 if (SSE_REGNO_P (regno))
22521 /* We implement the move patterns for all vector modes into and
22522 out of SSE registers, even when no operation instructions
22523 are available. */
22524 return (VALID_SSE_REG_MODE (mode)
22525 || VALID_SSE2_REG_MODE (mode)
22526 || VALID_MMX_REG_MODE (mode)
22527 || VALID_MMX_REG_MODE_3DNOW (mode));
22529 if (MMX_REGNO_P (regno))
22531 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22532 so if the register is available at all, then we can move data of
22533 the given mode into or out of it. */
22534 return (VALID_MMX_REG_MODE (mode)
22535 || VALID_MMX_REG_MODE_3DNOW (mode));
22538 if (mode == QImode)
22540 /* Take care for QImode values - they can be in non-QI regs,
22541 but then they do cause partial register stalls. */
22542 if (regno < 4 || TARGET_64BIT)
22543 return 1;
22544 if (!TARGET_PARTIAL_REG_STALL)
22545 return 1;
22546 return reload_in_progress || reload_completed;
22548 /* We handle both integer and floats in the general purpose registers. */
22549 else if (VALID_INT_MODE_P (mode))
22550 return 1;
22551 else if (VALID_FP_MODE_P (mode))
22552 return 1;
22553 else if (VALID_DFP_MODE_P (mode))
22554 return 1;
22555 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22556 on to use that value in smaller contexts, this can easily force a
22557 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22558 supporting DImode, allow it. */
22559 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22560 return 1;
22562 return 0;
22565 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22566 tieable integer mode. */
22568 static bool
22569 ix86_tieable_integer_mode_p (enum machine_mode mode)
22571 switch (mode)
22573 case HImode:
22574 case SImode:
22575 return true;
22577 case QImode:
22578 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22580 case DImode:
22581 return TARGET_64BIT;
22583 default:
22584 return false;
22588 /* Return true if MODE1 is accessible in a register that can hold MODE2
22589 without copying. That is, all register classes that can hold MODE2
22590 can also hold MODE1. */
22592 bool
22593 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22595 if (mode1 == mode2)
22596 return true;
22598 if (ix86_tieable_integer_mode_p (mode1)
22599 && ix86_tieable_integer_mode_p (mode2))
22600 return true;
22602 /* MODE2 being XFmode implies fp stack or general regs, which means we
22603 can tie any smaller floating point modes to it. Note that we do not
22604 tie this with TFmode. */
22605 if (mode2 == XFmode)
22606 return mode1 == SFmode || mode1 == DFmode;
22608 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22609 that we can tie it with SFmode. */
22610 if (mode2 == DFmode)
22611 return mode1 == SFmode;
22613 /* If MODE2 is only appropriate for an SSE register, then tie with
22614 any other mode acceptable to SSE registers. */
22615 if (GET_MODE_SIZE (mode2) == 16
22616 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22617 return (GET_MODE_SIZE (mode1) == 16
22618 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22620 /* If MODE2 is appropriate for an MMX register, then tie
22621 with any other mode acceptable to MMX registers. */
22622 if (GET_MODE_SIZE (mode2) == 8
22623 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22624 return (GET_MODE_SIZE (mode1) == 8
22625 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22627 return false;
22630 /* Compute a (partial) cost for rtx X. Return true if the complete
22631 cost has been computed, and false if subexpressions should be
22632 scanned. In either case, *TOTAL contains the cost result. */
22634 static bool
22635 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22637 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22638 enum machine_mode mode = GET_MODE (x);
22640 switch (code)
22642 case CONST_INT:
22643 case CONST:
22644 case LABEL_REF:
22645 case SYMBOL_REF:
22646 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22647 *total = 3;
22648 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22649 *total = 2;
22650 else if (flag_pic && SYMBOLIC_CONST (x)
22651 && (!TARGET_64BIT
22652 || (!GET_CODE (x) != LABEL_REF
22653 && (GET_CODE (x) != SYMBOL_REF
22654 || !SYMBOL_REF_LOCAL_P (x)))))
22655 *total = 1;
22656 else
22657 *total = 0;
22658 return true;
22660 case CONST_DOUBLE:
22661 if (mode == VOIDmode)
22662 *total = 0;
22663 else
22664 switch (standard_80387_constant_p (x))
22666 case 1: /* 0.0 */
22667 *total = 1;
22668 break;
22669 default: /* Other constants */
22670 *total = 2;
22671 break;
22672 case 0:
22673 case -1:
22674 /* Start with (MEM (SYMBOL_REF)), since that's where
22675 it'll probably end up. Add a penalty for size. */
22676 *total = (COSTS_N_INSNS (1)
22677 + (flag_pic != 0 && !TARGET_64BIT)
22678 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22679 break;
22681 return true;
22683 case ZERO_EXTEND:
22684 /* The zero extensions is often completely free on x86_64, so make
22685 it as cheap as possible. */
22686 if (TARGET_64BIT && mode == DImode
22687 && GET_MODE (XEXP (x, 0)) == SImode)
22688 *total = 1;
22689 else if (TARGET_ZERO_EXTEND_WITH_AND)
22690 *total = ix86_cost->add;
22691 else
22692 *total = ix86_cost->movzx;
22693 return false;
22695 case SIGN_EXTEND:
22696 *total = ix86_cost->movsx;
22697 return false;
22699 case ASHIFT:
22700 if (CONST_INT_P (XEXP (x, 1))
22701 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22703 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22704 if (value == 1)
22706 *total = ix86_cost->add;
22707 return false;
22709 if ((value == 2 || value == 3)
22710 && ix86_cost->lea <= ix86_cost->shift_const)
22712 *total = ix86_cost->lea;
22713 return false;
22716 /* FALLTHRU */
22718 case ROTATE:
22719 case ASHIFTRT:
22720 case LSHIFTRT:
22721 case ROTATERT:
22722 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22724 if (CONST_INT_P (XEXP (x, 1)))
22726 if (INTVAL (XEXP (x, 1)) > 32)
22727 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22728 else
22729 *total = ix86_cost->shift_const * 2;
22731 else
22733 if (GET_CODE (XEXP (x, 1)) == AND)
22734 *total = ix86_cost->shift_var * 2;
22735 else
22736 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22739 else
22741 if (CONST_INT_P (XEXP (x, 1)))
22742 *total = ix86_cost->shift_const;
22743 else
22744 *total = ix86_cost->shift_var;
22746 return false;
22748 case MULT:
22749 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22751 /* ??? SSE scalar cost should be used here. */
22752 *total = ix86_cost->fmul;
22753 return false;
22755 else if (X87_FLOAT_MODE_P (mode))
22757 *total = ix86_cost->fmul;
22758 return false;
22760 else if (FLOAT_MODE_P (mode))
22762 /* ??? SSE vector cost should be used here. */
22763 *total = ix86_cost->fmul;
22764 return false;
22766 else
22768 rtx op0 = XEXP (x, 0);
22769 rtx op1 = XEXP (x, 1);
22770 int nbits;
22771 if (CONST_INT_P (XEXP (x, 1)))
22773 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22774 for (nbits = 0; value != 0; value &= value - 1)
22775 nbits++;
22777 else
22778 /* This is arbitrary. */
22779 nbits = 7;
22781 /* Compute costs correctly for widening multiplication. */
22782 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22783 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22784 == GET_MODE_SIZE (mode))
22786 int is_mulwiden = 0;
22787 enum machine_mode inner_mode = GET_MODE (op0);
22789 if (GET_CODE (op0) == GET_CODE (op1))
22790 is_mulwiden = 1, op1 = XEXP (op1, 0);
22791 else if (CONST_INT_P (op1))
22793 if (GET_CODE (op0) == SIGN_EXTEND)
22794 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22795 == INTVAL (op1);
22796 else
22797 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22800 if (is_mulwiden)
22801 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22804 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22805 + nbits * ix86_cost->mult_bit
22806 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22808 return true;
22811 case DIV:
22812 case UDIV:
22813 case MOD:
22814 case UMOD:
22815 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22816 /* ??? SSE cost should be used here. */
22817 *total = ix86_cost->fdiv;
22818 else if (X87_FLOAT_MODE_P (mode))
22819 *total = ix86_cost->fdiv;
22820 else if (FLOAT_MODE_P (mode))
22821 /* ??? SSE vector cost should be used here. */
22822 *total = ix86_cost->fdiv;
22823 else
22824 *total = ix86_cost->divide[MODE_INDEX (mode)];
22825 return false;
22827 case PLUS:
22828 if (GET_MODE_CLASS (mode) == MODE_INT
22829 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22831 if (GET_CODE (XEXP (x, 0)) == PLUS
22832 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22833 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22834 && CONSTANT_P (XEXP (x, 1)))
22836 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22837 if (val == 2 || val == 4 || val == 8)
22839 *total = ix86_cost->lea;
22840 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22841 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22842 outer_code);
22843 *total += rtx_cost (XEXP (x, 1), outer_code);
22844 return true;
22847 else if (GET_CODE (XEXP (x, 0)) == MULT
22848 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22850 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22851 if (val == 2 || val == 4 || val == 8)
22853 *total = ix86_cost->lea;
22854 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22855 *total += rtx_cost (XEXP (x, 1), outer_code);
22856 return true;
22859 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22861 *total = ix86_cost->lea;
22862 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22863 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22864 *total += rtx_cost (XEXP (x, 1), outer_code);
22865 return true;
22868 /* FALLTHRU */
22870 case MINUS:
22871 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22873 /* ??? SSE cost should be used here. */
22874 *total = ix86_cost->fadd;
22875 return false;
22877 else if (X87_FLOAT_MODE_P (mode))
22879 *total = ix86_cost->fadd;
22880 return false;
22882 else if (FLOAT_MODE_P (mode))
22884 /* ??? SSE vector cost should be used here. */
22885 *total = ix86_cost->fadd;
22886 return false;
22888 /* FALLTHRU */
22890 case AND:
22891 case IOR:
22892 case XOR:
22893 if (!TARGET_64BIT && mode == DImode)
22895 *total = (ix86_cost->add * 2
22896 + (rtx_cost (XEXP (x, 0), outer_code)
22897 << (GET_MODE (XEXP (x, 0)) != DImode))
22898 + (rtx_cost (XEXP (x, 1), outer_code)
22899 << (GET_MODE (XEXP (x, 1)) != DImode)));
22900 return true;
22902 /* FALLTHRU */
22904 case NEG:
22905 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22907 /* ??? SSE cost should be used here. */
22908 *total = ix86_cost->fchs;
22909 return false;
22911 else if (X87_FLOAT_MODE_P (mode))
22913 *total = ix86_cost->fchs;
22914 return false;
22916 else if (FLOAT_MODE_P (mode))
22918 /* ??? SSE vector cost should be used here. */
22919 *total = ix86_cost->fchs;
22920 return false;
22922 /* FALLTHRU */
22924 case NOT:
22925 if (!TARGET_64BIT && mode == DImode)
22926 *total = ix86_cost->add * 2;
22927 else
22928 *total = ix86_cost->add;
22929 return false;
22931 case COMPARE:
22932 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22933 && XEXP (XEXP (x, 0), 1) == const1_rtx
22934 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22935 && XEXP (x, 1) == const0_rtx)
22937 /* This kind of construct is implemented using test[bwl].
22938 Treat it as if we had an AND. */
22939 *total = (ix86_cost->add
22940 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22941 + rtx_cost (const1_rtx, outer_code));
22942 return true;
22944 return false;
22946 case FLOAT_EXTEND:
22947 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22948 *total = 0;
22949 return false;
22951 case ABS:
22952 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22953 /* ??? SSE cost should be used here. */
22954 *total = ix86_cost->fabs;
22955 else if (X87_FLOAT_MODE_P (mode))
22956 *total = ix86_cost->fabs;
22957 else if (FLOAT_MODE_P (mode))
22958 /* ??? SSE vector cost should be used here. */
22959 *total = ix86_cost->fabs;
22960 return false;
22962 case SQRT:
22963 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22964 /* ??? SSE cost should be used here. */
22965 *total = ix86_cost->fsqrt;
22966 else if (X87_FLOAT_MODE_P (mode))
22967 *total = ix86_cost->fsqrt;
22968 else if (FLOAT_MODE_P (mode))
22969 /* ??? SSE vector cost should be used here. */
22970 *total = ix86_cost->fsqrt;
22971 return false;
22973 case UNSPEC:
22974 if (XINT (x, 1) == UNSPEC_TP)
22975 *total = 0;
22976 return false;
22978 default:
22979 return false;
22983 #if TARGET_MACHO
22985 static int current_machopic_label_num;
22987 /* Given a symbol name and its associated stub, write out the
22988 definition of the stub. */
22990 void
22991 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22993 unsigned int length;
22994 char *binder_name, *symbol_name, lazy_ptr_name[32];
22995 int label = ++current_machopic_label_num;
22997 /* For 64-bit we shouldn't get here. */
22998 gcc_assert (!TARGET_64BIT);
23000 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23001 symb = (*targetm.strip_name_encoding) (symb);
23003 length = strlen (stub);
23004 binder_name = XALLOCAVEC (char, length + 32);
23005 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23007 length = strlen (symb);
23008 symbol_name = XALLOCAVEC (char, length + 32);
23009 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23011 sprintf (lazy_ptr_name, "L%d$lz", label);
23013 if (MACHOPIC_PURE)
23014 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
23015 else
23016 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23018 fprintf (file, "%s:\n", stub);
23019 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23021 if (MACHOPIC_PURE)
23023 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
23024 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
23025 fprintf (file, "\tjmp\t*%%edx\n");
23027 else
23028 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23030 fprintf (file, "%s:\n", binder_name);
23032 if (MACHOPIC_PURE)
23034 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23035 fprintf (file, "\tpushl\t%%eax\n");
23037 else
23038 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23040 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
23042 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23043 fprintf (file, "%s:\n", lazy_ptr_name);
23044 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23045 fprintf (file, "\t.long %s\n", binder_name);
23048 void
23049 darwin_x86_file_end (void)
23051 darwin_file_end ();
23052 ix86_file_end ();
23054 #endif /* TARGET_MACHO */
23056 /* Order the registers for register allocator. */
23058 void
23059 x86_order_regs_for_local_alloc (void)
23061 int pos = 0;
23062 int i;
23064 /* First allocate the local general purpose registers. */
23065 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23066 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23067 reg_alloc_order [pos++] = i;
23069 /* Global general purpose registers. */
23070 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23071 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23072 reg_alloc_order [pos++] = i;
23074 /* x87 registers come first in case we are doing FP math
23075 using them. */
23076 if (!TARGET_SSE_MATH)
23077 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23078 reg_alloc_order [pos++] = i;
23080 /* SSE registers. */
23081 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23082 reg_alloc_order [pos++] = i;
23083 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23084 reg_alloc_order [pos++] = i;
23086 /* x87 registers. */
23087 if (TARGET_SSE_MATH)
23088 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23089 reg_alloc_order [pos++] = i;
23091 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23092 reg_alloc_order [pos++] = i;
23094 /* Initialize the rest of array as we do not allocate some registers
23095 at all. */
23096 while (pos < FIRST_PSEUDO_REGISTER)
23097 reg_alloc_order [pos++] = 0;
23100 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23101 struct attribute_spec.handler. */
23102 static tree
23103 ix86_handle_struct_attribute (tree *node, tree name,
23104 tree args ATTRIBUTE_UNUSED,
23105 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23107 tree *type = NULL;
23108 if (DECL_P (*node))
23110 if (TREE_CODE (*node) == TYPE_DECL)
23111 type = &TREE_TYPE (*node);
23113 else
23114 type = node;
23116 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23117 || TREE_CODE (*type) == UNION_TYPE)))
23119 warning (OPT_Wattributes, "%qs attribute ignored",
23120 IDENTIFIER_POINTER (name));
23121 *no_add_attrs = true;
23124 else if ((is_attribute_p ("ms_struct", name)
23125 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23126 || ((is_attribute_p ("gcc_struct", name)
23127 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23129 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23130 IDENTIFIER_POINTER (name));
23131 *no_add_attrs = true;
23134 return NULL_TREE;
23137 static bool
23138 ix86_ms_bitfield_layout_p (const_tree record_type)
23140 return (TARGET_MS_BITFIELD_LAYOUT &&
23141 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23142 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23145 /* Returns an expression indicating where the this parameter is
23146 located on entry to the FUNCTION. */
23148 static rtx
23149 x86_this_parameter (tree function)
23151 tree type = TREE_TYPE (function);
23152 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23153 int nregs;
23155 if (TARGET_64BIT)
23157 const int *parm_regs;
23159 if (ix86_function_type_abi (type) == MS_ABI)
23160 parm_regs = x86_64_ms_abi_int_parameter_registers;
23161 else
23162 parm_regs = x86_64_int_parameter_registers;
23163 return gen_rtx_REG (DImode, parm_regs[aggr]);
23166 nregs = ix86_function_regparm (type, function);
23168 if (nregs > 0 && !stdarg_p (type))
23170 int regno;
23172 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23173 regno = aggr ? DX_REG : CX_REG;
23174 else
23176 regno = AX_REG;
23177 if (aggr)
23179 regno = DX_REG;
23180 if (nregs == 1)
23181 return gen_rtx_MEM (SImode,
23182 plus_constant (stack_pointer_rtx, 4));
23185 return gen_rtx_REG (SImode, regno);
23188 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23191 /* Determine whether x86_output_mi_thunk can succeed. */
23193 static bool
23194 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23195 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23196 HOST_WIDE_INT vcall_offset, const_tree function)
23198 /* 64-bit can handle anything. */
23199 if (TARGET_64BIT)
23200 return true;
23202 /* For 32-bit, everything's fine if we have one free register. */
23203 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23204 return true;
23206 /* Need a free register for vcall_offset. */
23207 if (vcall_offset)
23208 return false;
23210 /* Need a free register for GOT references. */
23211 if (flag_pic && !(*targetm.binds_local_p) (function))
23212 return false;
23214 /* Otherwise ok. */
23215 return true;
23218 /* Output the assembler code for a thunk function. THUNK_DECL is the
23219 declaration for the thunk function itself, FUNCTION is the decl for
23220 the target function. DELTA is an immediate constant offset to be
23221 added to THIS. If VCALL_OFFSET is nonzero, the word at
23222 *(*this + vcall_offset) should be added to THIS. */
23224 static void
23225 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23226 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23227 HOST_WIDE_INT vcall_offset, tree function)
23229 rtx xops[3];
23230 rtx this_param = x86_this_parameter (function);
23231 rtx this_reg, tmp;
23233 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23234 pull it in now and let DELTA benefit. */
23235 if (REG_P (this_param))
23236 this_reg = this_param;
23237 else if (vcall_offset)
23239 /* Put the this parameter into %eax. */
23240 xops[0] = this_param;
23241 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23242 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23244 else
23245 this_reg = NULL_RTX;
23247 /* Adjust the this parameter by a fixed constant. */
23248 if (delta)
23250 xops[0] = GEN_INT (delta);
23251 xops[1] = this_reg ? this_reg : this_param;
23252 if (TARGET_64BIT)
23254 if (!x86_64_general_operand (xops[0], DImode))
23256 tmp = gen_rtx_REG (DImode, R10_REG);
23257 xops[1] = tmp;
23258 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23259 xops[0] = tmp;
23260 xops[1] = this_param;
23262 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23264 else
23265 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23268 /* Adjust the this parameter by a value stored in the vtable. */
23269 if (vcall_offset)
23271 if (TARGET_64BIT)
23272 tmp = gen_rtx_REG (DImode, R10_REG);
23273 else
23275 int tmp_regno = CX_REG;
23276 if (lookup_attribute ("fastcall",
23277 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23278 tmp_regno = AX_REG;
23279 tmp = gen_rtx_REG (SImode, tmp_regno);
23282 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23283 xops[1] = tmp;
23284 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23286 /* Adjust the this parameter. */
23287 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23288 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23290 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23291 xops[0] = GEN_INT (vcall_offset);
23292 xops[1] = tmp2;
23293 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23294 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23296 xops[1] = this_reg;
23297 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
23300 /* If necessary, drop THIS back to its stack slot. */
23301 if (this_reg && this_reg != this_param)
23303 xops[0] = this_reg;
23304 xops[1] = this_param;
23305 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23308 xops[0] = XEXP (DECL_RTL (function), 0);
23309 if (TARGET_64BIT)
23311 if (!flag_pic || (*targetm.binds_local_p) (function))
23312 output_asm_insn ("jmp\t%P0", xops);
23313 /* All thunks should be in the same object as their target,
23314 and thus binds_local_p should be true. */
23315 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23316 gcc_unreachable ();
23317 else
23319 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23320 tmp = gen_rtx_CONST (Pmode, tmp);
23321 tmp = gen_rtx_MEM (QImode, tmp);
23322 xops[0] = tmp;
23323 output_asm_insn ("jmp\t%A0", xops);
23326 else
23328 if (!flag_pic || (*targetm.binds_local_p) (function))
23329 output_asm_insn ("jmp\t%P0", xops);
23330 else
23331 #if TARGET_MACHO
23332 if (TARGET_MACHO)
23334 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23335 tmp = (gen_rtx_SYMBOL_REF
23336 (Pmode,
23337 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23338 tmp = gen_rtx_MEM (QImode, tmp);
23339 xops[0] = tmp;
23340 output_asm_insn ("jmp\t%0", xops);
23342 else
23343 #endif /* TARGET_MACHO */
23345 tmp = gen_rtx_REG (SImode, CX_REG);
23346 output_set_got (tmp, NULL_RTX);
23348 xops[1] = tmp;
23349 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23350 output_asm_insn ("jmp\t{*}%1", xops);
23355 static void
23356 x86_file_start (void)
23358 default_file_start ();
23359 #if TARGET_MACHO
23360 darwin_file_start ();
23361 #endif
23362 if (X86_FILE_START_VERSION_DIRECTIVE)
23363 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23364 if (X86_FILE_START_FLTUSED)
23365 fputs ("\t.global\t__fltused\n", asm_out_file);
23366 if (ix86_asm_dialect == ASM_INTEL)
23367 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23371 x86_field_alignment (tree field, int computed)
23373 enum machine_mode mode;
23374 tree type = TREE_TYPE (field);
23376 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23377 return computed;
23378 mode = TYPE_MODE (strip_array_types (type));
23379 if (mode == DFmode || mode == DCmode
23380 || GET_MODE_CLASS (mode) == MODE_INT
23381 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23382 return MIN (32, computed);
23383 return computed;
23386 /* Output assembler code to FILE to increment profiler label # LABELNO
23387 for profiling a function entry. */
23388 void
23389 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23391 if (TARGET_64BIT)
23393 #ifndef NO_PROFILE_COUNTERS
23394 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23395 #endif
23397 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23398 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23399 else
23400 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23402 else if (flag_pic)
23404 #ifndef NO_PROFILE_COUNTERS
23405 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23406 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23407 #endif
23408 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23410 else
23412 #ifndef NO_PROFILE_COUNTERS
23413 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23414 PROFILE_COUNT_REGISTER);
23415 #endif
23416 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23420 /* We don't have exact information about the insn sizes, but we may assume
23421 quite safely that we are informed about all 1 byte insns and memory
23422 address sizes. This is enough to eliminate unnecessary padding in
23423 99% of cases. */
23425 static int
23426 min_insn_size (rtx insn)
23428 int l = 0;
23430 if (!INSN_P (insn) || !active_insn_p (insn))
23431 return 0;
23433 /* Discard alignments we've emit and jump instructions. */
23434 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23435 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23436 return 0;
23437 if (JUMP_P (insn)
23438 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23439 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23440 return 0;
23442 /* Important case - calls are always 5 bytes.
23443 It is common to have many calls in the row. */
23444 if (CALL_P (insn)
23445 && symbolic_reference_mentioned_p (PATTERN (insn))
23446 && !SIBLING_CALL_P (insn))
23447 return 5;
23448 if (get_attr_length (insn) <= 1)
23449 return 1;
23451 /* For normal instructions we may rely on the sizes of addresses
23452 and the presence of symbol to require 4 bytes of encoding.
23453 This is not the case for jumps where references are PC relative. */
23454 if (!JUMP_P (insn))
23456 l = get_attr_length_address (insn);
23457 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23458 l = 4;
23460 if (l)
23461 return 1+l;
23462 else
23463 return 2;
23466 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23467 window. */
23469 static void
23470 ix86_avoid_jump_misspredicts (void)
23472 rtx insn, start = get_insns ();
23473 int nbytes = 0, njumps = 0;
23474 int isjump = 0;
23476 /* Look for all minimal intervals of instructions containing 4 jumps.
23477 The intervals are bounded by START and INSN. NBYTES is the total
23478 size of instructions in the interval including INSN and not including
23479 START. When the NBYTES is smaller than 16 bytes, it is possible
23480 that the end of START and INSN ends up in the same 16byte page.
23482 The smallest offset in the page INSN can start is the case where START
23483 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23484 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23486 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23489 nbytes += min_insn_size (insn);
23490 if (dump_file)
23491 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23492 INSN_UID (insn), min_insn_size (insn));
23493 if ((JUMP_P (insn)
23494 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23495 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23496 || CALL_P (insn))
23497 njumps++;
23498 else
23499 continue;
23501 while (njumps > 3)
23503 start = NEXT_INSN (start);
23504 if ((JUMP_P (start)
23505 && GET_CODE (PATTERN (start)) != ADDR_VEC
23506 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23507 || CALL_P (start))
23508 njumps--, isjump = 1;
23509 else
23510 isjump = 0;
23511 nbytes -= min_insn_size (start);
23513 gcc_assert (njumps >= 0);
23514 if (dump_file)
23515 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23516 INSN_UID (start), INSN_UID (insn), nbytes);
23518 if (njumps == 3 && isjump && nbytes < 16)
23520 int padsize = 15 - nbytes + min_insn_size (insn);
23522 if (dump_file)
23523 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23524 INSN_UID (insn), padsize);
23525 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23530 /* AMD Athlon works faster
23531 when RET is not destination of conditional jump or directly preceded
23532 by other jump instruction. We avoid the penalty by inserting NOP just
23533 before the RET instructions in such cases. */
23534 static void
23535 ix86_pad_returns (void)
23537 edge e;
23538 edge_iterator ei;
23540 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23542 basic_block bb = e->src;
23543 rtx ret = BB_END (bb);
23544 rtx prev;
23545 bool replace = false;
23547 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23548 || !maybe_hot_bb_p (bb))
23549 continue;
23550 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23551 if (active_insn_p (prev) || LABEL_P (prev))
23552 break;
23553 if (prev && LABEL_P (prev))
23555 edge e;
23556 edge_iterator ei;
23558 FOR_EACH_EDGE (e, ei, bb->preds)
23559 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23560 && !(e->flags & EDGE_FALLTHRU))
23561 replace = true;
23563 if (!replace)
23565 prev = prev_active_insn (ret);
23566 if (prev
23567 && ((JUMP_P (prev) && any_condjump_p (prev))
23568 || CALL_P (prev)))
23569 replace = true;
23570 /* Empty functions get branch mispredict even when the jump destination
23571 is not visible to us. */
23572 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23573 replace = true;
23575 if (replace)
23577 emit_insn_before (gen_return_internal_long (), ret);
23578 delete_insn (ret);
23583 /* Implement machine specific optimizations. We implement padding of returns
23584 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23585 static void
23586 ix86_reorg (void)
23588 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23589 ix86_pad_returns ();
23590 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23591 ix86_avoid_jump_misspredicts ();
23594 /* Return nonzero when QImode register that must be represented via REX prefix
23595 is used. */
23596 bool
23597 x86_extended_QIreg_mentioned_p (rtx insn)
23599 int i;
23600 extract_insn_cached (insn);
23601 for (i = 0; i < recog_data.n_operands; i++)
23602 if (REG_P (recog_data.operand[i])
23603 && REGNO (recog_data.operand[i]) >= 4)
23604 return true;
23605 return false;
23608 /* Return nonzero when P points to register encoded via REX prefix.
23609 Called via for_each_rtx. */
23610 static int
23611 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23613 unsigned int regno;
23614 if (!REG_P (*p))
23615 return 0;
23616 regno = REGNO (*p);
23617 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23620 /* Return true when INSN mentions register that must be encoded using REX
23621 prefix. */
23622 bool
23623 x86_extended_reg_mentioned_p (rtx insn)
23625 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23628 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23629 optabs would emit if we didn't have TFmode patterns. */
23631 void
23632 x86_emit_floatuns (rtx operands[2])
23634 rtx neglab, donelab, i0, i1, f0, in, out;
23635 enum machine_mode mode, inmode;
23637 inmode = GET_MODE (operands[1]);
23638 gcc_assert (inmode == SImode || inmode == DImode);
23640 out = operands[0];
23641 in = force_reg (inmode, operands[1]);
23642 mode = GET_MODE (out);
23643 neglab = gen_label_rtx ();
23644 donelab = gen_label_rtx ();
23645 f0 = gen_reg_rtx (mode);
23647 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23649 expand_float (out, in, 0);
23651 emit_jump_insn (gen_jump (donelab));
23652 emit_barrier ();
23654 emit_label (neglab);
23656 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23657 1, OPTAB_DIRECT);
23658 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23659 1, OPTAB_DIRECT);
23660 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23662 expand_float (f0, i0, 0);
23664 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23666 emit_label (donelab);
23669 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23670 with all elements equal to VAR. Return true if successful. */
23672 static bool
23673 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23674 rtx target, rtx val)
23676 enum machine_mode smode, wsmode, wvmode;
23677 rtx x;
23679 switch (mode)
23681 case V2SImode:
23682 case V2SFmode:
23683 if (!mmx_ok)
23684 return false;
23685 /* FALLTHRU */
23687 case V2DFmode:
23688 case V2DImode:
23689 case V4SFmode:
23690 case V4SImode:
23691 val = force_reg (GET_MODE_INNER (mode), val);
23692 x = gen_rtx_VEC_DUPLICATE (mode, val);
23693 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23694 return true;
23696 case V4HImode:
23697 if (!mmx_ok)
23698 return false;
23699 if (TARGET_SSE || TARGET_3DNOW_A)
23701 val = gen_lowpart (SImode, val);
23702 x = gen_rtx_TRUNCATE (HImode, val);
23703 x = gen_rtx_VEC_DUPLICATE (mode, x);
23704 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23705 return true;
23707 else
23709 smode = HImode;
23710 wsmode = SImode;
23711 wvmode = V2SImode;
23712 goto widen;
23715 case V8QImode:
23716 if (!mmx_ok)
23717 return false;
23718 smode = QImode;
23719 wsmode = HImode;
23720 wvmode = V4HImode;
23721 goto widen;
23722 case V8HImode:
23723 if (TARGET_SSE2)
23725 rtx tmp1, tmp2;
23726 /* Extend HImode to SImode using a paradoxical SUBREG. */
23727 tmp1 = gen_reg_rtx (SImode);
23728 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23729 /* Insert the SImode value as low element of V4SImode vector. */
23730 tmp2 = gen_reg_rtx (V4SImode);
23731 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23732 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23733 CONST0_RTX (V4SImode),
23734 const1_rtx);
23735 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23736 /* Cast the V4SImode vector back to a V8HImode vector. */
23737 tmp1 = gen_reg_rtx (V8HImode);
23738 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23739 /* Duplicate the low short through the whole low SImode word. */
23740 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23741 /* Cast the V8HImode vector back to a V4SImode vector. */
23742 tmp2 = gen_reg_rtx (V4SImode);
23743 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23744 /* Replicate the low element of the V4SImode vector. */
23745 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23746 /* Cast the V2SImode back to V8HImode, and store in target. */
23747 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23748 return true;
23750 smode = HImode;
23751 wsmode = SImode;
23752 wvmode = V4SImode;
23753 goto widen;
23754 case V16QImode:
23755 if (TARGET_SSE2)
23757 rtx tmp1, tmp2;
23758 /* Extend QImode to SImode using a paradoxical SUBREG. */
23759 tmp1 = gen_reg_rtx (SImode);
23760 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23761 /* Insert the SImode value as low element of V4SImode vector. */
23762 tmp2 = gen_reg_rtx (V4SImode);
23763 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23764 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23765 CONST0_RTX (V4SImode),
23766 const1_rtx);
23767 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23768 /* Cast the V4SImode vector back to a V16QImode vector. */
23769 tmp1 = gen_reg_rtx (V16QImode);
23770 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23771 /* Duplicate the low byte through the whole low SImode word. */
23772 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23773 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23774 /* Cast the V16QImode vector back to a V4SImode vector. */
23775 tmp2 = gen_reg_rtx (V4SImode);
23776 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23777 /* Replicate the low element of the V4SImode vector. */
23778 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23779 /* Cast the V2SImode back to V16QImode, and store in target. */
23780 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23781 return true;
23783 smode = QImode;
23784 wsmode = HImode;
23785 wvmode = V8HImode;
23786 goto widen;
23787 widen:
23788 /* Replicate the value once into the next wider mode and recurse. */
23789 val = convert_modes (wsmode, smode, val, true);
23790 x = expand_simple_binop (wsmode, ASHIFT, val,
23791 GEN_INT (GET_MODE_BITSIZE (smode)),
23792 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23793 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23795 x = gen_reg_rtx (wvmode);
23796 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23797 gcc_unreachable ();
23798 emit_move_insn (target, gen_lowpart (mode, x));
23799 return true;
23801 default:
23802 return false;
23806 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23807 whose ONE_VAR element is VAR, and other elements are zero. Return true
23808 if successful. */
23810 static bool
23811 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23812 rtx target, rtx var, int one_var)
23814 enum machine_mode vsimode;
23815 rtx new_target;
23816 rtx x, tmp;
23817 bool use_vector_set = false;
23819 switch (mode)
23821 case V2DImode:
23822 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23823 break;
23824 case V16QImode:
23825 case V4SImode:
23826 case V4SFmode:
23827 use_vector_set = TARGET_SSE4_1;
23828 break;
23829 case V8HImode:
23830 use_vector_set = TARGET_SSE2;
23831 break;
23832 case V4HImode:
23833 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23834 break;
23835 default:
23836 break;
23839 if (use_vector_set)
23841 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23842 var = force_reg (GET_MODE_INNER (mode), var);
23843 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23844 return true;
23847 switch (mode)
23849 case V2SFmode:
23850 case V2SImode:
23851 if (!mmx_ok)
23852 return false;
23853 /* FALLTHRU */
23855 case V2DFmode:
23856 case V2DImode:
23857 if (one_var != 0)
23858 return false;
23859 var = force_reg (GET_MODE_INNER (mode), var);
23860 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23861 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23862 return true;
23864 case V4SFmode:
23865 case V4SImode:
23866 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23867 new_target = gen_reg_rtx (mode);
23868 else
23869 new_target = target;
23870 var = force_reg (GET_MODE_INNER (mode), var);
23871 x = gen_rtx_VEC_DUPLICATE (mode, var);
23872 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23873 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23874 if (one_var != 0)
23876 /* We need to shuffle the value to the correct position, so
23877 create a new pseudo to store the intermediate result. */
23879 /* With SSE2, we can use the integer shuffle insns. */
23880 if (mode != V4SFmode && TARGET_SSE2)
23882 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23883 GEN_INT (1),
23884 GEN_INT (one_var == 1 ? 0 : 1),
23885 GEN_INT (one_var == 2 ? 0 : 1),
23886 GEN_INT (one_var == 3 ? 0 : 1)));
23887 if (target != new_target)
23888 emit_move_insn (target, new_target);
23889 return true;
23892 /* Otherwise convert the intermediate result to V4SFmode and
23893 use the SSE1 shuffle instructions. */
23894 if (mode != V4SFmode)
23896 tmp = gen_reg_rtx (V4SFmode);
23897 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23899 else
23900 tmp = new_target;
23902 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23903 GEN_INT (1),
23904 GEN_INT (one_var == 1 ? 0 : 1),
23905 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23906 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23908 if (mode != V4SFmode)
23909 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23910 else if (tmp != target)
23911 emit_move_insn (target, tmp);
23913 else if (target != new_target)
23914 emit_move_insn (target, new_target);
23915 return true;
23917 case V8HImode:
23918 case V16QImode:
23919 vsimode = V4SImode;
23920 goto widen;
23921 case V4HImode:
23922 case V8QImode:
23923 if (!mmx_ok)
23924 return false;
23925 vsimode = V2SImode;
23926 goto widen;
23927 widen:
23928 if (one_var != 0)
23929 return false;
23931 /* Zero extend the variable element to SImode and recurse. */
23932 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23934 x = gen_reg_rtx (vsimode);
23935 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23936 var, one_var))
23937 gcc_unreachable ();
23939 emit_move_insn (target, gen_lowpart (mode, x));
23940 return true;
23942 default:
23943 return false;
23947 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23948 consisting of the values in VALS. It is known that all elements
23949 except ONE_VAR are constants. Return true if successful. */
23951 static bool
23952 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23953 rtx target, rtx vals, int one_var)
23955 rtx var = XVECEXP (vals, 0, one_var);
23956 enum machine_mode wmode;
23957 rtx const_vec, x;
23959 const_vec = copy_rtx (vals);
23960 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23961 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23963 switch (mode)
23965 case V2DFmode:
23966 case V2DImode:
23967 case V2SFmode:
23968 case V2SImode:
23969 /* For the two element vectors, it's just as easy to use
23970 the general case. */
23971 return false;
23973 case V4SFmode:
23974 case V4SImode:
23975 case V8HImode:
23976 case V4HImode:
23977 break;
23979 case V16QImode:
23980 if (TARGET_SSE4_1)
23981 break;
23982 wmode = V8HImode;
23983 goto widen;
23984 case V8QImode:
23985 wmode = V4HImode;
23986 goto widen;
23987 widen:
23988 /* There's no way to set one QImode entry easily. Combine
23989 the variable value with its adjacent constant value, and
23990 promote to an HImode set. */
23991 x = XVECEXP (vals, 0, one_var ^ 1);
23992 if (one_var & 1)
23994 var = convert_modes (HImode, QImode, var, true);
23995 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23996 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23997 x = GEN_INT (INTVAL (x) & 0xff);
23999 else
24001 var = convert_modes (HImode, QImode, var, true);
24002 x = gen_int_mode (INTVAL (x) << 8, HImode);
24004 if (x != const0_rtx)
24005 var = expand_simple_binop (HImode, IOR, var, x, var,
24006 1, OPTAB_LIB_WIDEN);
24008 x = gen_reg_rtx (wmode);
24009 emit_move_insn (x, gen_lowpart (wmode, const_vec));
24010 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
24012 emit_move_insn (target, gen_lowpart (mode, x));
24013 return true;
24015 default:
24016 return false;
24019 emit_move_insn (target, const_vec);
24020 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24021 return true;
24024 /* A subroutine of ix86_expand_vector_init_general. Use vector
24025 concatenate to handle the most general case: all values variable,
24026 and none identical. */
24028 static void
24029 ix86_expand_vector_init_concat (enum machine_mode mode,
24030 rtx target, rtx *ops, int n)
24032 enum machine_mode cmode, hmode = VOIDmode;
24033 rtx first[4], second[2];
24034 rtvec v;
24035 int i, j;
24037 switch (n)
24039 case 2:
24040 switch (mode)
24042 case V4SImode:
24043 cmode = V2SImode;
24044 break;
24045 case V4SFmode:
24046 cmode = V2SFmode;
24047 break;
24048 case V2DImode:
24049 cmode = DImode;
24050 break;
24051 case V2SImode:
24052 cmode = SImode;
24053 break;
24054 case V2DFmode:
24055 cmode = DFmode;
24056 break;
24057 case V2SFmode:
24058 cmode = SFmode;
24059 break;
24060 default:
24061 gcc_unreachable ();
24064 if (!register_operand (ops[1], cmode))
24065 ops[1] = force_reg (cmode, ops[1]);
24066 if (!register_operand (ops[0], cmode))
24067 ops[0] = force_reg (cmode, ops[0]);
24068 emit_insn (gen_rtx_SET (VOIDmode, target,
24069 gen_rtx_VEC_CONCAT (mode, ops[0],
24070 ops[1])));
24071 break;
24073 case 4:
24074 switch (mode)
24076 case V4SImode:
24077 cmode = V2SImode;
24078 break;
24079 case V4SFmode:
24080 cmode = V2SFmode;
24081 break;
24082 default:
24083 gcc_unreachable ();
24085 goto half;
24087 half:
24088 /* FIXME: We process inputs backward to help RA. PR 36222. */
24089 i = n - 1;
24090 j = (n >> 1) - 1;
24091 for (; i > 0; i -= 2, j--)
24093 first[j] = gen_reg_rtx (cmode);
24094 v = gen_rtvec (2, ops[i - 1], ops[i]);
24095 ix86_expand_vector_init (false, first[j],
24096 gen_rtx_PARALLEL (cmode, v));
24099 n >>= 1;
24100 if (n > 2)
24102 gcc_assert (hmode != VOIDmode);
24103 for (i = j = 0; i < n; i += 2, j++)
24105 second[j] = gen_reg_rtx (hmode);
24106 ix86_expand_vector_init_concat (hmode, second [j],
24107 &first [i], 2);
24109 n >>= 1;
24110 ix86_expand_vector_init_concat (mode, target, second, n);
24112 else
24113 ix86_expand_vector_init_concat (mode, target, first, n);
24114 break;
24116 default:
24117 gcc_unreachable ();
24121 /* A subroutine of ix86_expand_vector_init_general. Use vector
24122 interleave to handle the most general case: all values variable,
24123 and none identical. */
24125 static void
24126 ix86_expand_vector_init_interleave (enum machine_mode mode,
24127 rtx target, rtx *ops, int n)
24129 enum machine_mode first_imode, second_imode, third_imode;
24130 int i, j;
24131 rtx op0, op1;
24132 rtx (*gen_load_even) (rtx, rtx, rtx);
24133 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24134 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24136 switch (mode)
24138 case V8HImode:
24139 gen_load_even = gen_vec_setv8hi;
24140 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24141 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24142 first_imode = V4SImode;
24143 second_imode = V2DImode;
24144 third_imode = VOIDmode;
24145 break;
24146 case V16QImode:
24147 gen_load_even = gen_vec_setv16qi;
24148 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24149 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24150 first_imode = V8HImode;
24151 second_imode = V4SImode;
24152 third_imode = V2DImode;
24153 break;
24154 default:
24155 gcc_unreachable ();
24158 for (i = 0; i < n; i++)
24160 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24161 op0 = gen_reg_rtx (SImode);
24162 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24164 /* Insert the SImode value as low element of V4SImode vector. */
24165 op1 = gen_reg_rtx (V4SImode);
24166 op0 = gen_rtx_VEC_MERGE (V4SImode,
24167 gen_rtx_VEC_DUPLICATE (V4SImode,
24168 op0),
24169 CONST0_RTX (V4SImode),
24170 const1_rtx);
24171 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24173 /* Cast the V4SImode vector back to a vector in orignal mode. */
24174 op0 = gen_reg_rtx (mode);
24175 emit_move_insn (op0, gen_lowpart (mode, op1));
24177 /* Load even elements into the second positon. */
24178 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24179 const1_rtx));
24181 /* Cast vector to FIRST_IMODE vector. */
24182 ops[i] = gen_reg_rtx (first_imode);
24183 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24186 /* Interleave low FIRST_IMODE vectors. */
24187 for (i = j = 0; i < n; i += 2, j++)
24189 op0 = gen_reg_rtx (first_imode);
24190 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24192 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24193 ops[j] = gen_reg_rtx (second_imode);
24194 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24197 /* Interleave low SECOND_IMODE vectors. */
24198 switch (second_imode)
24200 case V4SImode:
24201 for (i = j = 0; i < n / 2; i += 2, j++)
24203 op0 = gen_reg_rtx (second_imode);
24204 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24205 ops[i + 1]));
24207 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24208 vector. */
24209 ops[j] = gen_reg_rtx (third_imode);
24210 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24212 second_imode = V2DImode;
24213 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24214 /* FALLTHRU */
24216 case V2DImode:
24217 op0 = gen_reg_rtx (second_imode);
24218 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24219 ops[1]));
24221 /* Cast the SECOND_IMODE vector back to a vector on original
24222 mode. */
24223 emit_insn (gen_rtx_SET (VOIDmode, target,
24224 gen_lowpart (mode, op0)));
24225 break;
24227 default:
24228 gcc_unreachable ();
24232 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24233 all values variable, and none identical. */
24235 static void
24236 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24237 rtx target, rtx vals)
24239 rtx ops[16];
24240 int n, i;
24242 switch (mode)
24244 case V2SFmode:
24245 case V2SImode:
24246 if (!mmx_ok && !TARGET_SSE)
24247 break;
24248 /* FALLTHRU */
24250 case V4SFmode:
24251 case V4SImode:
24252 case V2DFmode:
24253 case V2DImode:
24254 n = GET_MODE_NUNITS (mode);
24255 for (i = 0; i < n; i++)
24256 ops[i] = XVECEXP (vals, 0, i);
24257 ix86_expand_vector_init_concat (mode, target, ops, n);
24258 return;
24260 case V16QImode:
24261 if (!TARGET_SSE4_1)
24262 break;
24263 /* FALLTHRU */
24265 case V8HImode:
24266 if (!TARGET_SSE2)
24267 break;
24269 n = GET_MODE_NUNITS (mode);
24270 for (i = 0; i < n; i++)
24271 ops[i] = XVECEXP (vals, 0, i);
24272 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24273 return;
24275 case V4HImode:
24276 case V8QImode:
24277 break;
24279 default:
24280 gcc_unreachable ();
24284 int i, j, n_elts, n_words, n_elt_per_word;
24285 enum machine_mode inner_mode;
24286 rtx words[4], shift;
24288 inner_mode = GET_MODE_INNER (mode);
24289 n_elts = GET_MODE_NUNITS (mode);
24290 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24291 n_elt_per_word = n_elts / n_words;
24292 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24294 for (i = 0; i < n_words; ++i)
24296 rtx word = NULL_RTX;
24298 for (j = 0; j < n_elt_per_word; ++j)
24300 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24301 elt = convert_modes (word_mode, inner_mode, elt, true);
24303 if (j == 0)
24304 word = elt;
24305 else
24307 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24308 word, 1, OPTAB_LIB_WIDEN);
24309 word = expand_simple_binop (word_mode, IOR, word, elt,
24310 word, 1, OPTAB_LIB_WIDEN);
24314 words[i] = word;
24317 if (n_words == 1)
24318 emit_move_insn (target, gen_lowpart (mode, words[0]));
24319 else if (n_words == 2)
24321 rtx tmp = gen_reg_rtx (mode);
24322 emit_clobber (tmp);
24323 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24324 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24325 emit_move_insn (target, tmp);
24327 else if (n_words == 4)
24329 rtx tmp = gen_reg_rtx (V4SImode);
24330 gcc_assert (word_mode == SImode);
24331 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24332 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24333 emit_move_insn (target, gen_lowpart (mode, tmp));
24335 else
24336 gcc_unreachable ();
24340 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24341 instructions unless MMX_OK is true. */
24343 void
24344 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24346 enum machine_mode mode = GET_MODE (target);
24347 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24348 int n_elts = GET_MODE_NUNITS (mode);
24349 int n_var = 0, one_var = -1;
24350 bool all_same = true, all_const_zero = true;
24351 int i;
24352 rtx x;
24354 for (i = 0; i < n_elts; ++i)
24356 x = XVECEXP (vals, 0, i);
24357 if (!(CONST_INT_P (x)
24358 || GET_CODE (x) == CONST_DOUBLE
24359 || GET_CODE (x) == CONST_FIXED))
24360 n_var++, one_var = i;
24361 else if (x != CONST0_RTX (inner_mode))
24362 all_const_zero = false;
24363 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24364 all_same = false;
24367 /* Constants are best loaded from the constant pool. */
24368 if (n_var == 0)
24370 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24371 return;
24374 /* If all values are identical, broadcast the value. */
24375 if (all_same
24376 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24377 XVECEXP (vals, 0, 0)))
24378 return;
24380 /* Values where only one field is non-constant are best loaded from
24381 the pool and overwritten via move later. */
24382 if (n_var == 1)
24384 if (all_const_zero
24385 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24386 XVECEXP (vals, 0, one_var),
24387 one_var))
24388 return;
24390 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24391 return;
24394 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24397 void
24398 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24400 enum machine_mode mode = GET_MODE (target);
24401 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24402 bool use_vec_merge = false;
24403 rtx tmp;
24405 switch (mode)
24407 case V2SFmode:
24408 case V2SImode:
24409 if (mmx_ok)
24411 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24412 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24413 if (elt == 0)
24414 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24415 else
24416 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24417 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24418 return;
24420 break;
24422 case V2DImode:
24423 use_vec_merge = TARGET_SSE4_1;
24424 if (use_vec_merge)
24425 break;
24427 case V2DFmode:
24429 rtx op0, op1;
24431 /* For the two element vectors, we implement a VEC_CONCAT with
24432 the extraction of the other element. */
24434 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24435 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24437 if (elt == 0)
24438 op0 = val, op1 = tmp;
24439 else
24440 op0 = tmp, op1 = val;
24442 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24443 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24445 return;
24447 case V4SFmode:
24448 use_vec_merge = TARGET_SSE4_1;
24449 if (use_vec_merge)
24450 break;
24452 switch (elt)
24454 case 0:
24455 use_vec_merge = true;
24456 break;
24458 case 1:
24459 /* tmp = target = A B C D */
24460 tmp = copy_to_reg (target);
24461 /* target = A A B B */
24462 emit_insn (gen_sse_unpcklps (target, target, target));
24463 /* target = X A B B */
24464 ix86_expand_vector_set (false, target, val, 0);
24465 /* target = A X C D */
24466 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24467 GEN_INT (1), GEN_INT (0),
24468 GEN_INT (2+4), GEN_INT (3+4)));
24469 return;
24471 case 2:
24472 /* tmp = target = A B C D */
24473 tmp = copy_to_reg (target);
24474 /* tmp = X B C D */
24475 ix86_expand_vector_set (false, tmp, val, 0);
24476 /* target = A B X D */
24477 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24478 GEN_INT (0), GEN_INT (1),
24479 GEN_INT (0+4), GEN_INT (3+4)));
24480 return;
24482 case 3:
24483 /* tmp = target = A B C D */
24484 tmp = copy_to_reg (target);
24485 /* tmp = X B C D */
24486 ix86_expand_vector_set (false, tmp, val, 0);
24487 /* target = A B X D */
24488 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24489 GEN_INT (0), GEN_INT (1),
24490 GEN_INT (2+4), GEN_INT (0+4)));
24491 return;
24493 default:
24494 gcc_unreachable ();
24496 break;
24498 case V4SImode:
24499 use_vec_merge = TARGET_SSE4_1;
24500 if (use_vec_merge)
24501 break;
24503 /* Element 0 handled by vec_merge below. */
24504 if (elt == 0)
24506 use_vec_merge = true;
24507 break;
24510 if (TARGET_SSE2)
24512 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24513 store into element 0, then shuffle them back. */
24515 rtx order[4];
24517 order[0] = GEN_INT (elt);
24518 order[1] = const1_rtx;
24519 order[2] = const2_rtx;
24520 order[3] = GEN_INT (3);
24521 order[elt] = const0_rtx;
24523 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24524 order[1], order[2], order[3]));
24526 ix86_expand_vector_set (false, target, val, 0);
24528 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24529 order[1], order[2], order[3]));
24531 else
24533 /* For SSE1, we have to reuse the V4SF code. */
24534 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24535 gen_lowpart (SFmode, val), elt);
24537 return;
24539 case V8HImode:
24540 use_vec_merge = TARGET_SSE2;
24541 break;
24542 case V4HImode:
24543 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24544 break;
24546 case V16QImode:
24547 use_vec_merge = TARGET_SSE4_1;
24548 break;
24550 case V8QImode:
24551 default:
24552 break;
24555 if (use_vec_merge)
24557 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24558 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24559 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24561 else
24563 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24565 emit_move_insn (mem, target);
24567 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24568 emit_move_insn (tmp, val);
24570 emit_move_insn (target, mem);
24574 void
24575 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24577 enum machine_mode mode = GET_MODE (vec);
24578 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24579 bool use_vec_extr = false;
24580 rtx tmp;
24582 switch (mode)
24584 case V2SImode:
24585 case V2SFmode:
24586 if (!mmx_ok)
24587 break;
24588 /* FALLTHRU */
24590 case V2DFmode:
24591 case V2DImode:
24592 use_vec_extr = true;
24593 break;
24595 case V4SFmode:
24596 use_vec_extr = TARGET_SSE4_1;
24597 if (use_vec_extr)
24598 break;
24600 switch (elt)
24602 case 0:
24603 tmp = vec;
24604 break;
24606 case 1:
24607 case 3:
24608 tmp = gen_reg_rtx (mode);
24609 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24610 GEN_INT (elt), GEN_INT (elt),
24611 GEN_INT (elt+4), GEN_INT (elt+4)));
24612 break;
24614 case 2:
24615 tmp = gen_reg_rtx (mode);
24616 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24617 break;
24619 default:
24620 gcc_unreachable ();
24622 vec = tmp;
24623 use_vec_extr = true;
24624 elt = 0;
24625 break;
24627 case V4SImode:
24628 use_vec_extr = TARGET_SSE4_1;
24629 if (use_vec_extr)
24630 break;
24632 if (TARGET_SSE2)
24634 switch (elt)
24636 case 0:
24637 tmp = vec;
24638 break;
24640 case 1:
24641 case 3:
24642 tmp = gen_reg_rtx (mode);
24643 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24644 GEN_INT (elt), GEN_INT (elt),
24645 GEN_INT (elt), GEN_INT (elt)));
24646 break;
24648 case 2:
24649 tmp = gen_reg_rtx (mode);
24650 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24651 break;
24653 default:
24654 gcc_unreachable ();
24656 vec = tmp;
24657 use_vec_extr = true;
24658 elt = 0;
24660 else
24662 /* For SSE1, we have to reuse the V4SF code. */
24663 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24664 gen_lowpart (V4SFmode, vec), elt);
24665 return;
24667 break;
24669 case V8HImode:
24670 use_vec_extr = TARGET_SSE2;
24671 break;
24672 case V4HImode:
24673 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24674 break;
24676 case V16QImode:
24677 use_vec_extr = TARGET_SSE4_1;
24678 break;
24680 case V8QImode:
24681 /* ??? Could extract the appropriate HImode element and shift. */
24682 default:
24683 break;
24686 if (use_vec_extr)
24688 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24689 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24691 /* Let the rtl optimizers know about the zero extension performed. */
24692 if (inner_mode == QImode || inner_mode == HImode)
24694 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24695 target = gen_lowpart (SImode, target);
24698 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24700 else
24702 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24704 emit_move_insn (mem, vec);
24706 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24707 emit_move_insn (target, tmp);
24711 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24712 pattern to reduce; DEST is the destination; IN is the input vector. */
24714 void
24715 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24717 rtx tmp1, tmp2, tmp3;
24719 tmp1 = gen_reg_rtx (V4SFmode);
24720 tmp2 = gen_reg_rtx (V4SFmode);
24721 tmp3 = gen_reg_rtx (V4SFmode);
24723 emit_insn (gen_sse_movhlps (tmp1, in, in));
24724 emit_insn (fn (tmp2, tmp1, in));
24726 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24727 GEN_INT (1), GEN_INT (1),
24728 GEN_INT (1+4), GEN_INT (1+4)));
24729 emit_insn (fn (dest, tmp2, tmp3));
24732 /* Target hook for scalar_mode_supported_p. */
24733 static bool
24734 ix86_scalar_mode_supported_p (enum machine_mode mode)
24736 if (DECIMAL_FLOAT_MODE_P (mode))
24737 return true;
24738 else if (mode == TFmode)
24739 return true;
24740 else
24741 return default_scalar_mode_supported_p (mode);
24744 /* Implements target hook vector_mode_supported_p. */
24745 static bool
24746 ix86_vector_mode_supported_p (enum machine_mode mode)
24748 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24749 return true;
24750 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24751 return true;
24752 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24753 return true;
24754 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24755 return true;
24756 return false;
24759 /* Target hook for c_mode_for_suffix. */
24760 static enum machine_mode
24761 ix86_c_mode_for_suffix (char suffix)
24763 if (suffix == 'q')
24764 return TFmode;
24765 if (suffix == 'w')
24766 return XFmode;
24768 return VOIDmode;
24771 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24773 We do this in the new i386 backend to maintain source compatibility
24774 with the old cc0-based compiler. */
24776 static tree
24777 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24778 tree inputs ATTRIBUTE_UNUSED,
24779 tree clobbers)
24781 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24782 clobbers);
24783 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24784 clobbers);
24785 return clobbers;
24788 /* Implements target vector targetm.asm.encode_section_info. This
24789 is not used by netware. */
24791 static void ATTRIBUTE_UNUSED
24792 ix86_encode_section_info (tree decl, rtx rtl, int first)
24794 default_encode_section_info (decl, rtl, first);
24796 if (TREE_CODE (decl) == VAR_DECL
24797 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24798 && ix86_in_large_data_p (decl))
24799 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24802 /* Worker function for REVERSE_CONDITION. */
24804 enum rtx_code
24805 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24807 return (mode != CCFPmode && mode != CCFPUmode
24808 ? reverse_condition (code)
24809 : reverse_condition_maybe_unordered (code));
24812 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24813 to OPERANDS[0]. */
24815 const char *
24816 output_387_reg_move (rtx insn, rtx *operands)
24818 if (REG_P (operands[0]))
24820 if (REG_P (operands[1])
24821 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24823 if (REGNO (operands[0]) == FIRST_STACK_REG)
24824 return output_387_ffreep (operands, 0);
24825 return "fstp\t%y0";
24827 if (STACK_TOP_P (operands[0]))
24828 return "fld%z1\t%y1";
24829 return "fst\t%y0";
24831 else if (MEM_P (operands[0]))
24833 gcc_assert (REG_P (operands[1]));
24834 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24835 return "fstp%z0\t%y0";
24836 else
24838 /* There is no non-popping store to memory for XFmode.
24839 So if we need one, follow the store with a load. */
24840 if (GET_MODE (operands[0]) == XFmode)
24841 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24842 else
24843 return "fst%z0\t%y0";
24846 else
24847 gcc_unreachable();
24850 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24851 FP status register is set. */
24853 void
24854 ix86_emit_fp_unordered_jump (rtx label)
24856 rtx reg = gen_reg_rtx (HImode);
24857 rtx temp;
24859 emit_insn (gen_x86_fnstsw_1 (reg));
24861 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24863 emit_insn (gen_x86_sahf_1 (reg));
24865 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24866 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24868 else
24870 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24872 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24873 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24876 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24877 gen_rtx_LABEL_REF (VOIDmode, label),
24878 pc_rtx);
24879 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24881 emit_jump_insn (temp);
24882 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24885 /* Output code to perform a log1p XFmode calculation. */
24887 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24889 rtx label1 = gen_label_rtx ();
24890 rtx label2 = gen_label_rtx ();
24892 rtx tmp = gen_reg_rtx (XFmode);
24893 rtx tmp2 = gen_reg_rtx (XFmode);
24895 emit_insn (gen_absxf2 (tmp, op1));
24896 emit_insn (gen_cmpxf (tmp,
24897 CONST_DOUBLE_FROM_REAL_VALUE (
24898 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24899 XFmode)));
24900 emit_jump_insn (gen_bge (label1));
24902 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24903 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24904 emit_jump (label2);
24906 emit_label (label1);
24907 emit_move_insn (tmp, CONST1_RTX (XFmode));
24908 emit_insn (gen_addxf3 (tmp, op1, tmp));
24909 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24910 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24912 emit_label (label2);
24915 /* Output code to perform a Newton-Rhapson approximation of a single precision
24916 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24918 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24920 rtx x0, x1, e0, e1, two;
24922 x0 = gen_reg_rtx (mode);
24923 e0 = gen_reg_rtx (mode);
24924 e1 = gen_reg_rtx (mode);
24925 x1 = gen_reg_rtx (mode);
24927 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24929 if (VECTOR_MODE_P (mode))
24930 two = ix86_build_const_vector (SFmode, true, two);
24932 two = force_reg (mode, two);
24934 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24936 /* x0 = rcp(b) estimate */
24937 emit_insn (gen_rtx_SET (VOIDmode, x0,
24938 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24939 UNSPEC_RCP)));
24940 /* e0 = x0 * b */
24941 emit_insn (gen_rtx_SET (VOIDmode, e0,
24942 gen_rtx_MULT (mode, x0, b)));
24943 /* e1 = 2. - e0 */
24944 emit_insn (gen_rtx_SET (VOIDmode, e1,
24945 gen_rtx_MINUS (mode, two, e0)));
24946 /* x1 = x0 * e1 */
24947 emit_insn (gen_rtx_SET (VOIDmode, x1,
24948 gen_rtx_MULT (mode, x0, e1)));
24949 /* res = a * x1 */
24950 emit_insn (gen_rtx_SET (VOIDmode, res,
24951 gen_rtx_MULT (mode, a, x1)));
24954 /* Output code to perform a Newton-Rhapson approximation of a
24955 single precision floating point [reciprocal] square root. */
24957 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24958 bool recip)
24960 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24961 REAL_VALUE_TYPE r;
24963 x0 = gen_reg_rtx (mode);
24964 e0 = gen_reg_rtx (mode);
24965 e1 = gen_reg_rtx (mode);
24966 e2 = gen_reg_rtx (mode);
24967 e3 = gen_reg_rtx (mode);
24969 real_from_integer (&r, VOIDmode, -3, -1, 0);
24970 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24972 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24973 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24975 if (VECTOR_MODE_P (mode))
24977 mthree = ix86_build_const_vector (SFmode, true, mthree);
24978 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24981 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24982 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24984 /* x0 = rsqrt(a) estimate */
24985 emit_insn (gen_rtx_SET (VOIDmode, x0,
24986 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24987 UNSPEC_RSQRT)));
24989 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24990 if (!recip)
24992 rtx zero, mask;
24994 zero = gen_reg_rtx (mode);
24995 mask = gen_reg_rtx (mode);
24997 zero = force_reg (mode, CONST0_RTX(mode));
24998 emit_insn (gen_rtx_SET (VOIDmode, mask,
24999 gen_rtx_NE (mode, zero, a)));
25001 emit_insn (gen_rtx_SET (VOIDmode, x0,
25002 gen_rtx_AND (mode, x0, mask)));
25005 /* e0 = x0 * a */
25006 emit_insn (gen_rtx_SET (VOIDmode, e0,
25007 gen_rtx_MULT (mode, x0, a)));
25008 /* e1 = e0 * x0 */
25009 emit_insn (gen_rtx_SET (VOIDmode, e1,
25010 gen_rtx_MULT (mode, e0, x0)));
25012 /* e2 = e1 - 3. */
25013 mthree = force_reg (mode, mthree);
25014 emit_insn (gen_rtx_SET (VOIDmode, e2,
25015 gen_rtx_PLUS (mode, e1, mthree)));
25017 mhalf = force_reg (mode, mhalf);
25018 if (recip)
25019 /* e3 = -.5 * x0 */
25020 emit_insn (gen_rtx_SET (VOIDmode, e3,
25021 gen_rtx_MULT (mode, x0, mhalf)));
25022 else
25023 /* e3 = -.5 * e0 */
25024 emit_insn (gen_rtx_SET (VOIDmode, e3,
25025 gen_rtx_MULT (mode, e0, mhalf)));
25026 /* ret = e2 * e3 */
25027 emit_insn (gen_rtx_SET (VOIDmode, res,
25028 gen_rtx_MULT (mode, e2, e3)));
25031 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25033 static void ATTRIBUTE_UNUSED
25034 i386_solaris_elf_named_section (const char *name, unsigned int flags,
25035 tree decl)
25037 /* With Binutils 2.15, the "@unwind" marker must be specified on
25038 every occurrence of the ".eh_frame" section, not just the first
25039 one. */
25040 if (TARGET_64BIT
25041 && strcmp (name, ".eh_frame") == 0)
25043 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25044 flags & SECTION_WRITE ? "aw" : "a");
25045 return;
25047 default_elf_asm_named_section (name, flags, decl);
25050 /* Return the mangling of TYPE if it is an extended fundamental type. */
25052 static const char *
25053 ix86_mangle_type (const_tree type)
25055 type = TYPE_MAIN_VARIANT (type);
25057 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25058 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25059 return NULL;
25061 switch (TYPE_MODE (type))
25063 case TFmode:
25064 /* __float128 is "g". */
25065 return "g";
25066 case XFmode:
25067 /* "long double" or __float80 is "e". */
25068 return "e";
25069 default:
25070 return NULL;
25074 /* For 32-bit code we can save PIC register setup by using
25075 __stack_chk_fail_local hidden function instead of calling
25076 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25077 register, so it is better to call __stack_chk_fail directly. */
25079 static tree
25080 ix86_stack_protect_fail (void)
25082 return TARGET_64BIT
25083 ? default_external_stack_protect_fail ()
25084 : default_hidden_stack_protect_fail ();
25087 /* Select a format to encode pointers in exception handling data. CODE
25088 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25089 true if the symbol may be affected by dynamic relocations.
25091 ??? All x86 object file formats are capable of representing this.
25092 After all, the relocation needed is the same as for the call insn.
25093 Whether or not a particular assembler allows us to enter such, I
25094 guess we'll have to see. */
25096 asm_preferred_eh_data_format (int code, int global)
25098 if (flag_pic)
25100 int type = DW_EH_PE_sdata8;
25101 if (!TARGET_64BIT
25102 || ix86_cmodel == CM_SMALL_PIC
25103 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25104 type = DW_EH_PE_sdata4;
25105 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25107 if (ix86_cmodel == CM_SMALL
25108 || (ix86_cmodel == CM_MEDIUM && code))
25109 return DW_EH_PE_udata4;
25110 return DW_EH_PE_absptr;
25113 /* Expand copysign from SIGN to the positive value ABS_VALUE
25114 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25115 the sign-bit. */
25116 static void
25117 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25119 enum machine_mode mode = GET_MODE (sign);
25120 rtx sgn = gen_reg_rtx (mode);
25121 if (mask == NULL_RTX)
25123 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25124 if (!VECTOR_MODE_P (mode))
25126 /* We need to generate a scalar mode mask in this case. */
25127 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25128 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25129 mask = gen_reg_rtx (mode);
25130 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25133 else
25134 mask = gen_rtx_NOT (mode, mask);
25135 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25136 gen_rtx_AND (mode, mask, sign)));
25137 emit_insn (gen_rtx_SET (VOIDmode, result,
25138 gen_rtx_IOR (mode, abs_value, sgn)));
25141 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25142 mask for masking out the sign-bit is stored in *SMASK, if that is
25143 non-null. */
25144 static rtx
25145 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25147 enum machine_mode mode = GET_MODE (op0);
25148 rtx xa, mask;
25150 xa = gen_reg_rtx (mode);
25151 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25152 if (!VECTOR_MODE_P (mode))
25154 /* We need to generate a scalar mode mask in this case. */
25155 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25156 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25157 mask = gen_reg_rtx (mode);
25158 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25160 emit_insn (gen_rtx_SET (VOIDmode, xa,
25161 gen_rtx_AND (mode, op0, mask)));
25163 if (smask)
25164 *smask = mask;
25166 return xa;
25169 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25170 swapping the operands if SWAP_OPERANDS is true. The expanded
25171 code is a forward jump to a newly created label in case the
25172 comparison is true. The generated label rtx is returned. */
25173 static rtx
25174 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25175 bool swap_operands)
25177 rtx label, tmp;
25179 if (swap_operands)
25181 tmp = op0;
25182 op0 = op1;
25183 op1 = tmp;
25186 label = gen_label_rtx ();
25187 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25188 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25189 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25190 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25191 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25192 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25193 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25194 JUMP_LABEL (tmp) = label;
25196 return label;
25199 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25200 using comparison code CODE. Operands are swapped for the comparison if
25201 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25202 static rtx
25203 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25204 bool swap_operands)
25206 enum machine_mode mode = GET_MODE (op0);
25207 rtx mask = gen_reg_rtx (mode);
25209 if (swap_operands)
25211 rtx tmp = op0;
25212 op0 = op1;
25213 op1 = tmp;
25216 if (mode == DFmode)
25217 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25218 gen_rtx_fmt_ee (code, mode, op0, op1)));
25219 else
25220 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25221 gen_rtx_fmt_ee (code, mode, op0, op1)));
25223 return mask;
25226 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25227 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25228 static rtx
25229 ix86_gen_TWO52 (enum machine_mode mode)
25231 REAL_VALUE_TYPE TWO52r;
25232 rtx TWO52;
25234 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25235 TWO52 = const_double_from_real_value (TWO52r, mode);
25236 TWO52 = force_reg (mode, TWO52);
25238 return TWO52;
25241 /* Expand SSE sequence for computing lround from OP1 storing
25242 into OP0. */
25243 void
25244 ix86_expand_lround (rtx op0, rtx op1)
25246 /* C code for the stuff we're doing below:
25247 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25248 return (long)tmp;
25250 enum machine_mode mode = GET_MODE (op1);
25251 const struct real_format *fmt;
25252 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25253 rtx adj;
25255 /* load nextafter (0.5, 0.0) */
25256 fmt = REAL_MODE_FORMAT (mode);
25257 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25258 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25260 /* adj = copysign (0.5, op1) */
25261 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25262 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25264 /* adj = op1 + adj */
25265 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25267 /* op0 = (imode)adj */
25268 expand_fix (op0, adj, 0);
25271 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25272 into OPERAND0. */
25273 void
25274 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25276 /* C code for the stuff we're doing below (for do_floor):
25277 xi = (long)op1;
25278 xi -= (double)xi > op1 ? 1 : 0;
25279 return xi;
25281 enum machine_mode fmode = GET_MODE (op1);
25282 enum machine_mode imode = GET_MODE (op0);
25283 rtx ireg, freg, label, tmp;
25285 /* reg = (long)op1 */
25286 ireg = gen_reg_rtx (imode);
25287 expand_fix (ireg, op1, 0);
25289 /* freg = (double)reg */
25290 freg = gen_reg_rtx (fmode);
25291 expand_float (freg, ireg, 0);
25293 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25294 label = ix86_expand_sse_compare_and_jump (UNLE,
25295 freg, op1, !do_floor);
25296 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25297 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25298 emit_move_insn (ireg, tmp);
25300 emit_label (label);
25301 LABEL_NUSES (label) = 1;
25303 emit_move_insn (op0, ireg);
25306 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25307 result in OPERAND0. */
25308 void
25309 ix86_expand_rint (rtx operand0, rtx operand1)
25311 /* C code for the stuff we're doing below:
25312 xa = fabs (operand1);
25313 if (!isless (xa, 2**52))
25314 return operand1;
25315 xa = xa + 2**52 - 2**52;
25316 return copysign (xa, operand1);
25318 enum machine_mode mode = GET_MODE (operand0);
25319 rtx res, xa, label, TWO52, mask;
25321 res = gen_reg_rtx (mode);
25322 emit_move_insn (res, operand1);
25324 /* xa = abs (operand1) */
25325 xa = ix86_expand_sse_fabs (res, &mask);
25327 /* if (!isless (xa, TWO52)) goto label; */
25328 TWO52 = ix86_gen_TWO52 (mode);
25329 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25331 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25332 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25334 ix86_sse_copysign_to_positive (res, xa, res, mask);
25336 emit_label (label);
25337 LABEL_NUSES (label) = 1;
25339 emit_move_insn (operand0, res);
25342 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25343 into OPERAND0. */
25344 void
25345 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25347 /* C code for the stuff we expand below.
25348 double xa = fabs (x), x2;
25349 if (!isless (xa, TWO52))
25350 return x;
25351 xa = xa + TWO52 - TWO52;
25352 x2 = copysign (xa, x);
25353 Compensate. Floor:
25354 if (x2 > x)
25355 x2 -= 1;
25356 Compensate. Ceil:
25357 if (x2 < x)
25358 x2 -= -1;
25359 return x2;
25361 enum machine_mode mode = GET_MODE (operand0);
25362 rtx xa, TWO52, tmp, label, one, res, mask;
25364 TWO52 = ix86_gen_TWO52 (mode);
25366 /* Temporary for holding the result, initialized to the input
25367 operand to ease control flow. */
25368 res = gen_reg_rtx (mode);
25369 emit_move_insn (res, operand1);
25371 /* xa = abs (operand1) */
25372 xa = ix86_expand_sse_fabs (res, &mask);
25374 /* if (!isless (xa, TWO52)) goto label; */
25375 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25377 /* xa = xa + TWO52 - TWO52; */
25378 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25379 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25381 /* xa = copysign (xa, operand1) */
25382 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25384 /* generate 1.0 or -1.0 */
25385 one = force_reg (mode,
25386 const_double_from_real_value (do_floor
25387 ? dconst1 : dconstm1, mode));
25389 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25390 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25391 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25392 gen_rtx_AND (mode, one, tmp)));
25393 /* We always need to subtract here to preserve signed zero. */
25394 tmp = expand_simple_binop (mode, MINUS,
25395 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25396 emit_move_insn (res, tmp);
25398 emit_label (label);
25399 LABEL_NUSES (label) = 1;
25401 emit_move_insn (operand0, res);
25404 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25405 into OPERAND0. */
25406 void
25407 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25409 /* C code for the stuff we expand below.
25410 double xa = fabs (x), x2;
25411 if (!isless (xa, TWO52))
25412 return x;
25413 x2 = (double)(long)x;
25414 Compensate. Floor:
25415 if (x2 > x)
25416 x2 -= 1;
25417 Compensate. Ceil:
25418 if (x2 < x)
25419 x2 += 1;
25420 if (HONOR_SIGNED_ZEROS (mode))
25421 return copysign (x2, x);
25422 return x2;
25424 enum machine_mode mode = GET_MODE (operand0);
25425 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25427 TWO52 = ix86_gen_TWO52 (mode);
25429 /* Temporary for holding the result, initialized to the input
25430 operand to ease control flow. */
25431 res = gen_reg_rtx (mode);
25432 emit_move_insn (res, operand1);
25434 /* xa = abs (operand1) */
25435 xa = ix86_expand_sse_fabs (res, &mask);
25437 /* if (!isless (xa, TWO52)) goto label; */
25438 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25440 /* xa = (double)(long)x */
25441 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25442 expand_fix (xi, res, 0);
25443 expand_float (xa, xi, 0);
25445 /* generate 1.0 */
25446 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25448 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25449 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25450 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25451 gen_rtx_AND (mode, one, tmp)));
25452 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25453 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25454 emit_move_insn (res, tmp);
25456 if (HONOR_SIGNED_ZEROS (mode))
25457 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25459 emit_label (label);
25460 LABEL_NUSES (label) = 1;
25462 emit_move_insn (operand0, res);
25465 /* Expand SSE sequence for computing round from OPERAND1 storing
25466 into OPERAND0. Sequence that works without relying on DImode truncation
25467 via cvttsd2siq that is only available on 64bit targets. */
25468 void
25469 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25471 /* C code for the stuff we expand below.
25472 double xa = fabs (x), xa2, x2;
25473 if (!isless (xa, TWO52))
25474 return x;
25475 Using the absolute value and copying back sign makes
25476 -0.0 -> -0.0 correct.
25477 xa2 = xa + TWO52 - TWO52;
25478 Compensate.
25479 dxa = xa2 - xa;
25480 if (dxa <= -0.5)
25481 xa2 += 1;
25482 else if (dxa > 0.5)
25483 xa2 -= 1;
25484 x2 = copysign (xa2, x);
25485 return x2;
25487 enum machine_mode mode = GET_MODE (operand0);
25488 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25490 TWO52 = ix86_gen_TWO52 (mode);
25492 /* Temporary for holding the result, initialized to the input
25493 operand to ease control flow. */
25494 res = gen_reg_rtx (mode);
25495 emit_move_insn (res, operand1);
25497 /* xa = abs (operand1) */
25498 xa = ix86_expand_sse_fabs (res, &mask);
25500 /* if (!isless (xa, TWO52)) goto label; */
25501 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25503 /* xa2 = xa + TWO52 - TWO52; */
25504 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25505 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25507 /* dxa = xa2 - xa; */
25508 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25510 /* generate 0.5, 1.0 and -0.5 */
25511 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25512 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25513 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25514 0, OPTAB_DIRECT);
25516 /* Compensate. */
25517 tmp = gen_reg_rtx (mode);
25518 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25519 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25520 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25521 gen_rtx_AND (mode, one, tmp)));
25522 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25523 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25524 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25525 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25526 gen_rtx_AND (mode, one, tmp)));
25527 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25529 /* res = copysign (xa2, operand1) */
25530 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25532 emit_label (label);
25533 LABEL_NUSES (label) = 1;
25535 emit_move_insn (operand0, res);
25538 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25539 into OPERAND0. */
25540 void
25541 ix86_expand_trunc (rtx operand0, rtx operand1)
25543 /* C code for SSE variant we expand below.
25544 double xa = fabs (x), x2;
25545 if (!isless (xa, TWO52))
25546 return x;
25547 x2 = (double)(long)x;
25548 if (HONOR_SIGNED_ZEROS (mode))
25549 return copysign (x2, x);
25550 return x2;
25552 enum machine_mode mode = GET_MODE (operand0);
25553 rtx xa, xi, TWO52, label, res, mask;
25555 TWO52 = ix86_gen_TWO52 (mode);
25557 /* Temporary for holding the result, initialized to the input
25558 operand to ease control flow. */
25559 res = gen_reg_rtx (mode);
25560 emit_move_insn (res, operand1);
25562 /* xa = abs (operand1) */
25563 xa = ix86_expand_sse_fabs (res, &mask);
25565 /* if (!isless (xa, TWO52)) goto label; */
25566 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25568 /* x = (double)(long)x */
25569 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25570 expand_fix (xi, res, 0);
25571 expand_float (res, xi, 0);
25573 if (HONOR_SIGNED_ZEROS (mode))
25574 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25576 emit_label (label);
25577 LABEL_NUSES (label) = 1;
25579 emit_move_insn (operand0, res);
25582 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25583 into OPERAND0. */
25584 void
25585 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25587 enum machine_mode mode = GET_MODE (operand0);
25588 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25590 /* C code for SSE variant we expand below.
25591 double xa = fabs (x), x2;
25592 if (!isless (xa, TWO52))
25593 return x;
25594 xa2 = xa + TWO52 - TWO52;
25595 Compensate:
25596 if (xa2 > xa)
25597 xa2 -= 1.0;
25598 x2 = copysign (xa2, x);
25599 return x2;
25602 TWO52 = ix86_gen_TWO52 (mode);
25604 /* Temporary for holding the result, initialized to the input
25605 operand to ease control flow. */
25606 res = gen_reg_rtx (mode);
25607 emit_move_insn (res, operand1);
25609 /* xa = abs (operand1) */
25610 xa = ix86_expand_sse_fabs (res, &smask);
25612 /* if (!isless (xa, TWO52)) goto label; */
25613 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25615 /* res = xa + TWO52 - TWO52; */
25616 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25617 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25618 emit_move_insn (res, tmp);
25620 /* generate 1.0 */
25621 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25623 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25624 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25625 emit_insn (gen_rtx_SET (VOIDmode, mask,
25626 gen_rtx_AND (mode, mask, one)));
25627 tmp = expand_simple_binop (mode, MINUS,
25628 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25629 emit_move_insn (res, tmp);
25631 /* res = copysign (res, operand1) */
25632 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25634 emit_label (label);
25635 LABEL_NUSES (label) = 1;
25637 emit_move_insn (operand0, res);
25640 /* Expand SSE sequence for computing round from OPERAND1 storing
25641 into OPERAND0. */
25642 void
25643 ix86_expand_round (rtx operand0, rtx operand1)
25645 /* C code for the stuff we're doing below:
25646 double xa = fabs (x);
25647 if (!isless (xa, TWO52))
25648 return x;
25649 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25650 return copysign (xa, x);
25652 enum machine_mode mode = GET_MODE (operand0);
25653 rtx res, TWO52, xa, label, xi, half, mask;
25654 const struct real_format *fmt;
25655 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25657 /* Temporary for holding the result, initialized to the input
25658 operand to ease control flow. */
25659 res = gen_reg_rtx (mode);
25660 emit_move_insn (res, operand1);
25662 TWO52 = ix86_gen_TWO52 (mode);
25663 xa = ix86_expand_sse_fabs (res, &mask);
25664 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25666 /* load nextafter (0.5, 0.0) */
25667 fmt = REAL_MODE_FORMAT (mode);
25668 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25669 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25671 /* xa = xa + 0.5 */
25672 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25673 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25675 /* xa = (double)(int64_t)xa */
25676 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25677 expand_fix (xi, xa, 0);
25678 expand_float (xa, xi, 0);
25680 /* res = copysign (xa, operand1) */
25681 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25683 emit_label (label);
25684 LABEL_NUSES (label) = 1;
25686 emit_move_insn (operand0, res);
25690 /* Validate whether a SSE5 instruction is valid or not.
25691 OPERANDS is the array of operands.
25692 NUM is the number of operands.
25693 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25694 NUM_MEMORY is the maximum number of memory operands to accept. */
25696 bool
25697 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25698 bool uses_oc0, int num_memory)
25700 int mem_mask;
25701 int mem_count;
25702 int i;
25704 /* Count the number of memory arguments */
25705 mem_mask = 0;
25706 mem_count = 0;
25707 for (i = 0; i < num; i++)
25709 enum machine_mode mode = GET_MODE (operands[i]);
25710 if (register_operand (operands[i], mode))
25713 else if (memory_operand (operands[i], mode))
25715 mem_mask |= (1 << i);
25716 mem_count++;
25719 else
25721 rtx pattern = PATTERN (insn);
25723 /* allow 0 for pcmov */
25724 if (GET_CODE (pattern) != SET
25725 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25726 || i < 2
25727 || operands[i] != CONST0_RTX (mode))
25728 return false;
25732 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25733 a memory operation. */
25734 if (num_memory < 0)
25736 num_memory = -num_memory;
25737 if ((mem_mask & (1 << (num-1))) != 0)
25739 mem_mask &= ~(1 << (num-1));
25740 mem_count--;
25744 /* If there were no memory operations, allow the insn */
25745 if (mem_mask == 0)
25746 return true;
25748 /* Do not allow the destination register to be a memory operand. */
25749 else if (mem_mask & (1 << 0))
25750 return false;
25752 /* If there are too many memory operations, disallow the instruction. While
25753 the hardware only allows 1 memory reference, before register allocation
25754 for some insns, we allow two memory operations sometimes in order to allow
25755 code like the following to be optimized:
25757 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25759 or similar cases that are vectorized into using the fmaddss
25760 instruction. */
25761 else if (mem_count > num_memory)
25762 return false;
25764 /* Don't allow more than one memory operation if not optimizing. */
25765 else if (mem_count > 1 && !optimize)
25766 return false;
25768 else if (num == 4 && mem_count == 1)
25770 /* formats (destination is the first argument), example fmaddss:
25771 xmm1, xmm1, xmm2, xmm3/mem
25772 xmm1, xmm1, xmm2/mem, xmm3
25773 xmm1, xmm2, xmm3/mem, xmm1
25774 xmm1, xmm2/mem, xmm3, xmm1 */
25775 if (uses_oc0)
25776 return ((mem_mask == (1 << 1))
25777 || (mem_mask == (1 << 2))
25778 || (mem_mask == (1 << 3)));
25780 /* format, example pmacsdd:
25781 xmm1, xmm2, xmm3/mem, xmm1 */
25782 else
25783 return (mem_mask == (1 << 2));
25786 else if (num == 4 && num_memory == 2)
25788 /* If there are two memory operations, we can load one of the memory ops
25789 into the destination register. This is for optimizing the
25790 multiply/add ops, which the combiner has optimized both the multiply
25791 and the add insns to have a memory operation. We have to be careful
25792 that the destination doesn't overlap with the inputs. */
25793 rtx op0 = operands[0];
25795 if (reg_mentioned_p (op0, operands[1])
25796 || reg_mentioned_p (op0, operands[2])
25797 || reg_mentioned_p (op0, operands[3]))
25798 return false;
25800 /* formats (destination is the first argument), example fmaddss:
25801 xmm1, xmm1, xmm2, xmm3/mem
25802 xmm1, xmm1, xmm2/mem, xmm3
25803 xmm1, xmm2, xmm3/mem, xmm1
25804 xmm1, xmm2/mem, xmm3, xmm1
25806 For the oc0 case, we will load either operands[1] or operands[3] into
25807 operands[0], so any combination of 2 memory operands is ok. */
25808 if (uses_oc0)
25809 return true;
25811 /* format, example pmacsdd:
25812 xmm1, xmm2, xmm3/mem, xmm1
25814 For the integer multiply/add instructions be more restrictive and
25815 require operands[2] and operands[3] to be the memory operands. */
25816 else
25817 return (mem_mask == ((1 << 2) | (1 << 3)));
25820 else if (num == 3 && num_memory == 1)
25822 /* formats, example protb:
25823 xmm1, xmm2, xmm3/mem
25824 xmm1, xmm2/mem, xmm3 */
25825 if (uses_oc0)
25826 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25828 /* format, example comeq:
25829 xmm1, xmm2, xmm3/mem */
25830 else
25831 return (mem_mask == (1 << 2));
25834 else
25835 gcc_unreachable ();
25837 return false;
25841 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25842 hardware will allow by using the destination register to load one of the
25843 memory operations. Presently this is used by the multiply/add routines to
25844 allow 2 memory references. */
25846 void
25847 ix86_expand_sse5_multiple_memory (rtx operands[],
25848 int num,
25849 enum machine_mode mode)
25851 rtx op0 = operands[0];
25852 if (num != 4
25853 || memory_operand (op0, mode)
25854 || reg_mentioned_p (op0, operands[1])
25855 || reg_mentioned_p (op0, operands[2])
25856 || reg_mentioned_p (op0, operands[3]))
25857 gcc_unreachable ();
25859 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25860 the destination register. */
25861 if (memory_operand (operands[1], mode))
25863 emit_move_insn (op0, operands[1]);
25864 operands[1] = op0;
25866 else if (memory_operand (operands[3], mode))
25868 emit_move_insn (op0, operands[3]);
25869 operands[3] = op0;
25871 else
25872 gcc_unreachable ();
25874 return;
25878 /* Table of valid machine attributes. */
25879 static const struct attribute_spec ix86_attribute_table[] =
25881 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25882 /* Stdcall attribute says callee is responsible for popping arguments
25883 if they are not variable. */
25884 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25885 /* Fastcall attribute says callee is responsible for popping arguments
25886 if they are not variable. */
25887 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25888 /* Cdecl attribute says the callee is a normal C declaration */
25889 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25890 /* Regparm attribute specifies how many integer arguments are to be
25891 passed in registers. */
25892 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25893 /* Sseregparm attribute says we are using x86_64 calling conventions
25894 for FP arguments. */
25895 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25896 /* force_align_arg_pointer says this function realigns the stack at entry. */
25897 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25898 false, true, true, ix86_handle_cconv_attribute },
25899 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25900 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25901 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25902 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25903 #endif
25904 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25905 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25906 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25907 SUBTARGET_ATTRIBUTE_TABLE,
25908 #endif
25909 { NULL, 0, 0, false, false, false, NULL }
25912 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25913 static int
25914 x86_builtin_vectorization_cost (bool runtime_test)
25916 /* If the branch of the runtime test is taken - i.e. - the vectorized
25917 version is skipped - this incurs a misprediction cost (because the
25918 vectorized version is expected to be the fall-through). So we subtract
25919 the latency of a mispredicted branch from the costs that are incured
25920 when the vectorized version is executed.
25922 TODO: The values in individual target tables have to be tuned or new
25923 fields may be needed. For eg. on K8, the default branch path is the
25924 not-taken path. If the taken path is predicted correctly, the minimum
25925 penalty of going down the taken-path is 1 cycle. If the taken-path is
25926 not predicted correctly, then the minimum penalty is 10 cycles. */
25928 if (runtime_test)
25930 return (-(ix86_cost->cond_taken_branch_cost));
25932 else
25933 return 0;
25936 /* Initialize the GCC target structure. */
25937 #undef TARGET_RETURN_IN_MEMORY
25938 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25940 #undef TARGET_ATTRIBUTE_TABLE
25941 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25942 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25943 # undef TARGET_MERGE_DECL_ATTRIBUTES
25944 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25945 #endif
25947 #undef TARGET_COMP_TYPE_ATTRIBUTES
25948 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25950 #undef TARGET_INIT_BUILTINS
25951 #define TARGET_INIT_BUILTINS ix86_init_builtins
25952 #undef TARGET_EXPAND_BUILTIN
25953 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25955 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25956 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25957 ix86_builtin_vectorized_function
25959 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25960 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25962 #undef TARGET_BUILTIN_RECIPROCAL
25963 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25965 #undef TARGET_ASM_FUNCTION_EPILOGUE
25966 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25968 #undef TARGET_ENCODE_SECTION_INFO
25969 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25970 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25971 #else
25972 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25973 #endif
25975 #undef TARGET_ASM_OPEN_PAREN
25976 #define TARGET_ASM_OPEN_PAREN ""
25977 #undef TARGET_ASM_CLOSE_PAREN
25978 #define TARGET_ASM_CLOSE_PAREN ""
25980 #undef TARGET_ASM_ALIGNED_HI_OP
25981 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25982 #undef TARGET_ASM_ALIGNED_SI_OP
25983 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25984 #ifdef ASM_QUAD
25985 #undef TARGET_ASM_ALIGNED_DI_OP
25986 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25987 #endif
25989 #undef TARGET_ASM_UNALIGNED_HI_OP
25990 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25991 #undef TARGET_ASM_UNALIGNED_SI_OP
25992 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25993 #undef TARGET_ASM_UNALIGNED_DI_OP
25994 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25996 #undef TARGET_SCHED_ADJUST_COST
25997 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25998 #undef TARGET_SCHED_ISSUE_RATE
25999 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26000 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26001 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26002 ia32_multipass_dfa_lookahead
26004 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
26005 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26007 #ifdef HAVE_AS_TLS
26008 #undef TARGET_HAVE_TLS
26009 #define TARGET_HAVE_TLS true
26010 #endif
26011 #undef TARGET_CANNOT_FORCE_CONST_MEM
26012 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26013 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26014 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26016 #undef TARGET_DELEGITIMIZE_ADDRESS
26017 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26019 #undef TARGET_MS_BITFIELD_LAYOUT_P
26020 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26022 #if TARGET_MACHO
26023 #undef TARGET_BINDS_LOCAL_P
26024 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26025 #endif
26026 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26027 #undef TARGET_BINDS_LOCAL_P
26028 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26029 #endif
26031 #undef TARGET_ASM_OUTPUT_MI_THUNK
26032 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26033 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26034 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26036 #undef TARGET_ASM_FILE_START
26037 #define TARGET_ASM_FILE_START x86_file_start
26039 #undef TARGET_DEFAULT_TARGET_FLAGS
26040 #define TARGET_DEFAULT_TARGET_FLAGS \
26041 (TARGET_DEFAULT \
26042 | TARGET_SUBTARGET_DEFAULT \
26043 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26045 #undef TARGET_HANDLE_OPTION
26046 #define TARGET_HANDLE_OPTION ix86_handle_option
26048 #undef TARGET_RTX_COSTS
26049 #define TARGET_RTX_COSTS ix86_rtx_costs
26050 #undef TARGET_ADDRESS_COST
26051 #define TARGET_ADDRESS_COST ix86_address_cost
26053 #undef TARGET_FIXED_CONDITION_CODE_REGS
26054 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26055 #undef TARGET_CC_MODES_COMPATIBLE
26056 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26058 #undef TARGET_MACHINE_DEPENDENT_REORG
26059 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26061 #undef TARGET_BUILD_BUILTIN_VA_LIST
26062 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26064 #undef TARGET_EXPAND_BUILTIN_VA_START
26065 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26067 #undef TARGET_MD_ASM_CLOBBERS
26068 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26070 #undef TARGET_PROMOTE_PROTOTYPES
26071 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26072 #undef TARGET_STRUCT_VALUE_RTX
26073 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26074 #undef TARGET_SETUP_INCOMING_VARARGS
26075 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26076 #undef TARGET_MUST_PASS_IN_STACK
26077 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26078 #undef TARGET_PASS_BY_REFERENCE
26079 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26080 #undef TARGET_INTERNAL_ARG_POINTER
26081 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26082 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26083 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26084 #undef TARGET_STRICT_ARGUMENT_NAMING
26085 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26087 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26088 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26090 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26091 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26093 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26094 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26096 #undef TARGET_C_MODE_FOR_SUFFIX
26097 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26099 #ifdef HAVE_AS_TLS
26100 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26101 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26102 #endif
26104 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26105 #undef TARGET_INSERT_ATTRIBUTES
26106 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26107 #endif
26109 #undef TARGET_MANGLE_TYPE
26110 #define TARGET_MANGLE_TYPE ix86_mangle_type
26112 #undef TARGET_STACK_PROTECT_FAIL
26113 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26115 #undef TARGET_FUNCTION_VALUE
26116 #define TARGET_FUNCTION_VALUE ix86_function_value
26118 #undef TARGET_SECONDARY_RELOAD
26119 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26121 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26122 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26124 struct gcc_target targetm = TARGET_INITIALIZER;
26126 #include "gt-i386.h"