1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
140 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost
= { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
211 DUMMY_STRINGOP_ALGS
},
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
213 DUMMY_STRINGOP_ALGS
},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost
= { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
283 DUMMY_STRINGOP_ALGS
},
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
285 DUMMY_STRINGOP_ALGS
},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost
= {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
353 DUMMY_STRINGOP_ALGS
},
354 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
355 DUMMY_STRINGOP_ALGS
},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost
= {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
428 {8192, rep_prefix_4_byte
, false},
429 {-1, rep_prefix_1_byte
, false}}},
430 DUMMY_STRINGOP_ALGS
},
431 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, libcall
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
727 {-1, rep_prefix_4_byte
, false}}},
728 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
729 {-1, libcall
, false}}}},
730 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
731 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
732 {libcall
, {{48, unrolled_loop
, false},
733 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost
= {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}},
820 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
821 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
822 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost
= {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
907 {-1, rep_prefix_4_byte
, false}}},
908 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
909 {-1, libcall
, false}}}},
910 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost
= {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
997 {-1, rep_prefix_4_byte
, false}}},
998 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
999 {-1, libcall
, false}}}},
1000 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1001 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1002 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1079 {-1, rep_prefix_4_byte
, false}}},
1080 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1081 {-1, libcall
, false}}}},
1082 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1083 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1084 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost
= {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1164 {-1, rep_prefix_4_byte
, false}}},
1165 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1166 {-1, libcall
, false}}}},
1167 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1168 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1169 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1246 {-1, rep_prefix_4_byte
, false}}},
1247 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1248 {-1, libcall
, false}}}},
1249 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1250 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1251 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost
= {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1320 DUMMY_STRINGOP_ALGS
},
1321 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1322 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1323 DUMMY_STRINGOP_ALGS
},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost
= {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1391 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1392 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1393 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1394 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1395 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1396 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost
= {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1464 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1465 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1466 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1467 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1468 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1483 /* Generic64 should produce code tuned for Nocona and K8. */
1485 struct processor_costs generic64_cost
= {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 /* On all chips taken into consideration lea is 2 cycles and more. With
1488 this cost however our current implementation of synth_mult results in
1489 use of unnecessary temporary registers causing regression on several
1490 SPECfp benchmarks. */
1491 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1492 COSTS_N_INSNS (1), /* variable shift costs */
1493 COSTS_N_INSNS (1), /* constant shift costs */
1494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1495 COSTS_N_INSNS (4), /* HI */
1496 COSTS_N_INSNS (3), /* SI */
1497 COSTS_N_INSNS (4), /* DI */
1498 COSTS_N_INSNS (2)}, /* other */
1499 0, /* cost of multiply per each bit set */
1500 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1501 COSTS_N_INSNS (26), /* HI */
1502 COSTS_N_INSNS (42), /* SI */
1503 COSTS_N_INSNS (74), /* DI */
1504 COSTS_N_INSNS (74)}, /* other */
1505 COSTS_N_INSNS (1), /* cost of movsx */
1506 COSTS_N_INSNS (1), /* cost of movzx */
1507 8, /* "large" insn */
1508 17, /* MOVE_RATIO */
1509 4, /* cost for loading QImode using movzbl */
1510 {4, 4, 4}, /* cost of loading integer registers
1511 in QImode, HImode and SImode.
1512 Relative to reg-reg move (2). */
1513 {4, 4, 4}, /* cost of storing integer registers */
1514 4, /* cost of reg,reg fld/fst */
1515 {12, 12, 12}, /* cost of loading fp registers
1516 in SFmode, DFmode and XFmode */
1517 {6, 6, 8}, /* cost of storing fp registers
1518 in SFmode, DFmode and XFmode */
1519 2, /* cost of moving MMX register */
1520 {8, 8}, /* cost of loading MMX registers
1521 in SImode and DImode */
1522 {8, 8}, /* cost of storing MMX registers
1523 in SImode and DImode */
1524 2, /* cost of moving SSE register */
1525 {8, 8, 8}, /* cost of loading SSE registers
1526 in SImode, DImode and TImode */
1527 {8, 8, 8}, /* cost of storing SSE registers
1528 in SImode, DImode and TImode */
1529 5, /* MMX or SSE register to integer */
1530 32, /* size of l1 cache. */
1531 512, /* size of l2 cache. */
1532 64, /* size of prefetch block */
1533 6, /* number of parallel prefetches */
1534 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1535 value is increased to perhaps more appropriate value of 5. */
1536 3, /* Branch cost */
1537 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1538 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1539 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1540 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1541 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1542 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1543 {DUMMY_STRINGOP_ALGS
,
1544 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1545 {-1, libcall
, false}}}},
1546 {DUMMY_STRINGOP_ALGS
,
1547 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1548 {-1, libcall
, false}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* core_cost should produce code tuned for Core familly of CPUs. */
1564 struct processor_costs core_cost
= {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 /* On all chips taken into consideration lea is 2 cycles and more. With
1567 this cost however our current implementation of synth_mult results in
1568 use of unnecessary temporary registers causing regression on several
1569 SPECfp benchmarks. */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 64, /* size of l1 cache. */
1610 512, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 /* FIXME perhaps more appropriate value is 5. */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1622 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1623 {-1, libcall
, false}}}},
1624 {{libcall
, {{6, loop_1_byte
, true},
1626 {8192, rep_prefix_4_byte
, true},
1627 {-1, libcall
, false}}},
1628 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1629 {-1, libcall
, false}}}},
1630 1, /* scalar_stmt_cost. */
1631 1, /* scalar load_cost. */
1632 1, /* scalar_store_cost. */
1633 1, /* vec_stmt_cost. */
1634 1, /* vec_to_scalar_cost. */
1635 1, /* scalar_to_vec_cost. */
1636 1, /* vec_align_load_cost. */
1637 2, /* vec_unalign_load_cost. */
1638 1, /* vec_store_cost. */
1639 3, /* cond_taken_branch_cost. */
1640 1, /* cond_not_taken_branch_cost. */
1643 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1646 struct processor_costs generic32_cost
= {
1647 COSTS_N_INSNS (1), /* cost of an add instruction */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 256, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1699 {-1, libcall
, false}}},
1700 DUMMY_STRINGOP_ALGS
},
1701 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1702 {-1, libcall
, false}}},
1703 DUMMY_STRINGOP_ALGS
},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Set by -mtune. */
1718 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1720 /* Set by -mtune or -Os. */
1721 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1723 /* Processor feature/optimization bitmasks. */
1724 #define m_386 (1<<PROCESSOR_I386)
1725 #define m_486 (1<<PROCESSOR_I486)
1726 #define m_PENT (1<<PROCESSOR_PENTIUM)
1727 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1728 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1730 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1731 #define m_CORE2 (1<<PROCESSOR_CORE2)
1732 #define m_COREI7 (1<<PROCESSOR_COREI7)
1733 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1734 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1735 #define m_ATOM (1<<PROCESSOR_ATOM)
1737 #define m_GEODE (1<<PROCESSOR_GEODE)
1738 #define m_K6 (1<<PROCESSOR_K6)
1739 #define m_K6_GEODE (m_K6 | m_GEODE)
1740 #define m_K8 (1<<PROCESSOR_K8)
1741 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1742 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1743 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1744 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1745 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1746 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1747 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1748 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1749 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1750 #define m_BTVER (m_BTVER1 | m_BTVER2)
1751 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1753 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1754 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1756 /* Generic instruction choice should be common subset of supported CPUs
1757 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1758 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1760 /* Feature tests against the various tunings. */
1761 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1763 /* Feature tests against the various tunings used to create ix86_tune_features
1764 based on the processor mask. */
1765 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1766 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1767 negatively, so enabling for Generic64 seems like good code size
1768 tradeoff. We can't enable it for 32bit generic because it does not
1769 work well with PPro base chips. */
1770 m_386
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1772 /* X86_TUNE_PUSH_MEMORY */
1773 m_386
| m_P4_NOCONA
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1775 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1778 /* X86_TUNE_UNROLL_STRLEN */
1779 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE_ALL
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1781 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1782 on simulation result. But after P4 was made, no performance benefit
1783 was observed with branch hints. It also increases the code size.
1784 As a result, icc never generates branch hints. */
1787 /* X86_TUNE_DOUBLE_WITH_ADD */
1790 /* X86_TUNE_USE_SAHF */
1791 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1793 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1794 partial dependencies. */
1795 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1797 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1798 register stalls on Generic32 compilation setting as well. However
1799 in current implementation the partial register stalls are not eliminated
1800 very well - they can be introduced via subregs synthesized by combine
1801 and can happen in caller/callee saving sequences. Because this option
1802 pays back little on PPro based chips and is in conflict with partial reg
1803 dependencies used by Athlon/P4 based chips, it is better to leave it off
1804 for generic32 for now. */
1807 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1808 m_CORE_ALL
| m_GENERIC
,
1810 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1811 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1812 m_CORE_ALL
| m_GENERIC
,
1814 /* X86_TUNE_USE_HIMODE_FIOP */
1815 m_386
| m_486
| m_K6_GEODE
,
1817 /* X86_TUNE_USE_SIMODE_FIOP */
1818 ~(m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1820 /* X86_TUNE_USE_MOV0 */
1823 /* X86_TUNE_USE_CLTD */
1824 ~(m_PENT
| m_ATOM
| m_K6
),
1826 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1829 /* X86_TUNE_SPLIT_LONG_MOVES */
1832 /* X86_TUNE_READ_MODIFY_WRITE */
1835 /* X86_TUNE_READ_MODIFY */
1838 /* X86_TUNE_PROMOTE_QIMODE */
1839 m_386
| m_486
| m_PENT
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1841 /* X86_TUNE_FAST_PREFIX */
1842 ~(m_386
| m_486
| m_PENT
),
1844 /* X86_TUNE_SINGLE_STRINGOP */
1845 m_386
| m_P4_NOCONA
,
1847 /* X86_TUNE_QIMODE_MATH */
1850 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1851 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1852 might be considered for Generic32 if our scheme for avoiding partial
1853 stalls was more effective. */
1856 /* X86_TUNE_PROMOTE_QI_REGS */
1859 /* X86_TUNE_PROMOTE_HI_REGS */
1862 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1863 over esp addition. */
1864 m_386
| m_486
| m_PENT
| m_PPRO
,
1866 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1867 over esp addition. */
1870 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1871 over esp subtraction. */
1872 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1874 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1875 over esp subtraction. */
1876 m_PENT
| m_K6_GEODE
,
1878 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1879 for DFmode copies */
1880 ~(m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1882 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1883 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1885 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1886 conflict here in between PPro/Pentium4 based chips that thread 128bit
1887 SSE registers as single units versus K8 based chips that divide SSE
1888 registers to two 64bit halves. This knob promotes all store destinations
1889 to be 128bit to allow register renaming on 128bit SSE units, but usually
1890 results in one extra microop on 64bit SSE units. Experimental results
1891 shows that disabling this option on P4 brings over 20% SPECfp regression,
1892 while enabling it on K8 brings roughly 2.4% regression that can be partly
1893 masked by careful scheduling of moves. */
1894 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1896 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1897 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1899 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1902 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1905 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1906 are resolved on SSE register parts instead of whole registers, so we may
1907 maintain just lower part of scalar values in proper format leaving the
1908 upper part undefined. */
1911 /* X86_TUNE_SSE_TYPELESS_STORES */
1914 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1915 m_PPRO
| m_P4_NOCONA
,
1917 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1918 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1920 /* X86_TUNE_PROLOGUE_USING_MOVE */
1921 m_PPRO
| m_ATHLON_K8
,
1923 /* X86_TUNE_EPILOGUE_USING_MOVE */
1924 m_PPRO
| m_ATHLON_K8
,
1926 /* X86_TUNE_SHIFT1 */
1929 /* X86_TUNE_USE_FFREEP */
1932 /* X86_TUNE_INTER_UNIT_MOVES */
1933 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1935 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1936 ~(m_AMDFAM10
| m_BDVER
),
1938 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1939 than 4 branch instructions in the 16 byte window. */
1940 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1942 /* X86_TUNE_SCHEDULE */
1943 m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1945 /* X86_TUNE_USE_BT */
1946 m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1948 /* X86_TUNE_USE_INCDEC */
1949 ~(m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GENERIC
),
1951 /* X86_TUNE_PAD_RETURNS */
1952 m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1957 /* X86_TUNE_EXT_80387_CONSTANTS */
1958 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1960 /* X86_TUNE_AVOID_VECTOR_DECODE */
1961 m_CORE_ALL
| m_K8
| m_GENERIC64
,
1963 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1964 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1967 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1968 vector path on AMD machines. */
1969 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1971 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1973 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1975 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1979 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1980 but one byte longer. */
1983 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1984 operand that cannot be represented using a modRM byte. The XOR
1985 replacement is long decoded, so this split helps here as well. */
1988 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1990 m_CORE_ALL
| m_AMDFAM10
| m_GENERIC
,
1992 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1993 from integer to FP. */
1996 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1997 with a subsequent conditional jump instruction into a single
1998 compare-and-branch uop. */
2001 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2002 will impact LEA instruction selection. */
2005 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2009 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2010 at -O3. For the moment, the prefetching seems badly tuned for Intel
2012 m_K6_GEODE
| m_AMD_MULTIPLE
,
2014 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2015 the auto-vectorizer. */
2018 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2019 during reassociation of integer computation. */
2022 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2023 during reassociation of fp computation. */
2026 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2027 regs instead of memory. */
2030 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2031 a conditional move. */
2035 /* Feature tests against the various architecture variations. */
2036 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2038 /* Feature tests against the various architecture variations, used to create
2039 ix86_arch_features based on the processor mask. */
2040 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2041 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2042 ~(m_386
| m_486
| m_PENT
| m_K6
),
2044 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2047 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2050 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2053 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2057 static const unsigned int x86_accumulate_outgoing_args
2058 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
2060 static const unsigned int x86_arch_always_fancy_math_387
2061 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2063 static const unsigned int x86_avx256_split_unaligned_load
2064 = m_COREI7
| m_GENERIC
;
2066 static const unsigned int x86_avx256_split_unaligned_store
2067 = m_COREI7
| m_BDVER
| m_GENERIC
;
2069 /* In case the average insn count for single function invocation is
2070 lower than this constant, emit fast (but longer) prologue and
2072 #define FAST_PROLOGUE_INSN_COUNT 20
2074 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2075 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2076 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2077 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2079 /* Array of the smallest class containing reg number REGNO, indexed by
2080 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2082 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2084 /* ax, dx, cx, bx */
2085 AREG
, DREG
, CREG
, BREG
,
2086 /* si, di, bp, sp */
2087 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2089 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2090 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2093 /* flags, fpsr, fpcr, frame */
2094 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2096 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2099 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2102 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2103 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2104 /* SSE REX registers */
2105 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2109 /* The "default" register map used in 32bit mode. */
2111 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2113 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2114 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2115 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2116 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2117 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2118 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2119 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2122 /* The "default" register map used in 64bit mode. */
2124 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2126 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2127 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2128 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2129 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2130 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2131 8,9,10,11,12,13,14,15, /* extended integer registers */
2132 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2135 /* Define the register numbers to be used in Dwarf debugging information.
2136 The SVR4 reference port C compiler uses the following register numbers
2137 in its Dwarf output code:
2138 0 for %eax (gcc regno = 0)
2139 1 for %ecx (gcc regno = 2)
2140 2 for %edx (gcc regno = 1)
2141 3 for %ebx (gcc regno = 3)
2142 4 for %esp (gcc regno = 7)
2143 5 for %ebp (gcc regno = 6)
2144 6 for %esi (gcc regno = 4)
2145 7 for %edi (gcc regno = 5)
2146 The following three DWARF register numbers are never generated by
2147 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2148 believes these numbers have these meanings.
2149 8 for %eip (no gcc equivalent)
2150 9 for %eflags (gcc regno = 17)
2151 10 for %trapno (no gcc equivalent)
2152 It is not at all clear how we should number the FP stack registers
2153 for the x86 architecture. If the version of SDB on x86/svr4 were
2154 a bit less brain dead with respect to floating-point then we would
2155 have a precedent to follow with respect to DWARF register numbers
2156 for x86 FP registers, but the SDB on x86/svr4 is so completely
2157 broken with respect to FP registers that it is hardly worth thinking
2158 of it as something to strive for compatibility with.
2159 The version of x86/svr4 SDB I have at the moment does (partially)
2160 seem to believe that DWARF register number 11 is associated with
2161 the x86 register %st(0), but that's about all. Higher DWARF
2162 register numbers don't seem to be associated with anything in
2163 particular, and even for DWARF regno 11, SDB only seems to under-
2164 stand that it should say that a variable lives in %st(0) (when
2165 asked via an `=' command) if we said it was in DWARF regno 11,
2166 but SDB still prints garbage when asked for the value of the
2167 variable in question (via a `/' command).
2168 (Also note that the labels SDB prints for various FP stack regs
2169 when doing an `x' command are all wrong.)
2170 Note that these problems generally don't affect the native SVR4
2171 C compiler because it doesn't allow the use of -O with -g and
2172 because when it is *not* optimizing, it allocates a memory
2173 location for each floating-point variable, and the memory
2174 location is what gets described in the DWARF AT_location
2175 attribute for the variable in question.
2176 Regardless of the severe mental illness of the x86/svr4 SDB, we
2177 do something sensible here and we use the following DWARF
2178 register numbers. Note that these are all stack-top-relative
2180 11 for %st(0) (gcc regno = 8)
2181 12 for %st(1) (gcc regno = 9)
2182 13 for %st(2) (gcc regno = 10)
2183 14 for %st(3) (gcc regno = 11)
2184 15 for %st(4) (gcc regno = 12)
2185 16 for %st(5) (gcc regno = 13)
2186 17 for %st(6) (gcc regno = 14)
2187 18 for %st(7) (gcc regno = 15)
2189 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2191 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2192 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2193 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2194 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2195 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2196 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2197 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2200 /* Define parameter passing and return registers. */
2202 static int const x86_64_int_parameter_registers
[6] =
2204 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2207 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2209 CX_REG
, DX_REG
, R8_REG
, R9_REG
2212 static int const x86_64_int_return_registers
[4] =
2214 AX_REG
, DX_REG
, DI_REG
, SI_REG
2217 /* Define the structure for the machine field in struct function. */
2219 struct GTY(()) stack_local_entry
{
2220 unsigned short mode
;
2223 struct stack_local_entry
*next
;
2226 /* Structure describing stack frame layout.
2227 Stack grows downward:
2233 saved static chain if ix86_static_chain_on_stack
2235 saved frame pointer if frame_pointer_needed
2236 <- HARD_FRAME_POINTER
2242 <- sse_regs_save_offset
2245 [va_arg registers] |
2249 [padding2] | = to_allocate
2258 int outgoing_arguments_size
;
2260 /* The offsets relative to ARG_POINTER. */
2261 HOST_WIDE_INT frame_pointer_offset
;
2262 HOST_WIDE_INT hard_frame_pointer_offset
;
2263 HOST_WIDE_INT stack_pointer_offset
;
2264 HOST_WIDE_INT hfp_save_offset
;
2265 HOST_WIDE_INT reg_save_offset
;
2266 HOST_WIDE_INT sse_reg_save_offset
;
2268 /* When save_regs_using_mov is set, emit prologue using
2269 move instead of push instructions. */
2270 bool save_regs_using_mov
;
2273 /* Which cpu are we scheduling for. */
2274 enum attr_cpu ix86_schedule
;
2276 /* Which cpu are we optimizing for. */
2277 enum processor_type ix86_tune
;
2279 /* Which instruction set architecture to use. */
2280 enum processor_type ix86_arch
;
2282 /* True if processor has SSE prefetch instruction. */
2283 unsigned char x86_prefetch_sse
;
2285 /* -mstackrealign option */
2286 static const char ix86_force_align_arg_pointer_string
[]
2287 = "force_align_arg_pointer";
2289 static rtx (*ix86_gen_leave
) (void);
2290 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2291 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2292 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2293 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2294 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2295 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2296 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2297 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2298 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2299 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2300 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2302 /* Preferred alignment for stack boundary in bits. */
2303 unsigned int ix86_preferred_stack_boundary
;
2305 /* Alignment for incoming stack boundary in bits specified at
2307 static unsigned int ix86_user_incoming_stack_boundary
;
2309 /* Default alignment for incoming stack boundary in bits. */
2310 static unsigned int ix86_default_incoming_stack_boundary
;
2312 /* Alignment for incoming stack boundary in bits. */
2313 unsigned int ix86_incoming_stack_boundary
;
2315 /* Calling abi specific va_list type nodes. */
2316 static GTY(()) tree sysv_va_list_type_node
;
2317 static GTY(()) tree ms_va_list_type_node
;
2319 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2320 char internal_label_prefix
[16];
2321 int internal_label_prefix_len
;
2323 /* Fence to use after loop using movnt. */
2326 /* Register class used for passing given 64bit part of the argument.
2327 These represent classes as documented by the PS ABI, with the exception
2328 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2329 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2331 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2332 whenever possible (upper half does contain padding). */
2333 enum x86_64_reg_class
2336 X86_64_INTEGER_CLASS
,
2337 X86_64_INTEGERSI_CLASS
,
2344 X86_64_COMPLEX_X87_CLASS
,
2348 #define MAX_CLASSES 4
2350 /* Table of constants used by fldpi, fldln2, etc.... */
2351 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2352 static bool ext_80387_constants_init
= 0;
2355 static struct machine_function
* ix86_init_machine_status (void);
2356 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2357 static bool ix86_function_value_regno_p (const unsigned int);
2358 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2360 static rtx
ix86_static_chain (const_tree
, bool);
2361 static int ix86_function_regparm (const_tree
, const_tree
);
2362 static void ix86_compute_frame_layout (struct ix86_frame
*);
2363 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2365 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2366 static tree
ix86_canonical_va_list_type (tree
);
2367 static void predict_jump (int);
2368 static unsigned int split_stack_prologue_scratch_regno (void);
2369 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2371 enum ix86_function_specific_strings
2373 IX86_FUNCTION_SPECIFIC_ARCH
,
2374 IX86_FUNCTION_SPECIFIC_TUNE
,
2375 IX86_FUNCTION_SPECIFIC_MAX
2378 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2379 const char *, enum fpmath_unit
, bool);
2380 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2381 static void ix86_function_specific_save (struct cl_target_option
*);
2382 static void ix86_function_specific_restore (struct cl_target_option
*);
2383 static void ix86_function_specific_print (FILE *, int,
2384 struct cl_target_option
*);
2385 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2386 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2387 struct gcc_options
*);
2388 static bool ix86_can_inline_p (tree
, tree
);
2389 static void ix86_set_current_function (tree
);
2390 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2392 static enum calling_abi
ix86_function_abi (const_tree
);
2395 #ifndef SUBTARGET32_DEFAULT_CPU
2396 #define SUBTARGET32_DEFAULT_CPU "i386"
2399 /* Whether -mtune= or -march= were specified */
2400 static int ix86_tune_defaulted
;
2401 static int ix86_arch_specified
;
2403 /* Vectorization library interface and handlers. */
2404 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2406 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2407 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2409 /* Processor target table, indexed by processor number */
2412 const struct processor_costs
*cost
; /* Processor costs */
2413 const int align_loop
; /* Default alignments. */
2414 const int align_loop_max_skip
;
2415 const int align_jump
;
2416 const int align_jump_max_skip
;
2417 const int align_func
;
2420 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2422 {&i386_cost
, 4, 3, 4, 3, 4},
2423 {&i486_cost
, 16, 15, 16, 15, 16},
2424 {&pentium_cost
, 16, 7, 16, 7, 16},
2425 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2426 {&geode_cost
, 0, 0, 0, 0, 0},
2427 {&k6_cost
, 32, 7, 32, 7, 32},
2428 {&athlon_cost
, 16, 7, 16, 7, 16},
2429 {&pentium4_cost
, 0, 0, 0, 0, 0},
2430 {&k8_cost
, 16, 7, 16, 7, 16},
2431 {&nocona_cost
, 0, 0, 0, 0, 0},
2433 {&core_cost
, 16, 10, 16, 10, 16},
2435 {&core_cost
, 16, 10, 16, 10, 16},
2437 {&core_cost
, 16, 10, 16, 10, 16},
2438 {&generic32_cost
, 16, 7, 16, 7, 16},
2439 {&generic64_cost
, 16, 10, 16, 10, 16},
2440 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2441 {&bdver1_cost
, 32, 24, 32, 7, 32},
2442 {&bdver2_cost
, 32, 24, 32, 7, 32},
2443 {&bdver3_cost
, 32, 24, 32, 7, 32},
2444 {&btver1_cost
, 32, 24, 32, 7, 32},
2445 {&btver2_cost
, 32, 24, 32, 7, 32},
2446 {&atom_cost
, 16, 15, 16, 7, 16}
2449 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2483 gate_insert_vzeroupper (void)
2485 return TARGET_VZEROUPPER
;
2489 rest_of_handle_insert_vzeroupper (void)
2493 /* vzeroupper instructions are inserted immediately after reload to
2494 account for possible spills from 256bit registers. The pass
2495 reuses mode switching infrastructure by re-running mode insertion
2496 pass, so disable entities that have already been processed. */
2497 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2498 ix86_optimize_mode_switching
[i
] = 0;
2500 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2502 /* Call optimize_mode_switching. */
2503 pass_mode_switching
.pass
.execute ();
2507 struct rtl_opt_pass pass_insert_vzeroupper
=
2511 "vzeroupper", /* name */
2512 OPTGROUP_NONE
, /* optinfo_flags */
2513 gate_insert_vzeroupper
, /* gate */
2514 rest_of_handle_insert_vzeroupper
, /* execute */
2517 0, /* static_pass_number */
2518 TV_NONE
, /* tv_id */
2519 0, /* properties_required */
2520 0, /* properties_provided */
2521 0, /* properties_destroyed */
2522 0, /* todo_flags_start */
2523 TODO_df_finish
| TODO_verify_rtl_sharing
|
2524 0, /* todo_flags_finish */
2528 /* Return true if a red-zone is in use. */
2531 ix86_using_red_zone (void)
2533 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2536 /* Return a string that documents the current -m options. The caller is
2537 responsible for freeing the string. */
2540 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2541 const char *tune
, enum fpmath_unit fpmath
,
2544 struct ix86_target_opts
2546 const char *option
; /* option string */
2547 HOST_WIDE_INT mask
; /* isa mask options */
2550 /* This table is ordered so that options like -msse4.2 that imply
2551 preceding options while match those first. */
2552 static struct ix86_target_opts isa_opts
[] =
2554 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2555 { "-mfma", OPTION_MASK_ISA_FMA
},
2556 { "-mxop", OPTION_MASK_ISA_XOP
},
2557 { "-mlwp", OPTION_MASK_ISA_LWP
},
2558 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2559 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2560 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2561 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2562 { "-msse3", OPTION_MASK_ISA_SSE3
},
2563 { "-msse2", OPTION_MASK_ISA_SSE2
},
2564 { "-msse", OPTION_MASK_ISA_SSE
},
2565 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2566 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2567 { "-mmmx", OPTION_MASK_ISA_MMX
},
2568 { "-mabm", OPTION_MASK_ISA_ABM
},
2569 { "-mbmi", OPTION_MASK_ISA_BMI
},
2570 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2571 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2572 { "-mhle", OPTION_MASK_ISA_HLE
},
2573 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2574 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2575 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2576 { "-madx", OPTION_MASK_ISA_ADX
},
2577 { "-mtbm", OPTION_MASK_ISA_TBM
},
2578 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2579 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2580 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2581 { "-maes", OPTION_MASK_ISA_AES
},
2582 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2583 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2584 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2585 { "-mf16c", OPTION_MASK_ISA_F16C
},
2586 { "-mrtm", OPTION_MASK_ISA_RTM
},
2587 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2588 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2592 static struct ix86_target_opts flag_opts
[] =
2594 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2595 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2596 { "-m80387", MASK_80387
},
2597 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2598 { "-malign-double", MASK_ALIGN_DOUBLE
},
2599 { "-mcld", MASK_CLD
},
2600 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2601 { "-mieee-fp", MASK_IEEE_FP
},
2602 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2603 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2604 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2605 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2606 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2607 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2608 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2609 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2610 { "-mrecip", MASK_RECIP
},
2611 { "-mrtd", MASK_RTD
},
2612 { "-msseregparm", MASK_SSEREGPARM
},
2613 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2614 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2615 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2616 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2617 { "-mvzeroupper", MASK_VZEROUPPER
},
2618 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2619 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2620 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2623 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2626 char target_other
[40];
2636 memset (opts
, '\0', sizeof (opts
));
2638 /* Add -march= option. */
2641 opts
[num
][0] = "-march=";
2642 opts
[num
++][1] = arch
;
2645 /* Add -mtune= option. */
2648 opts
[num
][0] = "-mtune=";
2649 opts
[num
++][1] = tune
;
2652 /* Add -m32/-m64/-mx32. */
2653 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2655 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2659 isa
&= ~ (OPTION_MASK_ISA_64BIT
2660 | OPTION_MASK_ABI_64
2661 | OPTION_MASK_ABI_X32
);
2665 opts
[num
++][0] = abi
;
2667 /* Pick out the options in isa options. */
2668 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2670 if ((isa
& isa_opts
[i
].mask
) != 0)
2672 opts
[num
++][0] = isa_opts
[i
].option
;
2673 isa
&= ~ isa_opts
[i
].mask
;
2677 if (isa
&& add_nl_p
)
2679 opts
[num
++][0] = isa_other
;
2680 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2684 /* Add flag options. */
2685 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2687 if ((flags
& flag_opts
[i
].mask
) != 0)
2689 opts
[num
++][0] = flag_opts
[i
].option
;
2690 flags
&= ~ flag_opts
[i
].mask
;
2694 if (flags
&& add_nl_p
)
2696 opts
[num
++][0] = target_other
;
2697 sprintf (target_other
, "(other flags: %#x)", flags
);
2700 /* Add -fpmath= option. */
2703 opts
[num
][0] = "-mfpmath=";
2704 switch ((int) fpmath
)
2707 opts
[num
++][1] = "387";
2711 opts
[num
++][1] = "sse";
2714 case FPMATH_387
| FPMATH_SSE
:
2715 opts
[num
++][1] = "sse+387";
2727 gcc_assert (num
< ARRAY_SIZE (opts
));
2729 /* Size the string. */
2731 sep_len
= (add_nl_p
) ? 3 : 1;
2732 for (i
= 0; i
< num
; i
++)
2735 for (j
= 0; j
< 2; j
++)
2737 len
+= strlen (opts
[i
][j
]);
2740 /* Build the string. */
2741 ret
= ptr
= (char *) xmalloc (len
);
2744 for (i
= 0; i
< num
; i
++)
2748 for (j
= 0; j
< 2; j
++)
2749 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2756 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2764 for (j
= 0; j
< 2; j
++)
2767 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2769 line_len
+= len2
[j
];
2774 gcc_assert (ret
+ len
>= ptr
);
2779 /* Return true, if profiling code should be emitted before
2780 prologue. Otherwise it returns false.
2781 Note: For x86 with "hotfix" it is sorried. */
2783 ix86_profile_before_prologue (void)
2785 return flag_fentry
!= 0;
2788 /* Function that is callable from the debugger to print the current
2791 ix86_debug_options (void)
2793 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2794 ix86_arch_string
, ix86_tune_string
,
2799 fprintf (stderr
, "%s\n\n", opts
);
2803 fputs ("<no options>\n\n", stderr
);
2808 /* Override various settings based on options. If MAIN_ARGS_P, the
2809 options are from the command line, otherwise they are from
2813 ix86_option_override_internal (bool main_args_p
)
2816 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2817 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2822 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2823 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2824 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2825 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2826 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2827 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2828 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2829 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2830 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2831 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2832 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2833 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2834 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2835 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2836 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2837 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2838 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2839 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2840 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2841 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2842 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2843 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2844 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2845 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2846 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2847 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2848 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2849 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2850 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2851 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2852 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2853 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2854 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2855 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2856 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2857 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2858 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2859 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2860 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2861 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2863 /* if this reaches 64, need to widen struct pta flags below */
2867 const char *const name
; /* processor name or nickname. */
2868 const enum processor_type processor
;
2869 const enum attr_cpu schedule
;
2870 const unsigned HOST_WIDE_INT flags
;
2872 const processor_alias_table
[] =
2874 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2875 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2876 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2877 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2878 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2879 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2880 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2881 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2882 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2883 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2884 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2885 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2886 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2887 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2888 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2889 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2890 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2891 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2892 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2893 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2894 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2895 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2896 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2897 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2898 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2899 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2900 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2901 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2902 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2903 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2904 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
2905 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2906 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2907 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
2908 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2909 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2910 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2911 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2912 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
2913 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2914 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2915 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2916 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2917 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
2918 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2919 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2920 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2921 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2922 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2924 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2925 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2926 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2927 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2928 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2929 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2930 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2931 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2932 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2933 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2934 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2935 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2936 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2937 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2938 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2939 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2940 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2941 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2942 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2943 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2944 {"k8", PROCESSOR_K8
, CPU_K8
,
2945 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2946 | PTA_SSE2
| PTA_NO_SAHF
},
2947 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2948 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2949 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2950 {"opteron", PROCESSOR_K8
, CPU_K8
,
2951 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2952 | PTA_SSE2
| PTA_NO_SAHF
},
2953 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2954 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2955 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2956 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2957 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2958 | PTA_SSE2
| PTA_NO_SAHF
},
2959 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2960 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2961 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2962 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2963 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2964 | PTA_SSE2
| PTA_NO_SAHF
},
2965 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2966 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2967 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2968 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2969 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2970 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2971 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2974 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2975 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2976 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2977 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2978 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2979 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2980 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2981 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2982 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2983 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2984 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2985 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2986 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2987 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2989 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2990 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2991 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2992 | PTA_FXSR
| PTA_XSAVE
},
2993 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
2994 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2995 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2996 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2997 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2998 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3000 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3001 PTA_HLE
/* flags are only used for -march switch. */ },
3002 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3004 | PTA_HLE
/* flags are only used for -march switch. */ },
3007 /* -mrecip options. */
3010 const char *string
; /* option name */
3011 unsigned int mask
; /* mask bits to set */
3013 const recip_options
[] =
3015 { "all", RECIP_MASK_ALL
},
3016 { "none", RECIP_MASK_NONE
},
3017 { "div", RECIP_MASK_DIV
},
3018 { "sqrt", RECIP_MASK_SQRT
},
3019 { "vec-div", RECIP_MASK_VEC_DIV
},
3020 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3023 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3025 /* Set up prefix/suffix so the error messages refer to either the command
3026 line argument, or the attribute(target). */
3035 prefix
= "option(\"";
3040 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3041 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3042 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3043 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3044 #ifdef TARGET_BI_ARCH
3047 #if TARGET_BI_ARCH == 1
3048 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3049 is on and OPTION_MASK_ABI_X32 is off. We turn off
3050 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3053 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3055 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3056 on and OPTION_MASK_ABI_64 is off. We turn off
3057 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3060 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3067 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3068 OPTION_MASK_ABI_64 for TARGET_X32. */
3069 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3070 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3072 else if (TARGET_LP64
)
3074 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3075 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3076 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3077 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3080 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3081 SUBTARGET_OVERRIDE_OPTIONS
;
3084 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3085 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3088 /* -fPIC is the default for x86_64. */
3089 if (TARGET_MACHO
&& TARGET_64BIT
)
3092 /* Need to check -mtune=generic first. */
3093 if (ix86_tune_string
)
3095 if (!strcmp (ix86_tune_string
, "generic")
3096 || !strcmp (ix86_tune_string
, "i686")
3097 /* As special support for cross compilers we read -mtune=native
3098 as -mtune=generic. With native compilers we won't see the
3099 -mtune=native, as it was changed by the driver. */
3100 || !strcmp (ix86_tune_string
, "native"))
3103 ix86_tune_string
= "generic64";
3105 ix86_tune_string
= "generic32";
3107 /* If this call is for setting the option attribute, allow the
3108 generic32/generic64 that was previously set. */
3109 else if (!main_args_p
3110 && (!strcmp (ix86_tune_string
, "generic32")
3111 || !strcmp (ix86_tune_string
, "generic64")))
3113 else if (!strncmp (ix86_tune_string
, "generic", 7))
3114 error ("bad value (%s) for %stune=%s %s",
3115 ix86_tune_string
, prefix
, suffix
, sw
);
3116 else if (!strcmp (ix86_tune_string
, "x86-64"))
3117 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3118 "%stune=k8%s or %stune=generic%s instead as appropriate",
3119 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3123 if (ix86_arch_string
)
3124 ix86_tune_string
= ix86_arch_string
;
3125 if (!ix86_tune_string
)
3127 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3128 ix86_tune_defaulted
= 1;
3131 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3132 need to use a sensible tune option. */
3133 if (!strcmp (ix86_tune_string
, "generic")
3134 || !strcmp (ix86_tune_string
, "x86-64")
3135 || !strcmp (ix86_tune_string
, "i686"))
3138 ix86_tune_string
= "generic64";
3140 ix86_tune_string
= "generic32";
3144 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3146 /* rep; movq isn't available in 32-bit code. */
3147 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3148 ix86_stringop_alg
= no_stringop
;
3151 if (!ix86_arch_string
)
3152 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3154 ix86_arch_specified
= 1;
3156 if (global_options_set
.x_ix86_pmode
)
3158 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3159 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3160 error ("address mode %qs not supported in the %s bit mode",
3161 TARGET_64BIT
? "short" : "long",
3162 TARGET_64BIT
? "64" : "32");
3165 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3167 if (!global_options_set
.x_ix86_abi
)
3168 ix86_abi
= DEFAULT_ABI
;
3170 if (global_options_set
.x_ix86_cmodel
)
3172 switch (ix86_cmodel
)
3177 ix86_cmodel
= CM_SMALL_PIC
;
3179 error ("code model %qs not supported in the %s bit mode",
3186 ix86_cmodel
= CM_MEDIUM_PIC
;
3188 error ("code model %qs not supported in the %s bit mode",
3190 else if (TARGET_X32
)
3191 error ("code model %qs not supported in x32 mode",
3198 ix86_cmodel
= CM_LARGE_PIC
;
3200 error ("code model %qs not supported in the %s bit mode",
3202 else if (TARGET_X32
)
3203 error ("code model %qs not supported in x32 mode",
3209 error ("code model %s does not support PIC mode", "32");
3211 error ("code model %qs not supported in the %s bit mode",
3218 error ("code model %s does not support PIC mode", "kernel");
3219 ix86_cmodel
= CM_32
;
3222 error ("code model %qs not supported in the %s bit mode",
3232 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3233 use of rip-relative addressing. This eliminates fixups that
3234 would otherwise be needed if this object is to be placed in a
3235 DLL, and is essentially just as efficient as direct addressing. */
3236 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3237 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3238 else if (TARGET_64BIT
&& TARGET_RDOS
)
3239 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3240 else if (TARGET_64BIT
)
3241 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3243 ix86_cmodel
= CM_32
;
3245 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3247 error ("-masm=intel not supported in this configuration");
3248 ix86_asm_dialect
= ASM_ATT
;
3250 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3251 sorry ("%i-bit mode not compiled in",
3252 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3254 for (i
= 0; i
< pta_size
; i
++)
3255 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3257 ix86_schedule
= processor_alias_table
[i
].schedule
;
3258 ix86_arch
= processor_alias_table
[i
].processor
;
3259 /* Default cpu tuning to the architecture. */
3260 ix86_tune
= ix86_arch
;
3262 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3263 error ("CPU you selected does not support x86-64 "
3266 if (processor_alias_table
[i
].flags
& PTA_MMX
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3269 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3272 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3275 if (processor_alias_table
[i
].flags
& PTA_SSE
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3278 if (processor_alias_table
[i
].flags
& PTA_SSE2
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3281 if (processor_alias_table
[i
].flags
& PTA_SSE3
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3284 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3287 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3290 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3293 if (processor_alias_table
[i
].flags
& PTA_AVX
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3296 if (processor_alias_table
[i
].flags
& PTA_AVX2
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3299 if (processor_alias_table
[i
].flags
& PTA_FMA
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3302 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3305 if (processor_alias_table
[i
].flags
& PTA_FMA4
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3308 if (processor_alias_table
[i
].flags
& PTA_XOP
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3311 if (processor_alias_table
[i
].flags
& PTA_LWP
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3314 if (processor_alias_table
[i
].flags
& PTA_ABM
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3317 if (processor_alias_table
[i
].flags
& PTA_BMI
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3320 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3323 if (processor_alias_table
[i
].flags
& PTA_TBM
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3326 if (processor_alias_table
[i
].flags
& PTA_BMI2
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3329 if (processor_alias_table
[i
].flags
& PTA_CX16
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3332 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3335 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3338 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3341 if (processor_alias_table
[i
].flags
& PTA_AES
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3344 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3347 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3350 if (processor_alias_table
[i
].flags
& PTA_RDRND
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3353 if (processor_alias_table
[i
].flags
& PTA_F16C
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3356 if (processor_alias_table
[i
].flags
& PTA_RTM
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3359 if (processor_alias_table
[i
].flags
& PTA_HLE
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3362 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3365 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3368 if (processor_alias_table
[i
].flags
& PTA_ADX
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3371 if (processor_alias_table
[i
].flags
& PTA_FXSR
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3374 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3375 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3376 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3377 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3378 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3379 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3380 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3381 x86_prefetch_sse
= true;
3386 if (!strcmp (ix86_arch_string
, "generic"))
3387 error ("generic CPU can be used only for %stune=%s %s",
3388 prefix
, suffix
, sw
);
3389 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3390 error ("bad value (%s) for %sarch=%s %s",
3391 ix86_arch_string
, prefix
, suffix
, sw
);
3393 ix86_arch_mask
= 1u << ix86_arch
;
3394 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3395 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3397 for (i
= 0; i
< pta_size
; i
++)
3398 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3400 ix86_schedule
= processor_alias_table
[i
].schedule
;
3401 ix86_tune
= processor_alias_table
[i
].processor
;
3404 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3406 if (ix86_tune_defaulted
)
3408 ix86_tune_string
= "x86-64";
3409 for (i
= 0; i
< pta_size
; i
++)
3410 if (! strcmp (ix86_tune_string
,
3411 processor_alias_table
[i
].name
))
3413 ix86_schedule
= processor_alias_table
[i
].schedule
;
3414 ix86_tune
= processor_alias_table
[i
].processor
;
3417 error ("CPU you selected does not support x86-64 "
3423 /* Adjust tuning when compiling for 32-bit ABI. */
3426 case PROCESSOR_GENERIC64
:
3427 ix86_tune
= PROCESSOR_GENERIC32
;
3428 ix86_schedule
= CPU_PENTIUMPRO
;
3435 /* Intel CPUs have always interpreted SSE prefetch instructions as
3436 NOPs; so, we can enable SSE prefetch instructions even when
3437 -mtune (rather than -march) points us to a processor that has them.
3438 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3439 higher processors. */
3441 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3442 x86_prefetch_sse
= true;
3446 if (ix86_tune_specified
&& i
== pta_size
)
3447 error ("bad value (%s) for %stune=%s %s",
3448 ix86_tune_string
, prefix
, suffix
, sw
);
3450 ix86_tune_mask
= 1u << ix86_tune
;
3451 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3452 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3454 #ifndef USE_IX86_FRAME_POINTER
3455 #define USE_IX86_FRAME_POINTER 0
3458 #ifndef USE_X86_64_FRAME_POINTER
3459 #define USE_X86_64_FRAME_POINTER 0
3462 /* Set the default values for switches whose default depends on TARGET_64BIT
3463 in case they weren't overwritten by command line options. */
3466 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3467 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3468 if (flag_asynchronous_unwind_tables
== 2)
3469 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3470 if (flag_pcc_struct_return
== 2)
3471 flag_pcc_struct_return
= 0;
3475 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3476 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3477 if (flag_asynchronous_unwind_tables
== 2)
3478 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3479 if (flag_pcc_struct_return
== 2)
3480 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3483 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3485 ix86_cost
= &ix86_size_cost
;
3487 ix86_cost
= ix86_tune_cost
;
3489 /* Arrange to set up i386_stack_locals for all functions. */
3490 init_machine_status
= ix86_init_machine_status
;
3492 /* Validate -mregparm= value. */
3493 if (global_options_set
.x_ix86_regparm
)
3496 warning (0, "-mregparm is ignored in 64-bit mode");
3497 if (ix86_regparm
> REGPARM_MAX
)
3499 error ("-mregparm=%d is not between 0 and %d",
3500 ix86_regparm
, REGPARM_MAX
);
3505 ix86_regparm
= REGPARM_MAX
;
3507 /* Default align_* from the processor table. */
3508 if (align_loops
== 0)
3510 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3511 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3513 if (align_jumps
== 0)
3515 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3516 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3518 if (align_functions
== 0)
3520 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3523 /* Provide default for -mbranch-cost= value. */
3524 if (!global_options_set
.x_ix86_branch_cost
)
3525 ix86_branch_cost
= ix86_cost
->branch_cost
;
3529 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3531 /* Enable by default the SSE and MMX builtins. Do allow the user to
3532 explicitly disable any of these. In particular, disabling SSE and
3533 MMX for kernel code is extremely useful. */
3534 if (!ix86_arch_specified
)
3536 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3537 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3540 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3544 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3546 if (!ix86_arch_specified
)
3548 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3550 /* i386 ABI does not specify red zone. It still makes sense to use it
3551 when programmer takes care to stack from being destroyed. */
3552 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3553 target_flags
|= MASK_NO_RED_ZONE
;
3556 /* Keep nonleaf frame pointers. */
3557 if (flag_omit_frame_pointer
)
3558 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3559 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3560 flag_omit_frame_pointer
= 1;
3562 /* If we're doing fast math, we don't care about comparison order
3563 wrt NaNs. This lets us use a shorter comparison sequence. */
3564 if (flag_finite_math_only
)
3565 target_flags
&= ~MASK_IEEE_FP
;
3567 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3568 since the insns won't need emulation. */
3569 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3570 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3572 /* Likewise, if the target doesn't have a 387, or we've specified
3573 software floating point, don't use 387 inline intrinsics. */
3575 target_flags
|= MASK_NO_FANCY_MATH_387
;
3577 /* Turn on MMX builtins for -msse. */
3579 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3581 /* Enable SSE prefetch. */
3582 if (TARGET_SSE
|| TARGET_PRFCHW
)
3583 x86_prefetch_sse
= true;
3585 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3586 if (TARGET_SSE4_2
|| TARGET_ABM
)
3587 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3589 /* Turn on lzcnt instruction for -mabm. */
3591 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3593 /* Validate -mpreferred-stack-boundary= value or default it to
3594 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3595 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3596 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3598 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3599 int max
= (TARGET_SEH
? 4 : 12);
3601 if (ix86_preferred_stack_boundary_arg
< min
3602 || ix86_preferred_stack_boundary_arg
> max
)
3605 error ("-mpreferred-stack-boundary is not supported "
3608 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3609 ix86_preferred_stack_boundary_arg
, min
, max
);
3612 ix86_preferred_stack_boundary
3613 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3616 /* Set the default value for -mstackrealign. */
3617 if (ix86_force_align_arg_pointer
== -1)
3618 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3620 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3622 /* Validate -mincoming-stack-boundary= value or default it to
3623 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3624 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3625 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3627 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3628 || ix86_incoming_stack_boundary_arg
> 12)
3629 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3630 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3633 ix86_user_incoming_stack_boundary
3634 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3635 ix86_incoming_stack_boundary
3636 = ix86_user_incoming_stack_boundary
;
3640 /* Accept -msseregparm only if at least SSE support is enabled. */
3641 if (TARGET_SSEREGPARM
3643 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3645 if (global_options_set
.x_ix86_fpmath
)
3647 if (ix86_fpmath
& FPMATH_SSE
)
3651 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3652 ix86_fpmath
= FPMATH_387
;
3654 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3656 warning (0, "387 instruction set disabled, using SSE arithmetics");
3657 ix86_fpmath
= FPMATH_SSE
;
3662 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3664 /* If the i387 is disabled, then do not return values in it. */
3666 target_flags
&= ~MASK_FLOAT_RETURNS
;
3668 /* Use external vectorized library in vectorizing intrinsics. */
3669 if (global_options_set
.x_ix86_veclibabi_type
)
3670 switch (ix86_veclibabi_type
)
3672 case ix86_veclibabi_type_svml
:
3673 ix86_veclib_handler
= ix86_veclibabi_svml
;
3676 case ix86_veclibabi_type_acml
:
3677 ix86_veclib_handler
= ix86_veclibabi_acml
;
3684 if ((!USE_IX86_FRAME_POINTER
3685 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3686 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3688 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3690 /* ??? Unwind info is not correct around the CFG unless either a frame
3691 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3692 unwind info generation to be aware of the CFG and propagating states
3694 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3695 || flag_exceptions
|| flag_non_call_exceptions
)
3696 && flag_omit_frame_pointer
3697 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3699 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3700 warning (0, "unwind tables currently require either a frame pointer "
3701 "or %saccumulate-outgoing-args%s for correctness",
3703 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3706 /* If stack probes are required, the space used for large function
3707 arguments on the stack must also be probed, so enable
3708 -maccumulate-outgoing-args so this happens in the prologue. */
3709 if (TARGET_STACK_PROBE
3710 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3712 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3713 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3714 "for correctness", prefix
, suffix
);
3715 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3718 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3721 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3722 p
= strchr (internal_label_prefix
, 'X');
3723 internal_label_prefix_len
= p
- internal_label_prefix
;
3727 /* When scheduling description is not available, disable scheduler pass
3728 so it won't slow down the compilation and make x87 code slower. */
3729 if (!TARGET_SCHEDULE
)
3730 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3732 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3733 ix86_tune_cost
->simultaneous_prefetches
,
3734 global_options
.x_param_values
,
3735 global_options_set
.x_param_values
);
3736 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3737 ix86_tune_cost
->prefetch_block
,
3738 global_options
.x_param_values
,
3739 global_options_set
.x_param_values
);
3740 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3741 ix86_tune_cost
->l1_cache_size
,
3742 global_options
.x_param_values
,
3743 global_options_set
.x_param_values
);
3744 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3745 ix86_tune_cost
->l2_cache_size
,
3746 global_options
.x_param_values
,
3747 global_options_set
.x_param_values
);
3749 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3750 if (flag_prefetch_loop_arrays
< 0
3752 && (optimize
>= 3 || flag_profile_use
)
3753 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3754 flag_prefetch_loop_arrays
= 1;
3756 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3757 can be optimized to ap = __builtin_next_arg (0). */
3758 if (!TARGET_64BIT
&& !flag_split_stack
)
3759 targetm
.expand_builtin_va_start
= NULL
;
3763 ix86_gen_leave
= gen_leave_rex64
;
3764 if (Pmode
== DImode
)
3766 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3767 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3768 ix86_gen_tls_local_dynamic_base_64
3769 = gen_tls_local_dynamic_base_64_di
;
3773 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3774 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3775 ix86_gen_tls_local_dynamic_base_64
3776 = gen_tls_local_dynamic_base_64_si
;
3781 ix86_gen_leave
= gen_leave
;
3782 ix86_gen_monitor
= gen_sse3_monitor
;
3785 if (Pmode
== DImode
)
3787 ix86_gen_add3
= gen_adddi3
;
3788 ix86_gen_sub3
= gen_subdi3
;
3789 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3790 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3791 ix86_gen_andsp
= gen_anddi3
;
3792 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3793 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3794 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3798 ix86_gen_add3
= gen_addsi3
;
3799 ix86_gen_sub3
= gen_subsi3
;
3800 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3801 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3802 ix86_gen_andsp
= gen_andsi3
;
3803 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3804 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3805 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3809 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3811 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3814 if (!TARGET_64BIT
&& flag_pic
)
3816 if (flag_fentry
> 0)
3817 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3821 else if (TARGET_SEH
)
3823 if (flag_fentry
== 0)
3824 sorry ("-mno-fentry isn%'t compatible with SEH");
3827 else if (flag_fentry
< 0)
3829 #if defined(PROFILE_BEFORE_PROLOGUE)
3838 /* When not optimize for size, enable vzeroupper optimization for
3839 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3840 AVX unaligned load/store. */
3843 if (flag_expensive_optimizations
3844 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3845 target_flags
|= MASK_VZEROUPPER
;
3846 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3847 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3848 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3849 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3850 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3851 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3852 /* Enable 128-bit AVX instruction generation
3853 for the auto-vectorizer. */
3854 if (TARGET_AVX128_OPTIMAL
3855 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3856 target_flags
|= MASK_PREFER_AVX128
;
3861 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3862 target_flags
&= ~MASK_VZEROUPPER
;
3865 if (ix86_recip_name
)
3867 char *p
= ASTRDUP (ix86_recip_name
);
3869 unsigned int mask
, i
;
3872 while ((q
= strtok (p
, ",")) != NULL
)
3883 if (!strcmp (q
, "default"))
3884 mask
= RECIP_MASK_ALL
;
3887 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3888 if (!strcmp (q
, recip_options
[i
].string
))
3890 mask
= recip_options
[i
].mask
;
3894 if (i
== ARRAY_SIZE (recip_options
))
3896 error ("unknown option for -mrecip=%s", q
);
3898 mask
= RECIP_MASK_NONE
;
3902 recip_mask_explicit
|= mask
;
3904 recip_mask
&= ~mask
;
3911 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3912 else if (target_flags_explicit
& MASK_RECIP
)
3913 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3915 /* Default long double to 64-bit for Bionic. */
3916 if (TARGET_HAS_BIONIC
3917 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3918 target_flags
|= MASK_LONG_DOUBLE_64
;
3920 /* Save the initial options in case the user does function specific
3923 target_option_default_node
= target_option_current_node
3924 = build_target_option_node ();
3927 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3930 ix86_option_override (void)
3932 static struct register_pass_info insert_vzeroupper_info
3933 = { &pass_insert_vzeroupper
.pass
, "reload",
3934 1, PASS_POS_INSERT_AFTER
3937 ix86_option_override_internal (true);
3940 /* This needs to be done at start up. It's convenient to do it here. */
3941 register_pass (&insert_vzeroupper_info
);
3944 /* Update register usage after having seen the compiler flags. */
3947 ix86_conditional_register_usage (void)
3952 /* The PIC register, if it exists, is fixed. */
3953 j
= PIC_OFFSET_TABLE_REGNUM
;
3954 if (j
!= INVALID_REGNUM
)
3955 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3957 /* For 32-bit targets, squash the REX registers. */
3960 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3961 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3962 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3963 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3966 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3967 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3968 : TARGET_64BIT
? (1 << 2)
3971 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3973 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 /* Set/reset conditionally defined registers from
3976 CALL_USED_REGISTERS initializer. */
3977 if (call_used_regs
[i
] > 1)
3978 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3980 /* Calculate registers of CLOBBERED_REGS register set
3981 as call used registers from GENERAL_REGS register set. */
3982 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3983 && call_used_regs
[i
])
3984 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3987 /* If MMX is disabled, squash the registers. */
3989 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3990 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3991 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3993 /* If SSE is disabled, squash the registers. */
3995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3996 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3997 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3999 /* If the FPU is disabled, squash the registers. */
4000 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4001 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4002 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4003 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4007 /* Save the current options */
4010 ix86_function_specific_save (struct cl_target_option
*ptr
)
4012 ptr
->arch
= ix86_arch
;
4013 ptr
->schedule
= ix86_schedule
;
4014 ptr
->tune
= ix86_tune
;
4015 ptr
->branch_cost
= ix86_branch_cost
;
4016 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4017 ptr
->arch_specified
= ix86_arch_specified
;
4018 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4019 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4020 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4022 /* The fields are char but the variables are not; make sure the
4023 values fit in the fields. */
4024 gcc_assert (ptr
->arch
== ix86_arch
);
4025 gcc_assert (ptr
->schedule
== ix86_schedule
);
4026 gcc_assert (ptr
->tune
== ix86_tune
);
4027 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4030 /* Restore the current options */
4033 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4035 enum processor_type old_tune
= ix86_tune
;
4036 enum processor_type old_arch
= ix86_arch
;
4037 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4040 ix86_arch
= (enum processor_type
) ptr
->arch
;
4041 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4042 ix86_tune
= (enum processor_type
) ptr
->tune
;
4043 ix86_branch_cost
= ptr
->branch_cost
;
4044 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4045 ix86_arch_specified
= ptr
->arch_specified
;
4046 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4047 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4048 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4050 /* Recreate the arch feature tests if the arch changed */
4051 if (old_arch
!= ix86_arch
)
4053 ix86_arch_mask
= 1u << ix86_arch
;
4054 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4055 ix86_arch_features
[i
]
4056 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4059 /* Recreate the tune optimization tests */
4060 if (old_tune
!= ix86_tune
)
4062 ix86_tune_mask
= 1u << ix86_tune
;
4063 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4064 ix86_tune_features
[i
]
4065 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4069 /* Print the current options */
4072 ix86_function_specific_print (FILE *file
, int indent
,
4073 struct cl_target_option
*ptr
)
4076 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4077 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4079 fprintf (file
, "%*sarch = %d (%s)\n",
4082 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4083 ? cpu_names
[ptr
->arch
]
4086 fprintf (file
, "%*stune = %d (%s)\n",
4089 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4090 ? cpu_names
[ptr
->tune
]
4093 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4097 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4098 free (target_string
);
4103 /* Inner function to process the attribute((target(...))), take an argument and
4104 set the current options from the argument. If we have a list, recursively go
4108 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4109 struct gcc_options
*enum_opts_set
)
4114 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4115 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4116 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4117 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4118 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4134 enum ix86_opt_type type
;
4139 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4140 IX86_ATTR_ISA ("abm", OPT_mabm
),
4141 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4142 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4143 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4144 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4145 IX86_ATTR_ISA ("aes", OPT_maes
),
4146 IX86_ATTR_ISA ("avx", OPT_mavx
),
4147 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4148 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4149 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4150 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4151 IX86_ATTR_ISA ("sse", OPT_msse
),
4152 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4153 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4154 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4155 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4156 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4157 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4158 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4159 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4160 IX86_ATTR_ISA ("fma", OPT_mfma
),
4161 IX86_ATTR_ISA ("xop", OPT_mxop
),
4162 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4163 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4164 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4165 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4166 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4167 IX86_ATTR_ISA ("hle", OPT_mhle
),
4168 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4169 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4170 IX86_ATTR_ISA ("adx", OPT_madx
),
4171 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4172 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4173 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4176 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4178 /* string options */
4179 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4180 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4183 IX86_ATTR_YES ("cld",
4187 IX86_ATTR_NO ("fancy-math-387",
4188 OPT_mfancy_math_387
,
4189 MASK_NO_FANCY_MATH_387
),
4191 IX86_ATTR_YES ("ieee-fp",
4195 IX86_ATTR_YES ("inline-all-stringops",
4196 OPT_minline_all_stringops
,
4197 MASK_INLINE_ALL_STRINGOPS
),
4199 IX86_ATTR_YES ("inline-stringops-dynamically",
4200 OPT_minline_stringops_dynamically
,
4201 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4203 IX86_ATTR_NO ("align-stringops",
4204 OPT_mno_align_stringops
,
4205 MASK_NO_ALIGN_STRINGOPS
),
4207 IX86_ATTR_YES ("recip",
4213 /* If this is a list, recurse to get the options. */
4214 if (TREE_CODE (args
) == TREE_LIST
)
4218 for (; args
; args
= TREE_CHAIN (args
))
4219 if (TREE_VALUE (args
)
4220 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4221 p_strings
, enum_opts_set
))
4227 else if (TREE_CODE (args
) != STRING_CST
)
4229 error ("attribute %<target%> argument not a string");
4233 /* Handle multiple arguments separated by commas. */
4234 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4236 while (next_optstr
&& *next_optstr
!= '\0')
4238 char *p
= next_optstr
;
4240 char *comma
= strchr (next_optstr
, ',');
4241 const char *opt_string
;
4242 size_t len
, opt_len
;
4247 enum ix86_opt_type type
= ix86_opt_unknown
;
4253 len
= comma
- next_optstr
;
4254 next_optstr
= comma
+ 1;
4262 /* Recognize no-xxx. */
4263 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4272 /* Find the option. */
4275 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4277 type
= attrs
[i
].type
;
4278 opt_len
= attrs
[i
].len
;
4279 if (ch
== attrs
[i
].string
[0]
4280 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4283 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4286 mask
= attrs
[i
].mask
;
4287 opt_string
= attrs
[i
].string
;
4292 /* Process the option. */
4295 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4299 else if (type
== ix86_opt_isa
)
4301 struct cl_decoded_option decoded
;
4303 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4304 ix86_handle_option (&global_options
, &global_options_set
,
4305 &decoded
, input_location
);
4308 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4310 if (type
== ix86_opt_no
)
4311 opt_set_p
= !opt_set_p
;
4314 target_flags
|= mask
;
4316 target_flags
&= ~mask
;
4319 else if (type
== ix86_opt_str
)
4323 error ("option(\"%s\") was already specified", opt_string
);
4327 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4330 else if (type
== ix86_opt_enum
)
4335 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4337 set_option (&global_options
, enum_opts_set
, opt
, value
,
4338 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4342 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4354 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4357 ix86_valid_target_attribute_tree (tree args
)
4359 const char *orig_arch_string
= ix86_arch_string
;
4360 const char *orig_tune_string
= ix86_tune_string
;
4361 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4362 int orig_tune_defaulted
= ix86_tune_defaulted
;
4363 int orig_arch_specified
= ix86_arch_specified
;
4364 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4367 struct cl_target_option
*def
4368 = TREE_TARGET_OPTION (target_option_default_node
);
4369 struct gcc_options enum_opts_set
;
4371 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4373 /* Process each of the options on the chain. */
4374 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4376 return error_mark_node
;
4378 /* If the changed options are different from the default, rerun
4379 ix86_option_override_internal, and then save the options away.
4380 The string options are are attribute options, and will be undone
4381 when we copy the save structure. */
4382 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4383 || target_flags
!= def
->x_target_flags
4384 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4385 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4386 || enum_opts_set
.x_ix86_fpmath
)
4388 /* If we are using the default tune= or arch=, undo the string assigned,
4389 and use the default. */
4390 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4391 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4392 else if (!orig_arch_specified
)
4393 ix86_arch_string
= NULL
;
4395 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4396 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4397 else if (orig_tune_defaulted
)
4398 ix86_tune_string
= NULL
;
4400 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4401 if (enum_opts_set
.x_ix86_fpmath
)
4402 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4403 else if (!TARGET_64BIT
&& TARGET_SSE
)
4405 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4406 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4409 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4410 ix86_option_override_internal (false);
4412 /* Add any builtin functions with the new isa if any. */
4413 ix86_add_new_builtins (ix86_isa_flags
);
4415 /* Save the current options unless we are validating options for
4417 t
= build_target_option_node ();
4419 ix86_arch_string
= orig_arch_string
;
4420 ix86_tune_string
= orig_tune_string
;
4421 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4423 /* Free up memory allocated to hold the strings */
4424 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4425 free (option_strings
[i
]);
4431 /* Hook to validate attribute((target("string"))). */
4434 ix86_valid_target_attribute_p (tree fndecl
,
4435 tree
ARG_UNUSED (name
),
4437 int ARG_UNUSED (flags
))
4439 struct cl_target_option cur_target
;
4442 /* attribute((target("default"))) does nothing, beyond
4443 affecting multi-versioning. */
4444 if (TREE_VALUE (args
)
4445 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4446 && TREE_CHAIN (args
) == NULL_TREE
4447 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4450 tree old_optimize
= build_optimization_node ();
4451 tree new_target
, new_optimize
;
4452 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4454 /* If the function changed the optimization levels as well as setting target
4455 options, start with the optimizations specified. */
4456 if (func_optimize
&& func_optimize
!= old_optimize
)
4457 cl_optimization_restore (&global_options
,
4458 TREE_OPTIMIZATION (func_optimize
));
4460 /* The target attributes may also change some optimization flags, so update
4461 the optimization options if necessary. */
4462 cl_target_option_save (&cur_target
, &global_options
);
4463 new_target
= ix86_valid_target_attribute_tree (args
);
4464 new_optimize
= build_optimization_node ();
4466 if (new_target
== error_mark_node
)
4469 else if (fndecl
&& new_target
)
4471 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4473 if (old_optimize
!= new_optimize
)
4474 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4477 cl_target_option_restore (&global_options
, &cur_target
);
4479 if (old_optimize
!= new_optimize
)
4480 cl_optimization_restore (&global_options
,
4481 TREE_OPTIMIZATION (old_optimize
));
4487 /* Hook to determine if one function can safely inline another. */
4490 ix86_can_inline_p (tree caller
, tree callee
)
4493 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4494 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4496 /* If callee has no option attributes, then it is ok to inline. */
4500 /* If caller has no option attributes, but callee does then it is not ok to
4502 else if (!caller_tree
)
4507 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4508 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4510 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4511 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4513 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4514 != callee_opts
->x_ix86_isa_flags
)
4517 /* See if we have the same non-isa options. */
4518 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4521 /* See if arch, tune, etc. are the same. */
4522 else if (caller_opts
->arch
!= callee_opts
->arch
)
4525 else if (caller_opts
->tune
!= callee_opts
->tune
)
4528 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4531 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4542 /* Remember the last target of ix86_set_current_function. */
4543 static GTY(()) tree ix86_previous_fndecl
;
4545 /* Establish appropriate back-end context for processing the function
4546 FNDECL. The argument might be NULL to indicate processing at top
4547 level, outside of any function scope. */
4549 ix86_set_current_function (tree fndecl
)
4551 /* Only change the context if the function changes. This hook is called
4552 several times in the course of compiling a function, and we don't want to
4553 slow things down too much or call target_reinit when it isn't safe. */
4554 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4556 tree old_tree
= (ix86_previous_fndecl
4557 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4560 tree new_tree
= (fndecl
4561 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4564 ix86_previous_fndecl
= fndecl
;
4565 if (old_tree
== new_tree
)
4570 cl_target_option_restore (&global_options
,
4571 TREE_TARGET_OPTION (new_tree
));
4577 struct cl_target_option
*def
4578 = TREE_TARGET_OPTION (target_option_current_node
);
4580 cl_target_option_restore (&global_options
, def
);
4587 /* Return true if this goes in large data/bss. */
4590 ix86_in_large_data_p (tree exp
)
4592 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4595 /* Functions are never large data. */
4596 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4599 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4601 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4602 if (strcmp (section
, ".ldata") == 0
4603 || strcmp (section
, ".lbss") == 0)
4609 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4611 /* If this is an incomplete type with size 0, then we can't put it
4612 in data because it might be too big when completed. */
4613 if (!size
|| size
> ix86_section_threshold
)
4620 /* Switch to the appropriate section for output of DECL.
4621 DECL is either a `VAR_DECL' node or a constant of some sort.
4622 RELOC indicates whether forming the initial value of DECL requires
4623 link-time relocations. */
4625 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4629 x86_64_elf_select_section (tree decl
, int reloc
,
4630 unsigned HOST_WIDE_INT align
)
4632 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4633 && ix86_in_large_data_p (decl
))
4635 const char *sname
= NULL
;
4636 unsigned int flags
= SECTION_WRITE
;
4637 switch (categorize_decl_for_section (decl
, reloc
))
4642 case SECCAT_DATA_REL
:
4643 sname
= ".ldata.rel";
4645 case SECCAT_DATA_REL_LOCAL
:
4646 sname
= ".ldata.rel.local";
4648 case SECCAT_DATA_REL_RO
:
4649 sname
= ".ldata.rel.ro";
4651 case SECCAT_DATA_REL_RO_LOCAL
:
4652 sname
= ".ldata.rel.ro.local";
4656 flags
|= SECTION_BSS
;
4659 case SECCAT_RODATA_MERGE_STR
:
4660 case SECCAT_RODATA_MERGE_STR_INIT
:
4661 case SECCAT_RODATA_MERGE_CONST
:
4665 case SECCAT_SRODATA
:
4672 /* We don't split these for medium model. Place them into
4673 default sections and hope for best. */
4678 /* We might get called with string constants, but get_named_section
4679 doesn't like them as they are not DECLs. Also, we need to set
4680 flags in that case. */
4682 return get_section (sname
, flags
, NULL
);
4683 return get_named_section (decl
, sname
, reloc
);
4686 return default_elf_select_section (decl
, reloc
, align
);
4689 /* Build up a unique section name, expressed as a
4690 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4691 RELOC indicates whether the initial value of EXP requires
4692 link-time relocations. */
4694 static void ATTRIBUTE_UNUSED
4695 x86_64_elf_unique_section (tree decl
, int reloc
)
4697 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4698 && ix86_in_large_data_p (decl
))
4700 const char *prefix
= NULL
;
4701 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4702 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4704 switch (categorize_decl_for_section (decl
, reloc
))
4707 case SECCAT_DATA_REL
:
4708 case SECCAT_DATA_REL_LOCAL
:
4709 case SECCAT_DATA_REL_RO
:
4710 case SECCAT_DATA_REL_RO_LOCAL
:
4711 prefix
= one_only
? ".ld" : ".ldata";
4714 prefix
= one_only
? ".lb" : ".lbss";
4717 case SECCAT_RODATA_MERGE_STR
:
4718 case SECCAT_RODATA_MERGE_STR_INIT
:
4719 case SECCAT_RODATA_MERGE_CONST
:
4720 prefix
= one_only
? ".lr" : ".lrodata";
4722 case SECCAT_SRODATA
:
4729 /* We don't split these for medium model. Place them into
4730 default sections and hope for best. */
4735 const char *name
, *linkonce
;
4738 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4739 name
= targetm
.strip_name_encoding (name
);
4741 /* If we're using one_only, then there needs to be a .gnu.linkonce
4742 prefix to the section name. */
4743 linkonce
= one_only
? ".gnu.linkonce" : "";
4745 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4747 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4751 default_unique_section (decl
, reloc
);
4754 #ifdef COMMON_ASM_OP
4755 /* This says how to output assembler code to declare an
4756 uninitialized external linkage data object.
4758 For medium model x86-64 we need to use .largecomm opcode for
4761 x86_elf_aligned_common (FILE *file
,
4762 const char *name
, unsigned HOST_WIDE_INT size
,
4765 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4766 && size
> (unsigned int)ix86_section_threshold
)
4767 fputs (".largecomm\t", file
);
4769 fputs (COMMON_ASM_OP
, file
);
4770 assemble_name (file
, name
);
4771 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4772 size
, align
/ BITS_PER_UNIT
);
4776 /* Utility function for targets to use in implementing
4777 ASM_OUTPUT_ALIGNED_BSS. */
4780 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4781 const char *name
, unsigned HOST_WIDE_INT size
,
4784 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4785 && size
> (unsigned int)ix86_section_threshold
)
4786 switch_to_section (get_named_section (decl
, ".lbss", 0));
4788 switch_to_section (bss_section
);
4789 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4790 #ifdef ASM_DECLARE_OBJECT_NAME
4791 last_assemble_variable_decl
= decl
;
4792 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4794 /* Standard thing is just output label for the object. */
4795 ASM_OUTPUT_LABEL (file
, name
);
4796 #endif /* ASM_DECLARE_OBJECT_NAME */
4797 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4800 /* Decide whether we must probe the stack before any space allocation
4801 on this target. It's essentially TARGET_STACK_PROBE except when
4802 -fstack-check causes the stack to be already probed differently. */
4805 ix86_target_stack_probe (void)
4807 /* Do not probe the stack twice if static stack checking is enabled. */
4808 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4811 return TARGET_STACK_PROBE
;
4814 /* Decide whether we can make a sibling call to a function. DECL is the
4815 declaration of the function being targeted by the call and EXP is the
4816 CALL_EXPR representing the call. */
4819 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4821 tree type
, decl_or_type
;
4824 /* If we are generating position-independent code, we cannot sibcall
4825 optimize any indirect call, or a direct call to a global function,
4826 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4830 && (!decl
|| !targetm
.binds_local_p (decl
)))
4833 /* If we need to align the outgoing stack, then sibcalling would
4834 unalign the stack, which may break the called function. */
4835 if (ix86_minimum_incoming_stack_boundary (true)
4836 < PREFERRED_STACK_BOUNDARY
)
4841 decl_or_type
= decl
;
4842 type
= TREE_TYPE (decl
);
4846 /* We're looking at the CALL_EXPR, we need the type of the function. */
4847 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4848 type
= TREE_TYPE (type
); /* pointer type */
4849 type
= TREE_TYPE (type
); /* function type */
4850 decl_or_type
= type
;
4853 /* Check that the return value locations are the same. Like
4854 if we are returning floats on the 80387 register stack, we cannot
4855 make a sibcall from a function that doesn't return a float to a
4856 function that does or, conversely, from a function that does return
4857 a float to a function that doesn't; the necessary stack adjustment
4858 would not be executed. This is also the place we notice
4859 differences in the return value ABI. Note that it is ok for one
4860 of the functions to have void return type as long as the return
4861 value of the other is passed in a register. */
4862 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4863 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4865 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4867 if (!rtx_equal_p (a
, b
))
4870 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4872 else if (!rtx_equal_p (a
, b
))
4877 /* The SYSV ABI has more call-clobbered registers;
4878 disallow sibcalls from MS to SYSV. */
4879 if (cfun
->machine
->call_abi
== MS_ABI
4880 && ix86_function_type_abi (type
) == SYSV_ABI
)
4885 /* If this call is indirect, we'll need to be able to use a
4886 call-clobbered register for the address of the target function.
4887 Make sure that all such registers are not used for passing
4888 parameters. Note that DLLIMPORT functions are indirect. */
4890 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4892 if (ix86_function_regparm (type
, NULL
) >= 3)
4894 /* ??? Need to count the actual number of registers to be used,
4895 not the possible number of registers. Fix later. */
4901 /* Otherwise okay. That also includes certain types of indirect calls. */
4905 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4906 and "sseregparm" calling convention attributes;
4907 arguments as in struct attribute_spec.handler. */
4910 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4912 int flags ATTRIBUTE_UNUSED
,
4915 if (TREE_CODE (*node
) != FUNCTION_TYPE
4916 && TREE_CODE (*node
) != METHOD_TYPE
4917 && TREE_CODE (*node
) != FIELD_DECL
4918 && TREE_CODE (*node
) != TYPE_DECL
)
4920 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4922 *no_add_attrs
= true;
4926 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4927 if (is_attribute_p ("regparm", name
))
4931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4933 error ("fastcall and regparm attributes are not compatible");
4936 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4938 error ("regparam and thiscall attributes are not compatible");
4941 cst
= TREE_VALUE (args
);
4942 if (TREE_CODE (cst
) != INTEGER_CST
)
4944 warning (OPT_Wattributes
,
4945 "%qE attribute requires an integer constant argument",
4947 *no_add_attrs
= true;
4949 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4951 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4953 *no_add_attrs
= true;
4961 /* Do not warn when emulating the MS ABI. */
4962 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4963 && TREE_CODE (*node
) != METHOD_TYPE
)
4964 || ix86_function_type_abi (*node
) != MS_ABI
)
4965 warning (OPT_Wattributes
, "%qE attribute ignored",
4967 *no_add_attrs
= true;
4971 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4972 if (is_attribute_p ("fastcall", name
))
4974 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4976 error ("fastcall and cdecl attributes are not compatible");
4978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4980 error ("fastcall and stdcall attributes are not compatible");
4982 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4984 error ("fastcall and regparm attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4988 error ("fastcall and thiscall attributes are not compatible");
4992 /* Can combine stdcall with fastcall (redundant), regparm and
4994 else if (is_attribute_p ("stdcall", name
))
4996 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4998 error ("stdcall and cdecl attributes are not compatible");
5000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5002 error ("stdcall and fastcall attributes are not compatible");
5004 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5006 error ("stdcall and thiscall attributes are not compatible");
5010 /* Can combine cdecl with regparm and sseregparm. */
5011 else if (is_attribute_p ("cdecl", name
))
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5015 error ("stdcall and cdecl attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5019 error ("fastcall and cdecl attributes are not compatible");
5021 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5023 error ("cdecl and thiscall attributes are not compatible");
5026 else if (is_attribute_p ("thiscall", name
))
5028 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5029 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5031 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5033 error ("stdcall and thiscall attributes are not compatible");
5035 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5037 error ("fastcall and thiscall attributes are not compatible");
5039 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5041 error ("cdecl and thiscall attributes are not compatible");
5045 /* Can combine sseregparm with all attributes. */
5050 /* The transactional memory builtins are implicitly regparm or fastcall
5051 depending on the ABI. Override the generic do-nothing attribute that
5052 these builtins were declared with, and replace it with one of the two
5053 attributes that we expect elsewhere. */
5056 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5057 tree args ATTRIBUTE_UNUSED
,
5058 int flags ATTRIBUTE_UNUSED
,
5063 /* In no case do we want to add the placeholder attribute. */
5064 *no_add_attrs
= true;
5066 /* The 64-bit ABI is unchanged for transactional memory. */
5070 /* ??? Is there a better way to validate 32-bit windows? We have
5071 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5072 if (CHECK_STACK_LIMIT
> 0)
5073 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5076 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5077 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5079 decl_attributes (node
, alt
, flags
);
5084 /* This function determines from TYPE the calling-convention. */
5087 ix86_get_callcvt (const_tree type
)
5089 unsigned int ret
= 0;
5094 return IX86_CALLCVT_CDECL
;
5096 attrs
= TYPE_ATTRIBUTES (type
);
5097 if (attrs
!= NULL_TREE
)
5099 if (lookup_attribute ("cdecl", attrs
))
5100 ret
|= IX86_CALLCVT_CDECL
;
5101 else if (lookup_attribute ("stdcall", attrs
))
5102 ret
|= IX86_CALLCVT_STDCALL
;
5103 else if (lookup_attribute ("fastcall", attrs
))
5104 ret
|= IX86_CALLCVT_FASTCALL
;
5105 else if (lookup_attribute ("thiscall", attrs
))
5106 ret
|= IX86_CALLCVT_THISCALL
;
5108 /* Regparam isn't allowed for thiscall and fastcall. */
5109 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5111 if (lookup_attribute ("regparm", attrs
))
5112 ret
|= IX86_CALLCVT_REGPARM
;
5113 if (lookup_attribute ("sseregparm", attrs
))
5114 ret
|= IX86_CALLCVT_SSEREGPARM
;
5117 if (IX86_BASE_CALLCVT(ret
) != 0)
5121 is_stdarg
= stdarg_p (type
);
5122 if (TARGET_RTD
&& !is_stdarg
)
5123 return IX86_CALLCVT_STDCALL
| ret
;
5127 || TREE_CODE (type
) != METHOD_TYPE
5128 || ix86_function_type_abi (type
) != MS_ABI
)
5129 return IX86_CALLCVT_CDECL
| ret
;
5131 return IX86_CALLCVT_THISCALL
;
5134 /* Return 0 if the attributes for two types are incompatible, 1 if they
5135 are compatible, and 2 if they are nearly compatible (which causes a
5136 warning to be generated). */
5139 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5141 unsigned int ccvt1
, ccvt2
;
5143 if (TREE_CODE (type1
) != FUNCTION_TYPE
5144 && TREE_CODE (type1
) != METHOD_TYPE
)
5147 ccvt1
= ix86_get_callcvt (type1
);
5148 ccvt2
= ix86_get_callcvt (type2
);
5151 if (ix86_function_regparm (type1
, NULL
)
5152 != ix86_function_regparm (type2
, NULL
))
5158 /* Return the regparm value for a function with the indicated TYPE and DECL.
5159 DECL may be NULL when calling function indirectly
5160 or considering a libcall. */
5163 ix86_function_regparm (const_tree type
, const_tree decl
)
5170 return (ix86_function_type_abi (type
) == SYSV_ABI
5171 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5172 ccvt
= ix86_get_callcvt (type
);
5173 regparm
= ix86_regparm
;
5175 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5177 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5180 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5184 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5186 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5189 /* Use register calling convention for local functions when possible. */
5191 && TREE_CODE (decl
) == FUNCTION_DECL
5193 && !(profile_flag
&& !flag_fentry
))
5195 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5196 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5197 if (i
&& i
->local
&& i
->can_change_signature
)
5199 int local_regparm
, globals
= 0, regno
;
5201 /* Make sure no regparm register is taken by a
5202 fixed register variable. */
5203 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5204 if (fixed_regs
[local_regparm
])
5207 /* We don't want to use regparm(3) for nested functions as
5208 these use a static chain pointer in the third argument. */
5209 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5212 /* In 32-bit mode save a register for the split stack. */
5213 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5216 /* Each fixed register usage increases register pressure,
5217 so less registers should be used for argument passing.
5218 This functionality can be overriden by an explicit
5220 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5221 if (fixed_regs
[regno
])
5225 = globals
< local_regparm
? local_regparm
- globals
: 0;
5227 if (local_regparm
> regparm
)
5228 regparm
= local_regparm
;
5235 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5236 DFmode (2) arguments in SSE registers for a function with the
5237 indicated TYPE and DECL. DECL may be NULL when calling function
5238 indirectly or considering a libcall. Otherwise return 0. */
5241 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5243 gcc_assert (!TARGET_64BIT
);
5245 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5246 by the sseregparm attribute. */
5247 if (TARGET_SSEREGPARM
5248 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5255 error ("calling %qD with attribute sseregparm without "
5256 "SSE/SSE2 enabled", decl
);
5258 error ("calling %qT with attribute sseregparm without "
5259 "SSE/SSE2 enabled", type
);
5267 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5268 (and DFmode for SSE2) arguments in SSE registers. */
5269 if (decl
&& TARGET_SSE_MATH
&& optimize
5270 && !(profile_flag
&& !flag_fentry
))
5272 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5273 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5274 if (i
&& i
->local
&& i
->can_change_signature
)
5275 return TARGET_SSE2
? 2 : 1;
5281 /* Return true if EAX is live at the start of the function. Used by
5282 ix86_expand_prologue to determine if we need special help before
5283 calling allocate_stack_worker. */
5286 ix86_eax_live_at_start_p (void)
5288 /* Cheat. Don't bother working forward from ix86_function_regparm
5289 to the function type to whether an actual argument is located in
5290 eax. Instead just look at cfg info, which is still close enough
5291 to correct at this point. This gives false positives for broken
5292 functions that might use uninitialized data that happens to be
5293 allocated in eax, but who cares? */
5294 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5298 ix86_keep_aggregate_return_pointer (tree fntype
)
5304 attr
= lookup_attribute ("callee_pop_aggregate_return",
5305 TYPE_ATTRIBUTES (fntype
));
5307 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5309 /* For 32-bit MS-ABI the default is to keep aggregate
5311 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5314 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5317 /* Value is the number of bytes of arguments automatically
5318 popped when returning from a subroutine call.
5319 FUNDECL is the declaration node of the function (as a tree),
5320 FUNTYPE is the data type of the function (as a tree),
5321 or for a library call it is an identifier node for the subroutine name.
5322 SIZE is the number of bytes of arguments passed on the stack.
5324 On the 80386, the RTD insn may be used to pop them if the number
5325 of args is fixed, but if the number is variable then the caller
5326 must pop them all. RTD can't be used for library calls now
5327 because the library is compiled with the Unix compiler.
5328 Use of RTD is a selectable option, since it is incompatible with
5329 standard Unix calling sequences. If the option is not selected,
5330 the caller must always pop the args.
5332 The attribute stdcall is equivalent to RTD on a per module basis. */
5335 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5339 /* None of the 64-bit ABIs pop arguments. */
5343 ccvt
= ix86_get_callcvt (funtype
);
5345 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5346 | IX86_CALLCVT_THISCALL
)) != 0
5347 && ! stdarg_p (funtype
))
5350 /* Lose any fake structure return argument if it is passed on the stack. */
5351 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5352 && !ix86_keep_aggregate_return_pointer (funtype
))
5354 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5356 return GET_MODE_SIZE (Pmode
);
5362 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5365 ix86_legitimate_combined_insn (rtx insn
)
5367 /* Check operand constraints in case hard registers were propagated
5368 into insn pattern. This check prevents combine pass from
5369 generating insn patterns with invalid hard register operands.
5370 These invalid insns can eventually confuse reload to error out
5371 with a spill failure. See also PRs 46829 and 46843. */
5372 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5376 extract_insn (insn
);
5377 preprocess_constraints ();
5379 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5381 rtx op
= recog_data
.operand
[i
];
5382 enum machine_mode mode
= GET_MODE (op
);
5383 struct operand_alternative
*op_alt
;
5388 /* A unary operator may be accepted by the predicate, but it
5389 is irrelevant for matching constraints. */
5393 if (GET_CODE (op
) == SUBREG
)
5395 if (REG_P (SUBREG_REG (op
))
5396 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5397 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5398 GET_MODE (SUBREG_REG (op
)),
5401 op
= SUBREG_REG (op
);
5404 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5407 op_alt
= recog_op_alt
[i
];
5409 /* Operand has no constraints, anything is OK. */
5410 win
= !recog_data
.n_alternatives
;
5412 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5414 if (op_alt
[j
].anything_ok
5415 || (op_alt
[j
].matches
!= -1
5417 (recog_data
.operand
[i
],
5418 recog_data
.operand
[op_alt
[j
].matches
]))
5419 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5434 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5436 static unsigned HOST_WIDE_INT
5437 ix86_asan_shadow_offset (void)
5439 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_LP64
? 44 : 29);
5442 /* Argument support functions. */
5444 /* Return true when register may be used to pass function parameters. */
5446 ix86_function_arg_regno_p (int regno
)
5449 const int *parm_regs
;
5454 return (regno
< REGPARM_MAX
5455 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5457 return (regno
< REGPARM_MAX
5458 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5459 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5460 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5461 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5466 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5471 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5472 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5476 /* TODO: The function should depend on current function ABI but
5477 builtins.c would need updating then. Therefore we use the
5480 /* RAX is used as hidden argument to va_arg functions. */
5481 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5484 if (ix86_abi
== MS_ABI
)
5485 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5487 parm_regs
= x86_64_int_parameter_registers
;
5488 for (i
= 0; i
< (ix86_abi
== MS_ABI
5489 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5490 if (regno
== parm_regs
[i
])
5495 /* Return if we do not know how to pass TYPE solely in registers. */
5498 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5500 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5503 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5504 The layout_type routine is crafty and tries to trick us into passing
5505 currently unsupported vector types on the stack by using TImode. */
5506 return (!TARGET_64BIT
&& mode
== TImode
5507 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5510 /* It returns the size, in bytes, of the area reserved for arguments passed
5511 in registers for the function represented by fndecl dependent to the used
5514 ix86_reg_parm_stack_space (const_tree fndecl
)
5516 enum calling_abi call_abi
= SYSV_ABI
;
5517 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5518 call_abi
= ix86_function_abi (fndecl
);
5520 call_abi
= ix86_function_type_abi (fndecl
);
5521 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5526 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5529 ix86_function_type_abi (const_tree fntype
)
5531 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5533 enum calling_abi abi
= ix86_abi
;
5534 if (abi
== SYSV_ABI
)
5536 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5539 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5547 ix86_function_ms_hook_prologue (const_tree fn
)
5549 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5551 if (decl_function_context (fn
) != NULL_TREE
)
5552 error_at (DECL_SOURCE_LOCATION (fn
),
5553 "ms_hook_prologue is not compatible with nested function");
5560 static enum calling_abi
5561 ix86_function_abi (const_tree fndecl
)
5565 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5568 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5571 ix86_cfun_abi (void)
5575 return cfun
->machine
->call_abi
;
5578 /* Write the extra assembler code needed to declare a function properly. */
5581 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5584 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5588 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5589 unsigned int filler_cc
= 0xcccccccc;
5591 for (i
= 0; i
< filler_count
; i
+= 4)
5592 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5595 #ifdef SUBTARGET_ASM_UNWIND_INIT
5596 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5599 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5601 /* Output magic byte marker, if hot-patch attribute is set. */
5606 /* leaq [%rsp + 0], %rsp */
5607 asm_fprintf (asm_out_file
, ASM_BYTE
5608 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5612 /* movl.s %edi, %edi
5614 movl.s %esp, %ebp */
5615 asm_fprintf (asm_out_file
, ASM_BYTE
5616 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5622 extern void init_regs (void);
5624 /* Implementation of call abi switching target hook. Specific to FNDECL
5625 the specific call register sets are set. See also
5626 ix86_conditional_register_usage for more details. */
5628 ix86_call_abi_override (const_tree fndecl
)
5630 if (fndecl
== NULL_TREE
)
5631 cfun
->machine
->call_abi
= ix86_abi
;
5633 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5636 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5637 expensive re-initialization of init_regs each time we switch function context
5638 since this is needed only during RTL expansion. */
5640 ix86_maybe_switch_abi (void)
5643 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5647 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5648 for a call to a function whose data type is FNTYPE.
5649 For a library call, FNTYPE is 0. */
5652 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5653 tree fntype
, /* tree ptr for function decl */
5654 rtx libname
, /* SYMBOL_REF of library name or 0 */
5658 struct cgraph_local_info
*i
;
5660 memset (cum
, 0, sizeof (*cum
));
5664 i
= cgraph_local_info (fndecl
);
5665 cum
->call_abi
= ix86_function_abi (fndecl
);
5670 cum
->call_abi
= ix86_function_type_abi (fntype
);
5673 cum
->caller
= caller
;
5675 /* Set up the number of registers to use for passing arguments. */
5677 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5678 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5679 "or subtarget optimization implying it");
5680 cum
->nregs
= ix86_regparm
;
5683 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5684 ? X86_64_REGPARM_MAX
5685 : X86_64_MS_REGPARM_MAX
);
5689 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5692 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5693 ? X86_64_SSE_REGPARM_MAX
5694 : X86_64_MS_SSE_REGPARM_MAX
);
5698 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5699 cum
->warn_avx
= true;
5700 cum
->warn_sse
= true;
5701 cum
->warn_mmx
= true;
5703 /* Because type might mismatch in between caller and callee, we need to
5704 use actual type of function for local calls.
5705 FIXME: cgraph_analyze can be told to actually record if function uses
5706 va_start so for local functions maybe_vaarg can be made aggressive
5708 FIXME: once typesytem is fixed, we won't need this code anymore. */
5709 if (i
&& i
->local
&& i
->can_change_signature
)
5710 fntype
= TREE_TYPE (fndecl
);
5711 cum
->maybe_vaarg
= (fntype
5712 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5717 /* If there are variable arguments, then we won't pass anything
5718 in registers in 32-bit mode. */
5719 if (stdarg_p (fntype
))
5730 /* Use ecx and edx registers if function has fastcall attribute,
5731 else look for regparm information. */
5734 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5735 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5738 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5740 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5746 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5749 /* Set up the number of SSE registers used for passing SFmode
5750 and DFmode arguments. Warn for mismatching ABI. */
5751 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5755 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5756 But in the case of vector types, it is some vector mode.
5758 When we have only some of our vector isa extensions enabled, then there
5759 are some modes for which vector_mode_supported_p is false. For these
5760 modes, the generic vector support in gcc will choose some non-vector mode
5761 in order to implement the type. By computing the natural mode, we'll
5762 select the proper ABI location for the operand and not depend on whatever
5763 the middle-end decides to do with these vector types.
5765 The midde-end can't deal with the vector types > 16 bytes. In this
5766 case, we return the original mode and warn ABI change if CUM isn't
5769 static enum machine_mode
5770 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5772 enum machine_mode mode
= TYPE_MODE (type
);
5774 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5776 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5777 if ((size
== 8 || size
== 16 || size
== 32)
5778 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5779 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5781 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5783 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5784 mode
= MIN_MODE_VECTOR_FLOAT
;
5786 mode
= MIN_MODE_VECTOR_INT
;
5788 /* Get the mode which has this inner mode and number of units. */
5789 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5790 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5791 && GET_MODE_INNER (mode
) == innermode
)
5793 if (size
== 32 && !TARGET_AVX
)
5795 static bool warnedavx
;
5802 warning (0, "AVX vector argument without AVX "
5803 "enabled changes the ABI");
5805 return TYPE_MODE (type
);
5807 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5809 static bool warnedsse
;
5816 warning (0, "SSE vector argument without SSE "
5817 "enabled changes the ABI");
5832 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5833 this may not agree with the mode that the type system has chosen for the
5834 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5835 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5838 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5843 if (orig_mode
!= BLKmode
)
5844 tmp
= gen_rtx_REG (orig_mode
, regno
);
5847 tmp
= gen_rtx_REG (mode
, regno
);
5848 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5849 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5855 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5856 of this code is to classify each 8bytes of incoming argument by the register
5857 class and assign registers accordingly. */
5859 /* Return the union class of CLASS1 and CLASS2.
5860 See the x86-64 PS ABI for details. */
5862 static enum x86_64_reg_class
5863 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5865 /* Rule #1: If both classes are equal, this is the resulting class. */
5866 if (class1
== class2
)
5869 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5871 if (class1
== X86_64_NO_CLASS
)
5873 if (class2
== X86_64_NO_CLASS
)
5876 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5877 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5878 return X86_64_MEMORY_CLASS
;
5880 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5881 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5882 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5883 return X86_64_INTEGERSI_CLASS
;
5884 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5885 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5886 return X86_64_INTEGER_CLASS
;
5888 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5890 if (class1
== X86_64_X87_CLASS
5891 || class1
== X86_64_X87UP_CLASS
5892 || class1
== X86_64_COMPLEX_X87_CLASS
5893 || class2
== X86_64_X87_CLASS
5894 || class2
== X86_64_X87UP_CLASS
5895 || class2
== X86_64_COMPLEX_X87_CLASS
)
5896 return X86_64_MEMORY_CLASS
;
5898 /* Rule #6: Otherwise class SSE is used. */
5899 return X86_64_SSE_CLASS
;
5902 /* Classify the argument of type TYPE and mode MODE.
5903 CLASSES will be filled by the register class used to pass each word
5904 of the operand. The number of words is returned. In case the parameter
5905 should be passed in memory, 0 is returned. As a special case for zero
5906 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5908 BIT_OFFSET is used internally for handling records and specifies offset
5909 of the offset in bits modulo 256 to avoid overflow cases.
5911 See the x86-64 PS ABI for details.
5915 classify_argument (enum machine_mode mode
, const_tree type
,
5916 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5918 HOST_WIDE_INT bytes
=
5919 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5921 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5923 /* Variable sized entities are always passed/returned in memory. */
5927 if (mode
!= VOIDmode
5928 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5931 /* Special case check for pointer to shared, on 64-bit target. */
5932 if (TARGET_64BIT
&& mode
== TImode
5933 && type
&& TREE_CODE (type
) == POINTER_TYPE
5934 && upc_shared_type_p (TREE_TYPE (type
)))
5936 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5940 if (type
&& AGGREGATE_TYPE_P (type
))
5944 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5946 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5950 for (i
= 0; i
< words
; i
++)
5951 classes
[i
] = X86_64_NO_CLASS
;
5953 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5954 signalize memory class, so handle it as special case. */
5957 classes
[0] = X86_64_NO_CLASS
;
5961 /* Classify each field of record and merge classes. */
5962 switch (TREE_CODE (type
))
5965 /* And now merge the fields of structure. */
5966 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5968 if (TREE_CODE (field
) == FIELD_DECL
)
5972 if (TREE_TYPE (field
) == error_mark_node
)
5975 /* Bitfields are always classified as integer. Handle them
5976 early, since later code would consider them to be
5977 misaligned integers. */
5978 if (DECL_BIT_FIELD (field
))
5980 for (i
= (int_bit_position (field
)
5981 + (bit_offset
% 64)) / 8 / 8;
5982 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5983 + tree_low_cst (DECL_SIZE (field
), 0)
5986 merge_classes (X86_64_INTEGER_CLASS
,
5993 type
= TREE_TYPE (field
);
5995 /* Flexible array member is ignored. */
5996 if (TYPE_MODE (type
) == BLKmode
5997 && TREE_CODE (type
) == ARRAY_TYPE
5998 && TYPE_SIZE (type
) == NULL_TREE
5999 && TYPE_DOMAIN (type
) != NULL_TREE
6000 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6005 if (!warned
&& warn_psabi
)
6008 inform (input_location
,
6009 "the ABI of passing struct with"
6010 " a flexible array member has"
6011 " changed in GCC 4.4");
6015 num
= classify_argument (TYPE_MODE (type
), type
,
6017 (int_bit_position (field
)
6018 + bit_offset
) % 256);
6021 pos
= (int_bit_position (field
)
6022 + (bit_offset
% 64)) / 8 / 8;
6023 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6025 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6032 /* Arrays are handled as small records. */
6035 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6036 TREE_TYPE (type
), subclasses
, bit_offset
);
6040 /* The partial classes are now full classes. */
6041 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6042 subclasses
[0] = X86_64_SSE_CLASS
;
6043 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6044 && !((bit_offset
% 64) == 0 && bytes
== 4))
6045 subclasses
[0] = X86_64_INTEGER_CLASS
;
6047 for (i
= 0; i
< words
; i
++)
6048 classes
[i
] = subclasses
[i
% num
];
6053 case QUAL_UNION_TYPE
:
6054 /* Unions are similar to RECORD_TYPE but offset is always 0.
6056 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6058 if (TREE_CODE (field
) == FIELD_DECL
)
6062 if (TREE_TYPE (field
) == error_mark_node
)
6065 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6066 TREE_TYPE (field
), subclasses
,
6070 for (i
= 0; i
< num
; i
++)
6071 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6082 /* When size > 16 bytes, if the first one isn't
6083 X86_64_SSE_CLASS or any other ones aren't
6084 X86_64_SSEUP_CLASS, everything should be passed in
6086 if (classes
[0] != X86_64_SSE_CLASS
)
6089 for (i
= 1; i
< words
; i
++)
6090 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6094 /* Final merger cleanup. */
6095 for (i
= 0; i
< words
; i
++)
6097 /* If one class is MEMORY, everything should be passed in
6099 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6102 /* The X86_64_SSEUP_CLASS should be always preceded by
6103 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6104 if (classes
[i
] == X86_64_SSEUP_CLASS
6105 && classes
[i
- 1] != X86_64_SSE_CLASS
6106 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6108 /* The first one should never be X86_64_SSEUP_CLASS. */
6109 gcc_assert (i
!= 0);
6110 classes
[i
] = X86_64_SSE_CLASS
;
6113 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6114 everything should be passed in memory. */
6115 if (classes
[i
] == X86_64_X87UP_CLASS
6116 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6120 /* The first one should never be X86_64_X87UP_CLASS. */
6121 gcc_assert (i
!= 0);
6122 if (!warned
&& warn_psabi
)
6125 inform (input_location
,
6126 "the ABI of passing union with long double"
6127 " has changed in GCC 4.4");
6135 /* Compute alignment needed. We align all types to natural boundaries with
6136 exception of XFmode that is aligned to 64bits. */
6137 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6139 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6142 mode_alignment
= 128;
6143 else if (mode
== XCmode
)
6144 mode_alignment
= 256;
6145 if (COMPLEX_MODE_P (mode
))
6146 mode_alignment
/= 2;
6147 /* Misaligned fields are always returned in memory. */
6148 if (bit_offset
% mode_alignment
)
6152 /* for V1xx modes, just use the base mode */
6153 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6154 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6155 mode
= GET_MODE_INNER (mode
);
6157 /* Classification of atomic types. */
6162 classes
[0] = X86_64_SSE_CLASS
;
6165 classes
[0] = X86_64_SSE_CLASS
;
6166 classes
[1] = X86_64_SSEUP_CLASS
;
6176 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6180 classes
[0] = X86_64_INTEGERSI_CLASS
;
6183 else if (size
<= 64)
6185 classes
[0] = X86_64_INTEGER_CLASS
;
6188 else if (size
<= 64+32)
6190 classes
[0] = X86_64_INTEGER_CLASS
;
6191 classes
[1] = X86_64_INTEGERSI_CLASS
;
6194 else if (size
<= 64+64)
6196 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6204 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6208 /* OImode shouldn't be used directly. */
6213 if (!(bit_offset
% 64))
6214 classes
[0] = X86_64_SSESF_CLASS
;
6216 classes
[0] = X86_64_SSE_CLASS
;
6219 classes
[0] = X86_64_SSEDF_CLASS
;
6222 classes
[0] = X86_64_X87_CLASS
;
6223 classes
[1] = X86_64_X87UP_CLASS
;
6226 classes
[0] = X86_64_SSE_CLASS
;
6227 classes
[1] = X86_64_SSEUP_CLASS
;
6230 classes
[0] = X86_64_SSE_CLASS
;
6231 if (!(bit_offset
% 64))
6237 if (!warned
&& warn_psabi
)
6240 inform (input_location
,
6241 "the ABI of passing structure with complex float"
6242 " member has changed in GCC 4.4");
6244 classes
[1] = X86_64_SSESF_CLASS
;
6248 classes
[0] = X86_64_SSEDF_CLASS
;
6249 classes
[1] = X86_64_SSEDF_CLASS
;
6252 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6255 /* This modes is larger than 16 bytes. */
6263 classes
[0] = X86_64_SSE_CLASS
;
6264 classes
[1] = X86_64_SSEUP_CLASS
;
6265 classes
[2] = X86_64_SSEUP_CLASS
;
6266 classes
[3] = X86_64_SSEUP_CLASS
;
6274 classes
[0] = X86_64_SSE_CLASS
;
6275 classes
[1] = X86_64_SSEUP_CLASS
;
6283 classes
[0] = X86_64_SSE_CLASS
;
6289 gcc_assert (VECTOR_MODE_P (mode
));
6294 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6296 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6297 classes
[0] = X86_64_INTEGERSI_CLASS
;
6299 classes
[0] = X86_64_INTEGER_CLASS
;
6300 classes
[1] = X86_64_INTEGER_CLASS
;
6301 return 1 + (bytes
> 8);
6305 /* Examine the argument and return set number of register required in each
6306 class. Return 0 iff parameter should be passed in memory. */
6308 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6309 int *int_nregs
, int *sse_nregs
)
6311 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6312 int n
= classify_argument (mode
, type
, regclass
, 0);
6318 for (n
--; n
>= 0; n
--)
6319 switch (regclass
[n
])
6321 case X86_64_INTEGER_CLASS
:
6322 case X86_64_INTEGERSI_CLASS
:
6325 case X86_64_SSE_CLASS
:
6326 case X86_64_SSESF_CLASS
:
6327 case X86_64_SSEDF_CLASS
:
6330 case X86_64_NO_CLASS
:
6331 case X86_64_SSEUP_CLASS
:
6333 case X86_64_X87_CLASS
:
6334 case X86_64_X87UP_CLASS
:
6338 case X86_64_COMPLEX_X87_CLASS
:
6339 return in_return
? 2 : 0;
6340 case X86_64_MEMORY_CLASS
:
6346 /* Construct container for the argument used by GCC interface. See
6347 FUNCTION_ARG for the detailed description. */
6350 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6351 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6352 const int *intreg
, int sse_regno
)
6354 /* The following variables hold the static issued_error state. */
6355 static bool issued_sse_arg_error
;
6356 static bool issued_sse_ret_error
;
6357 static bool issued_x87_ret_error
;
6359 enum machine_mode tmpmode
;
6361 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6362 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6366 int needed_sseregs
, needed_intregs
;
6367 rtx exp
[MAX_CLASSES
];
6370 n
= classify_argument (mode
, type
, regclass
, 0);
6373 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6376 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6379 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6380 some less clueful developer tries to use floating-point anyway. */
6381 if (needed_sseregs
&& !TARGET_SSE
)
6385 if (!issued_sse_ret_error
)
6387 error ("SSE register return with SSE disabled");
6388 issued_sse_ret_error
= true;
6391 else if (!issued_sse_arg_error
)
6393 error ("SSE register argument with SSE disabled");
6394 issued_sse_arg_error
= true;
6399 /* Likewise, error if the ABI requires us to return values in the
6400 x87 registers and the user specified -mno-80387. */
6401 if (!TARGET_80387
&& in_return
)
6402 for (i
= 0; i
< n
; i
++)
6403 if (regclass
[i
] == X86_64_X87_CLASS
6404 || regclass
[i
] == X86_64_X87UP_CLASS
6405 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6407 if (!issued_x87_ret_error
)
6409 error ("x87 register return with x87 disabled");
6410 issued_x87_ret_error
= true;
6415 /* First construct simple cases. Avoid SCmode, since we want to use
6416 single register to pass this type. */
6417 if (n
== 1 && mode
!= SCmode
)
6418 switch (regclass
[0])
6420 case X86_64_INTEGER_CLASS
:
6421 case X86_64_INTEGERSI_CLASS
:
6422 return gen_rtx_REG (mode
, intreg
[0]);
6423 case X86_64_SSE_CLASS
:
6424 case X86_64_SSESF_CLASS
:
6425 case X86_64_SSEDF_CLASS
:
6426 if (mode
!= BLKmode
)
6427 return gen_reg_or_parallel (mode
, orig_mode
,
6428 SSE_REGNO (sse_regno
));
6430 case X86_64_X87_CLASS
:
6431 case X86_64_COMPLEX_X87_CLASS
:
6432 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6433 case X86_64_NO_CLASS
:
6434 /* Zero sized array, struct or class. */
6440 && regclass
[0] == X86_64_SSE_CLASS
6441 && regclass
[1] == X86_64_SSEUP_CLASS
6443 return gen_reg_or_parallel (mode
, orig_mode
,
6444 SSE_REGNO (sse_regno
));
6446 && regclass
[0] == X86_64_SSE_CLASS
6447 && regclass
[1] == X86_64_SSEUP_CLASS
6448 && regclass
[2] == X86_64_SSEUP_CLASS
6449 && regclass
[3] == X86_64_SSEUP_CLASS
6451 return gen_reg_or_parallel (mode
, orig_mode
,
6452 SSE_REGNO (sse_regno
));
6454 && regclass
[0] == X86_64_X87_CLASS
6455 && regclass
[1] == X86_64_X87UP_CLASS
)
6456 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6459 && regclass
[0] == X86_64_INTEGER_CLASS
6460 && regclass
[1] == X86_64_INTEGER_CLASS
6461 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6462 && intreg
[0] + 1 == intreg
[1])
6463 return gen_rtx_REG (mode
, intreg
[0]);
6465 /* Otherwise figure out the entries of the PARALLEL. */
6466 for (i
= 0; i
< n
; i
++)
6470 switch (regclass
[i
])
6472 case X86_64_NO_CLASS
:
6474 case X86_64_INTEGER_CLASS
:
6475 case X86_64_INTEGERSI_CLASS
:
6476 /* Merge TImodes on aligned occasions here too. */
6477 if (i
* 8 + 8 > bytes
)
6479 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6480 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6484 /* We've requested 24 bytes we
6485 don't have mode for. Use DImode. */
6486 if (tmpmode
== BLKmode
)
6489 = gen_rtx_EXPR_LIST (VOIDmode
,
6490 gen_rtx_REG (tmpmode
, *intreg
),
6494 case X86_64_SSESF_CLASS
:
6496 = gen_rtx_EXPR_LIST (VOIDmode
,
6497 gen_rtx_REG (SFmode
,
6498 SSE_REGNO (sse_regno
)),
6502 case X86_64_SSEDF_CLASS
:
6504 = gen_rtx_EXPR_LIST (VOIDmode
,
6505 gen_rtx_REG (DFmode
,
6506 SSE_REGNO (sse_regno
)),
6510 case X86_64_SSE_CLASS
:
6518 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6528 && regclass
[1] == X86_64_SSEUP_CLASS
6529 && regclass
[2] == X86_64_SSEUP_CLASS
6530 && regclass
[3] == X86_64_SSEUP_CLASS
);
6538 = gen_rtx_EXPR_LIST (VOIDmode
,
6539 gen_rtx_REG (tmpmode
,
6540 SSE_REGNO (sse_regno
)),
6549 /* Empty aligned struct, union or class. */
6553 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6554 for (i
= 0; i
< nexps
; i
++)
6555 XVECEXP (ret
, 0, i
) = exp
[i
];
6559 /* Update the data in CUM to advance over an argument of mode MODE
6560 and data type TYPE. (TYPE is null for libcalls where that information
6561 may not be available.) */
6564 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6565 const_tree type
, HOST_WIDE_INT bytes
,
6566 HOST_WIDE_INT words
)
6582 cum
->words
+= words
;
6583 cum
->nregs
-= words
;
6584 cum
->regno
+= words
;
6586 if (cum
->nregs
<= 0)
6594 /* OImode shouldn't be used directly. */
6598 if (cum
->float_in_sse
< 2)
6601 if (cum
->float_in_sse
< 1)
6618 if (!type
|| !AGGREGATE_TYPE_P (type
))
6620 cum
->sse_words
+= words
;
6621 cum
->sse_nregs
-= 1;
6622 cum
->sse_regno
+= 1;
6623 if (cum
->sse_nregs
<= 0)
6637 if (!type
|| !AGGREGATE_TYPE_P (type
))
6639 cum
->mmx_words
+= words
;
6640 cum
->mmx_nregs
-= 1;
6641 cum
->mmx_regno
+= 1;
6642 if (cum
->mmx_nregs
<= 0)
6653 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6654 const_tree type
, HOST_WIDE_INT words
, bool named
)
6656 int int_nregs
, sse_nregs
;
6658 /* Unnamed 256bit vector mode parameters are passed on stack. */
6659 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6662 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6663 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6665 cum
->nregs
-= int_nregs
;
6666 cum
->sse_nregs
-= sse_nregs
;
6667 cum
->regno
+= int_nregs
;
6668 cum
->sse_regno
+= sse_nregs
;
6672 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6673 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6674 cum
->words
+= words
;
6679 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6680 HOST_WIDE_INT words
)
6682 /* Otherwise, this should be passed indirect. */
6683 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6685 cum
->words
+= words
;
6693 /* Update the data in CUM to advance over an argument of mode MODE and
6694 data type TYPE. (TYPE is null for libcalls where that information
6695 may not be available.) */
6698 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6699 const_tree type
, bool named
)
6701 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6702 HOST_WIDE_INT bytes
, words
;
6704 if (mode
== BLKmode
)
6705 bytes
= int_size_in_bytes (type
);
6707 bytes
= GET_MODE_SIZE (mode
);
6708 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6711 mode
= type_natural_mode (type
, NULL
);
6713 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6714 function_arg_advance_ms_64 (cum
, bytes
, words
);
6715 else if (TARGET_64BIT
)
6716 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6718 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6721 /* Define where to put the arguments to a function.
6722 Value is zero to push the argument on the stack,
6723 or a hard register in which to store the argument.
6725 MODE is the argument's machine mode.
6726 TYPE is the data type of the argument (as a tree).
6727 This is null for libcalls where that information may
6729 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6730 the preceding args and about the function being called.
6731 NAMED is nonzero if this argument is a named parameter
6732 (otherwise it is an extra parameter matching an ellipsis). */
6735 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6736 enum machine_mode orig_mode
, const_tree type
,
6737 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6739 static bool warnedsse
, warnedmmx
;
6741 /* Avoid the AL settings for the Unix64 ABI. */
6742 if (mode
== VOIDmode
)
6758 if (words
<= cum
->nregs
)
6760 int regno
= cum
->regno
;
6762 /* Fastcall allocates the first two DWORD (SImode) or
6763 smaller arguments to ECX and EDX if it isn't an
6769 || (type
&& AGGREGATE_TYPE_P (type
)))
6772 /* ECX not EAX is the first allocated register. */
6773 if (regno
== AX_REG
)
6776 return gen_rtx_REG (mode
, regno
);
6781 if (cum
->float_in_sse
< 2)
6784 if (cum
->float_in_sse
< 1)
6788 /* In 32bit, we pass TImode in xmm registers. */
6795 if (!type
|| !AGGREGATE_TYPE_P (type
))
6797 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6800 warning (0, "SSE vector argument without SSE enabled "
6804 return gen_reg_or_parallel (mode
, orig_mode
,
6805 cum
->sse_regno
+ FIRST_SSE_REG
);
6810 /* OImode shouldn't be used directly. */
6819 if (!type
|| !AGGREGATE_TYPE_P (type
))
6822 return gen_reg_or_parallel (mode
, orig_mode
,
6823 cum
->sse_regno
+ FIRST_SSE_REG
);
6833 if (!type
|| !AGGREGATE_TYPE_P (type
))
6835 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6838 warning (0, "MMX vector argument without MMX enabled "
6842 return gen_reg_or_parallel (mode
, orig_mode
,
6843 cum
->mmx_regno
+ FIRST_MMX_REG
);
6852 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6853 enum machine_mode orig_mode
, const_tree type
, bool named
)
6855 /* Handle a hidden AL argument containing number of registers
6856 for varargs x86-64 functions. */
6857 if (mode
== VOIDmode
)
6858 return GEN_INT (cum
->maybe_vaarg
6859 ? (cum
->sse_nregs
< 0
6860 ? X86_64_SSE_REGPARM_MAX
6875 /* Unnamed 256bit vector mode parameters are passed on stack. */
6881 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6883 &x86_64_int_parameter_registers
[cum
->regno
],
6888 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6889 enum machine_mode orig_mode
, bool named
,
6890 HOST_WIDE_INT bytes
)
6894 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6895 We use value of -2 to specify that current function call is MSABI. */
6896 if (mode
== VOIDmode
)
6897 return GEN_INT (-2);
6899 /* If we've run out of registers, it goes on the stack. */
6900 if (cum
->nregs
== 0)
6903 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6905 /* Only floating point modes are passed in anything but integer regs. */
6906 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6909 regno
= cum
->regno
+ FIRST_SSE_REG
;
6914 /* Unnamed floating parameters are passed in both the
6915 SSE and integer registers. */
6916 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6917 t2
= gen_rtx_REG (mode
, regno
);
6918 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6919 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6920 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6923 /* Handle aggregated types passed in register. */
6924 if (orig_mode
== BLKmode
)
6926 if (bytes
> 0 && bytes
<= 8)
6927 mode
= (bytes
> 4 ? DImode
: SImode
);
6928 if (mode
== BLKmode
)
6932 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6935 /* Return where to put the arguments to a function.
6936 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6938 MODE is the argument's machine mode. TYPE is the data type of the
6939 argument. It is null for libcalls where that information may not be
6940 available. CUM gives information about the preceding args and about
6941 the function being called. NAMED is nonzero if this argument is a
6942 named parameter (otherwise it is an extra parameter matching an
6946 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6947 const_tree type
, bool named
)
6949 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6950 enum machine_mode mode
= omode
;
6951 HOST_WIDE_INT bytes
, words
;
6954 if (mode
== BLKmode
)
6955 bytes
= int_size_in_bytes (type
);
6957 bytes
= GET_MODE_SIZE (mode
);
6958 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6960 /* To simplify the code below, represent vector types with a vector mode
6961 even if MMX/SSE are not active. */
6962 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6963 mode
= type_natural_mode (type
, cum
);
6965 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6966 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6967 else if (TARGET_64BIT
)
6968 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6970 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6975 /* A C expression that indicates when an argument must be passed by
6976 reference. If nonzero for an argument, a copy of that argument is
6977 made in memory and a pointer to the argument is passed instead of
6978 the argument itself. The pointer is passed in whatever way is
6979 appropriate for passing a pointer to that type. */
6982 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6983 enum machine_mode mode ATTRIBUTE_UNUSED
,
6984 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6986 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6988 /* See Windows x64 Software Convention. */
6989 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6991 int msize
= (int) GET_MODE_SIZE (mode
);
6994 /* Arrays are passed by reference. */
6995 if (TREE_CODE (type
) == ARRAY_TYPE
)
6998 if (AGGREGATE_TYPE_P (type
))
7000 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7001 are passed by reference. */
7002 msize
= int_size_in_bytes (type
);
7006 /* __m128 is passed by reference. */
7008 case 1: case 2: case 4: case 8:
7014 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7020 /* Return true when TYPE should be 128bit aligned for 32bit argument
7021 passing ABI. XXX: This function is obsolete and is only used for
7022 checking psABI compatibility with previous versions of GCC. */
7025 ix86_compat_aligned_value_p (const_tree type
)
7027 enum machine_mode mode
= TYPE_MODE (type
);
7028 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7032 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7034 if (TYPE_ALIGN (type
) < 128)
7037 if (AGGREGATE_TYPE_P (type
))
7039 /* Walk the aggregates recursively. */
7040 switch (TREE_CODE (type
))
7044 case QUAL_UNION_TYPE
:
7048 /* Walk all the structure fields. */
7049 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7051 if (TREE_CODE (field
) == FIELD_DECL
7052 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7059 /* Just for use if some languages passes arrays by value. */
7060 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7071 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7072 XXX: This function is obsolete and is only used for checking psABI
7073 compatibility with previous versions of GCC. */
7076 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7077 const_tree type
, unsigned int align
)
7079 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7080 natural boundaries. */
7081 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7083 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7084 make an exception for SSE modes since these require 128bit
7087 The handling here differs from field_alignment. ICC aligns MMX
7088 arguments to 4 byte boundaries, while structure fields are aligned
7089 to 8 byte boundaries. */
7092 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7093 align
= PARM_BOUNDARY
;
7097 if (!ix86_compat_aligned_value_p (type
))
7098 align
= PARM_BOUNDARY
;
7101 if (align
> BIGGEST_ALIGNMENT
)
7102 align
= BIGGEST_ALIGNMENT
;
7106 /* Return true when TYPE should be 128bit aligned for 32bit argument
7110 ix86_contains_aligned_value_p (const_tree type
)
7112 enum machine_mode mode
= TYPE_MODE (type
);
7114 if (mode
== XFmode
|| mode
== XCmode
)
7117 if (TYPE_ALIGN (type
) < 128)
7120 if (AGGREGATE_TYPE_P (type
))
7122 /* Walk the aggregates recursively. */
7123 switch (TREE_CODE (type
))
7127 case QUAL_UNION_TYPE
:
7131 /* Walk all the structure fields. */
7132 for (field
= TYPE_FIELDS (type
);
7134 field
= DECL_CHAIN (field
))
7136 if (TREE_CODE (field
) == FIELD_DECL
7137 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7144 /* Just for use if some languages passes arrays by value. */
7145 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7154 return TYPE_ALIGN (type
) >= 128;
7159 /* Gives the alignment boundary, in bits, of an argument with the
7160 specified mode and type. */
7163 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7168 /* Since the main variant type is used for call, we convert it to
7169 the main variant type. */
7170 type
= TYPE_MAIN_VARIANT (type
);
7171 align
= TYPE_ALIGN (type
);
7174 align
= GET_MODE_ALIGNMENT (mode
);
7175 if (align
< PARM_BOUNDARY
)
7176 align
= PARM_BOUNDARY
;
7180 unsigned int saved_align
= align
;
7184 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7187 if (mode
== XFmode
|| mode
== XCmode
)
7188 align
= PARM_BOUNDARY
;
7190 else if (!ix86_contains_aligned_value_p (type
))
7191 align
= PARM_BOUNDARY
;
7194 align
= PARM_BOUNDARY
;
7199 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7203 inform (input_location
,
7204 "The ABI for passing parameters with %d-byte"
7205 " alignment has changed in GCC 4.6",
7206 align
/ BITS_PER_UNIT
);
7213 /* Return true if N is a possible register number of function value. */
7216 ix86_function_value_regno_p (const unsigned int regno
)
7223 case FIRST_FLOAT_REG
:
7224 /* TODO: The function should depend on current function ABI but
7225 builtins.c would need updating then. Therefore we use the
7227 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7229 return TARGET_FLOAT_RETURNS_IN_80387
;
7235 if (TARGET_MACHO
|| TARGET_64BIT
)
7243 /* Define how to find the value returned by a function.
7244 VALTYPE is the data type of the value (as a tree).
7245 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7246 otherwise, FUNC is 0. */
7249 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7250 const_tree fntype
, const_tree fn
)
7254 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7255 we normally prevent this case when mmx is not available. However
7256 some ABIs may require the result to be returned like DImode. */
7257 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7258 regno
= FIRST_MMX_REG
;
7260 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7261 we prevent this case when sse is not available. However some ABIs
7262 may require the result to be returned like integer TImode. */
7263 else if (mode
== TImode
7264 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7265 regno
= FIRST_SSE_REG
;
7267 /* 32-byte vector modes in %ymm0. */
7268 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7269 regno
= FIRST_SSE_REG
;
7271 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7272 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7273 regno
= FIRST_FLOAT_REG
;
7275 /* Most things go in %eax. */
7278 /* Override FP return register with %xmm0 for local functions when
7279 SSE math is enabled or for functions with sseregparm attribute. */
7280 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7282 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7283 if ((sse_level
>= 1 && mode
== SFmode
)
7284 || (sse_level
== 2 && mode
== DFmode
))
7285 regno
= FIRST_SSE_REG
;
7288 /* OImode shouldn't be used directly. */
7289 gcc_assert (mode
!= OImode
);
7291 return gen_rtx_REG (orig_mode
, regno
);
7295 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7300 /* Handle libcalls, which don't provide a type node. */
7301 if (valtype
== NULL
)
7315 regno
= FIRST_SSE_REG
;
7319 regno
= FIRST_FLOAT_REG
;
7327 return gen_rtx_REG (mode
, regno
);
7329 else if (POINTER_TYPE_P (valtype
)
7330 && !upc_shared_type_p (TREE_TYPE (valtype
)))
7332 /* Pointers are always returned in word_mode. */
7336 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7337 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7338 x86_64_int_return_registers
, 0);
7340 /* For zero sized structures, construct_container returns NULL, but we
7341 need to keep rest of compiler happy by returning meaningful value. */
7343 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7349 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7351 unsigned int regno
= AX_REG
;
7355 switch (GET_MODE_SIZE (mode
))
7358 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7359 && !COMPLEX_MODE_P (mode
))
7360 regno
= FIRST_SSE_REG
;
7364 if (mode
== SFmode
|| mode
== DFmode
)
7365 regno
= FIRST_SSE_REG
;
7371 return gen_rtx_REG (orig_mode
, regno
);
7375 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7376 enum machine_mode orig_mode
, enum machine_mode mode
)
7378 const_tree fn
, fntype
;
7381 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7382 fn
= fntype_or_decl
;
7383 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7385 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7386 return function_value_ms_64 (orig_mode
, mode
);
7387 else if (TARGET_64BIT
)
7388 return function_value_64 (orig_mode
, mode
, valtype
);
7390 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7394 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7395 bool outgoing ATTRIBUTE_UNUSED
)
7397 enum machine_mode mode
, orig_mode
;
7399 orig_mode
= TYPE_MODE (valtype
);
7400 mode
= type_natural_mode (valtype
, NULL
);
7401 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7404 /* Pointer function arguments and return values are promoted to
7407 static enum machine_mode
7408 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7409 int *punsignedp
, const_tree fntype
,
7412 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7414 if (upc_shared_type_p (TREE_TYPE (type
)))
7417 return TYPE_MODE (upc_pts_rep_type_node
);
7419 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7422 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7426 /* Return true if a structure, union or array with MODE containing FIELD
7427 should be accessed using BLKmode. */
7430 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7432 /* Union with XFmode must be in BLKmode. */
7433 return (mode
== XFmode
7434 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7435 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7439 ix86_libcall_value (enum machine_mode mode
)
7441 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7444 /* Return true iff type is returned in memory. */
7446 static bool ATTRIBUTE_UNUSED
7447 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7451 if (mode
== BLKmode
)
7454 size
= int_size_in_bytes (type
);
7456 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7459 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7461 /* User-created vectors small enough to fit in EAX. */
7465 /* MMX/3dNow values are returned in MM0,
7466 except when it doesn't exits or the ABI prescribes otherwise. */
7468 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7470 /* SSE values are returned in XMM0, except when it doesn't exist. */
7474 /* AVX values are returned in YMM0, except when it doesn't exist. */
7485 /* OImode shouldn't be used directly. */
7486 gcc_assert (mode
!= OImode
);
7491 static bool ATTRIBUTE_UNUSED
7492 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7494 int needed_intregs
, needed_sseregs
;
7495 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7498 static bool ATTRIBUTE_UNUSED
7499 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7501 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7503 /* __m128 is returned in xmm0. */
7504 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7505 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7508 /* Otherwise, the size must be exactly in [1248]. */
7509 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7513 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7515 #ifdef SUBTARGET_RETURN_IN_MEMORY
7516 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7518 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7522 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7523 return return_in_memory_ms_64 (type
, mode
);
7525 return return_in_memory_64 (type
, mode
);
7528 return return_in_memory_32 (type
, mode
);
7532 /* When returning SSE vector types, we have a choice of either
7533 (1) being abi incompatible with a -march switch, or
7534 (2) generating an error.
7535 Given no good solution, I think the safest thing is one warning.
7536 The user won't be able to use -Werror, but....
7538 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7539 called in response to actually generating a caller or callee that
7540 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7541 via aggregate_value_p for general type probing from tree-ssa. */
7544 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7546 static bool warnedsse
, warnedmmx
;
7548 if (!TARGET_64BIT
&& type
)
7550 /* Look at the return type of the function, not the function type. */
7551 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7553 if (!TARGET_SSE
&& !warnedsse
)
7556 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7559 warning (0, "SSE vector return without SSE enabled "
7564 if (!TARGET_MMX
&& !warnedmmx
)
7566 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7569 warning (0, "MMX vector return without MMX enabled "
7579 /* Create the va_list data type. */
7581 /* Returns the calling convention specific va_list date type.
7582 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7585 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7587 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7589 /* For i386 we use plain pointer to argument area. */
7590 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7591 return build_pointer_type (char_type_node
);
7593 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7594 type_decl
= build_decl (BUILTINS_LOCATION
,
7595 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7597 f_gpr
= build_decl (BUILTINS_LOCATION
,
7598 FIELD_DECL
, get_identifier ("gp_offset"),
7599 unsigned_type_node
);
7600 f_fpr
= build_decl (BUILTINS_LOCATION
,
7601 FIELD_DECL
, get_identifier ("fp_offset"),
7602 unsigned_type_node
);
7603 f_ovf
= build_decl (BUILTINS_LOCATION
,
7604 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7606 f_sav
= build_decl (BUILTINS_LOCATION
,
7607 FIELD_DECL
, get_identifier ("reg_save_area"),
7610 va_list_gpr_counter_field
= f_gpr
;
7611 va_list_fpr_counter_field
= f_fpr
;
7613 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7614 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7615 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7616 DECL_FIELD_CONTEXT (f_sav
) = record
;
7618 TYPE_STUB_DECL (record
) = type_decl
;
7619 TYPE_NAME (record
) = type_decl
;
7620 TYPE_FIELDS (record
) = f_gpr
;
7621 DECL_CHAIN (f_gpr
) = f_fpr
;
7622 DECL_CHAIN (f_fpr
) = f_ovf
;
7623 DECL_CHAIN (f_ovf
) = f_sav
;
7625 layout_type (record
);
7627 /* The correct type is an array type of one element. */
7628 return build_array_type (record
, build_index_type (size_zero_node
));
7631 /* Setup the builtin va_list data type and for 64-bit the additional
7632 calling convention specific va_list data types. */
7635 ix86_build_builtin_va_list (void)
7637 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7639 /* Initialize abi specific va_list builtin types. */
7643 if (ix86_abi
== MS_ABI
)
7645 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7646 if (TREE_CODE (t
) != RECORD_TYPE
)
7647 t
= build_variant_type_copy (t
);
7648 sysv_va_list_type_node
= t
;
7653 if (TREE_CODE (t
) != RECORD_TYPE
)
7654 t
= build_variant_type_copy (t
);
7655 sysv_va_list_type_node
= t
;
7657 if (ix86_abi
!= MS_ABI
)
7659 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7660 if (TREE_CODE (t
) != RECORD_TYPE
)
7661 t
= build_variant_type_copy (t
);
7662 ms_va_list_type_node
= t
;
7667 if (TREE_CODE (t
) != RECORD_TYPE
)
7668 t
= build_variant_type_copy (t
);
7669 ms_va_list_type_node
= t
;
7676 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7679 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7685 /* GPR size of varargs save area. */
7686 if (cfun
->va_list_gpr_size
)
7687 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7689 ix86_varargs_gpr_size
= 0;
7691 /* FPR size of varargs save area. We don't need it if we don't pass
7692 anything in SSE registers. */
7693 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7694 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7696 ix86_varargs_fpr_size
= 0;
7698 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7701 save_area
= frame_pointer_rtx
;
7702 set
= get_varargs_alias_set ();
7704 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7705 if (max
> X86_64_REGPARM_MAX
)
7706 max
= X86_64_REGPARM_MAX
;
7708 for (i
= cum
->regno
; i
< max
; i
++)
7710 mem
= gen_rtx_MEM (word_mode
,
7711 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7712 MEM_NOTRAP_P (mem
) = 1;
7713 set_mem_alias_set (mem
, set
);
7714 emit_move_insn (mem
,
7715 gen_rtx_REG (word_mode
,
7716 x86_64_int_parameter_registers
[i
]));
7719 if (ix86_varargs_fpr_size
)
7721 enum machine_mode smode
;
7724 /* Now emit code to save SSE registers. The AX parameter contains number
7725 of SSE parameter registers used to call this function, though all we
7726 actually check here is the zero/non-zero status. */
7728 label
= gen_label_rtx ();
7729 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7730 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7733 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7734 we used movdqa (i.e. TImode) instead? Perhaps even better would
7735 be if we could determine the real mode of the data, via a hook
7736 into pass_stdarg. Ignore all that for now. */
7738 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7739 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7741 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7742 if (max
> X86_64_SSE_REGPARM_MAX
)
7743 max
= X86_64_SSE_REGPARM_MAX
;
7745 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7747 mem
= plus_constant (Pmode
, save_area
,
7748 i
* 16 + ix86_varargs_gpr_size
);
7749 mem
= gen_rtx_MEM (smode
, mem
);
7750 MEM_NOTRAP_P (mem
) = 1;
7751 set_mem_alias_set (mem
, set
);
7752 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7754 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7762 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7764 alias_set_type set
= get_varargs_alias_set ();
7767 /* Reset to zero, as there might be a sysv vaarg used
7769 ix86_varargs_gpr_size
= 0;
7770 ix86_varargs_fpr_size
= 0;
7772 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7776 mem
= gen_rtx_MEM (Pmode
,
7777 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7778 i
* UNITS_PER_WORD
));
7779 MEM_NOTRAP_P (mem
) = 1;
7780 set_mem_alias_set (mem
, set
);
7782 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7783 emit_move_insn (mem
, reg
);
7788 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7789 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7792 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7793 CUMULATIVE_ARGS next_cum
;
7796 /* This argument doesn't appear to be used anymore. Which is good,
7797 because the old code here didn't suppress rtl generation. */
7798 gcc_assert (!no_rtl
);
7803 fntype
= TREE_TYPE (current_function_decl
);
7805 /* For varargs, we do not want to skip the dummy va_dcl argument.
7806 For stdargs, we do want to skip the last named argument. */
7808 if (stdarg_p (fntype
))
7809 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7812 if (cum
->call_abi
== MS_ABI
)
7813 setup_incoming_varargs_ms_64 (&next_cum
);
7815 setup_incoming_varargs_64 (&next_cum
);
7818 /* Checks if TYPE is of kind va_list char *. */
7821 is_va_list_char_pointer (tree type
)
7825 /* For 32-bit it is always true. */
7828 canonic
= ix86_canonical_va_list_type (type
);
7829 return (canonic
== ms_va_list_type_node
7830 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7833 /* Implement va_start. */
7836 ix86_va_start (tree valist
, rtx nextarg
)
7838 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7839 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7840 tree gpr
, fpr
, ovf
, sav
, t
;
7844 if (flag_split_stack
7845 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7847 unsigned int scratch_regno
;
7849 /* When we are splitting the stack, we can't refer to the stack
7850 arguments using internal_arg_pointer, because they may be on
7851 the old stack. The split stack prologue will arrange to
7852 leave a pointer to the old stack arguments in a scratch
7853 register, which we here copy to a pseudo-register. The split
7854 stack prologue can't set the pseudo-register directly because
7855 it (the prologue) runs before any registers have been saved. */
7857 scratch_regno
= split_stack_prologue_scratch_regno ();
7858 if (scratch_regno
!= INVALID_REGNUM
)
7862 reg
= gen_reg_rtx (Pmode
);
7863 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7866 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7870 push_topmost_sequence ();
7871 emit_insn_after (seq
, entry_of_function ());
7872 pop_topmost_sequence ();
7876 /* Only 64bit target needs something special. */
7877 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7879 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7880 std_expand_builtin_va_start (valist
, nextarg
);
7885 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7886 next
= expand_binop (ptr_mode
, add_optab
,
7887 cfun
->machine
->split_stack_varargs_pointer
,
7888 crtl
->args
.arg_offset_rtx
,
7889 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7890 convert_move (va_r
, next
, 0);
7895 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7896 f_fpr
= DECL_CHAIN (f_gpr
);
7897 f_ovf
= DECL_CHAIN (f_fpr
);
7898 f_sav
= DECL_CHAIN (f_ovf
);
7900 valist
= build_simple_mem_ref (valist
);
7901 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7902 /* The following should be folded into the MEM_REF offset. */
7903 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7905 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7907 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7909 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7912 /* Count number of gp and fp argument registers used. */
7913 words
= crtl
->args
.info
.words
;
7914 n_gpr
= crtl
->args
.info
.regno
;
7915 n_fpr
= crtl
->args
.info
.sse_regno
;
7917 if (cfun
->va_list_gpr_size
)
7919 type
= TREE_TYPE (gpr
);
7920 t
= build2 (MODIFY_EXPR
, type
,
7921 gpr
, build_int_cst (type
, n_gpr
* 8));
7922 TREE_SIDE_EFFECTS (t
) = 1;
7923 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7926 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7928 type
= TREE_TYPE (fpr
);
7929 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7930 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7931 TREE_SIDE_EFFECTS (t
) = 1;
7932 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7935 /* Find the overflow area. */
7936 type
= TREE_TYPE (ovf
);
7937 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7938 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7940 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7941 t
= make_tree (type
, ovf_rtx
);
7943 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7944 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7945 TREE_SIDE_EFFECTS (t
) = 1;
7946 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7948 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7950 /* Find the register save area.
7951 Prologue of the function save it right above stack frame. */
7952 type
= TREE_TYPE (sav
);
7953 t
= make_tree (type
, frame_pointer_rtx
);
7954 if (!ix86_varargs_gpr_size
)
7955 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7956 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7957 TREE_SIDE_EFFECTS (t
) = 1;
7958 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7962 /* Implement va_arg. */
7965 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7968 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7969 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7970 tree gpr
, fpr
, ovf
, sav
, t
;
7972 tree lab_false
, lab_over
= NULL_TREE
;
7977 enum machine_mode nat_mode
;
7978 unsigned int arg_boundary
;
7980 /* Only 64bit target needs something special. */
7981 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7982 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7984 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7985 f_fpr
= DECL_CHAIN (f_gpr
);
7986 f_ovf
= DECL_CHAIN (f_fpr
);
7987 f_sav
= DECL_CHAIN (f_ovf
);
7989 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7990 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7991 valist
= build_va_arg_indirect_ref (valist
);
7992 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7993 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7994 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7996 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7998 type
= build_pointer_type (type
);
7999 size
= int_size_in_bytes (type
);
8000 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8002 nat_mode
= type_natural_mode (type
, NULL
);
8011 /* Unnamed 256bit vector mode parameters are passed on stack. */
8012 if (!TARGET_64BIT_MS_ABI
)
8019 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8020 type
, 0, X86_64_REGPARM_MAX
,
8021 X86_64_SSE_REGPARM_MAX
, intreg
,
8026 /* Pull the value out of the saved registers. */
8028 addr
= create_tmp_var (ptr_type_node
, "addr");
8032 int needed_intregs
, needed_sseregs
;
8034 tree int_addr
, sse_addr
;
8036 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8037 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8039 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8041 need_temp
= (!REG_P (container
)
8042 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8043 || TYPE_ALIGN (type
) > 128));
8045 /* In case we are passing structure, verify that it is consecutive block
8046 on the register save area. If not we need to do moves. */
8047 if (!need_temp
&& !REG_P (container
))
8049 /* Verify that all registers are strictly consecutive */
8050 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8054 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8056 rtx slot
= XVECEXP (container
, 0, i
);
8057 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8058 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8066 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8068 rtx slot
= XVECEXP (container
, 0, i
);
8069 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8070 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8082 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8083 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8086 /* First ensure that we fit completely in registers. */
8089 t
= build_int_cst (TREE_TYPE (gpr
),
8090 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8091 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8092 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8093 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8094 gimplify_and_add (t
, pre_p
);
8098 t
= build_int_cst (TREE_TYPE (fpr
),
8099 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8100 + X86_64_REGPARM_MAX
* 8);
8101 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8102 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8103 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8104 gimplify_and_add (t
, pre_p
);
8107 /* Compute index to start of area used for integer regs. */
8110 /* int_addr = gpr + sav; */
8111 t
= fold_build_pointer_plus (sav
, gpr
);
8112 gimplify_assign (int_addr
, t
, pre_p
);
8116 /* sse_addr = fpr + sav; */
8117 t
= fold_build_pointer_plus (sav
, fpr
);
8118 gimplify_assign (sse_addr
, t
, pre_p
);
8122 int i
, prev_size
= 0;
8123 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8126 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8127 gimplify_assign (addr
, t
, pre_p
);
8129 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8131 rtx slot
= XVECEXP (container
, 0, i
);
8132 rtx reg
= XEXP (slot
, 0);
8133 enum machine_mode mode
= GET_MODE (reg
);
8139 tree dest_addr
, dest
;
8140 int cur_size
= GET_MODE_SIZE (mode
);
8142 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8143 prev_size
= INTVAL (XEXP (slot
, 1));
8144 if (prev_size
+ cur_size
> size
)
8146 cur_size
= size
- prev_size
;
8147 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8148 if (mode
== BLKmode
)
8151 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8152 if (mode
== GET_MODE (reg
))
8153 addr_type
= build_pointer_type (piece_type
);
8155 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8157 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8160 if (SSE_REGNO_P (REGNO (reg
)))
8162 src_addr
= sse_addr
;
8163 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8167 src_addr
= int_addr
;
8168 src_offset
= REGNO (reg
) * 8;
8170 src_addr
= fold_convert (addr_type
, src_addr
);
8171 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8173 dest_addr
= fold_convert (daddr_type
, addr
);
8174 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8175 if (cur_size
== GET_MODE_SIZE (mode
))
8177 src
= build_va_arg_indirect_ref (src_addr
);
8178 dest
= build_va_arg_indirect_ref (dest_addr
);
8180 gimplify_assign (dest
, src
, pre_p
);
8185 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8186 3, dest_addr
, src_addr
,
8187 size_int (cur_size
));
8188 gimplify_and_add (copy
, pre_p
);
8190 prev_size
+= cur_size
;
8196 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8197 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8198 gimplify_assign (gpr
, t
, pre_p
);
8203 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8204 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8205 gimplify_assign (fpr
, t
, pre_p
);
8208 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8210 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8213 /* ... otherwise out of the overflow area. */
8215 /* When we align parameter on stack for caller, if the parameter
8216 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8217 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8218 here with caller. */
8219 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8220 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8221 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8223 /* Care for on-stack alignment if needed. */
8224 if (arg_boundary
<= 64 || size
== 0)
8228 HOST_WIDE_INT align
= arg_boundary
/ 8;
8229 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8230 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8231 build_int_cst (TREE_TYPE (t
), -align
));
8234 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8235 gimplify_assign (addr
, t
, pre_p
);
8237 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8238 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8241 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8243 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8244 addr
= fold_convert (ptrtype
, addr
);
8247 addr
= build_va_arg_indirect_ref (addr
);
8248 return build_va_arg_indirect_ref (addr
);
8251 /* Return true if OPNUM's MEM should be matched
8252 in movabs* patterns. */
8255 ix86_check_movabs (rtx insn
, int opnum
)
8259 set
= PATTERN (insn
);
8260 if (GET_CODE (set
) == PARALLEL
)
8261 set
= XVECEXP (set
, 0, 0);
8262 gcc_assert (GET_CODE (set
) == SET
);
8263 mem
= XEXP (set
, opnum
);
8264 while (GET_CODE (mem
) == SUBREG
)
8265 mem
= SUBREG_REG (mem
);
8266 gcc_assert (MEM_P (mem
));
8267 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8270 /* Initialize the table of extra 80387 mathematical constants. */
8273 init_ext_80387_constants (void)
8275 static const char * cst
[5] =
8277 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8278 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8279 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8280 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8281 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8285 for (i
= 0; i
< 5; i
++)
8287 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8288 /* Ensure each constant is rounded to XFmode precision. */
8289 real_convert (&ext_80387_constants_table
[i
],
8290 XFmode
, &ext_80387_constants_table
[i
]);
8293 ext_80387_constants_init
= 1;
8296 /* Return non-zero if the constant is something that
8297 can be loaded with a special instruction. */
8300 standard_80387_constant_p (rtx x
)
8302 enum machine_mode mode
= GET_MODE (x
);
8306 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8309 if (x
== CONST0_RTX (mode
))
8311 if (x
== CONST1_RTX (mode
))
8314 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8316 /* For XFmode constants, try to find a special 80387 instruction when
8317 optimizing for size or on those CPUs that benefit from them. */
8319 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8323 if (! ext_80387_constants_init
)
8324 init_ext_80387_constants ();
8326 for (i
= 0; i
< 5; i
++)
8327 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8331 /* Load of the constant -0.0 or -1.0 will be split as
8332 fldz;fchs or fld1;fchs sequence. */
8333 if (real_isnegzero (&r
))
8335 if (real_identical (&r
, &dconstm1
))
8341 /* Return the opcode of the special instruction to be used to load
8345 standard_80387_constant_opcode (rtx x
)
8347 switch (standard_80387_constant_p (x
))
8371 /* Return the CONST_DOUBLE representing the 80387 constant that is
8372 loaded by the specified special instruction. The argument IDX
8373 matches the return value from standard_80387_constant_p. */
8376 standard_80387_constant_rtx (int idx
)
8380 if (! ext_80387_constants_init
)
8381 init_ext_80387_constants ();
8397 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8401 /* Return 1 if X is all 0s and 2 if x is all 1s
8402 in supported SSE/AVX vector mode. */
8405 standard_sse_constant_p (rtx x
)
8407 enum machine_mode mode
= GET_MODE (x
);
8409 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8411 if (vector_all_ones_operand (x
, mode
))
8433 /* Return the opcode of the special instruction to be used to load
8437 standard_sse_constant_opcode (rtx insn
, rtx x
)
8439 switch (standard_sse_constant_p (x
))
8442 switch (get_attr_mode (insn
))
8445 return "%vpxor\t%0, %d0";
8447 return "%vxorpd\t%0, %d0";
8449 return "%vxorps\t%0, %d0";
8452 return "vpxor\t%x0, %x0, %x0";
8454 return "vxorpd\t%x0, %x0, %x0";
8456 return "vxorps\t%x0, %x0, %x0";
8464 return "vpcmpeqd\t%0, %0, %0";
8466 return "pcmpeqd\t%0, %0";
8474 /* Returns true if OP contains a symbol reference */
8477 symbolic_reference_mentioned_p (rtx op
)
8482 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8485 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8486 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8492 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8493 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8497 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8504 /* Return true if it is appropriate to emit `ret' instructions in the
8505 body of a function. Do this only if the epilogue is simple, needing a
8506 couple of insns. Prior to reloading, we can't tell how many registers
8507 must be saved, so return false then. Return false if there is no frame
8508 marker to de-allocate. */
8511 ix86_can_use_return_insn_p (void)
8513 struct ix86_frame frame
;
8515 if (! reload_completed
|| frame_pointer_needed
)
8518 /* Don't allow more than 32k pop, since that's all we can do
8519 with one instruction. */
8520 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8523 ix86_compute_frame_layout (&frame
);
8524 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8525 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8528 /* Value should be nonzero if functions must have frame pointers.
8529 Zero means the frame pointer need not be set up (and parms may
8530 be accessed via the stack pointer) in functions that seem suitable. */
8533 ix86_frame_pointer_required (void)
8535 /* If we accessed previous frames, then the generated code expects
8536 to be able to access the saved ebp value in our frame. */
8537 if (cfun
->machine
->accesses_prev_frame
)
8540 /* Several x86 os'es need a frame pointer for other reasons,
8541 usually pertaining to setjmp. */
8542 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8545 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8546 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8549 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8550 allocation is 4GB. */
8551 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8554 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8555 turns off the frame pointer by default. Turn it back on now if
8556 we've not got a leaf function. */
8557 if (TARGET_OMIT_LEAF_FRAME_POINTER
8559 || ix86_current_function_calls_tls_descriptor
))
8562 if (crtl
->profile
&& !flag_fentry
)
8568 /* Record that the current function accesses previous call frames. */
8571 ix86_setup_frame_addresses (void)
8573 cfun
->machine
->accesses_prev_frame
= 1;
8576 #ifndef USE_HIDDEN_LINKONCE
8577 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8578 # define USE_HIDDEN_LINKONCE 1
8580 # define USE_HIDDEN_LINKONCE 0
8584 static int pic_labels_used
;
8586 /* Fills in the label name that should be used for a pc thunk for
8587 the given register. */
8590 get_pc_thunk_name (char name
[32], unsigned int regno
)
8592 gcc_assert (!TARGET_64BIT
);
8594 if (USE_HIDDEN_LINKONCE
)
8595 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8597 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8601 /* This function generates code for -fpic that loads %ebx with
8602 the return address of the caller and then returns. */
8605 ix86_code_end (void)
8610 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8615 if (!(pic_labels_used
& (1 << regno
)))
8618 get_pc_thunk_name (name
, regno
);
8620 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8621 get_identifier (name
),
8622 build_function_type_list (void_type_node
, NULL_TREE
));
8623 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8624 NULL_TREE
, void_type_node
);
8625 TREE_PUBLIC (decl
) = 1;
8626 TREE_STATIC (decl
) = 1;
8627 DECL_IGNORED_P (decl
) = 1;
8632 switch_to_section (darwin_sections
[text_coal_section
]);
8633 fputs ("\t.weak_definition\t", asm_out_file
);
8634 assemble_name (asm_out_file
, name
);
8635 fputs ("\n\t.private_extern\t", asm_out_file
);
8636 assemble_name (asm_out_file
, name
);
8637 putc ('\n', asm_out_file
);
8638 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8639 DECL_WEAK (decl
) = 1;
8643 if (USE_HIDDEN_LINKONCE
)
8645 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8647 targetm
.asm_out
.unique_section (decl
, 0);
8648 switch_to_section (get_named_section (decl
, NULL
, 0));
8650 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8651 fputs ("\t.hidden\t", asm_out_file
);
8652 assemble_name (asm_out_file
, name
);
8653 putc ('\n', asm_out_file
);
8654 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8658 switch_to_section (text_section
);
8659 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8662 DECL_INITIAL (decl
) = make_node (BLOCK
);
8663 current_function_decl
= decl
;
8664 init_function_start (decl
);
8665 first_function_block_is_cold
= false;
8666 /* Make sure unwind info is emitted for the thunk if needed. */
8667 final_start_function (emit_barrier (), asm_out_file
, 1);
8669 /* Pad stack IP move with 4 instructions (two NOPs count
8670 as one instruction). */
8671 if (TARGET_PAD_SHORT_FUNCTION
)
8676 fputs ("\tnop\n", asm_out_file
);
8679 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8680 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8681 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8682 fputs ("\tret\n", asm_out_file
);
8683 final_end_function ();
8684 init_insn_lengths ();
8685 free_after_compilation (cfun
);
8687 current_function_decl
= NULL
;
8690 if (flag_split_stack
)
8691 file_end_indicate_split_stack ();
8694 /* Emit code for the SET_GOT patterns. */
8697 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8703 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8705 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8706 xops
[2] = gen_rtx_MEM (Pmode
,
8707 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8708 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8710 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8711 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8712 an unadorned address. */
8713 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8714 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8715 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8719 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8723 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8725 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8728 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8729 is what will be referenced by the Mach-O PIC subsystem. */
8731 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8734 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8735 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8740 get_pc_thunk_name (name
, REGNO (dest
));
8741 pic_labels_used
|= 1 << REGNO (dest
);
8743 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8744 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8745 output_asm_insn ("call\t%X2", xops
);
8746 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8747 is what will be referenced by the Mach-O PIC subsystem. */
8750 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8752 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8753 CODE_LABEL_NUMBER (label
));
8758 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8763 /* Generate an "push" pattern for input ARG. */
8768 struct machine_function
*m
= cfun
->machine
;
8770 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8771 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8772 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8774 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8775 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8777 return gen_rtx_SET (VOIDmode
,
8778 gen_rtx_MEM (word_mode
,
8779 gen_rtx_PRE_DEC (Pmode
,
8780 stack_pointer_rtx
)),
8784 /* Generate an "pop" pattern for input ARG. */
8789 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8790 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8792 return gen_rtx_SET (VOIDmode
,
8794 gen_rtx_MEM (word_mode
,
8795 gen_rtx_POST_INC (Pmode
,
8796 stack_pointer_rtx
)));
8799 /* Return >= 0 if there is an unused call-clobbered register available
8800 for the entire function. */
8803 ix86_select_alt_pic_regnum (void)
8807 && !ix86_current_function_calls_tls_descriptor
)
8810 /* Can't use the same register for both PIC and DRAP. */
8812 drap
= REGNO (crtl
->drap_reg
);
8815 for (i
= 2; i
>= 0; --i
)
8816 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8820 return INVALID_REGNUM
;
8823 /* Return TRUE if we need to save REGNO. */
8826 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8828 if (pic_offset_table_rtx
8829 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8830 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8832 || crtl
->calls_eh_return
8833 || crtl
->uses_const_pool
))
8834 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8836 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8841 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8842 if (test
== INVALID_REGNUM
)
8849 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8852 return (df_regs_ever_live_p (regno
)
8853 && !call_used_regs
[regno
]
8854 && !fixed_regs
[regno
]
8855 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8858 /* Return number of saved general prupose registers. */
8861 ix86_nsaved_regs (void)
8866 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8867 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8872 /* Return number of saved SSE registrers. */
8875 ix86_nsaved_sseregs (void)
8880 if (!TARGET_64BIT_MS_ABI
)
8882 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8883 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8888 /* Given FROM and TO register numbers, say whether this elimination is
8889 allowed. If stack alignment is needed, we can only replace argument
8890 pointer with hard frame pointer, or replace frame pointer with stack
8891 pointer. Otherwise, frame pointer elimination is automatically
8892 handled and all other eliminations are valid. */
8895 ix86_can_eliminate (const int from
, const int to
)
8897 if (stack_realign_fp
)
8898 return ((from
== ARG_POINTER_REGNUM
8899 && to
== HARD_FRAME_POINTER_REGNUM
)
8900 || (from
== FRAME_POINTER_REGNUM
8901 && to
== STACK_POINTER_REGNUM
));
8903 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8906 /* Return the offset between two registers, one to be eliminated, and the other
8907 its replacement, at the start of a routine. */
8910 ix86_initial_elimination_offset (int from
, int to
)
8912 struct ix86_frame frame
;
8913 ix86_compute_frame_layout (&frame
);
8915 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8916 return frame
.hard_frame_pointer_offset
;
8917 else if (from
== FRAME_POINTER_REGNUM
8918 && to
== HARD_FRAME_POINTER_REGNUM
)
8919 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8922 gcc_assert (to
== STACK_POINTER_REGNUM
);
8924 if (from
== ARG_POINTER_REGNUM
)
8925 return frame
.stack_pointer_offset
;
8927 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8928 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8932 /* In a dynamically-aligned function, we can't know the offset from
8933 stack pointer to frame pointer, so we must ensure that setjmp
8934 eliminates fp against the hard fp (%ebp) rather than trying to
8935 index from %esp up to the top of the frame across a gap that is
8936 of unknown (at compile-time) size. */
8938 ix86_builtin_setjmp_frame_value (void)
8940 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8943 /* When using -fsplit-stack, the allocation routines set a field in
8944 the TCB to the bottom of the stack plus this much space, measured
8947 #define SPLIT_STACK_AVAILABLE 256
8949 /* Fill structure ix86_frame about frame of currently computed function. */
8952 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8954 unsigned HOST_WIDE_INT stack_alignment_needed
;
8955 HOST_WIDE_INT offset
;
8956 unsigned HOST_WIDE_INT preferred_alignment
;
8957 HOST_WIDE_INT size
= get_frame_size ();
8958 HOST_WIDE_INT to_allocate
;
8960 frame
->nregs
= ix86_nsaved_regs ();
8961 frame
->nsseregs
= ix86_nsaved_sseregs ();
8963 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8964 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8966 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8967 function prologues and leaf. */
8968 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8969 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8970 || ix86_current_function_calls_tls_descriptor
))
8972 preferred_alignment
= 16;
8973 stack_alignment_needed
= 16;
8974 crtl
->preferred_stack_boundary
= 128;
8975 crtl
->stack_alignment_needed
= 128;
8978 gcc_assert (!size
|| stack_alignment_needed
);
8979 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8980 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8982 /* For SEH we have to limit the amount of code movement into the prologue.
8983 At present we do this via a BLOCKAGE, at which point there's very little
8984 scheduling that can be done, which means that there's very little point
8985 in doing anything except PUSHs. */
8987 cfun
->machine
->use_fast_prologue_epilogue
= false;
8989 /* During reload iteration the amount of registers saved can change.
8990 Recompute the value as needed. Do not recompute when amount of registers
8991 didn't change as reload does multiple calls to the function and does not
8992 expect the decision to change within single iteration. */
8993 else if (!optimize_function_for_size_p (cfun
)
8994 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8996 int count
= frame
->nregs
;
8997 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8999 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9001 /* The fast prologue uses move instead of push to save registers. This
9002 is significantly longer, but also executes faster as modern hardware
9003 can execute the moves in parallel, but can't do that for push/pop.
9005 Be careful about choosing what prologue to emit: When function takes
9006 many instructions to execute we may use slow version as well as in
9007 case function is known to be outside hot spot (this is known with
9008 feedback only). Weight the size of function by number of registers
9009 to save as it is cheap to use one or two push instructions but very
9010 slow to use many of them. */
9012 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9013 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9014 || (flag_branch_probabilities
9015 && node
->frequency
< NODE_FREQUENCY_HOT
))
9016 cfun
->machine
->use_fast_prologue_epilogue
= false;
9018 cfun
->machine
->use_fast_prologue_epilogue
9019 = !expensive_function_p (count
);
9022 frame
->save_regs_using_mov
9023 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9024 /* If static stack checking is enabled and done with probes,
9025 the registers need to be saved before allocating the frame. */
9026 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9028 /* Skip return address. */
9029 offset
= UNITS_PER_WORD
;
9031 /* Skip pushed static chain. */
9032 if (ix86_static_chain_on_stack
)
9033 offset
+= UNITS_PER_WORD
;
9035 /* Skip saved base pointer. */
9036 if (frame_pointer_needed
)
9037 offset
+= UNITS_PER_WORD
;
9038 frame
->hfp_save_offset
= offset
;
9040 /* The traditional frame pointer location is at the top of the frame. */
9041 frame
->hard_frame_pointer_offset
= offset
;
9043 /* Register save area */
9044 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9045 frame
->reg_save_offset
= offset
;
9047 /* On SEH target, registers are pushed just before the frame pointer
9050 frame
->hard_frame_pointer_offset
= offset
;
9052 /* Align and set SSE register save area. */
9053 if (frame
->nsseregs
)
9055 /* The only ABI that has saved SSE registers (Win64) also has a
9056 16-byte aligned default stack, and thus we don't need to be
9057 within the re-aligned local stack frame to save them. */
9058 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9059 offset
= (offset
+ 16 - 1) & -16;
9060 offset
+= frame
->nsseregs
* 16;
9062 frame
->sse_reg_save_offset
= offset
;
9064 /* The re-aligned stack starts here. Values before this point are not
9065 directly comparable with values below this point. In order to make
9066 sure that no value happens to be the same before and after, force
9067 the alignment computation below to add a non-zero value. */
9068 if (stack_realign_fp
)
9069 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9072 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9073 offset
+= frame
->va_arg_size
;
9075 /* Align start of frame for local function. */
9076 if (stack_realign_fp
9077 || offset
!= frame
->sse_reg_save_offset
9080 || cfun
->calls_alloca
9081 || ix86_current_function_calls_tls_descriptor
)
9082 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9084 /* Frame pointer points here. */
9085 frame
->frame_pointer_offset
= offset
;
9089 /* Add outgoing arguments area. Can be skipped if we eliminated
9090 all the function calls as dead code.
9091 Skipping is however impossible when function calls alloca. Alloca
9092 expander assumes that last crtl->outgoing_args_size
9093 of stack frame are unused. */
9094 if (ACCUMULATE_OUTGOING_ARGS
9095 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9096 || ix86_current_function_calls_tls_descriptor
))
9098 offset
+= crtl
->outgoing_args_size
;
9099 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9102 frame
->outgoing_arguments_size
= 0;
9104 /* Align stack boundary. Only needed if we're calling another function
9106 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9107 || ix86_current_function_calls_tls_descriptor
)
9108 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9110 /* We've reached end of stack frame. */
9111 frame
->stack_pointer_offset
= offset
;
9113 /* Size prologue needs to allocate. */
9114 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9116 if ((!to_allocate
&& frame
->nregs
<= 1)
9117 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9118 frame
->save_regs_using_mov
= false;
9120 if (ix86_using_red_zone ()
9121 && crtl
->sp_is_unchanging
9123 && !ix86_current_function_calls_tls_descriptor
)
9125 frame
->red_zone_size
= to_allocate
;
9126 if (frame
->save_regs_using_mov
)
9127 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9128 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9129 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9132 frame
->red_zone_size
= 0;
9133 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9135 /* The SEH frame pointer location is near the bottom of the frame.
9136 This is enforced by the fact that the difference between the
9137 stack pointer and the frame pointer is limited to 240 bytes in
9138 the unwind data structure. */
9143 /* If we can leave the frame pointer where it is, do so. Also, returns
9144 the establisher frame for __builtin_frame_address (0). */
9145 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9146 if (diff
<= SEH_MAX_FRAME_SIZE
9147 && (diff
> 240 || (diff
& 15) != 0)
9148 && !crtl
->accesses_prior_frames
)
9150 /* Ideally we'd determine what portion of the local stack frame
9151 (within the constraint of the lowest 240) is most heavily used.
9152 But without that complication, simply bias the frame pointer
9153 by 128 bytes so as to maximize the amount of the local stack
9154 frame that is addressable with 8-bit offsets. */
9155 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9160 /* This is semi-inlined memory_address_length, but simplified
9161 since we know that we're always dealing with reg+offset, and
9162 to avoid having to create and discard all that rtl. */
9165 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9171 /* EBP and R13 cannot be encoded without an offset. */
9172 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9174 else if (IN_RANGE (offset
, -128, 127))
9177 /* ESP and R12 must be encoded with a SIB byte. */
9178 if (regno
== SP_REG
|| regno
== R12_REG
)
9184 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9185 The valid base registers are taken from CFUN->MACHINE->FS. */
9188 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9190 const struct machine_function
*m
= cfun
->machine
;
9191 rtx base_reg
= NULL
;
9192 HOST_WIDE_INT base_offset
= 0;
9194 if (m
->use_fast_prologue_epilogue
)
9196 /* Choose the base register most likely to allow the most scheduling
9197 opportunities. Generally FP is valid throughout the function,
9198 while DRAP must be reloaded within the epilogue. But choose either
9199 over the SP due to increased encoding size. */
9203 base_reg
= hard_frame_pointer_rtx
;
9204 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9206 else if (m
->fs
.drap_valid
)
9208 base_reg
= crtl
->drap_reg
;
9209 base_offset
= 0 - cfa_offset
;
9211 else if (m
->fs
.sp_valid
)
9213 base_reg
= stack_pointer_rtx
;
9214 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9219 HOST_WIDE_INT toffset
;
9222 /* Choose the base register with the smallest address encoding.
9223 With a tie, choose FP > DRAP > SP. */
9226 base_reg
= stack_pointer_rtx
;
9227 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9228 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9230 if (m
->fs
.drap_valid
)
9232 toffset
= 0 - cfa_offset
;
9233 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9236 base_reg
= crtl
->drap_reg
;
9237 base_offset
= toffset
;
9243 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9244 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9247 base_reg
= hard_frame_pointer_rtx
;
9248 base_offset
= toffset
;
9253 gcc_assert (base_reg
!= NULL
);
9255 return plus_constant (Pmode
, base_reg
, base_offset
);
9258 /* Emit code to save registers in the prologue. */
9261 ix86_emit_save_regs (void)
9266 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9267 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9269 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9270 RTX_FRAME_RELATED_P (insn
) = 1;
9274 /* Emit a single register save at CFA - CFA_OFFSET. */
9277 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9278 HOST_WIDE_INT cfa_offset
)
9280 struct machine_function
*m
= cfun
->machine
;
9281 rtx reg
= gen_rtx_REG (mode
, regno
);
9282 rtx mem
, addr
, base
, insn
;
9284 addr
= choose_baseaddr (cfa_offset
);
9285 mem
= gen_frame_mem (mode
, addr
);
9287 /* For SSE saves, we need to indicate the 128-bit alignment. */
9288 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9290 insn
= emit_move_insn (mem
, reg
);
9291 RTX_FRAME_RELATED_P (insn
) = 1;
9294 if (GET_CODE (base
) == PLUS
)
9295 base
= XEXP (base
, 0);
9296 gcc_checking_assert (REG_P (base
));
9298 /* When saving registers into a re-aligned local stack frame, avoid
9299 any tricky guessing by dwarf2out. */
9300 if (m
->fs
.realigned
)
9302 gcc_checking_assert (stack_realign_drap
);
9304 if (regno
== REGNO (crtl
->drap_reg
))
9306 /* A bit of a hack. We force the DRAP register to be saved in
9307 the re-aligned stack frame, which provides us with a copy
9308 of the CFA that will last past the prologue. Install it. */
9309 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9310 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9311 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9312 mem
= gen_rtx_MEM (mode
, addr
);
9313 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9317 /* The frame pointer is a stable reference within the
9318 aligned frame. Use it. */
9319 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9320 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9321 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9322 mem
= gen_rtx_MEM (mode
, addr
);
9323 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9324 gen_rtx_SET (VOIDmode
, mem
, reg
));
9328 /* The memory may not be relative to the current CFA register,
9329 which means that we may need to generate a new pattern for
9330 use by the unwind info. */
9331 else if (base
!= m
->fs
.cfa_reg
)
9333 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9334 m
->fs
.cfa_offset
- cfa_offset
);
9335 mem
= gen_rtx_MEM (mode
, addr
);
9336 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9340 /* Emit code to save registers using MOV insns.
9341 First register is stored at CFA - CFA_OFFSET. */
9343 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9347 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9348 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9350 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9351 cfa_offset
-= UNITS_PER_WORD
;
9355 /* Emit code to save SSE registers using MOV insns.
9356 First register is stored at CFA - CFA_OFFSET. */
9358 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9362 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9363 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9365 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9370 static GTY(()) rtx queued_cfa_restores
;
9372 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9373 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9374 Don't add the note if the previously saved value will be left untouched
9375 within stack red-zone till return, as unwinders can find the same value
9376 in the register and on the stack. */
9379 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9381 if (!crtl
->shrink_wrapped
9382 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9387 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9388 RTX_FRAME_RELATED_P (insn
) = 1;
9392 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9395 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9398 ix86_add_queued_cfa_restore_notes (rtx insn
)
9401 if (!queued_cfa_restores
)
9403 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9405 XEXP (last
, 1) = REG_NOTES (insn
);
9406 REG_NOTES (insn
) = queued_cfa_restores
;
9407 queued_cfa_restores
= NULL_RTX
;
9408 RTX_FRAME_RELATED_P (insn
) = 1;
9411 /* Expand prologue or epilogue stack adjustment.
9412 The pattern exist to put a dependency on all ebp-based memory accesses.
9413 STYLE should be negative if instructions should be marked as frame related,
9414 zero if %r11 register is live and cannot be freely used and positive
9418 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9419 int style
, bool set_cfa
)
9421 struct machine_function
*m
= cfun
->machine
;
9423 bool add_frame_related_expr
= false;
9425 if (Pmode
== SImode
)
9426 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9427 else if (x86_64_immediate_operand (offset
, DImode
))
9428 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9432 /* r11 is used by indirect sibcall return as well, set before the
9433 epilogue and used after the epilogue. */
9435 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9438 gcc_assert (src
!= hard_frame_pointer_rtx
9439 && dest
!= hard_frame_pointer_rtx
);
9440 tmp
= hard_frame_pointer_rtx
;
9442 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9444 add_frame_related_expr
= true;
9446 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9449 insn
= emit_insn (insn
);
9451 ix86_add_queued_cfa_restore_notes (insn
);
9457 gcc_assert (m
->fs
.cfa_reg
== src
);
9458 m
->fs
.cfa_offset
+= INTVAL (offset
);
9459 m
->fs
.cfa_reg
= dest
;
9461 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9462 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9463 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9464 RTX_FRAME_RELATED_P (insn
) = 1;
9468 RTX_FRAME_RELATED_P (insn
) = 1;
9469 if (add_frame_related_expr
)
9471 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9472 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9473 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9477 if (dest
== stack_pointer_rtx
)
9479 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9480 bool valid
= m
->fs
.sp_valid
;
9482 if (src
== hard_frame_pointer_rtx
)
9484 valid
= m
->fs
.fp_valid
;
9485 ooffset
= m
->fs
.fp_offset
;
9487 else if (src
== crtl
->drap_reg
)
9489 valid
= m
->fs
.drap_valid
;
9494 /* Else there are two possibilities: SP itself, which we set
9495 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9496 taken care of this by hand along the eh_return path. */
9497 gcc_checking_assert (src
== stack_pointer_rtx
9498 || offset
== const0_rtx
);
9501 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9502 m
->fs
.sp_valid
= valid
;
9506 /* Find an available register to be used as dynamic realign argument
9507 pointer regsiter. Such a register will be written in prologue and
9508 used in begin of body, so it must not be
9509 1. parameter passing register.
9511 We reuse static-chain register if it is available. Otherwise, we
9512 use DI for i386 and R13 for x86-64. We chose R13 since it has
9515 Return: the regno of chosen register. */
9518 find_drap_reg (void)
9520 tree decl
= cfun
->decl
;
9524 /* Use R13 for nested function or function need static chain.
9525 Since function with tail call may use any caller-saved
9526 registers in epilogue, DRAP must not use caller-saved
9527 register in such case. */
9528 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9535 /* Use DI for nested function or function need static chain.
9536 Since function with tail call may use any caller-saved
9537 registers in epilogue, DRAP must not use caller-saved
9538 register in such case. */
9539 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9542 /* Reuse static chain register if it isn't used for parameter
9544 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9546 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9547 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9554 /* Return minimum incoming stack alignment. */
9557 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9559 unsigned int incoming_stack_boundary
;
9561 /* Prefer the one specified at command line. */
9562 if (ix86_user_incoming_stack_boundary
)
9563 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9564 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9565 if -mstackrealign is used, it isn't used for sibcall check and
9566 estimated stack alignment is 128bit. */
9569 && ix86_force_align_arg_pointer
9570 && crtl
->stack_alignment_estimated
== 128)
9571 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9573 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9575 /* Incoming stack alignment can be changed on individual functions
9576 via force_align_arg_pointer attribute. We use the smallest
9577 incoming stack boundary. */
9578 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9579 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9580 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9581 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9583 /* The incoming stack frame has to be aligned at least at
9584 parm_stack_boundary. */
9585 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9586 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9588 /* Stack at entrance of main is aligned by runtime. We use the
9589 smallest incoming stack boundary. */
9590 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9591 && DECL_NAME (current_function_decl
)
9592 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9593 && DECL_FILE_SCOPE_P (current_function_decl
))
9594 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9596 return incoming_stack_boundary
;
9599 /* Update incoming stack boundary and estimated stack alignment. */
9602 ix86_update_stack_boundary (void)
9604 ix86_incoming_stack_boundary
9605 = ix86_minimum_incoming_stack_boundary (false);
9607 /* x86_64 vararg needs 16byte stack alignment for register save
9611 && crtl
->stack_alignment_estimated
< 128)
9612 crtl
->stack_alignment_estimated
= 128;
9615 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9616 needed or an rtx for DRAP otherwise. */
9619 ix86_get_drap_rtx (void)
9621 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9622 crtl
->need_drap
= true;
9624 if (stack_realign_drap
)
9626 /* Assign DRAP to vDRAP and returns vDRAP */
9627 unsigned int regno
= find_drap_reg ();
9632 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9633 crtl
->drap_reg
= arg_ptr
;
9636 drap_vreg
= copy_to_reg (arg_ptr
);
9640 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9643 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9644 RTX_FRAME_RELATED_P (insn
) = 1;
9652 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9655 ix86_internal_arg_pointer (void)
9657 return virtual_incoming_args_rtx
;
9660 struct scratch_reg
{
9665 /* Return a short-lived scratch register for use on function entry.
9666 In 32-bit mode, it is valid only after the registers are saved
9667 in the prologue. This register must be released by means of
9668 release_scratch_register_on_entry once it is dead. */
9671 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9679 /* We always use R11 in 64-bit mode. */
9684 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9686 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9688 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9689 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9690 int regparm
= ix86_function_regparm (fntype
, decl
);
9692 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9694 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9695 for the static chain register. */
9696 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9697 && drap_regno
!= AX_REG
)
9699 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9700 for the static chain register. */
9701 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9703 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9705 /* ecx is the static chain register. */
9706 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9708 && drap_regno
!= CX_REG
)
9710 else if (ix86_save_reg (BX_REG
, true))
9712 /* esi is the static chain register. */
9713 else if (!(regparm
== 3 && static_chain_p
)
9714 && ix86_save_reg (SI_REG
, true))
9716 else if (ix86_save_reg (DI_REG
, true))
9720 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9725 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9728 rtx insn
= emit_insn (gen_push (sr
->reg
));
9729 RTX_FRAME_RELATED_P (insn
) = 1;
9733 /* Release a scratch register obtained from the preceding function. */
9736 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9740 struct machine_function
*m
= cfun
->machine
;
9741 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9743 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9744 RTX_FRAME_RELATED_P (insn
) = 1;
9745 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9746 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9747 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9748 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9752 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9754 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9757 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9759 /* We skip the probe for the first interval + a small dope of 4 words and
9760 probe that many bytes past the specified size to maintain a protection
9761 area at the botton of the stack. */
9762 const int dope
= 4 * UNITS_PER_WORD
;
9763 rtx size_rtx
= GEN_INT (size
), last
;
9765 /* See if we have a constant small number of probes to generate. If so,
9766 that's the easy case. The run-time loop is made up of 11 insns in the
9767 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9768 for n # of intervals. */
9769 if (size
<= 5 * PROBE_INTERVAL
)
9771 HOST_WIDE_INT i
, adjust
;
9772 bool first_probe
= true;
9774 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9775 values of N from 1 until it exceeds SIZE. If only one probe is
9776 needed, this will not generate any code. Then adjust and probe
9777 to PROBE_INTERVAL + SIZE. */
9778 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9782 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9783 first_probe
= false;
9786 adjust
= PROBE_INTERVAL
;
9788 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9789 plus_constant (Pmode
, stack_pointer_rtx
,
9791 emit_stack_probe (stack_pointer_rtx
);
9795 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9797 adjust
= size
+ PROBE_INTERVAL
- i
;
9799 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9800 plus_constant (Pmode
, stack_pointer_rtx
,
9802 emit_stack_probe (stack_pointer_rtx
);
9804 /* Adjust back to account for the additional first interval. */
9805 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9806 plus_constant (Pmode
, stack_pointer_rtx
,
9807 PROBE_INTERVAL
+ dope
)));
9810 /* Otherwise, do the same as above, but in a loop. Note that we must be
9811 extra careful with variables wrapping around because we might be at
9812 the very top (or the very bottom) of the address space and we have
9813 to be able to handle this case properly; in particular, we use an
9814 equality test for the loop condition. */
9817 HOST_WIDE_INT rounded_size
;
9818 struct scratch_reg sr
;
9820 get_scratch_register_on_entry (&sr
);
9823 /* Step 1: round SIZE to the previous multiple of the interval. */
9825 rounded_size
= size
& -PROBE_INTERVAL
;
9828 /* Step 2: compute initial and final value of the loop counter. */
9830 /* SP = SP_0 + PROBE_INTERVAL. */
9831 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9832 plus_constant (Pmode
, stack_pointer_rtx
,
9833 - (PROBE_INTERVAL
+ dope
))));
9835 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9836 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9837 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9838 gen_rtx_PLUS (Pmode
, sr
.reg
,
9839 stack_pointer_rtx
)));
9844 while (SP != LAST_ADDR)
9846 SP = SP + PROBE_INTERVAL
9850 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9851 values of N from 1 until it is equal to ROUNDED_SIZE. */
9853 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9856 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9857 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9859 if (size
!= rounded_size
)
9861 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9862 plus_constant (Pmode
, stack_pointer_rtx
,
9863 rounded_size
- size
)));
9864 emit_stack_probe (stack_pointer_rtx
);
9867 /* Adjust back to account for the additional first interval. */
9868 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9869 plus_constant (Pmode
, stack_pointer_rtx
,
9870 PROBE_INTERVAL
+ dope
)));
9872 release_scratch_register_on_entry (&sr
);
9875 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9877 /* Even if the stack pointer isn't the CFA register, we need to correctly
9878 describe the adjustments made to it, in particular differentiate the
9879 frame-related ones from the frame-unrelated ones. */
9882 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9883 XVECEXP (expr
, 0, 0)
9884 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9885 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9886 XVECEXP (expr
, 0, 1)
9887 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9888 plus_constant (Pmode
, stack_pointer_rtx
,
9889 PROBE_INTERVAL
+ dope
+ size
));
9890 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9891 RTX_FRAME_RELATED_P (last
) = 1;
9893 cfun
->machine
->fs
.sp_offset
+= size
;
9896 /* Make sure nothing is scheduled before we are done. */
9897 emit_insn (gen_blockage ());
9900 /* Adjust the stack pointer up to REG while probing it. */
9903 output_adjust_stack_and_probe (rtx reg
)
9905 static int labelno
= 0;
9906 char loop_lab
[32], end_lab
[32];
9909 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9910 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9912 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9914 /* Jump to END_LAB if SP == LAST_ADDR. */
9915 xops
[0] = stack_pointer_rtx
;
9917 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9918 fputs ("\tje\t", asm_out_file
);
9919 assemble_name_raw (asm_out_file
, end_lab
);
9920 fputc ('\n', asm_out_file
);
9922 /* SP = SP + PROBE_INTERVAL. */
9923 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9924 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9927 xops
[1] = const0_rtx
;
9928 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9930 fprintf (asm_out_file
, "\tjmp\t");
9931 assemble_name_raw (asm_out_file
, loop_lab
);
9932 fputc ('\n', asm_out_file
);
9934 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9939 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9940 inclusive. These are offsets from the current stack pointer. */
9943 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9945 /* See if we have a constant small number of probes to generate. If so,
9946 that's the easy case. The run-time loop is made up of 7 insns in the
9947 generic case while the compile-time loop is made up of n insns for n #
9949 if (size
<= 7 * PROBE_INTERVAL
)
9953 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9954 it exceeds SIZE. If only one probe is needed, this will not
9955 generate any code. Then probe at FIRST + SIZE. */
9956 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9957 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9960 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9964 /* Otherwise, do the same as above, but in a loop. Note that we must be
9965 extra careful with variables wrapping around because we might be at
9966 the very top (or the very bottom) of the address space and we have
9967 to be able to handle this case properly; in particular, we use an
9968 equality test for the loop condition. */
9971 HOST_WIDE_INT rounded_size
, last
;
9972 struct scratch_reg sr
;
9974 get_scratch_register_on_entry (&sr
);
9977 /* Step 1: round SIZE to the previous multiple of the interval. */
9979 rounded_size
= size
& -PROBE_INTERVAL
;
9982 /* Step 2: compute initial and final value of the loop counter. */
9984 /* TEST_OFFSET = FIRST. */
9985 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9987 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9988 last
= first
+ rounded_size
;
9993 while (TEST_ADDR != LAST_ADDR)
9995 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9999 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10000 until it is equal to ROUNDED_SIZE. */
10002 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10005 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10006 that SIZE is equal to ROUNDED_SIZE. */
10008 if (size
!= rounded_size
)
10009 emit_stack_probe (plus_constant (Pmode
,
10010 gen_rtx_PLUS (Pmode
,
10013 rounded_size
- size
));
10015 release_scratch_register_on_entry (&sr
);
10018 /* Make sure nothing is scheduled before we are done. */
10019 emit_insn (gen_blockage ());
10022 /* Probe a range of stack addresses from REG to END, inclusive. These are
10023 offsets from the current stack pointer. */
10026 output_probe_stack_range (rtx reg
, rtx end
)
10028 static int labelno
= 0;
10029 char loop_lab
[32], end_lab
[32];
10032 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10033 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10035 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10037 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10040 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10041 fputs ("\tje\t", asm_out_file
);
10042 assemble_name_raw (asm_out_file
, end_lab
);
10043 fputc ('\n', asm_out_file
);
10045 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10046 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10047 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10049 /* Probe at TEST_ADDR. */
10050 xops
[0] = stack_pointer_rtx
;
10052 xops
[2] = const0_rtx
;
10053 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10055 fprintf (asm_out_file
, "\tjmp\t");
10056 assemble_name_raw (asm_out_file
, loop_lab
);
10057 fputc ('\n', asm_out_file
);
10059 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10064 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10065 to be generated in correct form. */
10067 ix86_finalize_stack_realign_flags (void)
10069 /* Check if stack realign is really needed after reload, and
10070 stores result in cfun */
10071 unsigned int incoming_stack_boundary
10072 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10073 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10074 unsigned int stack_realign
= (incoming_stack_boundary
10076 ? crtl
->max_used_stack_slot_alignment
10077 : crtl
->stack_alignment_needed
));
10079 if (crtl
->stack_realign_finalized
)
10081 /* After stack_realign_needed is finalized, we can't no longer
10083 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10087 /* If the only reason for frame_pointer_needed is that we conservatively
10088 assumed stack realignment might be needed, but in the end nothing that
10089 needed the stack alignment had been spilled, clear frame_pointer_needed
10090 and say we don't need stack realignment. */
10092 && !crtl
->need_drap
10093 && frame_pointer_needed
10095 && flag_omit_frame_pointer
10096 && crtl
->sp_is_unchanging
10097 && !ix86_current_function_calls_tls_descriptor
10098 && !crtl
->accesses_prior_frames
10099 && !cfun
->calls_alloca
10100 && !crtl
->calls_eh_return
10101 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10102 && !ix86_frame_pointer_required ()
10103 && get_frame_size () == 0
10104 && ix86_nsaved_sseregs () == 0
10105 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10107 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10110 CLEAR_HARD_REG_SET (prologue_used
);
10111 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10112 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10113 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10114 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10115 HARD_FRAME_POINTER_REGNUM
);
10119 FOR_BB_INSNS (bb
, insn
)
10120 if (NONDEBUG_INSN_P (insn
)
10121 && requires_stack_frame_p (insn
, prologue_used
,
10122 set_up_by_prologue
))
10124 crtl
->stack_realign_needed
= stack_realign
;
10125 crtl
->stack_realign_finalized
= true;
10130 frame_pointer_needed
= false;
10131 stack_realign
= false;
10132 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10133 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10134 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10135 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10136 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10137 df_finish_pass (true);
10138 df_scan_alloc (NULL
);
10140 df_compute_regs_ever_live (true);
10144 crtl
->stack_realign_needed
= stack_realign
;
10145 crtl
->stack_realign_finalized
= true;
10148 /* Expand the prologue into a bunch of separate insns. */
10151 ix86_expand_prologue (void)
10153 struct machine_function
*m
= cfun
->machine
;
10156 struct ix86_frame frame
;
10157 HOST_WIDE_INT allocate
;
10158 bool int_registers_saved
;
10159 bool sse_registers_saved
;
10161 ix86_finalize_stack_realign_flags ();
10163 /* DRAP should not coexist with stack_realign_fp */
10164 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10166 memset (&m
->fs
, 0, sizeof (m
->fs
));
10168 /* Initialize CFA state for before the prologue. */
10169 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10170 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10172 /* Track SP offset to the CFA. We continue tracking this after we've
10173 swapped the CFA register away from SP. In the case of re-alignment
10174 this is fudged; we're interested to offsets within the local frame. */
10175 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10176 m
->fs
.sp_valid
= true;
10178 ix86_compute_frame_layout (&frame
);
10180 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10182 /* We should have already generated an error for any use of
10183 ms_hook on a nested function. */
10184 gcc_checking_assert (!ix86_static_chain_on_stack
);
10186 /* Check if profiling is active and we shall use profiling before
10187 prologue variant. If so sorry. */
10188 if (crtl
->profile
&& flag_fentry
!= 0)
10189 sorry ("ms_hook_prologue attribute isn%'t compatible "
10190 "with -mfentry for 32-bit");
10192 /* In ix86_asm_output_function_label we emitted:
10193 8b ff movl.s %edi,%edi
10195 8b ec movl.s %esp,%ebp
10197 This matches the hookable function prologue in Win32 API
10198 functions in Microsoft Windows XP Service Pack 2 and newer.
10199 Wine uses this to enable Windows apps to hook the Win32 API
10200 functions provided by Wine.
10202 What that means is that we've already set up the frame pointer. */
10204 if (frame_pointer_needed
10205 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10209 /* We've decided to use the frame pointer already set up.
10210 Describe this to the unwinder by pretending that both
10211 push and mov insns happen right here.
10213 Putting the unwind info here at the end of the ms_hook
10214 is done so that we can make absolutely certain we get
10215 the required byte sequence at the start of the function,
10216 rather than relying on an assembler that can produce
10217 the exact encoding required.
10219 However it does mean (in the unpatched case) that we have
10220 a 1 insn window where the asynchronous unwind info is
10221 incorrect. However, if we placed the unwind info at
10222 its correct location we would have incorrect unwind info
10223 in the patched case. Which is probably all moot since
10224 I don't expect Wine generates dwarf2 unwind info for the
10225 system libraries that use this feature. */
10227 insn
= emit_insn (gen_blockage ());
10229 push
= gen_push (hard_frame_pointer_rtx
);
10230 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10231 stack_pointer_rtx
);
10232 RTX_FRAME_RELATED_P (push
) = 1;
10233 RTX_FRAME_RELATED_P (mov
) = 1;
10235 RTX_FRAME_RELATED_P (insn
) = 1;
10236 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10237 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10239 /* Note that gen_push incremented m->fs.cfa_offset, even
10240 though we didn't emit the push insn here. */
10241 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10242 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10243 m
->fs
.fp_valid
= true;
10247 /* The frame pointer is not needed so pop %ebp again.
10248 This leaves us with a pristine state. */
10249 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10253 /* The first insn of a function that accepts its static chain on the
10254 stack is to push the register that would be filled in by a direct
10255 call. This insn will be skipped by the trampoline. */
10256 else if (ix86_static_chain_on_stack
)
10258 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10259 emit_insn (gen_blockage ());
10261 /* We don't want to interpret this push insn as a register save,
10262 only as a stack adjustment. The real copy of the register as
10263 a save will be done later, if needed. */
10264 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10265 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10266 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10267 RTX_FRAME_RELATED_P (insn
) = 1;
10270 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10271 of DRAP is needed and stack realignment is really needed after reload */
10272 if (stack_realign_drap
)
10274 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10276 /* Only need to push parameter pointer reg if it is caller saved. */
10277 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10279 /* Push arg pointer reg */
10280 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10281 RTX_FRAME_RELATED_P (insn
) = 1;
10284 /* Grab the argument pointer. */
10285 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10286 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10287 RTX_FRAME_RELATED_P (insn
) = 1;
10288 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10289 m
->fs
.cfa_offset
= 0;
10291 /* Align the stack. */
10292 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10294 GEN_INT (-align_bytes
)));
10295 RTX_FRAME_RELATED_P (insn
) = 1;
10297 /* Replicate the return address on the stack so that return
10298 address can be reached via (argp - 1) slot. This is needed
10299 to implement macro RETURN_ADDR_RTX and intrinsic function
10300 expand_builtin_return_addr etc. */
10301 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10302 t
= gen_frame_mem (word_mode
, t
);
10303 insn
= emit_insn (gen_push (t
));
10304 RTX_FRAME_RELATED_P (insn
) = 1;
10306 /* For the purposes of frame and register save area addressing,
10307 we've started over with a new frame. */
10308 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10309 m
->fs
.realigned
= true;
10312 int_registers_saved
= (frame
.nregs
== 0);
10313 sse_registers_saved
= (frame
.nsseregs
== 0);
10315 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10317 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10318 slower on all targets. Also sdb doesn't like it. */
10319 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10320 RTX_FRAME_RELATED_P (insn
) = 1;
10322 /* Push registers now, before setting the frame pointer
10324 if (!int_registers_saved
10326 && !frame
.save_regs_using_mov
)
10328 ix86_emit_save_regs ();
10329 int_registers_saved
= true;
10330 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10333 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10335 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10336 RTX_FRAME_RELATED_P (insn
) = 1;
10338 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10339 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10340 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10341 m
->fs
.fp_valid
= true;
10345 if (!int_registers_saved
)
10347 /* If saving registers via PUSH, do so now. */
10348 if (!frame
.save_regs_using_mov
)
10350 ix86_emit_save_regs ();
10351 int_registers_saved
= true;
10352 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10355 /* When using red zone we may start register saving before allocating
10356 the stack frame saving one cycle of the prologue. However, avoid
10357 doing this if we have to probe the stack; at least on x86_64 the
10358 stack probe can turn into a call that clobbers a red zone location. */
10359 else if (ix86_using_red_zone ()
10360 && (! TARGET_STACK_PROBE
10361 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10363 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10364 int_registers_saved
= true;
10368 if (stack_realign_fp
)
10370 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10371 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10373 /* The computation of the size of the re-aligned stack frame means
10374 that we must allocate the size of the register save area before
10375 performing the actual alignment. Otherwise we cannot guarantee
10376 that there's enough storage above the realignment point. */
10377 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10378 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10379 GEN_INT (m
->fs
.sp_offset
10380 - frame
.sse_reg_save_offset
),
10383 /* Align the stack. */
10384 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10386 GEN_INT (-align_bytes
)));
10388 /* For the purposes of register save area addressing, the stack
10389 pointer is no longer valid. As for the value of sp_offset,
10390 see ix86_compute_frame_layout, which we need to match in order
10391 to pass verification of stack_pointer_offset at the end. */
10392 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10393 m
->fs
.sp_valid
= false;
10396 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10398 if (flag_stack_usage_info
)
10400 /* We start to count from ARG_POINTER. */
10401 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10403 /* If it was realigned, take into account the fake frame. */
10404 if (stack_realign_drap
)
10406 if (ix86_static_chain_on_stack
)
10407 stack_size
+= UNITS_PER_WORD
;
10409 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10410 stack_size
+= UNITS_PER_WORD
;
10412 /* This over-estimates by 1 minimal-stack-alignment-unit but
10413 mitigates that by counting in the new return address slot. */
10414 current_function_dynamic_stack_size
10415 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10418 current_function_static_stack_size
= stack_size
;
10421 /* On SEH target with very large frame size, allocate an area to save
10422 SSE registers (as the very large allocation won't be described). */
10424 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10425 && !sse_registers_saved
)
10427 HOST_WIDE_INT sse_size
=
10428 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10430 gcc_assert (int_registers_saved
);
10432 /* No need to do stack checking as the area will be immediately
10434 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10435 GEN_INT (-sse_size
), -1,
10436 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10437 allocate
-= sse_size
;
10438 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10439 sse_registers_saved
= true;
10442 /* The stack has already been decremented by the instruction calling us
10443 so probe if the size is non-negative to preserve the protection area. */
10444 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10446 /* We expect the registers to be saved when probes are used. */
10447 gcc_assert (int_registers_saved
);
10449 if (STACK_CHECK_MOVING_SP
)
10451 ix86_adjust_stack_and_probe (allocate
);
10456 HOST_WIDE_INT size
= allocate
;
10458 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10459 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10461 if (TARGET_STACK_PROBE
)
10462 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10464 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10470 else if (!ix86_target_stack_probe ()
10471 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10473 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10474 GEN_INT (-allocate
), -1,
10475 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10479 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10481 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10482 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10483 bool eax_live
= false;
10484 bool r10_live
= false;
10487 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10488 if (!TARGET_64BIT_MS_ABI
)
10489 eax_live
= ix86_eax_live_at_start_p ();
10491 /* Note that SEH directives need to continue tracking the stack
10492 pointer even after the frame pointer has been set up. */
10495 insn
= emit_insn (gen_push (eax
));
10496 allocate
-= UNITS_PER_WORD
;
10497 if (sp_is_cfa_reg
|| TARGET_SEH
)
10500 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10501 RTX_FRAME_RELATED_P (insn
) = 1;
10507 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10508 insn
= emit_insn (gen_push (r10
));
10509 allocate
-= UNITS_PER_WORD
;
10510 if (sp_is_cfa_reg
|| TARGET_SEH
)
10513 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10514 RTX_FRAME_RELATED_P (insn
) = 1;
10518 emit_move_insn (eax
, GEN_INT (allocate
));
10519 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10521 /* Use the fact that AX still contains ALLOCATE. */
10522 adjust_stack_insn
= (Pmode
== DImode
10523 ? gen_pro_epilogue_adjust_stack_di_sub
10524 : gen_pro_epilogue_adjust_stack_si_sub
);
10526 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10527 stack_pointer_rtx
, eax
));
10529 if (sp_is_cfa_reg
|| TARGET_SEH
)
10532 m
->fs
.cfa_offset
+= allocate
;
10533 RTX_FRAME_RELATED_P (insn
) = 1;
10534 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10535 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10536 plus_constant (Pmode
, stack_pointer_rtx
,
10539 m
->fs
.sp_offset
+= allocate
;
10541 if (r10_live
&& eax_live
)
10543 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10544 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10545 gen_frame_mem (word_mode
, t
));
10546 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10547 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10548 gen_frame_mem (word_mode
, t
));
10550 else if (eax_live
|| r10_live
)
10552 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10553 emit_move_insn (gen_rtx_REG (word_mode
,
10554 (eax_live
? AX_REG
: R10_REG
)),
10555 gen_frame_mem (word_mode
, t
));
10558 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10560 /* If we havn't already set up the frame pointer, do so now. */
10561 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10563 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10564 GEN_INT (frame
.stack_pointer_offset
10565 - frame
.hard_frame_pointer_offset
));
10566 insn
= emit_insn (insn
);
10567 RTX_FRAME_RELATED_P (insn
) = 1;
10568 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10570 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10571 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10572 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10573 m
->fs
.fp_valid
= true;
10576 if (!int_registers_saved
)
10577 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10578 if (!sse_registers_saved
)
10579 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10581 pic_reg_used
= false;
10582 if (pic_offset_table_rtx
10583 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10586 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10588 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10589 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10591 pic_reg_used
= true;
10598 if (ix86_cmodel
== CM_LARGE_PIC
)
10600 rtx label
, tmp_reg
;
10602 gcc_assert (Pmode
== DImode
);
10603 label
= gen_label_rtx ();
10604 emit_label (label
);
10605 LABEL_PRESERVE_P (label
) = 1;
10606 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10607 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10608 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10610 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10611 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10612 pic_offset_table_rtx
, tmp_reg
));
10615 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10619 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10620 RTX_FRAME_RELATED_P (insn
) = 1;
10621 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10625 /* In the pic_reg_used case, make sure that the got load isn't deleted
10626 when mcount needs it. Blockage to avoid call movement across mcount
10627 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10629 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10630 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10632 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10634 /* vDRAP is setup but after reload it turns out stack realign
10635 isn't necessary, here we will emit prologue to setup DRAP
10636 without stack realign adjustment */
10637 t
= choose_baseaddr (0);
10638 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10641 /* Prevent instructions from being scheduled into register save push
10642 sequence when access to the redzone area is done through frame pointer.
10643 The offset between the frame pointer and the stack pointer is calculated
10644 relative to the value of the stack pointer at the end of the function
10645 prologue, and moving instructions that access redzone area via frame
10646 pointer inside push sequence violates this assumption. */
10647 if (frame_pointer_needed
&& frame
.red_zone_size
)
10648 emit_insn (gen_memory_blockage ());
10650 /* Emit cld instruction if stringops are used in the function. */
10651 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10652 emit_insn (gen_cld ());
10654 /* SEH requires that the prologue end within 256 bytes of the start of
10655 the function. Prevent instruction schedules that would extend that.
10656 Further, prevent alloca modifications to the stack pointer from being
10657 combined with prologue modifications. */
10659 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10662 /* Emit code to restore REG using a POP insn. */
10665 ix86_emit_restore_reg_using_pop (rtx reg
)
10667 struct machine_function
*m
= cfun
->machine
;
10668 rtx insn
= emit_insn (gen_pop (reg
));
10670 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10671 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10673 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10674 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10676 /* Previously we'd represented the CFA as an expression
10677 like *(%ebp - 8). We've just popped that value from
10678 the stack, which means we need to reset the CFA to
10679 the drap register. This will remain until we restore
10680 the stack pointer. */
10681 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10682 RTX_FRAME_RELATED_P (insn
) = 1;
10684 /* This means that the DRAP register is valid for addressing too. */
10685 m
->fs
.drap_valid
= true;
10689 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10691 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10692 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10693 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10694 RTX_FRAME_RELATED_P (insn
) = 1;
10696 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10699 /* When the frame pointer is the CFA, and we pop it, we are
10700 swapping back to the stack pointer as the CFA. This happens
10701 for stack frames that don't allocate other data, so we assume
10702 the stack pointer is now pointing at the return address, i.e.
10703 the function entry state, which makes the offset be 1 word. */
10704 if (reg
== hard_frame_pointer_rtx
)
10706 m
->fs
.fp_valid
= false;
10707 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10709 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10710 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10712 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10713 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10714 GEN_INT (m
->fs
.cfa_offset
)));
10715 RTX_FRAME_RELATED_P (insn
) = 1;
10720 /* Emit code to restore saved registers using POP insns. */
10723 ix86_emit_restore_regs_using_pop (void)
10725 unsigned int regno
;
10727 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10728 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10729 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10732 /* Emit code and notes for the LEAVE instruction. */
10735 ix86_emit_leave (void)
10737 struct machine_function
*m
= cfun
->machine
;
10738 rtx insn
= emit_insn (ix86_gen_leave ());
10740 ix86_add_queued_cfa_restore_notes (insn
);
10742 gcc_assert (m
->fs
.fp_valid
);
10743 m
->fs
.sp_valid
= true;
10744 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10745 m
->fs
.fp_valid
= false;
10747 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10749 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10750 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10752 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10753 plus_constant (Pmode
, stack_pointer_rtx
,
10755 RTX_FRAME_RELATED_P (insn
) = 1;
10757 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10761 /* Emit code to restore saved registers using MOV insns.
10762 First register is restored from CFA - CFA_OFFSET. */
10764 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10765 bool maybe_eh_return
)
10767 struct machine_function
*m
= cfun
->machine
;
10768 unsigned int regno
;
10770 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10771 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10773 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10776 mem
= choose_baseaddr (cfa_offset
);
10777 mem
= gen_frame_mem (word_mode
, mem
);
10778 insn
= emit_move_insn (reg
, mem
);
10780 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10782 /* Previously we'd represented the CFA as an expression
10783 like *(%ebp - 8). We've just popped that value from
10784 the stack, which means we need to reset the CFA to
10785 the drap register. This will remain until we restore
10786 the stack pointer. */
10787 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10788 RTX_FRAME_RELATED_P (insn
) = 1;
10790 /* This means that the DRAP register is valid for addressing. */
10791 m
->fs
.drap_valid
= true;
10794 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10796 cfa_offset
-= UNITS_PER_WORD
;
10800 /* Emit code to restore saved registers using MOV insns.
10801 First register is restored from CFA - CFA_OFFSET. */
10803 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10804 bool maybe_eh_return
)
10806 unsigned int regno
;
10808 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10809 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10811 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10814 mem
= choose_baseaddr (cfa_offset
);
10815 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10816 set_mem_align (mem
, 128);
10817 emit_move_insn (reg
, mem
);
10819 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10825 /* Restore function stack, frame, and registers. */
10828 ix86_expand_epilogue (int style
)
10830 struct machine_function
*m
= cfun
->machine
;
10831 struct machine_frame_state frame_state_save
= m
->fs
;
10832 struct ix86_frame frame
;
10833 bool restore_regs_via_mov
;
10836 ix86_finalize_stack_realign_flags ();
10837 ix86_compute_frame_layout (&frame
);
10839 m
->fs
.sp_valid
= (!frame_pointer_needed
10840 || (crtl
->sp_is_unchanging
10841 && !stack_realign_fp
));
10842 gcc_assert (!m
->fs
.sp_valid
10843 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10845 /* The FP must be valid if the frame pointer is present. */
10846 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10847 gcc_assert (!m
->fs
.fp_valid
10848 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10850 /* We must have *some* valid pointer to the stack frame. */
10851 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10853 /* The DRAP is never valid at this point. */
10854 gcc_assert (!m
->fs
.drap_valid
);
10856 /* See the comment about red zone and frame
10857 pointer usage in ix86_expand_prologue. */
10858 if (frame_pointer_needed
&& frame
.red_zone_size
)
10859 emit_insn (gen_memory_blockage ());
10861 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10862 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10864 /* Determine the CFA offset of the end of the red-zone. */
10865 m
->fs
.red_zone_offset
= 0;
10866 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10868 /* The red-zone begins below the return address. */
10869 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10871 /* When the register save area is in the aligned portion of
10872 the stack, determine the maximum runtime displacement that
10873 matches up with the aligned frame. */
10874 if (stack_realign_drap
)
10875 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10879 /* Special care must be taken for the normal return case of a function
10880 using eh_return: the eax and edx registers are marked as saved, but
10881 not restored along this path. Adjust the save location to match. */
10882 if (crtl
->calls_eh_return
&& style
!= 2)
10883 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10885 /* EH_RETURN requires the use of moves to function properly. */
10886 if (crtl
->calls_eh_return
)
10887 restore_regs_via_mov
= true;
10888 /* SEH requires the use of pops to identify the epilogue. */
10889 else if (TARGET_SEH
)
10890 restore_regs_via_mov
= false;
10891 /* If we're only restoring one register and sp is not valid then
10892 using a move instruction to restore the register since it's
10893 less work than reloading sp and popping the register. */
10894 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10895 restore_regs_via_mov
= true;
10896 else if (TARGET_EPILOGUE_USING_MOVE
10897 && cfun
->machine
->use_fast_prologue_epilogue
10898 && (frame
.nregs
> 1
10899 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10900 restore_regs_via_mov
= true;
10901 else if (frame_pointer_needed
10903 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10904 restore_regs_via_mov
= true;
10905 else if (frame_pointer_needed
10906 && TARGET_USE_LEAVE
10907 && cfun
->machine
->use_fast_prologue_epilogue
10908 && frame
.nregs
== 1)
10909 restore_regs_via_mov
= true;
10911 restore_regs_via_mov
= false;
10913 if (restore_regs_via_mov
|| frame
.nsseregs
)
10915 /* Ensure that the entire register save area is addressable via
10916 the stack pointer, if we will restore via sp. */
10918 && m
->fs
.sp_offset
> 0x7fffffff
10919 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10920 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10922 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10923 GEN_INT (m
->fs
.sp_offset
10924 - frame
.sse_reg_save_offset
),
10926 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10930 /* If there are any SSE registers to restore, then we have to do it
10931 via moves, since there's obviously no pop for SSE regs. */
10932 if (frame
.nsseregs
)
10933 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10936 if (restore_regs_via_mov
)
10941 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10943 /* eh_return epilogues need %ecx added to the stack pointer. */
10946 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10948 /* Stack align doesn't work with eh_return. */
10949 gcc_assert (!stack_realign_drap
);
10950 /* Neither does regparm nested functions. */
10951 gcc_assert (!ix86_static_chain_on_stack
);
10953 if (frame_pointer_needed
)
10955 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10956 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10957 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10959 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10960 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10962 /* Note that we use SA as a temporary CFA, as the return
10963 address is at the proper place relative to it. We
10964 pretend this happens at the FP restore insn because
10965 prior to this insn the FP would be stored at the wrong
10966 offset relative to SA, and after this insn we have no
10967 other reasonable register to use for the CFA. We don't
10968 bother resetting the CFA to the SP for the duration of
10969 the return insn. */
10970 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10971 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10972 ix86_add_queued_cfa_restore_notes (insn
);
10973 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10974 RTX_FRAME_RELATED_P (insn
) = 1;
10976 m
->fs
.cfa_reg
= sa
;
10977 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10978 m
->fs
.fp_valid
= false;
10980 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10981 const0_rtx
, style
, false);
10985 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10986 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10987 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10988 ix86_add_queued_cfa_restore_notes (insn
);
10990 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10991 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10993 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10994 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10995 plus_constant (Pmode
, stack_pointer_rtx
,
10997 RTX_FRAME_RELATED_P (insn
) = 1;
11000 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11001 m
->fs
.sp_valid
= true;
11006 /* SEH requires that the function end with (1) a stack adjustment
11007 if necessary, (2) a sequence of pops, and (3) a return or
11008 jump instruction. Prevent insns from the function body from
11009 being scheduled into this sequence. */
11012 /* Prevent a catch region from being adjacent to the standard
11013 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11014 several other flags that would be interesting to test are
11016 if (flag_non_call_exceptions
)
11017 emit_insn (gen_nops (const1_rtx
));
11019 emit_insn (gen_blockage ());
11022 /* First step is to deallocate the stack frame so that we can
11023 pop the registers. Also do it on SEH target for very large
11024 frame as the emitted instructions aren't allowed by the ABI in
11026 if (!m
->fs
.sp_valid
11028 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11029 >= SEH_MAX_FRAME_SIZE
)))
11031 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11032 GEN_INT (m
->fs
.fp_offset
11033 - frame
.reg_save_offset
),
11036 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11038 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11039 GEN_INT (m
->fs
.sp_offset
11040 - frame
.reg_save_offset
),
11042 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11045 ix86_emit_restore_regs_using_pop ();
11048 /* If we used a stack pointer and haven't already got rid of it,
11050 if (m
->fs
.fp_valid
)
11052 /* If the stack pointer is valid and pointing at the frame
11053 pointer store address, then we only need a pop. */
11054 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11055 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11056 /* Leave results in shorter dependency chains on CPUs that are
11057 able to grok it fast. */
11058 else if (TARGET_USE_LEAVE
11059 || optimize_function_for_size_p (cfun
)
11060 || !cfun
->machine
->use_fast_prologue_epilogue
)
11061 ix86_emit_leave ();
11064 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11065 hard_frame_pointer_rtx
,
11066 const0_rtx
, style
, !using_drap
);
11067 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11073 int param_ptr_offset
= UNITS_PER_WORD
;
11076 gcc_assert (stack_realign_drap
);
11078 if (ix86_static_chain_on_stack
)
11079 param_ptr_offset
+= UNITS_PER_WORD
;
11080 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11081 param_ptr_offset
+= UNITS_PER_WORD
;
11083 insn
= emit_insn (gen_rtx_SET
11084 (VOIDmode
, stack_pointer_rtx
,
11085 gen_rtx_PLUS (Pmode
,
11087 GEN_INT (-param_ptr_offset
))));
11088 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11089 m
->fs
.cfa_offset
= param_ptr_offset
;
11090 m
->fs
.sp_offset
= param_ptr_offset
;
11091 m
->fs
.realigned
= false;
11093 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11094 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11095 GEN_INT (param_ptr_offset
)));
11096 RTX_FRAME_RELATED_P (insn
) = 1;
11098 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11099 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11102 /* At this point the stack pointer must be valid, and we must have
11103 restored all of the registers. We may not have deallocated the
11104 entire stack frame. We've delayed this until now because it may
11105 be possible to merge the local stack deallocation with the
11106 deallocation forced by ix86_static_chain_on_stack. */
11107 gcc_assert (m
->fs
.sp_valid
);
11108 gcc_assert (!m
->fs
.fp_valid
);
11109 gcc_assert (!m
->fs
.realigned
);
11110 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11112 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11113 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11117 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11119 /* Sibcall epilogues don't want a return instruction. */
11122 m
->fs
= frame_state_save
;
11126 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11128 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11130 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11131 address, do explicit add, and jump indirectly to the caller. */
11133 if (crtl
->args
.pops_args
>= 65536)
11135 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11138 /* There is no "pascal" calling convention in any 64bit ABI. */
11139 gcc_assert (!TARGET_64BIT
);
11141 insn
= emit_insn (gen_pop (ecx
));
11142 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11143 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11145 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11146 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11147 add_reg_note (insn
, REG_CFA_REGISTER
,
11148 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11149 RTX_FRAME_RELATED_P (insn
) = 1;
11151 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11153 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11156 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11159 emit_jump_insn (gen_simple_return_internal ());
11161 /* Restore the state back to the state from the prologue,
11162 so that it's correct for the next epilogue. */
11163 m
->fs
= frame_state_save
;
11166 /* Reset from the function's potential modifications. */
11169 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11170 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11172 if (pic_offset_table_rtx
)
11173 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11175 /* Mach-O doesn't support labels at the end of objects, so if
11176 it looks like we might want one, insert a NOP. */
11178 rtx insn
= get_last_insn ();
11179 rtx deleted_debug_label
= NULL_RTX
;
11182 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11184 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11185 notes only, instead set their CODE_LABEL_NUMBER to -1,
11186 otherwise there would be code generation differences
11187 in between -g and -g0. */
11188 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11189 deleted_debug_label
= insn
;
11190 insn
= PREV_INSN (insn
);
11195 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11196 fputs ("\tnop\n", file
);
11197 else if (deleted_debug_label
)
11198 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11199 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11200 CODE_LABEL_NUMBER (insn
) = -1;
11206 /* Return a scratch register to use in the split stack prologue. The
11207 split stack prologue is used for -fsplit-stack. It is the first
11208 instructions in the function, even before the regular prologue.
11209 The scratch register can be any caller-saved register which is not
11210 used for parameters or for the static chain. */
11212 static unsigned int
11213 split_stack_prologue_scratch_regno (void)
11219 bool is_fastcall
, is_thiscall
;
11222 is_fastcall
= (lookup_attribute ("fastcall",
11223 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11225 is_thiscall
= (lookup_attribute ("thiscall",
11226 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11228 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11232 if (DECL_STATIC_CHAIN (cfun
->decl
))
11234 sorry ("-fsplit-stack does not support fastcall with "
11235 "nested function");
11236 return INVALID_REGNUM
;
11240 else if (is_thiscall
)
11242 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11246 else if (regparm
< 3)
11248 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11254 sorry ("-fsplit-stack does not support 2 register "
11255 " parameters for a nested function");
11256 return INVALID_REGNUM
;
11263 /* FIXME: We could make this work by pushing a register
11264 around the addition and comparison. */
11265 sorry ("-fsplit-stack does not support 3 register parameters");
11266 return INVALID_REGNUM
;
11271 /* A SYMBOL_REF for the function which allocates new stackspace for
11274 static GTY(()) rtx split_stack_fn
;
11276 /* A SYMBOL_REF for the more stack function when using the large
11279 static GTY(()) rtx split_stack_fn_large
;
11281 /* Handle -fsplit-stack. These are the first instructions in the
11282 function, even before the regular prologue. */
11285 ix86_expand_split_stack_prologue (void)
11287 struct ix86_frame frame
;
11288 HOST_WIDE_INT allocate
;
11289 unsigned HOST_WIDE_INT args_size
;
11290 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11291 rtx scratch_reg
= NULL_RTX
;
11292 rtx varargs_label
= NULL_RTX
;
11295 gcc_assert (flag_split_stack
&& reload_completed
);
11297 ix86_finalize_stack_realign_flags ();
11298 ix86_compute_frame_layout (&frame
);
11299 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11301 /* This is the label we will branch to if we have enough stack
11302 space. We expect the basic block reordering pass to reverse this
11303 branch if optimizing, so that we branch in the unlikely case. */
11304 label
= gen_label_rtx ();
11306 /* We need to compare the stack pointer minus the frame size with
11307 the stack boundary in the TCB. The stack boundary always gives
11308 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11309 can compare directly. Otherwise we need to do an addition. */
11311 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11312 UNSPEC_STACK_CHECK
);
11313 limit
= gen_rtx_CONST (Pmode
, limit
);
11314 limit
= gen_rtx_MEM (Pmode
, limit
);
11315 if (allocate
< SPLIT_STACK_AVAILABLE
)
11316 current
= stack_pointer_rtx
;
11319 unsigned int scratch_regno
;
11322 /* We need a scratch register to hold the stack pointer minus
11323 the required frame size. Since this is the very start of the
11324 function, the scratch register can be any caller-saved
11325 register which is not used for parameters. */
11326 offset
= GEN_INT (- allocate
);
11327 scratch_regno
= split_stack_prologue_scratch_regno ();
11328 if (scratch_regno
== INVALID_REGNUM
)
11330 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11331 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11333 /* We don't use ix86_gen_add3 in this case because it will
11334 want to split to lea, but when not optimizing the insn
11335 will not be split after this point. */
11336 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11337 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11342 emit_move_insn (scratch_reg
, offset
);
11343 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11344 stack_pointer_rtx
));
11346 current
= scratch_reg
;
11349 ix86_expand_branch (GEU
, current
, limit
, label
);
11350 jump_insn
= get_last_insn ();
11351 JUMP_LABEL (jump_insn
) = label
;
11353 /* Mark the jump as very likely to be taken. */
11354 add_reg_note (jump_insn
, REG_BR_PROB
,
11355 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11357 if (split_stack_fn
== NULL_RTX
)
11358 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11359 fn
= split_stack_fn
;
11361 /* Get more stack space. We pass in the desired stack space and the
11362 size of the arguments to copy to the new stack. In 32-bit mode
11363 we push the parameters; __morestack will return on a new stack
11364 anyhow. In 64-bit mode we pass the parameters in r10 and
11366 allocate_rtx
= GEN_INT (allocate
);
11367 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11368 call_fusage
= NULL_RTX
;
11373 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11374 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11376 /* If this function uses a static chain, it will be in %r10.
11377 Preserve it across the call to __morestack. */
11378 if (DECL_STATIC_CHAIN (cfun
->decl
))
11382 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11383 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11384 use_reg (&call_fusage
, rax
);
11387 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11389 HOST_WIDE_INT argval
;
11391 gcc_assert (Pmode
== DImode
);
11392 /* When using the large model we need to load the address
11393 into a register, and we've run out of registers. So we
11394 switch to a different calling convention, and we call a
11395 different function: __morestack_large. We pass the
11396 argument size in the upper 32 bits of r10 and pass the
11397 frame size in the lower 32 bits. */
11398 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11399 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11401 if (split_stack_fn_large
== NULL_RTX
)
11402 split_stack_fn_large
=
11403 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11405 if (ix86_cmodel
== CM_LARGE_PIC
)
11409 label
= gen_label_rtx ();
11410 emit_label (label
);
11411 LABEL_PRESERVE_P (label
) = 1;
11412 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11413 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11414 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11415 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11417 x
= gen_rtx_CONST (Pmode
, x
);
11418 emit_move_insn (reg11
, x
);
11419 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11420 x
= gen_const_mem (Pmode
, x
);
11421 emit_move_insn (reg11
, x
);
11424 emit_move_insn (reg11
, split_stack_fn_large
);
11428 argval
= ((args_size
<< 16) << 16) + allocate
;
11429 emit_move_insn (reg10
, GEN_INT (argval
));
11433 emit_move_insn (reg10
, allocate_rtx
);
11434 emit_move_insn (reg11
, GEN_INT (args_size
));
11435 use_reg (&call_fusage
, reg11
);
11438 use_reg (&call_fusage
, reg10
);
11442 emit_insn (gen_push (GEN_INT (args_size
)));
11443 emit_insn (gen_push (allocate_rtx
));
11445 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11446 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11448 add_function_usage_to (call_insn
, call_fusage
);
11450 /* In order to make call/return prediction work right, we now need
11451 to execute a return instruction. See
11452 libgcc/config/i386/morestack.S for the details on how this works.
11454 For flow purposes gcc must not see this as a return
11455 instruction--we need control flow to continue at the subsequent
11456 label. Therefore, we use an unspec. */
11457 gcc_assert (crtl
->args
.pops_args
< 65536);
11458 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11460 /* If we are in 64-bit mode and this function uses a static chain,
11461 we saved %r10 in %rax before calling _morestack. */
11462 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11463 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11464 gen_rtx_REG (word_mode
, AX_REG
));
11466 /* If this function calls va_start, we need to store a pointer to
11467 the arguments on the old stack, because they may not have been
11468 all copied to the new stack. At this point the old stack can be
11469 found at the frame pointer value used by __morestack, because
11470 __morestack has set that up before calling back to us. Here we
11471 store that pointer in a scratch register, and in
11472 ix86_expand_prologue we store the scratch register in a stack
11474 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11476 unsigned int scratch_regno
;
11480 scratch_regno
= split_stack_prologue_scratch_regno ();
11481 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11482 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11486 return address within this function
11487 return address of caller of this function
11489 So we add three words to get to the stack arguments.
11493 return address within this function
11494 first argument to __morestack
11495 second argument to __morestack
11496 return address of caller of this function
11498 So we add five words to get to the stack arguments.
11500 words
= TARGET_64BIT
? 3 : 5;
11501 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11502 gen_rtx_PLUS (Pmode
, frame_reg
,
11503 GEN_INT (words
* UNITS_PER_WORD
))));
11505 varargs_label
= gen_label_rtx ();
11506 emit_jump_insn (gen_jump (varargs_label
));
11507 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11512 emit_label (label
);
11513 LABEL_NUSES (label
) = 1;
11515 /* If this function calls va_start, we now have to set the scratch
11516 register for the case where we do not call __morestack. In this
11517 case we need to set it based on the stack pointer. */
11518 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11520 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11521 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11522 GEN_INT (UNITS_PER_WORD
))));
11524 emit_label (varargs_label
);
11525 LABEL_NUSES (varargs_label
) = 1;
11529 /* We may have to tell the dataflow pass that the split stack prologue
11530 is initializing a scratch register. */
11533 ix86_live_on_entry (bitmap regs
)
11535 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11537 gcc_assert (flag_split_stack
);
11538 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11542 /* Determine if op is suitable SUBREG RTX for address. */
11545 ix86_address_subreg_operand (rtx op
)
11547 enum machine_mode mode
;
11552 mode
= GET_MODE (op
);
11554 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11557 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11558 failures when the register is one word out of a two word structure. */
11559 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11562 /* Allow only SUBREGs of non-eliminable hard registers. */
11563 return register_no_elim_operand (op
, mode
);
11566 /* Extract the parts of an RTL expression that is a valid memory address
11567 for an instruction. Return 0 if the structure of the address is
11568 grossly off. Return -1 if the address contains ASHIFT, so it is not
11569 strictly valid, but still used for computing length of lea instruction. */
11572 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11574 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11575 rtx base_reg
, index_reg
;
11576 HOST_WIDE_INT scale
= 1;
11577 rtx scale_rtx
= NULL_RTX
;
11580 enum ix86_address_seg seg
= SEG_DEFAULT
;
11582 /* Allow zero-extended SImode addresses,
11583 they will be emitted with addr32 prefix. */
11584 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11586 if (GET_CODE (addr
) == ZERO_EXTEND
11587 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11589 addr
= XEXP (addr
, 0);
11590 if (CONST_INT_P (addr
))
11593 else if (GET_CODE (addr
) == AND
11594 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11596 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11597 if (addr
== NULL_RTX
)
11600 if (CONST_INT_P (addr
))
11605 /* Allow SImode subregs of DImode addresses,
11606 they will be emitted with addr32 prefix. */
11607 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11609 if (GET_CODE (addr
) == SUBREG
11610 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11612 addr
= SUBREG_REG (addr
);
11613 if (CONST_INT_P (addr
))
11620 else if (GET_CODE (addr
) == SUBREG
)
11622 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11627 else if (GET_CODE (addr
) == PLUS
)
11629 rtx addends
[4], op
;
11637 addends
[n
++] = XEXP (op
, 1);
11640 while (GET_CODE (op
) == PLUS
);
11645 for (i
= n
; i
>= 0; --i
)
11648 switch (GET_CODE (op
))
11653 index
= XEXP (op
, 0);
11654 scale_rtx
= XEXP (op
, 1);
11660 index
= XEXP (op
, 0);
11661 tmp
= XEXP (op
, 1);
11662 if (!CONST_INT_P (tmp
))
11664 scale
= INTVAL (tmp
);
11665 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11667 scale
= 1 << scale
;
11672 if (GET_CODE (op
) != UNSPEC
)
11677 if (XINT (op
, 1) == UNSPEC_TP
11678 && TARGET_TLS_DIRECT_SEG_REFS
11679 && seg
== SEG_DEFAULT
)
11680 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11686 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11713 else if (GET_CODE (addr
) == MULT
)
11715 index
= XEXP (addr
, 0); /* index*scale */
11716 scale_rtx
= XEXP (addr
, 1);
11718 else if (GET_CODE (addr
) == ASHIFT
)
11720 /* We're called for lea too, which implements ashift on occasion. */
11721 index
= XEXP (addr
, 0);
11722 tmp
= XEXP (addr
, 1);
11723 if (!CONST_INT_P (tmp
))
11725 scale
= INTVAL (tmp
);
11726 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11728 scale
= 1 << scale
;
11731 else if (CONST_INT_P (addr
))
11733 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11736 /* Constant addresses are sign extended to 64bit, we have to
11737 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11739 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11745 disp
= addr
; /* displacement */
11751 else if (GET_CODE (index
) == SUBREG
11752 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11758 /* Address override works only on the (%reg) part of %fs:(%reg). */
11759 if (seg
!= SEG_DEFAULT
11760 && ((base
&& GET_MODE (base
) != word_mode
)
11761 || (index
&& GET_MODE (index
) != word_mode
)))
11764 /* Extract the integral value of scale. */
11767 if (!CONST_INT_P (scale_rtx
))
11769 scale
= INTVAL (scale_rtx
);
11772 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11773 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11775 /* Avoid useless 0 displacement. */
11776 if (disp
== const0_rtx
&& (base
|| index
))
11779 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11780 if (base_reg
&& index_reg
&& scale
== 1
11781 && (index_reg
== arg_pointer_rtx
11782 || index_reg
== frame_pointer_rtx
11783 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11786 tmp
= base
, base
= index
, index
= tmp
;
11787 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11790 /* Special case: %ebp cannot be encoded as a base without a displacement.
11794 && (base_reg
== hard_frame_pointer_rtx
11795 || base_reg
== frame_pointer_rtx
11796 || base_reg
== arg_pointer_rtx
11797 || (REG_P (base_reg
)
11798 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11799 || REGNO (base_reg
) == R13_REG
))))
11802 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11803 Avoid this by transforming to [%esi+0].
11804 Reload calls address legitimization without cfun defined, so we need
11805 to test cfun for being non-NULL. */
11806 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11807 && base_reg
&& !index_reg
&& !disp
11808 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11811 /* Special case: encode reg+reg instead of reg*2. */
11812 if (!base
&& index
&& scale
== 2)
11813 base
= index
, base_reg
= index_reg
, scale
= 1;
11815 /* Special case: scaling cannot be encoded without base or displacement. */
11816 if (!base
&& !disp
&& index
&& scale
!= 1)
11820 out
->index
= index
;
11822 out
->scale
= scale
;
11828 /* Return cost of the memory address x.
11829 For i386, it is better to use a complex address than let gcc copy
11830 the address into a reg and make a new pseudo. But not if the address
11831 requires to two regs - that would mean more pseudos with longer
11834 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11835 addr_space_t as ATTRIBUTE_UNUSED
,
11836 bool speed ATTRIBUTE_UNUSED
)
11838 struct ix86_address parts
;
11840 int ok
= ix86_decompose_address (x
, &parts
);
11844 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11845 parts
.base
= SUBREG_REG (parts
.base
);
11846 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11847 parts
.index
= SUBREG_REG (parts
.index
);
11849 /* Attempt to minimize number of registers in the address. */
11851 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11853 && (!REG_P (parts
.index
)
11854 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11858 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11860 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11861 && parts
.base
!= parts
.index
)
11864 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11865 since it's predecode logic can't detect the length of instructions
11866 and it degenerates to vector decoded. Increase cost of such
11867 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11868 to split such addresses or even refuse such addresses at all.
11870 Following addressing modes are affected:
11875 The first and last case may be avoidable by explicitly coding the zero in
11876 memory address, but I don't have AMD-K6 machine handy to check this
11880 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11881 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11882 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11888 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11889 this is used for to form addresses to local data when -fPIC is in
11893 darwin_local_data_pic (rtx disp
)
11895 return (GET_CODE (disp
) == UNSPEC
11896 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11899 /* Determine if a given RTX is a valid constant. We already know this
11900 satisfies CONSTANT_P. */
11903 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11905 switch (GET_CODE (x
))
11910 if (GET_CODE (x
) == PLUS
)
11912 if (!CONST_INT_P (XEXP (x
, 1)))
11917 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11920 /* Only some unspecs are valid as "constants". */
11921 if (GET_CODE (x
) == UNSPEC
)
11922 switch (XINT (x
, 1))
11925 case UNSPEC_GOTOFF
:
11926 case UNSPEC_PLTOFF
:
11927 return TARGET_64BIT
;
11929 case UNSPEC_NTPOFF
:
11930 x
= XVECEXP (x
, 0, 0);
11931 return (GET_CODE (x
) == SYMBOL_REF
11932 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11933 case UNSPEC_DTPOFF
:
11934 x
= XVECEXP (x
, 0, 0);
11935 return (GET_CODE (x
) == SYMBOL_REF
11936 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11941 /* We must have drilled down to a symbol. */
11942 if (GET_CODE (x
) == LABEL_REF
)
11944 if (GET_CODE (x
) != SYMBOL_REF
)
11949 /* TLS symbols are never valid. */
11950 if (SYMBOL_REF_TLS_MODEL (x
))
11953 /* DLLIMPORT symbols are never valid. */
11954 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11955 && SYMBOL_REF_DLLIMPORT_P (x
))
11959 /* mdynamic-no-pic */
11960 if (MACHO_DYNAMIC_NO_PIC_P
)
11961 return machopic_symbol_defined_p (x
);
11966 if (GET_MODE (x
) == TImode
11967 && x
!= CONST0_RTX (TImode
)
11973 if (!standard_sse_constant_p (x
))
11980 /* Otherwise we handle everything else in the move patterns. */
11984 /* Determine if it's legal to put X into the constant pool. This
11985 is not possible for the address of thread-local symbols, which
11986 is checked above. */
11989 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11991 /* We can always put integral constants and vectors in memory. */
11992 switch (GET_CODE (x
))
12002 return !ix86_legitimate_constant_p (mode
, x
);
12006 /* Nonzero if the constant value X is a legitimate general operand
12007 when generating PIC code. It is given that flag_pic is on and
12008 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12011 legitimate_pic_operand_p (rtx x
)
12015 switch (GET_CODE (x
))
12018 inner
= XEXP (x
, 0);
12019 if (GET_CODE (inner
) == PLUS
12020 && CONST_INT_P (XEXP (inner
, 1)))
12021 inner
= XEXP (inner
, 0);
12023 /* Only some unspecs are valid as "constants". */
12024 if (GET_CODE (inner
) == UNSPEC
)
12025 switch (XINT (inner
, 1))
12028 case UNSPEC_GOTOFF
:
12029 case UNSPEC_PLTOFF
:
12030 return TARGET_64BIT
;
12032 x
= XVECEXP (inner
, 0, 0);
12033 return (GET_CODE (x
) == SYMBOL_REF
12034 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12035 case UNSPEC_MACHOPIC_OFFSET
:
12036 return legitimate_pic_address_disp_p (x
);
12044 return legitimate_pic_address_disp_p (x
);
12051 /* Determine if a given CONST RTX is a valid memory displacement
12055 legitimate_pic_address_disp_p (rtx disp
)
12059 /* In 64bit mode we can allow direct addresses of symbols and labels
12060 when they are not dynamic symbols. */
12063 rtx op0
= disp
, op1
;
12065 switch (GET_CODE (disp
))
12071 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12073 op0
= XEXP (XEXP (disp
, 0), 0);
12074 op1
= XEXP (XEXP (disp
, 0), 1);
12075 if (!CONST_INT_P (op1
)
12076 || INTVAL (op1
) >= 16*1024*1024
12077 || INTVAL (op1
) < -16*1024*1024)
12079 if (GET_CODE (op0
) == LABEL_REF
)
12081 if (GET_CODE (op0
) == CONST
12082 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12083 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12085 if (GET_CODE (op0
) == UNSPEC
12086 && XINT (op0
, 1) == UNSPEC_PCREL
)
12088 if (GET_CODE (op0
) != SYMBOL_REF
)
12093 /* TLS references should always be enclosed in UNSPEC. */
12094 if (SYMBOL_REF_TLS_MODEL (op0
))
12096 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12097 && ix86_cmodel
!= CM_LARGE_PIC
)
12105 if (GET_CODE (disp
) != CONST
)
12107 disp
= XEXP (disp
, 0);
12111 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12112 of GOT tables. We should not need these anyway. */
12113 if (GET_CODE (disp
) != UNSPEC
12114 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12115 && XINT (disp
, 1) != UNSPEC_GOTOFF
12116 && XINT (disp
, 1) != UNSPEC_PCREL
12117 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12120 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12121 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12127 if (GET_CODE (disp
) == PLUS
)
12129 if (!CONST_INT_P (XEXP (disp
, 1)))
12131 disp
= XEXP (disp
, 0);
12135 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12138 if (GET_CODE (disp
) != UNSPEC
)
12141 switch (XINT (disp
, 1))
12146 /* We need to check for both symbols and labels because VxWorks loads
12147 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12149 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12150 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12151 case UNSPEC_GOTOFF
:
12152 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12153 While ABI specify also 32bit relocation but we don't produce it in
12154 small PIC model at all. */
12155 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12156 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12158 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12160 case UNSPEC_GOTTPOFF
:
12161 case UNSPEC_GOTNTPOFF
:
12162 case UNSPEC_INDNTPOFF
:
12165 disp
= XVECEXP (disp
, 0, 0);
12166 return (GET_CODE (disp
) == SYMBOL_REF
12167 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12168 case UNSPEC_NTPOFF
:
12169 disp
= XVECEXP (disp
, 0, 0);
12170 return (GET_CODE (disp
) == SYMBOL_REF
12171 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12172 case UNSPEC_DTPOFF
:
12173 disp
= XVECEXP (disp
, 0, 0);
12174 return (GET_CODE (disp
) == SYMBOL_REF
12175 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12181 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12182 replace the input X, or the original X if no replacement is called for.
12183 The output parameter *WIN is 1 if the calling macro should goto WIN,
12184 0 if it should not. */
12187 ix86_legitimize_reload_address (rtx x
,
12188 enum machine_mode mode ATTRIBUTE_UNUSED
,
12189 int opnum
, int type
,
12190 int ind_levels ATTRIBUTE_UNUSED
)
12192 /* Reload can generate:
12194 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12198 This RTX is rejected from ix86_legitimate_address_p due to
12199 non-strictness of base register 97. Following this rejection,
12200 reload pushes all three components into separate registers,
12201 creating invalid memory address RTX.
12203 Following code reloads only the invalid part of the
12204 memory address RTX. */
12206 if (GET_CODE (x
) == PLUS
12207 && REG_P (XEXP (x
, 1))
12208 && GET_CODE (XEXP (x
, 0)) == PLUS
12209 && REG_P (XEXP (XEXP (x
, 0), 1)))
12212 bool something_reloaded
= false;
12214 base
= XEXP (XEXP (x
, 0), 1);
12215 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12217 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12218 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12219 opnum
, (enum reload_type
) type
);
12220 something_reloaded
= true;
12223 index
= XEXP (x
, 1);
12224 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12226 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12227 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12228 opnum
, (enum reload_type
) type
);
12229 something_reloaded
= true;
12232 gcc_assert (something_reloaded
);
12239 /* Recognizes RTL expressions that are valid memory addresses for an
12240 instruction. The MODE argument is the machine mode for the MEM
12241 expression that wants to use this address.
12243 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12244 convert common non-canonical forms to canonical form so that they will
12248 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12249 rtx addr
, bool strict
)
12251 struct ix86_address parts
;
12252 rtx base
, index
, disp
;
12253 HOST_WIDE_INT scale
;
12255 if (ix86_decompose_address (addr
, &parts
) <= 0)
12256 /* Decomposition failed. */
12260 index
= parts
.index
;
12262 scale
= parts
.scale
;
12264 /* Validate base register. */
12271 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12272 reg
= SUBREG_REG (base
);
12274 /* Base is not a register. */
12277 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12280 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12281 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12282 /* Base is not valid. */
12286 /* Validate index register. */
12293 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12294 reg
= SUBREG_REG (index
);
12296 /* Index is not a register. */
12299 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12302 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12303 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12304 /* Index is not valid. */
12308 /* Index and base should have the same mode. */
12310 && GET_MODE (base
) != GET_MODE (index
))
12313 /* Validate scale factor. */
12317 /* Scale without index. */
12320 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12321 /* Scale is not a valid multiplier. */
12325 /* Validate displacement. */
12328 if (GET_CODE (disp
) == CONST
12329 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12330 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12331 switch (XINT (XEXP (disp
, 0), 1))
12333 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12334 used. While ABI specify also 32bit relocations, we don't produce
12335 them at all and use IP relative instead. */
12337 case UNSPEC_GOTOFF
:
12338 gcc_assert (flag_pic
);
12340 goto is_legitimate_pic
;
12342 /* 64bit address unspec. */
12345 case UNSPEC_GOTPCREL
:
12347 gcc_assert (flag_pic
);
12348 goto is_legitimate_pic
;
12350 case UNSPEC_GOTTPOFF
:
12351 case UNSPEC_GOTNTPOFF
:
12352 case UNSPEC_INDNTPOFF
:
12353 case UNSPEC_NTPOFF
:
12354 case UNSPEC_DTPOFF
:
12357 case UNSPEC_STACK_CHECK
:
12358 gcc_assert (flag_split_stack
);
12362 /* Invalid address unspec. */
12366 else if (SYMBOLIC_CONST (disp
)
12370 && MACHOPIC_INDIRECT
12371 && !machopic_operand_p (disp
)
12377 if (TARGET_64BIT
&& (index
|| base
))
12379 /* foo@dtpoff(%rX) is ok. */
12380 if (GET_CODE (disp
) != CONST
12381 || GET_CODE (XEXP (disp
, 0)) != PLUS
12382 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12383 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12384 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12385 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12386 /* Non-constant pic memory reference. */
12389 else if ((!TARGET_MACHO
|| flag_pic
)
12390 && ! legitimate_pic_address_disp_p (disp
))
12391 /* Displacement is an invalid pic construct. */
12394 else if (MACHO_DYNAMIC_NO_PIC_P
12395 && !ix86_legitimate_constant_p (Pmode
, disp
))
12396 /* displacment must be referenced via non_lazy_pointer */
12400 /* This code used to verify that a symbolic pic displacement
12401 includes the pic_offset_table_rtx register.
12403 While this is good idea, unfortunately these constructs may
12404 be created by "adds using lea" optimization for incorrect
12413 This code is nonsensical, but results in addressing
12414 GOT table with pic_offset_table_rtx base. We can't
12415 just refuse it easily, since it gets matched by
12416 "addsi3" pattern, that later gets split to lea in the
12417 case output register differs from input. While this
12418 can be handled by separate addsi pattern for this case
12419 that never results in lea, this seems to be easier and
12420 correct fix for crash to disable this test. */
12422 else if (GET_CODE (disp
) != LABEL_REF
12423 && !CONST_INT_P (disp
)
12424 && (GET_CODE (disp
) != CONST
12425 || !ix86_legitimate_constant_p (Pmode
, disp
))
12426 && (GET_CODE (disp
) != SYMBOL_REF
12427 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12428 /* Displacement is not constant. */
12430 else if (TARGET_64BIT
12431 && !x86_64_immediate_operand (disp
, VOIDmode
))
12432 /* Displacement is out of range. */
12436 /* Everything looks valid. */
12440 /* Determine if a given RTX is a valid constant address. */
12443 constant_address_p (rtx x
)
12445 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12448 /* Return a unique alias set for the GOT. */
12450 static alias_set_type
12451 ix86_GOT_alias_set (void)
12453 static alias_set_type set
= -1;
12455 set
= new_alias_set ();
12459 /* Return a legitimate reference for ORIG (an address) using the
12460 register REG. If REG is 0, a new pseudo is generated.
12462 There are two types of references that must be handled:
12464 1. Global data references must load the address from the GOT, via
12465 the PIC reg. An insn is emitted to do this load, and the reg is
12468 2. Static data references, constant pool addresses, and code labels
12469 compute the address as an offset from the GOT, whose base is in
12470 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12471 differentiate them from global data objects. The returned
12472 address is the PIC reg + an unspec constant.
12474 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12475 reg also appears in the address. */
12478 legitimize_pic_address (rtx orig
, rtx reg
)
12481 rtx new_rtx
= orig
;
12484 if (TARGET_MACHO
&& !TARGET_64BIT
)
12487 reg
= gen_reg_rtx (Pmode
);
12488 /* Use the generic Mach-O PIC machinery. */
12489 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12493 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12495 else if (TARGET_64BIT
12496 && ix86_cmodel
!= CM_SMALL_PIC
12497 && gotoff_operand (addr
, Pmode
))
12500 /* This symbol may be referenced via a displacement from the PIC
12501 base address (@GOTOFF). */
12503 if (reload_in_progress
)
12504 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12505 if (GET_CODE (addr
) == CONST
)
12506 addr
= XEXP (addr
, 0);
12507 if (GET_CODE (addr
) == PLUS
)
12509 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12511 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12514 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12515 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12517 tmpreg
= gen_reg_rtx (Pmode
);
12520 emit_move_insn (tmpreg
, new_rtx
);
12524 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12525 tmpreg
, 1, OPTAB_DIRECT
);
12528 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12530 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12532 /* This symbol may be referenced via a displacement from the PIC
12533 base address (@GOTOFF). */
12535 if (reload_in_progress
)
12536 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12537 if (GET_CODE (addr
) == CONST
)
12538 addr
= XEXP (addr
, 0);
12539 if (GET_CODE (addr
) == PLUS
)
12541 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12543 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12546 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12547 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12548 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12552 emit_move_insn (reg
, new_rtx
);
12556 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12557 /* We can't use @GOTOFF for text labels on VxWorks;
12558 see gotoff_operand. */
12559 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12561 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12563 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12564 return legitimize_dllimport_symbol (addr
, true);
12565 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12566 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12567 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12569 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12570 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12574 /* For x64 PE-COFF there is no GOT table. So we use address
12576 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12578 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12579 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12582 reg
= gen_reg_rtx (Pmode
);
12583 emit_move_insn (reg
, new_rtx
);
12586 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12588 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12589 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12590 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12591 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12594 reg
= gen_reg_rtx (Pmode
);
12595 /* Use directly gen_movsi, otherwise the address is loaded
12596 into register for CSE. We don't want to CSE this addresses,
12597 instead we CSE addresses from the GOT table, so skip this. */
12598 emit_insn (gen_movsi (reg
, new_rtx
));
12603 /* This symbol must be referenced via a load from the
12604 Global Offset Table (@GOT). */
12606 if (reload_in_progress
)
12607 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12608 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12609 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12611 new_rtx
= force_reg (Pmode
, new_rtx
);
12612 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12613 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12614 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12617 reg
= gen_reg_rtx (Pmode
);
12618 emit_move_insn (reg
, new_rtx
);
12624 if (CONST_INT_P (addr
)
12625 && !x86_64_immediate_operand (addr
, VOIDmode
))
12629 emit_move_insn (reg
, addr
);
12633 new_rtx
= force_reg (Pmode
, addr
);
12635 else if (GET_CODE (addr
) == CONST
)
12637 addr
= XEXP (addr
, 0);
12639 /* We must match stuff we generate before. Assume the only
12640 unspecs that can get here are ours. Not that we could do
12641 anything with them anyway.... */
12642 if (GET_CODE (addr
) == UNSPEC
12643 || (GET_CODE (addr
) == PLUS
12644 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12646 gcc_assert (GET_CODE (addr
) == PLUS
);
12648 if (GET_CODE (addr
) == PLUS
)
12650 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12652 /* Check first to see if this is a constant offset from a @GOTOFF
12653 symbol reference. */
12654 if (gotoff_operand (op0
, Pmode
)
12655 && CONST_INT_P (op1
))
12659 if (reload_in_progress
)
12660 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12661 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12663 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12664 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12665 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12669 emit_move_insn (reg
, new_rtx
);
12675 if (INTVAL (op1
) < -16*1024*1024
12676 || INTVAL (op1
) >= 16*1024*1024)
12678 if (!x86_64_immediate_operand (op1
, Pmode
))
12679 op1
= force_reg (Pmode
, op1
);
12680 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12686 rtx base
= legitimize_pic_address (op0
, reg
);
12687 enum machine_mode mode
= GET_MODE (base
);
12689 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12691 if (CONST_INT_P (new_rtx
))
12693 if (INTVAL (new_rtx
) < -16*1024*1024
12694 || INTVAL (new_rtx
) >= 16*1024*1024)
12696 if (!x86_64_immediate_operand (new_rtx
, mode
))
12697 new_rtx
= force_reg (mode
, new_rtx
);
12699 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12702 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12706 if (GET_CODE (new_rtx
) == PLUS
12707 && CONSTANT_P (XEXP (new_rtx
, 1)))
12709 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12710 new_rtx
= XEXP (new_rtx
, 1);
12712 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12720 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12723 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12725 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12727 if (GET_MODE (tp
) != tp_mode
)
12729 gcc_assert (GET_MODE (tp
) == SImode
);
12730 gcc_assert (tp_mode
== DImode
);
12732 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12736 tp
= copy_to_mode_reg (tp_mode
, tp
);
12741 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12743 static GTY(()) rtx ix86_tls_symbol
;
12746 ix86_tls_get_addr (void)
12748 if (!ix86_tls_symbol
)
12751 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12752 ? "___tls_get_addr" : "__tls_get_addr");
12754 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12757 return ix86_tls_symbol
;
12760 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12762 static GTY(()) rtx ix86_tls_module_base_symbol
;
12765 ix86_tls_module_base (void)
12767 if (!ix86_tls_module_base_symbol
)
12769 ix86_tls_module_base_symbol
12770 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12772 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12773 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12776 return ix86_tls_module_base_symbol
;
12779 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12780 false if we expect this to be used for a memory address and true if
12781 we expect to load the address into a register. */
12784 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12786 rtx dest
, base
, off
;
12787 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12788 enum machine_mode tp_mode
= Pmode
;
12793 case TLS_MODEL_GLOBAL_DYNAMIC
:
12794 dest
= gen_reg_rtx (Pmode
);
12799 pic
= pic_offset_table_rtx
;
12802 pic
= gen_reg_rtx (Pmode
);
12803 emit_insn (gen_set_got (pic
));
12807 if (TARGET_GNU2_TLS
)
12810 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12812 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12814 tp
= get_thread_pointer (Pmode
, true);
12815 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12817 if (GET_MODE (x
) != Pmode
)
12818 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12820 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12824 rtx caddr
= ix86_tls_get_addr ();
12828 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12833 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12834 insns
= get_insns ();
12837 if (GET_MODE (x
) != Pmode
)
12838 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12840 RTL_CONST_CALL_P (insns
) = 1;
12841 emit_libcall_block (insns
, dest
, rax
, x
);
12844 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12848 case TLS_MODEL_LOCAL_DYNAMIC
:
12849 base
= gen_reg_rtx (Pmode
);
12854 pic
= pic_offset_table_rtx
;
12857 pic
= gen_reg_rtx (Pmode
);
12858 emit_insn (gen_set_got (pic
));
12862 if (TARGET_GNU2_TLS
)
12864 rtx tmp
= ix86_tls_module_base ();
12867 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12869 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12871 tp
= get_thread_pointer (Pmode
, true);
12872 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12873 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12877 rtx caddr
= ix86_tls_get_addr ();
12881 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12886 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
12887 insns
= get_insns ();
12890 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12891 share the LD_BASE result with other LD model accesses. */
12892 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12893 UNSPEC_TLS_LD_BASE
);
12895 RTL_CONST_CALL_P (insns
) = 1;
12896 emit_libcall_block (insns
, base
, rax
, eqv
);
12899 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12902 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12903 off
= gen_rtx_CONST (Pmode
, off
);
12905 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12907 if (TARGET_GNU2_TLS
)
12909 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12911 if (GET_MODE (x
) != Pmode
)
12912 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12914 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12918 case TLS_MODEL_INITIAL_EXEC
:
12921 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12923 /* The Sun linker took the AMD64 TLS spec literally
12924 and can only handle %rax as destination of the
12925 initial executable code sequence. */
12927 dest
= gen_reg_rtx (DImode
);
12928 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12932 /* Generate DImode references to avoid %fs:(%reg32)
12933 problems and linker IE->LE relaxation bug. */
12936 type
= UNSPEC_GOTNTPOFF
;
12940 if (reload_in_progress
)
12941 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12942 pic
= pic_offset_table_rtx
;
12943 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12945 else if (!TARGET_ANY_GNU_TLS
)
12947 pic
= gen_reg_rtx (Pmode
);
12948 emit_insn (gen_set_got (pic
));
12949 type
= UNSPEC_GOTTPOFF
;
12954 type
= UNSPEC_INDNTPOFF
;
12957 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12958 off
= gen_rtx_CONST (tp_mode
, off
);
12960 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12961 off
= gen_const_mem (tp_mode
, off
);
12962 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12964 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12966 base
= get_thread_pointer (tp_mode
,
12967 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12968 off
= force_reg (tp_mode
, off
);
12969 return gen_rtx_PLUS (tp_mode
, base
, off
);
12973 base
= get_thread_pointer (Pmode
, true);
12974 dest
= gen_reg_rtx (Pmode
);
12975 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12979 case TLS_MODEL_LOCAL_EXEC
:
12980 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12981 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12982 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12983 off
= gen_rtx_CONST (Pmode
, off
);
12985 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12987 base
= get_thread_pointer (Pmode
,
12988 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12989 return gen_rtx_PLUS (Pmode
, base
, off
);
12993 base
= get_thread_pointer (Pmode
, true);
12994 dest
= gen_reg_rtx (Pmode
);
12995 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13000 gcc_unreachable ();
13006 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13009 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13010 htab_t dllimport_map
;
13013 get_dllimport_decl (tree decl
)
13015 struct tree_map
*h
, in
;
13018 const char *prefix
;
13019 size_t namelen
, prefixlen
;
13024 if (!dllimport_map
)
13025 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13027 in
.hash
= htab_hash_pointer (decl
);
13028 in
.base
.from
= decl
;
13029 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13030 h
= (struct tree_map
*) *loc
;
13034 *loc
= h
= ggc_alloc_tree_map ();
13036 h
->base
.from
= decl
;
13037 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13038 VAR_DECL
, NULL
, ptr_type_node
);
13039 DECL_ARTIFICIAL (to
) = 1;
13040 DECL_IGNORED_P (to
) = 1;
13041 DECL_EXTERNAL (to
) = 1;
13042 TREE_READONLY (to
) = 1;
13044 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13045 name
= targetm
.strip_name_encoding (name
);
13046 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13047 ? "*__imp_" : "*__imp__";
13048 namelen
= strlen (name
);
13049 prefixlen
= strlen (prefix
);
13050 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13051 memcpy (imp_name
, prefix
, prefixlen
);
13052 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13054 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13055 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13056 SET_SYMBOL_REF_DECL (rtl
, to
);
13057 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13059 rtl
= gen_const_mem (Pmode
, rtl
);
13060 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13062 SET_DECL_RTL (to
, rtl
);
13063 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13068 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13069 true if we require the result be a register. */
13072 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13077 gcc_assert (SYMBOL_REF_DECL (symbol
));
13078 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13080 x
= DECL_RTL (imp_decl
);
13082 x
= force_reg (Pmode
, x
);
13086 /* Try machine-dependent ways of modifying an illegitimate address
13087 to be legitimate. If we find one, return the new, valid address.
13088 This macro is used in only one place: `memory_address' in explow.c.
13090 OLDX is the address as it was before break_out_memory_refs was called.
13091 In some cases it is useful to look at this to decide what needs to be done.
13093 It is always safe for this macro to do nothing. It exists to recognize
13094 opportunities to optimize the output.
13096 For the 80386, we handle X+REG by loading X into a register R and
13097 using R+REG. R will go in a general reg and indexing will be used.
13098 However, if REG is a broken-out memory address or multiplication,
13099 nothing needs to be done because REG can certainly go in a general reg.
13101 When -fpic is used, special handling is needed for symbolic references.
13102 See comments by legitimize_pic_address in i386.c for details. */
13105 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13106 enum machine_mode mode
)
13111 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13113 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13114 if (GET_CODE (x
) == CONST
13115 && GET_CODE (XEXP (x
, 0)) == PLUS
13116 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13117 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13119 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13120 (enum tls_model
) log
, false);
13121 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13124 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13126 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13127 return legitimize_dllimport_symbol (x
, true);
13128 if (GET_CODE (x
) == CONST
13129 && GET_CODE (XEXP (x
, 0)) == PLUS
13130 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13131 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13133 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13134 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13138 if (flag_pic
&& SYMBOLIC_CONST (x
))
13139 return legitimize_pic_address (x
, 0);
13142 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13143 return machopic_indirect_data_reference (x
, 0);
13146 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13147 if (GET_CODE (x
) == ASHIFT
13148 && CONST_INT_P (XEXP (x
, 1))
13149 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13152 log
= INTVAL (XEXP (x
, 1));
13153 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13154 GEN_INT (1 << log
));
13157 if (GET_CODE (x
) == PLUS
)
13159 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13161 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13162 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13163 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13166 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13167 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13168 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13169 GEN_INT (1 << log
));
13172 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13173 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13174 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13177 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13178 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13179 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13180 GEN_INT (1 << log
));
13183 /* Put multiply first if it isn't already. */
13184 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13186 rtx tmp
= XEXP (x
, 0);
13187 XEXP (x
, 0) = XEXP (x
, 1);
13192 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13193 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13194 created by virtual register instantiation, register elimination, and
13195 similar optimizations. */
13196 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13199 x
= gen_rtx_PLUS (Pmode
,
13200 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13201 XEXP (XEXP (x
, 1), 0)),
13202 XEXP (XEXP (x
, 1), 1));
13206 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13207 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13208 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13209 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13210 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13211 && CONSTANT_P (XEXP (x
, 1)))
13214 rtx other
= NULL_RTX
;
13216 if (CONST_INT_P (XEXP (x
, 1)))
13218 constant
= XEXP (x
, 1);
13219 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13221 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13223 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13224 other
= XEXP (x
, 1);
13232 x
= gen_rtx_PLUS (Pmode
,
13233 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13234 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13235 plus_constant (Pmode
, other
,
13236 INTVAL (constant
)));
13240 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13243 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13246 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13249 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13252 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13256 && REG_P (XEXP (x
, 1))
13257 && REG_P (XEXP (x
, 0)))
13260 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13263 x
= legitimize_pic_address (x
, 0);
13266 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13269 if (REG_P (XEXP (x
, 0)))
13271 rtx temp
= gen_reg_rtx (Pmode
);
13272 rtx val
= force_operand (XEXP (x
, 1), temp
);
13275 val
= convert_to_mode (Pmode
, val
, 1);
13276 emit_move_insn (temp
, val
);
13279 XEXP (x
, 1) = temp
;
13283 else if (REG_P (XEXP (x
, 1)))
13285 rtx temp
= gen_reg_rtx (Pmode
);
13286 rtx val
= force_operand (XEXP (x
, 0), temp
);
13289 val
= convert_to_mode (Pmode
, val
, 1);
13290 emit_move_insn (temp
, val
);
13293 XEXP (x
, 0) = temp
;
13301 /* Print an integer constant expression in assembler syntax. Addition
13302 and subtraction are the only arithmetic that may appear in these
13303 expressions. FILE is the stdio stream to write to, X is the rtx, and
13304 CODE is the operand print code from the output string. */
13307 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13311 switch (GET_CODE (x
))
13314 gcc_assert (flag_pic
);
13319 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13320 output_addr_const (file
, x
);
13323 const char *name
= XSTR (x
, 0);
13325 /* Mark the decl as referenced so that cgraph will
13326 output the function. */
13327 if (SYMBOL_REF_DECL (x
))
13328 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13331 if (MACHOPIC_INDIRECT
13332 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13333 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13335 assemble_name (file
, name
);
13337 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13338 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13339 fputs ("@PLT", file
);
13346 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13347 assemble_name (asm_out_file
, buf
);
13351 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13355 /* This used to output parentheses around the expression,
13356 but that does not work on the 386 (either ATT or BSD assembler). */
13357 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13361 if (GET_MODE (x
) == VOIDmode
)
13363 /* We can use %d if the number is <32 bits and positive. */
13364 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13365 fprintf (file
, "0x%lx%08lx",
13366 (unsigned long) CONST_DOUBLE_HIGH (x
),
13367 (unsigned long) CONST_DOUBLE_LOW (x
));
13369 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13372 /* We can't handle floating point constants;
13373 TARGET_PRINT_OPERAND must handle them. */
13374 output_operand_lossage ("floating constant misused");
13378 /* Some assemblers need integer constants to appear first. */
13379 if (CONST_INT_P (XEXP (x
, 0)))
13381 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13383 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13387 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13388 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13390 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13396 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13397 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13399 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13401 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13405 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13407 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13412 gcc_assert (XVECLEN (x
, 0) == 1);
13413 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13414 switch (XINT (x
, 1))
13417 fputs ("@GOT", file
);
13419 case UNSPEC_GOTOFF
:
13420 fputs ("@GOTOFF", file
);
13422 case UNSPEC_PLTOFF
:
13423 fputs ("@PLTOFF", file
);
13426 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13427 "(%rip)" : "[rip]", file
);
13429 case UNSPEC_GOTPCREL
:
13430 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13431 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13433 case UNSPEC_GOTTPOFF
:
13434 /* FIXME: This might be @TPOFF in Sun ld too. */
13435 fputs ("@gottpoff", file
);
13438 fputs ("@tpoff", file
);
13440 case UNSPEC_NTPOFF
:
13442 fputs ("@tpoff", file
);
13444 fputs ("@ntpoff", file
);
13446 case UNSPEC_DTPOFF
:
13447 fputs ("@dtpoff", file
);
13449 case UNSPEC_GOTNTPOFF
:
13451 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13452 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13454 fputs ("@gotntpoff", file
);
13456 case UNSPEC_INDNTPOFF
:
13457 fputs ("@indntpoff", file
);
13460 case UNSPEC_MACHOPIC_OFFSET
:
13462 machopic_output_function_base_name (file
);
13466 output_operand_lossage ("invalid UNSPEC as operand");
13472 output_operand_lossage ("invalid expression as operand");
13476 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13477 We need to emit DTP-relative relocations. */
13479 static void ATTRIBUTE_UNUSED
13480 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13482 fputs (ASM_LONG
, file
);
13483 output_addr_const (file
, x
);
13484 fputs ("@dtpoff", file
);
13490 fputs (", 0", file
);
13493 gcc_unreachable ();
13497 /* Return true if X is a representation of the PIC register. This copes
13498 with calls from ix86_find_base_term, where the register might have
13499 been replaced by a cselib value. */
13502 ix86_pic_register_p (rtx x
)
13504 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13505 return (pic_offset_table_rtx
13506 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13508 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13511 /* Helper function for ix86_delegitimize_address.
13512 Attempt to delegitimize TLS local-exec accesses. */
13515 ix86_delegitimize_tls_address (rtx orig_x
)
13517 rtx x
= orig_x
, unspec
;
13518 struct ix86_address addr
;
13520 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13524 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13526 if (ix86_decompose_address (x
, &addr
) == 0
13527 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13528 || addr
.disp
== NULL_RTX
13529 || GET_CODE (addr
.disp
) != CONST
)
13531 unspec
= XEXP (addr
.disp
, 0);
13532 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13533 unspec
= XEXP (unspec
, 0);
13534 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13536 x
= XVECEXP (unspec
, 0, 0);
13537 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13538 if (unspec
!= XEXP (addr
.disp
, 0))
13539 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13542 rtx idx
= addr
.index
;
13543 if (addr
.scale
!= 1)
13544 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13545 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13548 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13549 if (MEM_P (orig_x
))
13550 x
= replace_equiv_address_nv (orig_x
, x
);
13554 /* In the name of slightly smaller debug output, and to cater to
13555 general assembler lossage, recognize PIC+GOTOFF and turn it back
13556 into a direct symbol reference.
13558 On Darwin, this is necessary to avoid a crash, because Darwin
13559 has a different PIC label for each routine but the DWARF debugging
13560 information is not associated with any particular routine, so it's
13561 necessary to remove references to the PIC label from RTL stored by
13562 the DWARF output code. */
13565 ix86_delegitimize_address (rtx x
)
13567 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13568 /* addend is NULL or some rtx if x is something+GOTOFF where
13569 something doesn't include the PIC register. */
13570 rtx addend
= NULL_RTX
;
13571 /* reg_addend is NULL or a multiple of some register. */
13572 rtx reg_addend
= NULL_RTX
;
13573 /* const_addend is NULL or a const_int. */
13574 rtx const_addend
= NULL_RTX
;
13575 /* This is the result, or NULL. */
13576 rtx result
= NULL_RTX
;
13585 if (GET_CODE (x
) == CONST
13586 && GET_CODE (XEXP (x
, 0)) == PLUS
13587 && GET_MODE (XEXP (x
, 0)) == Pmode
13588 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13589 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13590 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13592 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13593 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13594 if (MEM_P (orig_x
))
13595 x
= replace_equiv_address_nv (orig_x
, x
);
13598 if (GET_CODE (x
) != CONST
13599 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13600 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13601 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13602 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13603 return ix86_delegitimize_tls_address (orig_x
);
13604 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13605 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13607 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13615 if (GET_CODE (x
) != PLUS
13616 || GET_CODE (XEXP (x
, 1)) != CONST
)
13617 return ix86_delegitimize_tls_address (orig_x
);
13619 if (ix86_pic_register_p (XEXP (x
, 0)))
13620 /* %ebx + GOT/GOTOFF */
13622 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13624 /* %ebx + %reg * scale + GOT/GOTOFF */
13625 reg_addend
= XEXP (x
, 0);
13626 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13627 reg_addend
= XEXP (reg_addend
, 1);
13628 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13629 reg_addend
= XEXP (reg_addend
, 0);
13632 reg_addend
= NULL_RTX
;
13633 addend
= XEXP (x
, 0);
13637 addend
= XEXP (x
, 0);
13639 x
= XEXP (XEXP (x
, 1), 0);
13640 if (GET_CODE (x
) == PLUS
13641 && CONST_INT_P (XEXP (x
, 1)))
13643 const_addend
= XEXP (x
, 1);
13647 if (GET_CODE (x
) == UNSPEC
13648 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13649 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13650 result
= XVECEXP (x
, 0, 0);
13652 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13653 && !MEM_P (orig_x
))
13654 result
= XVECEXP (x
, 0, 0);
13657 return ix86_delegitimize_tls_address (orig_x
);
13660 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13662 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13665 /* If the rest of original X doesn't involve the PIC register, add
13666 addend and subtract pic_offset_table_rtx. This can happen e.g.
13668 leal (%ebx, %ecx, 4), %ecx
13670 movl foo@GOTOFF(%ecx), %edx
13671 in which case we return (%ecx - %ebx) + foo. */
13672 if (pic_offset_table_rtx
)
13673 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13674 pic_offset_table_rtx
),
13679 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13681 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13682 if (result
== NULL_RTX
)
13688 /* If X is a machine specific address (i.e. a symbol or label being
13689 referenced as a displacement from the GOT implemented using an
13690 UNSPEC), then return the base term. Otherwise return X. */
13693 ix86_find_base_term (rtx x
)
13699 if (GET_CODE (x
) != CONST
)
13701 term
= XEXP (x
, 0);
13702 if (GET_CODE (term
) == PLUS
13703 && (CONST_INT_P (XEXP (term
, 1))
13704 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13705 term
= XEXP (term
, 0);
13706 if (GET_CODE (term
) != UNSPEC
13707 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13708 && XINT (term
, 1) != UNSPEC_PCREL
))
13711 return XVECEXP (term
, 0, 0);
13714 return ix86_delegitimize_address (x
);
13718 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13719 bool fp
, FILE *file
)
13721 const char *suffix
;
13723 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13725 code
= ix86_fp_compare_code_to_integer (code
);
13729 code
= reverse_condition (code
);
13780 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13784 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13785 Those same assemblers have the same but opposite lossage on cmov. */
13786 if (mode
== CCmode
)
13787 suffix
= fp
? "nbe" : "a";
13788 else if (mode
== CCCmode
)
13791 gcc_unreachable ();
13807 gcc_unreachable ();
13811 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13828 gcc_unreachable ();
13832 /* ??? As above. */
13833 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13834 suffix
= fp
? "nb" : "ae";
13837 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13841 /* ??? As above. */
13842 if (mode
== CCmode
)
13844 else if (mode
== CCCmode
)
13845 suffix
= fp
? "nb" : "ae";
13847 gcc_unreachable ();
13850 suffix
= fp
? "u" : "p";
13853 suffix
= fp
? "nu" : "np";
13856 gcc_unreachable ();
13858 fputs (suffix
, file
);
13861 /* Print the name of register X to FILE based on its machine mode and number.
13862 If CODE is 'w', pretend the mode is HImode.
13863 If CODE is 'b', pretend the mode is QImode.
13864 If CODE is 'k', pretend the mode is SImode.
13865 If CODE is 'q', pretend the mode is DImode.
13866 If CODE is 'x', pretend the mode is V4SFmode.
13867 If CODE is 't', pretend the mode is V8SFmode.
13868 If CODE is 'h', pretend the reg is the 'high' byte register.
13869 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13870 If CODE is 'd', duplicate the operand for AVX instruction.
13874 print_reg (rtx x
, int code
, FILE *file
)
13877 unsigned int regno
;
13878 bool duplicated
= code
== 'd' && TARGET_AVX
;
13880 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13885 gcc_assert (TARGET_64BIT
);
13886 fputs ("rip", file
);
13890 regno
= true_regnum (x
);
13891 gcc_assert (regno
!= ARG_POINTER_REGNUM
13892 && regno
!= FRAME_POINTER_REGNUM
13893 && regno
!= FLAGS_REG
13894 && regno
!= FPSR_REG
13895 && regno
!= FPCR_REG
);
13897 if (code
== 'w' || MMX_REG_P (x
))
13899 else if (code
== 'b')
13901 else if (code
== 'k')
13903 else if (code
== 'q')
13905 else if (code
== 'y')
13907 else if (code
== 'h')
13909 else if (code
== 'x')
13911 else if (code
== 't')
13914 code
= GET_MODE_SIZE (GET_MODE (x
));
13916 /* Irritatingly, AMD extended registers use different naming convention
13917 from the normal registers: "r%d[bwd]" */
13918 if (REX_INT_REGNO_P (regno
))
13920 gcc_assert (TARGET_64BIT
);
13922 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13926 error ("extended registers have no high halves");
13941 error ("unsupported operand size for extended register");
13951 if (STACK_TOP_P (x
))
13960 if (! ANY_FP_REG_P (x
))
13961 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13966 reg
= hi_reg_name
[regno
];
13969 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13971 reg
= qi_reg_name
[regno
];
13974 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13976 reg
= qi_high_reg_name
[regno
];
13981 gcc_assert (!duplicated
);
13983 fputs (hi_reg_name
[regno
] + 1, file
);
13988 gcc_unreachable ();
13994 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13995 fprintf (file
, ", %%%s", reg
);
13997 fprintf (file
, ", %s", reg
);
14001 /* Locate some local-dynamic symbol still in use by this function
14002 so that we can print its name in some tls_local_dynamic_base
14006 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14010 if (GET_CODE (x
) == SYMBOL_REF
14011 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14013 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14020 static const char *
14021 get_some_local_dynamic_name (void)
14025 if (cfun
->machine
->some_ld_name
)
14026 return cfun
->machine
->some_ld_name
;
14028 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14029 if (NONDEBUG_INSN_P (insn
)
14030 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14031 return cfun
->machine
->some_ld_name
;
14036 /* Meaning of CODE:
14037 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14038 C -- print opcode suffix for set/cmov insn.
14039 c -- like C, but print reversed condition
14040 F,f -- likewise, but for floating-point.
14041 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14043 R -- print the prefix for register names.
14044 z -- print the opcode suffix for the size of the current operand.
14045 Z -- likewise, with special suffixes for x87 instructions.
14046 * -- print a star (in certain assembler syntax)
14047 A -- print an absolute memory reference.
14048 E -- print address with DImode register names if TARGET_64BIT.
14049 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14050 s -- print a shift double count, followed by the assemblers argument
14052 b -- print the QImode name of the register for the indicated operand.
14053 %b0 would print %al if operands[0] is reg 0.
14054 w -- likewise, print the HImode name of the register.
14055 k -- likewise, print the SImode name of the register.
14056 q -- likewise, print the DImode name of the register.
14057 x -- likewise, print the V4SFmode name of the register.
14058 t -- likewise, print the V8SFmode name of the register.
14059 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14060 y -- print "st(0)" instead of "st" as a register.
14061 d -- print duplicated register operand for AVX instruction.
14062 D -- print condition for SSE cmp instruction.
14063 P -- if PIC, print an @PLT suffix.
14064 p -- print raw symbol name.
14065 X -- don't print any sort of PIC '@' suffix for a symbol.
14066 & -- print some in-use local-dynamic symbol name.
14067 H -- print a memory address offset by 8; used for sse high-parts
14068 Y -- print condition for XOP pcom* instruction.
14069 + -- print a branch hint as 'cs' or 'ds' prefix
14070 ; -- print a semicolon (after prefixes due to bug in older gas).
14071 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14072 @ -- print a segment register of thread base pointer load
14073 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14077 ix86_print_operand (FILE *file
, rtx x
, int code
)
14084 switch (ASSEMBLER_DIALECT
)
14091 /* Intel syntax. For absolute addresses, registers should not
14092 be surrounded by braces. */
14096 ix86_print_operand (file
, x
, 0);
14103 gcc_unreachable ();
14106 ix86_print_operand (file
, x
, 0);
14110 /* Wrap address in an UNSPEC to declare special handling. */
14112 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14114 output_address (x
);
14118 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14123 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14128 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14133 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14138 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14143 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14148 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14149 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14152 switch (GET_MODE_SIZE (GET_MODE (x
)))
14167 output_operand_lossage
14168 ("invalid operand size for operand code 'O'");
14177 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14179 /* Opcodes don't get size suffixes if using Intel opcodes. */
14180 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14183 switch (GET_MODE_SIZE (GET_MODE (x
)))
14202 output_operand_lossage
14203 ("invalid operand size for operand code 'z'");
14208 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14210 (0, "non-integer operand used with operand code 'z'");
14214 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14215 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14218 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14220 switch (GET_MODE_SIZE (GET_MODE (x
)))
14223 #ifdef HAVE_AS_IX86_FILDS
14233 #ifdef HAVE_AS_IX86_FILDQ
14236 fputs ("ll", file
);
14244 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14246 /* 387 opcodes don't get size suffixes
14247 if the operands are registers. */
14248 if (STACK_REG_P (x
))
14251 switch (GET_MODE_SIZE (GET_MODE (x
)))
14272 output_operand_lossage
14273 ("invalid operand type used with operand code 'Z'");
14277 output_operand_lossage
14278 ("invalid operand size for operand code 'Z'");
14296 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14298 ix86_print_operand (file
, x
, 0);
14299 fputs (", ", file
);
14304 switch (GET_CODE (x
))
14307 fputs ("neq", file
);
14310 fputs ("eq", file
);
14314 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14318 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14322 fputs ("le", file
);
14326 fputs ("lt", file
);
14329 fputs ("unord", file
);
14332 fputs ("ord", file
);
14335 fputs ("ueq", file
);
14338 fputs ("nlt", file
);
14341 fputs ("nle", file
);
14344 fputs ("ule", file
);
14347 fputs ("ult", file
);
14350 fputs ("une", file
);
14353 output_operand_lossage ("operand is not a condition code, "
14354 "invalid operand code 'Y'");
14360 /* Little bit of braindamage here. The SSE compare instructions
14361 does use completely different names for the comparisons that the
14362 fp conditional moves. */
14363 switch (GET_CODE (x
))
14368 fputs ("eq_us", file
);
14372 fputs ("eq", file
);
14377 fputs ("nge", file
);
14381 fputs ("lt", file
);
14386 fputs ("ngt", file
);
14390 fputs ("le", file
);
14393 fputs ("unord", file
);
14398 fputs ("neq_oq", file
);
14402 fputs ("neq", file
);
14407 fputs ("ge", file
);
14411 fputs ("nlt", file
);
14416 fputs ("gt", file
);
14420 fputs ("nle", file
);
14423 fputs ("ord", file
);
14426 output_operand_lossage ("operand is not a condition code, "
14427 "invalid operand code 'D'");
14434 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14435 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14441 if (!COMPARISON_P (x
))
14443 output_operand_lossage ("operand is not a condition code, "
14444 "invalid operand code '%c'", code
);
14447 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14448 code
== 'c' || code
== 'f',
14449 code
== 'F' || code
== 'f',
14454 if (!offsettable_memref_p (x
))
14456 output_operand_lossage ("operand is not an offsettable memory "
14457 "reference, invalid operand code 'H'");
14460 /* It doesn't actually matter what mode we use here, as we're
14461 only going to use this for printing. */
14462 x
= adjust_address_nv (x
, DImode
, 8);
14466 gcc_assert (CONST_INT_P (x
));
14468 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14469 #ifdef HAVE_AS_IX86_HLE
14470 fputs ("xacquire ", file
);
14472 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14474 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14475 #ifdef HAVE_AS_IX86_HLE
14476 fputs ("xrelease ", file
);
14478 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14480 /* We do not want to print value of the operand. */
14484 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14490 const char *name
= get_some_local_dynamic_name ();
14492 output_operand_lossage ("'%%&' used without any "
14493 "local dynamic TLS references");
14495 assemble_name (file
, name
);
14504 || optimize_function_for_size_p (cfun
)
14505 || !TARGET_BRANCH_PREDICTION_HINTS
)
14508 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14511 int pred_val
= INTVAL (XEXP (x
, 0));
14513 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14514 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14516 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14518 = final_forward_branch_p (current_output_insn
) == 0;
14520 /* Emit hints only in the case default branch prediction
14521 heuristics would fail. */
14522 if (taken
!= cputaken
)
14524 /* We use 3e (DS) prefix for taken branches and
14525 2e (CS) prefix for not taken branches. */
14527 fputs ("ds ; ", file
);
14529 fputs ("cs ; ", file
);
14537 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14543 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14546 /* The kernel uses a different segment register for performance
14547 reasons; a system call would not have to trash the userspace
14548 segment register, which would be expensive. */
14549 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14550 fputs ("fs", file
);
14552 fputs ("gs", file
);
14556 putc (TARGET_AVX2
? 'i' : 'f', file
);
14560 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14561 fputs ("addr32 ", file
);
14565 output_operand_lossage ("invalid operand code '%c'", code
);
14570 print_reg (x
, code
, file
);
14572 else if (MEM_P (x
))
14574 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14575 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14576 && GET_MODE (x
) != BLKmode
)
14579 switch (GET_MODE_SIZE (GET_MODE (x
)))
14581 case 1: size
= "BYTE"; break;
14582 case 2: size
= "WORD"; break;
14583 case 4: size
= "DWORD"; break;
14584 case 8: size
= "QWORD"; break;
14585 case 12: size
= "TBYTE"; break;
14587 if (GET_MODE (x
) == XFmode
)
14592 case 32: size
= "YMMWORD"; break;
14594 gcc_unreachable ();
14597 /* Check for explicit size override (codes 'b', 'w', 'k',
14601 else if (code
== 'w')
14603 else if (code
== 'k')
14605 else if (code
== 'q')
14607 else if (code
== 'x')
14610 fputs (size
, file
);
14611 fputs (" PTR ", file
);
14615 /* Avoid (%rip) for call operands. */
14616 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14617 && !CONST_INT_P (x
))
14618 output_addr_const (file
, x
);
14619 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14620 output_operand_lossage ("invalid constraints for operand");
14622 output_address (x
);
14625 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14630 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14631 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14633 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14635 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14637 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14639 fprintf (file
, "0x%08x", (unsigned int) l
);
14642 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14647 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14648 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14650 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14652 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14655 /* These float cases don't actually occur as immediate operands. */
14656 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14660 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14661 fputs (dstr
, file
);
14666 /* We have patterns that allow zero sets of memory, for instance.
14667 In 64-bit mode, we should probably support all 8-byte vectors,
14668 since we can in fact encode that into an immediate. */
14669 if (GET_CODE (x
) == CONST_VECTOR
)
14671 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14675 if (code
!= 'P' && code
!= 'p')
14677 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14679 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14682 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14683 || GET_CODE (x
) == LABEL_REF
)
14685 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14688 fputs ("OFFSET FLAT:", file
);
14691 if (CONST_INT_P (x
))
14692 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14693 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14694 output_pic_addr_const (file
, x
, code
);
14696 output_addr_const (file
, x
);
14701 ix86_print_operand_punct_valid_p (unsigned char code
)
14703 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14704 || code
== ';' || code
== '~' || code
== '^');
14707 /* Print a memory operand whose address is ADDR. */
14710 ix86_print_operand_address (FILE *file
, rtx addr
)
14712 struct ix86_address parts
;
14713 rtx base
, index
, disp
;
14719 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14721 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14722 gcc_assert (parts
.index
== NULL_RTX
);
14723 parts
.index
= XVECEXP (addr
, 0, 1);
14724 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14725 addr
= XVECEXP (addr
, 0, 0);
14728 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14730 gcc_assert (TARGET_64BIT
);
14731 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14735 ok
= ix86_decompose_address (addr
, &parts
);
14740 index
= parts
.index
;
14742 scale
= parts
.scale
;
14750 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14752 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14755 gcc_unreachable ();
14758 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14759 if (TARGET_64BIT
&& !base
&& !index
)
14763 if (GET_CODE (disp
) == CONST
14764 && GET_CODE (XEXP (disp
, 0)) == PLUS
14765 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14766 symbol
= XEXP (XEXP (disp
, 0), 0);
14768 if (GET_CODE (symbol
) == LABEL_REF
14769 || (GET_CODE (symbol
) == SYMBOL_REF
14770 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14773 if (!base
&& !index
)
14775 /* Displacement only requires special attention. */
14777 if (CONST_INT_P (disp
))
14779 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14780 fputs ("ds:", file
);
14781 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14784 output_pic_addr_const (file
, disp
, 0);
14786 output_addr_const (file
, disp
);
14790 /* Print SImode register names to force addr32 prefix. */
14791 if (SImode_address_operand (addr
, VOIDmode
))
14793 #ifdef ENABLE_CHECKING
14794 gcc_assert (TARGET_64BIT
);
14795 switch (GET_CODE (addr
))
14798 gcc_assert (GET_MODE (addr
) == SImode
);
14799 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14803 gcc_assert (GET_MODE (addr
) == DImode
);
14806 gcc_unreachable ();
14809 gcc_assert (!code
);
14815 && CONST_INT_P (disp
)
14816 && INTVAL (disp
) < -16*1024*1024)
14818 /* X32 runs in 64-bit mode, where displacement, DISP, in
14819 address DISP(%r64), is encoded as 32-bit immediate sign-
14820 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14821 address is %r64 + 0xffffffffbffffd00. When %r64 <
14822 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14823 which is invalid for x32. The correct address is %r64
14824 - 0x40000300 == 0xf7ffdd64. To properly encode
14825 -0x40000300(%r64) for x32, we zero-extend negative
14826 displacement by forcing addr32 prefix which truncates
14827 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14828 zero-extend all negative displacements, including -1(%rsp).
14829 However, for small negative displacements, sign-extension
14830 won't cause overflow. We only zero-extend negative
14831 displacements if they < -16*1024*1024, which is also used
14832 to check legitimate address displacements for PIC. */
14836 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14841 output_pic_addr_const (file
, disp
, 0);
14842 else if (GET_CODE (disp
) == LABEL_REF
)
14843 output_asm_label (disp
);
14845 output_addr_const (file
, disp
);
14850 print_reg (base
, code
, file
);
14854 print_reg (index
, vsib
? 0 : code
, file
);
14855 if (scale
!= 1 || vsib
)
14856 fprintf (file
, ",%d", scale
);
14862 rtx offset
= NULL_RTX
;
14866 /* Pull out the offset of a symbol; print any symbol itself. */
14867 if (GET_CODE (disp
) == CONST
14868 && GET_CODE (XEXP (disp
, 0)) == PLUS
14869 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14871 offset
= XEXP (XEXP (disp
, 0), 1);
14872 disp
= gen_rtx_CONST (VOIDmode
,
14873 XEXP (XEXP (disp
, 0), 0));
14877 output_pic_addr_const (file
, disp
, 0);
14878 else if (GET_CODE (disp
) == LABEL_REF
)
14879 output_asm_label (disp
);
14880 else if (CONST_INT_P (disp
))
14883 output_addr_const (file
, disp
);
14889 print_reg (base
, code
, file
);
14892 if (INTVAL (offset
) >= 0)
14894 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14898 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14905 print_reg (index
, vsib
? 0 : code
, file
);
14906 if (scale
!= 1 || vsib
)
14907 fprintf (file
, "*%d", scale
);
14914 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14917 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14921 if (GET_CODE (x
) != UNSPEC
)
14924 op
= XVECEXP (x
, 0, 0);
14925 switch (XINT (x
, 1))
14927 case UNSPEC_GOTTPOFF
:
14928 output_addr_const (file
, op
);
14929 /* FIXME: This might be @TPOFF in Sun ld. */
14930 fputs ("@gottpoff", file
);
14933 output_addr_const (file
, op
);
14934 fputs ("@tpoff", file
);
14936 case UNSPEC_NTPOFF
:
14937 output_addr_const (file
, op
);
14939 fputs ("@tpoff", file
);
14941 fputs ("@ntpoff", file
);
14943 case UNSPEC_DTPOFF
:
14944 output_addr_const (file
, op
);
14945 fputs ("@dtpoff", file
);
14947 case UNSPEC_GOTNTPOFF
:
14948 output_addr_const (file
, op
);
14950 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14951 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14953 fputs ("@gotntpoff", file
);
14955 case UNSPEC_INDNTPOFF
:
14956 output_addr_const (file
, op
);
14957 fputs ("@indntpoff", file
);
14960 case UNSPEC_MACHOPIC_OFFSET
:
14961 output_addr_const (file
, op
);
14963 machopic_output_function_base_name (file
);
14967 case UNSPEC_STACK_CHECK
:
14971 gcc_assert (flag_split_stack
);
14973 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14974 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14976 gcc_unreachable ();
14979 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14990 /* Split one or more double-mode RTL references into pairs of half-mode
14991 references. The RTL can be REG, offsettable MEM, integer constant, or
14992 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14993 split and "num" is its length. lo_half and hi_half are output arrays
14994 that parallel "operands". */
14997 split_double_mode (enum machine_mode mode
, rtx operands
[],
14998 int num
, rtx lo_half
[], rtx hi_half
[])
15000 enum machine_mode half_mode
;
15006 half_mode
= DImode
;
15009 half_mode
= SImode
;
15012 gcc_unreachable ();
15015 byte
= GET_MODE_SIZE (half_mode
);
15019 rtx op
= operands
[num
];
15021 /* simplify_subreg refuse to split volatile memory addresses,
15022 but we still have to handle it. */
15025 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15026 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15030 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15031 GET_MODE (op
) == VOIDmode
15032 ? mode
: GET_MODE (op
), 0);
15033 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15034 GET_MODE (op
) == VOIDmode
15035 ? mode
: GET_MODE (op
), byte
);
15040 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15041 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15042 is the expression of the binary operation. The output may either be
15043 emitted here, or returned to the caller, like all output_* functions.
15045 There is no guarantee that the operands are the same mode, as they
15046 might be within FLOAT or FLOAT_EXTEND expressions. */
15048 #ifndef SYSV386_COMPAT
15049 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15050 wants to fix the assemblers because that causes incompatibility
15051 with gcc. No-one wants to fix gcc because that causes
15052 incompatibility with assemblers... You can use the option of
15053 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15054 #define SYSV386_COMPAT 1
15058 output_387_binary_op (rtx insn
, rtx
*operands
)
15060 static char buf
[40];
15063 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15065 #ifdef ENABLE_CHECKING
15066 /* Even if we do not want to check the inputs, this documents input
15067 constraints. Which helps in understanding the following code. */
15068 if (STACK_REG_P (operands
[0])
15069 && ((REG_P (operands
[1])
15070 && REGNO (operands
[0]) == REGNO (operands
[1])
15071 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15072 || (REG_P (operands
[2])
15073 && REGNO (operands
[0]) == REGNO (operands
[2])
15074 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15075 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15078 gcc_assert (is_sse
);
15081 switch (GET_CODE (operands
[3]))
15084 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15085 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15093 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15094 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15102 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15103 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15111 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15112 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15120 gcc_unreachable ();
15127 strcpy (buf
, ssep
);
15128 if (GET_MODE (operands
[0]) == SFmode
)
15129 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15131 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15135 strcpy (buf
, ssep
+ 1);
15136 if (GET_MODE (operands
[0]) == SFmode
)
15137 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15139 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15145 switch (GET_CODE (operands
[3]))
15149 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15151 rtx temp
= operands
[2];
15152 operands
[2] = operands
[1];
15153 operands
[1] = temp
;
15156 /* know operands[0] == operands[1]. */
15158 if (MEM_P (operands
[2]))
15164 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15166 if (STACK_TOP_P (operands
[0]))
15167 /* How is it that we are storing to a dead operand[2]?
15168 Well, presumably operands[1] is dead too. We can't
15169 store the result to st(0) as st(0) gets popped on this
15170 instruction. Instead store to operands[2] (which I
15171 think has to be st(1)). st(1) will be popped later.
15172 gcc <= 2.8.1 didn't have this check and generated
15173 assembly code that the Unixware assembler rejected. */
15174 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15176 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15180 if (STACK_TOP_P (operands
[0]))
15181 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15183 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15188 if (MEM_P (operands
[1]))
15194 if (MEM_P (operands
[2]))
15200 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15203 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15204 derived assemblers, confusingly reverse the direction of
15205 the operation for fsub{r} and fdiv{r} when the
15206 destination register is not st(0). The Intel assembler
15207 doesn't have this brain damage. Read !SYSV386_COMPAT to
15208 figure out what the hardware really does. */
15209 if (STACK_TOP_P (operands
[0]))
15210 p
= "{p\t%0, %2|rp\t%2, %0}";
15212 p
= "{rp\t%2, %0|p\t%0, %2}";
15214 if (STACK_TOP_P (operands
[0]))
15215 /* As above for fmul/fadd, we can't store to st(0). */
15216 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15218 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15223 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15226 if (STACK_TOP_P (operands
[0]))
15227 p
= "{rp\t%0, %1|p\t%1, %0}";
15229 p
= "{p\t%1, %0|rp\t%0, %1}";
15231 if (STACK_TOP_P (operands
[0]))
15232 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15234 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15239 if (STACK_TOP_P (operands
[0]))
15241 if (STACK_TOP_P (operands
[1]))
15242 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15244 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15247 else if (STACK_TOP_P (operands
[1]))
15250 p
= "{\t%1, %0|r\t%0, %1}";
15252 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15258 p
= "{r\t%2, %0|\t%0, %2}";
15260 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15266 gcc_unreachable ();
15273 /* Check if a 256bit AVX register is referenced inside of EXP. */
15276 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15280 if (GET_CODE (exp
) == SUBREG
)
15281 exp
= SUBREG_REG (exp
);
15284 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15290 /* Return needed mode for entity in optimize_mode_switching pass. */
15293 ix86_avx_u128_mode_needed (rtx insn
)
15299 /* Needed mode is set to AVX_U128_CLEAN if there are
15300 no 256bit modes used in function arguments. */
15301 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15303 link
= XEXP (link
, 1))
15305 if (GET_CODE (XEXP (link
, 0)) == USE
)
15307 rtx arg
= XEXP (XEXP (link
, 0), 0);
15309 if (ix86_check_avx256_register (&arg
, NULL
))
15310 return AVX_U128_ANY
;
15314 return AVX_U128_CLEAN
;
15317 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15318 changes state only when a 256bit register is written to, but we need
15319 to prevent the compiler from moving optimal insertion point above
15320 eventual read from 256bit register. */
15321 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15322 return AVX_U128_DIRTY
;
15324 return AVX_U128_ANY
;
15327 /* Return mode that i387 must be switched into
15328 prior to the execution of insn. */
15331 ix86_i387_mode_needed (int entity
, rtx insn
)
15333 enum attr_i387_cw mode
;
15335 /* The mode UNINITIALIZED is used to store control word after a
15336 function call or ASM pattern. The mode ANY specify that function
15337 has no requirements on the control word and make no changes in the
15338 bits we are interested in. */
15341 || (NONJUMP_INSN_P (insn
)
15342 && (asm_noperands (PATTERN (insn
)) >= 0
15343 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15344 return I387_CW_UNINITIALIZED
;
15346 if (recog_memoized (insn
) < 0)
15347 return I387_CW_ANY
;
15349 mode
= get_attr_i387_cw (insn
);
15354 if (mode
== I387_CW_TRUNC
)
15359 if (mode
== I387_CW_FLOOR
)
15364 if (mode
== I387_CW_CEIL
)
15369 if (mode
== I387_CW_MASK_PM
)
15374 gcc_unreachable ();
15377 return I387_CW_ANY
;
15380 /* Return mode that entity must be switched into
15381 prior to the execution of insn. */
15384 ix86_mode_needed (int entity
, rtx insn
)
15389 return ix86_avx_u128_mode_needed (insn
);
15394 return ix86_i387_mode_needed (entity
, insn
);
15396 gcc_unreachable ();
15401 /* Check if a 256bit AVX register is referenced in stores. */
15404 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15406 if (ix86_check_avx256_register (&dest
, NULL
))
15408 bool *used
= (bool *) data
;
15413 /* Calculate mode of upper 128bit AVX registers after the insn. */
15416 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15418 rtx pat
= PATTERN (insn
);
15420 if (vzeroupper_operation (pat
, VOIDmode
)
15421 || vzeroall_operation (pat
, VOIDmode
))
15422 return AVX_U128_CLEAN
;
15424 /* We know that state is clean after CALL insn if there are no
15425 256bit registers used in the function return register. */
15428 bool avx_reg256_found
= false;
15429 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15430 if (!avx_reg256_found
)
15431 return AVX_U128_CLEAN
;
15434 /* Otherwise, return current mode. Remember that if insn
15435 references AVX 256bit registers, the mode was already changed
15436 to DIRTY from MODE_NEEDED. */
15440 /* Return the mode that an insn results in. */
15443 ix86_mode_after (int entity
, int mode
, rtx insn
)
15448 return ix86_avx_u128_mode_after (mode
, insn
);
15455 gcc_unreachable ();
15460 ix86_avx_u128_mode_entry (void)
15464 /* Entry mode is set to AVX_U128_DIRTY if there are
15465 256bit modes used in function arguments. */
15466 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15467 arg
= TREE_CHAIN (arg
))
15469 rtx incoming
= DECL_INCOMING_RTL (arg
);
15471 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15472 return AVX_U128_DIRTY
;
15475 return AVX_U128_CLEAN
;
15478 /* Return a mode that ENTITY is assumed to be
15479 switched to at function entry. */
15482 ix86_mode_entry (int entity
)
15487 return ix86_avx_u128_mode_entry ();
15492 return I387_CW_ANY
;
15494 gcc_unreachable ();
15499 ix86_avx_u128_mode_exit (void)
15501 rtx reg
= crtl
->return_rtx
;
15503 /* Exit mode is set to AVX_U128_DIRTY if there are
15504 256bit modes used in the function return register. */
15505 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15506 return AVX_U128_DIRTY
;
15508 return AVX_U128_CLEAN
;
15511 /* Return a mode that ENTITY is assumed to be
15512 switched to at function exit. */
15515 ix86_mode_exit (int entity
)
15520 return ix86_avx_u128_mode_exit ();
15525 return I387_CW_ANY
;
15527 gcc_unreachable ();
15531 /* Output code to initialize control word copies used by trunc?f?i and
15532 rounding patterns. CURRENT_MODE is set to current control word,
15533 while NEW_MODE is set to new control word. */
15536 emit_i387_cw_initialization (int mode
)
15538 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15541 enum ix86_stack_slot slot
;
15543 rtx reg
= gen_reg_rtx (HImode
);
15545 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15546 emit_move_insn (reg
, copy_rtx (stored_mode
));
15548 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15549 || optimize_function_for_size_p (cfun
))
15553 case I387_CW_TRUNC
:
15554 /* round toward zero (truncate) */
15555 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15556 slot
= SLOT_CW_TRUNC
;
15559 case I387_CW_FLOOR
:
15560 /* round down toward -oo */
15561 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15562 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15563 slot
= SLOT_CW_FLOOR
;
15567 /* round up toward +oo */
15568 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15569 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15570 slot
= SLOT_CW_CEIL
;
15573 case I387_CW_MASK_PM
:
15574 /* mask precision exception for nearbyint() */
15575 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15576 slot
= SLOT_CW_MASK_PM
;
15580 gcc_unreachable ();
15587 case I387_CW_TRUNC
:
15588 /* round toward zero (truncate) */
15589 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15590 slot
= SLOT_CW_TRUNC
;
15593 case I387_CW_FLOOR
:
15594 /* round down toward -oo */
15595 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15596 slot
= SLOT_CW_FLOOR
;
15600 /* round up toward +oo */
15601 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15602 slot
= SLOT_CW_CEIL
;
15605 case I387_CW_MASK_PM
:
15606 /* mask precision exception for nearbyint() */
15607 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15608 slot
= SLOT_CW_MASK_PM
;
15612 gcc_unreachable ();
15616 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15618 new_mode
= assign_386_stack_local (HImode
, slot
);
15619 emit_move_insn (new_mode
, reg
);
15622 /* Emit vzeroupper. */
15625 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15629 /* Cancel automatic vzeroupper insertion if there are
15630 live call-saved SSE registers at the insertion point. */
15632 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15633 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15637 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15638 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15641 emit_insn (gen_avx_vzeroupper ());
15644 /* Generate one or more insns to set ENTITY to MODE. */
15647 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15652 if (mode
== AVX_U128_CLEAN
)
15653 ix86_avx_emit_vzeroupper (regs_live
);
15659 if (mode
!= I387_CW_ANY
15660 && mode
!= I387_CW_UNINITIALIZED
)
15661 emit_i387_cw_initialization (mode
);
15664 gcc_unreachable ();
15668 /* Output code for INSN to convert a float to a signed int. OPERANDS
15669 are the insn operands. The output may be [HSD]Imode and the input
15670 operand may be [SDX]Fmode. */
15673 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15675 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15676 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15677 int round_mode
= get_attr_i387_cw (insn
);
15679 /* Jump through a hoop or two for DImode, since the hardware has no
15680 non-popping instruction. We used to do this a different way, but
15681 that was somewhat fragile and broke with post-reload splitters. */
15682 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15683 output_asm_insn ("fld\t%y1", operands
);
15685 gcc_assert (STACK_TOP_P (operands
[1]));
15686 gcc_assert (MEM_P (operands
[0]));
15687 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15690 output_asm_insn ("fisttp%Z0\t%0", operands
);
15693 if (round_mode
!= I387_CW_ANY
)
15694 output_asm_insn ("fldcw\t%3", operands
);
15695 if (stack_top_dies
|| dimode_p
)
15696 output_asm_insn ("fistp%Z0\t%0", operands
);
15698 output_asm_insn ("fist%Z0\t%0", operands
);
15699 if (round_mode
!= I387_CW_ANY
)
15700 output_asm_insn ("fldcw\t%2", operands
);
15706 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15707 have the values zero or one, indicates the ffreep insn's operand
15708 from the OPERANDS array. */
15710 static const char *
15711 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15713 if (TARGET_USE_FFREEP
)
15714 #ifdef HAVE_AS_IX86_FFREEP
15715 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15718 static char retval
[32];
15719 int regno
= REGNO (operands
[opno
]);
15721 gcc_assert (STACK_REGNO_P (regno
));
15723 regno
-= FIRST_STACK_REG
;
15725 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15730 return opno
? "fstp\t%y1" : "fstp\t%y0";
15734 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15735 should be used. UNORDERED_P is true when fucom should be used. */
15738 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15740 int stack_top_dies
;
15741 rtx cmp_op0
, cmp_op1
;
15742 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15746 cmp_op0
= operands
[0];
15747 cmp_op1
= operands
[1];
15751 cmp_op0
= operands
[1];
15752 cmp_op1
= operands
[2];
15757 if (GET_MODE (operands
[0]) == SFmode
)
15759 return "%vucomiss\t{%1, %0|%0, %1}";
15761 return "%vcomiss\t{%1, %0|%0, %1}";
15764 return "%vucomisd\t{%1, %0|%0, %1}";
15766 return "%vcomisd\t{%1, %0|%0, %1}";
15769 gcc_assert (STACK_TOP_P (cmp_op0
));
15771 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15773 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15775 if (stack_top_dies
)
15777 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15778 return output_387_ffreep (operands
, 1);
15781 return "ftst\n\tfnstsw\t%0";
15784 if (STACK_REG_P (cmp_op1
)
15786 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15787 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15789 /* If both the top of the 387 stack dies, and the other operand
15790 is also a stack register that dies, then this must be a
15791 `fcompp' float compare */
15795 /* There is no double popping fcomi variant. Fortunately,
15796 eflags is immune from the fstp's cc clobbering. */
15798 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15800 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15801 return output_387_ffreep (operands
, 0);
15806 return "fucompp\n\tfnstsw\t%0";
15808 return "fcompp\n\tfnstsw\t%0";
15813 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15815 static const char * const alt
[16] =
15817 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15818 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15819 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15820 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15822 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15823 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15827 "fcomi\t{%y1, %0|%0, %y1}",
15828 "fcomip\t{%y1, %0|%0, %y1}",
15829 "fucomi\t{%y1, %0|%0, %y1}",
15830 "fucomip\t{%y1, %0|%0, %y1}",
15841 mask
= eflags_p
<< 3;
15842 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15843 mask
|= unordered_p
<< 1;
15844 mask
|= stack_top_dies
;
15846 gcc_assert (mask
< 16);
15855 ix86_output_addr_vec_elt (FILE *file
, int value
)
15857 const char *directive
= ASM_LONG
;
15861 directive
= ASM_QUAD
;
15863 gcc_assert (!TARGET_64BIT
);
15866 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15870 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15872 const char *directive
= ASM_LONG
;
15875 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15876 directive
= ASM_QUAD
;
15878 gcc_assert (!TARGET_64BIT
);
15880 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15881 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15882 fprintf (file
, "%s%s%d-%s%d\n",
15883 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15884 else if (HAVE_AS_GOTOFF_IN_DATA
)
15885 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15887 else if (TARGET_MACHO
)
15889 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15890 machopic_output_function_base_name (file
);
15895 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15896 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15899 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15903 ix86_expand_clear (rtx dest
)
15907 /* We play register width games, which are only valid after reload. */
15908 gcc_assert (reload_completed
);
15910 /* Avoid HImode and its attendant prefix byte. */
15911 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15912 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15913 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15915 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15916 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15918 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15919 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15925 /* X is an unchanging MEM. If it is a constant pool reference, return
15926 the constant pool rtx, else NULL. */
15929 maybe_get_pool_constant (rtx x
)
15931 x
= ix86_delegitimize_address (XEXP (x
, 0));
15933 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15934 return get_pool_constant (x
);
15940 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15943 enum tls_model model
;
15948 if (GET_CODE (op1
) == SYMBOL_REF
)
15950 model
= SYMBOL_REF_TLS_MODEL (op1
);
15953 op1
= legitimize_tls_address (op1
, model
, true);
15954 op1
= force_operand (op1
, op0
);
15957 op1
= convert_to_mode (mode
, op1
, 1);
15959 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15960 && SYMBOL_REF_DLLIMPORT_P (op1
))
15961 op1
= legitimize_dllimport_symbol (op1
, false);
15963 else if (GET_CODE (op1
) == CONST
15964 && GET_CODE (XEXP (op1
, 0)) == PLUS
15965 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15967 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15968 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15971 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15973 tmp
= legitimize_tls_address (symbol
, model
, true);
15974 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15975 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15976 tmp
= legitimize_dllimport_symbol (symbol
, true);
15980 tmp
= force_operand (tmp
, NULL
);
15981 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15982 op0
, 1, OPTAB_DIRECT
);
15985 op1
= convert_to_mode (mode
, tmp
, 1);
15989 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15990 && symbolic_operand (op1
, mode
))
15992 if (TARGET_MACHO
&& !TARGET_64BIT
)
15995 /* dynamic-no-pic */
15996 if (MACHOPIC_INDIRECT
)
15998 rtx temp
= ((reload_in_progress
15999 || ((op0
&& REG_P (op0
))
16001 ? op0
: gen_reg_rtx (Pmode
));
16002 op1
= machopic_indirect_data_reference (op1
, temp
);
16004 op1
= machopic_legitimize_pic_address (op1
, mode
,
16005 temp
== op1
? 0 : temp
);
16007 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16009 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16013 if (GET_CODE (op0
) == MEM
)
16014 op1
= force_reg (Pmode
, op1
);
16018 if (GET_CODE (temp
) != REG
)
16019 temp
= gen_reg_rtx (Pmode
);
16020 temp
= legitimize_pic_address (op1
, temp
);
16025 /* dynamic-no-pic */
16031 op1
= force_reg (mode
, op1
);
16032 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16034 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16035 op1
= legitimize_pic_address (op1
, reg
);
16038 op1
= convert_to_mode (mode
, op1
, 1);
16045 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16046 || !push_operand (op0
, mode
))
16048 op1
= force_reg (mode
, op1
);
16050 if (push_operand (op0
, mode
)
16051 && ! general_no_elim_operand (op1
, mode
))
16052 op1
= copy_to_mode_reg (mode
, op1
);
16054 /* Force large constants in 64bit compilation into register
16055 to get them CSEed. */
16056 if (can_create_pseudo_p ()
16057 && (mode
== DImode
) && TARGET_64BIT
16058 && immediate_operand (op1
, mode
)
16059 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16060 && !register_operand (op0
, mode
)
16062 op1
= copy_to_mode_reg (mode
, op1
);
16064 if (can_create_pseudo_p ()
16065 && FLOAT_MODE_P (mode
)
16066 && GET_CODE (op1
) == CONST_DOUBLE
)
16068 /* If we are loading a floating point constant to a register,
16069 force the value to memory now, since we'll get better code
16070 out the back end. */
16072 op1
= validize_mem (force_const_mem (mode
, op1
));
16073 if (!register_operand (op0
, mode
))
16075 rtx temp
= gen_reg_rtx (mode
);
16076 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16077 emit_move_insn (op0
, temp
);
16083 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16087 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16089 rtx op0
= operands
[0], op1
= operands
[1];
16090 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16092 /* Force constants other than zero into memory. We do not know how
16093 the instructions used to build constants modify the upper 64 bits
16094 of the register, once we have that information we may be able
16095 to handle some of them more efficiently. */
16096 if (can_create_pseudo_p ()
16097 && register_operand (op0
, mode
)
16098 && (CONSTANT_P (op1
)
16099 || (GET_CODE (op1
) == SUBREG
16100 && CONSTANT_P (SUBREG_REG (op1
))))
16101 && !standard_sse_constant_p (op1
))
16102 op1
= validize_mem (force_const_mem (mode
, op1
));
16104 /* We need to check memory alignment for SSE mode since attribute
16105 can make operands unaligned. */
16106 if (can_create_pseudo_p ()
16107 && SSE_REG_MODE_P (mode
)
16108 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16109 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16113 /* ix86_expand_vector_move_misalign() does not like constants ... */
16114 if (CONSTANT_P (op1
)
16115 || (GET_CODE (op1
) == SUBREG
16116 && CONSTANT_P (SUBREG_REG (op1
))))
16117 op1
= validize_mem (force_const_mem (mode
, op1
));
16119 /* ... nor both arguments in memory. */
16120 if (!register_operand (op0
, mode
)
16121 && !register_operand (op1
, mode
))
16122 op1
= force_reg (mode
, op1
);
16124 tmp
[0] = op0
; tmp
[1] = op1
;
16125 ix86_expand_vector_move_misalign (mode
, tmp
);
16129 /* Make operand1 a register if it isn't already. */
16130 if (can_create_pseudo_p ()
16131 && !register_operand (op0
, mode
)
16132 && !register_operand (op1
, mode
))
16134 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16138 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16141 /* Split 32-byte AVX unaligned load and store if needed. */
16144 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16147 rtx (*extract
) (rtx
, rtx
, rtx
);
16148 rtx (*load_unaligned
) (rtx
, rtx
);
16149 rtx (*store_unaligned
) (rtx
, rtx
);
16150 enum machine_mode mode
;
16152 switch (GET_MODE (op0
))
16155 gcc_unreachable ();
16157 extract
= gen_avx_vextractf128v32qi
;
16158 load_unaligned
= gen_avx_loaddqu256
;
16159 store_unaligned
= gen_avx_storedqu256
;
16163 extract
= gen_avx_vextractf128v8sf
;
16164 load_unaligned
= gen_avx_loadups256
;
16165 store_unaligned
= gen_avx_storeups256
;
16169 extract
= gen_avx_vextractf128v4df
;
16170 load_unaligned
= gen_avx_loadupd256
;
16171 store_unaligned
= gen_avx_storeupd256
;
16178 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16180 rtx r
= gen_reg_rtx (mode
);
16181 m
= adjust_address (op1
, mode
, 0);
16182 emit_move_insn (r
, m
);
16183 m
= adjust_address (op1
, mode
, 16);
16184 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16185 emit_move_insn (op0
, r
);
16188 emit_insn (load_unaligned (op0
, op1
));
16190 else if (MEM_P (op0
))
16192 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16194 m
= adjust_address (op0
, mode
, 0);
16195 emit_insn (extract (m
, op1
, const0_rtx
));
16196 m
= adjust_address (op0
, mode
, 16);
16197 emit_insn (extract (m
, op1
, const1_rtx
));
16200 emit_insn (store_unaligned (op0
, op1
));
16203 gcc_unreachable ();
16206 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16207 straight to ix86_expand_vector_move. */
16208 /* Code generation for scalar reg-reg moves of single and double precision data:
16209 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16213 if (x86_sse_partial_reg_dependency == true)
16218 Code generation for scalar loads of double precision data:
16219 if (x86_sse_split_regs == true)
16220 movlpd mem, reg (gas syntax)
16224 Code generation for unaligned packed loads of single precision data
16225 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16226 if (x86_sse_unaligned_move_optimal)
16229 if (x86_sse_partial_reg_dependency == true)
16241 Code generation for unaligned packed loads of double precision data
16242 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16243 if (x86_sse_unaligned_move_optimal)
16246 if (x86_sse_split_regs == true)
16259 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16267 && GET_MODE_SIZE (mode
) == 32)
16269 switch (GET_MODE_CLASS (mode
))
16271 case MODE_VECTOR_INT
:
16273 op0
= gen_lowpart (V32QImode
, op0
);
16274 op1
= gen_lowpart (V32QImode
, op1
);
16277 case MODE_VECTOR_FLOAT
:
16278 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16282 gcc_unreachable ();
16290 /* ??? If we have typed data, then it would appear that using
16291 movdqu is the only way to get unaligned data loaded with
16293 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16295 op0
= gen_lowpart (V16QImode
, op0
);
16296 op1
= gen_lowpart (V16QImode
, op1
);
16297 /* We will eventually emit movups based on insn attributes. */
16298 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16300 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16305 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16306 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16307 || optimize_function_for_size_p (cfun
))
16309 /* We will eventually emit movups based on insn attributes. */
16310 emit_insn (gen_sse2_loadupd (op0
, op1
));
16314 /* When SSE registers are split into halves, we can avoid
16315 writing to the top half twice. */
16316 if (TARGET_SSE_SPLIT_REGS
)
16318 emit_clobber (op0
);
16323 /* ??? Not sure about the best option for the Intel chips.
16324 The following would seem to satisfy; the register is
16325 entirely cleared, breaking the dependency chain. We
16326 then store to the upper half, with a dependency depth
16327 of one. A rumor has it that Intel recommends two movsd
16328 followed by an unpacklpd, but this is unconfirmed. And
16329 given that the dependency depth of the unpacklpd would
16330 still be one, I'm not sure why this would be better. */
16331 zero
= CONST0_RTX (V2DFmode
);
16334 m
= adjust_address (op1
, DFmode
, 0);
16335 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16336 m
= adjust_address (op1
, DFmode
, 8);
16337 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16342 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16343 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16344 || optimize_function_for_size_p (cfun
))
16346 op0
= gen_lowpart (V4SFmode
, op0
);
16347 op1
= gen_lowpart (V4SFmode
, op1
);
16348 emit_insn (gen_sse_loadups (op0
, op1
));
16352 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16353 emit_move_insn (op0
, CONST0_RTX (mode
));
16355 emit_clobber (op0
);
16357 if (mode
!= V4SFmode
)
16358 op0
= gen_lowpart (V4SFmode
, op0
);
16360 m
= adjust_address (op1
, V2SFmode
, 0);
16361 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16362 m
= adjust_address (op1
, V2SFmode
, 8);
16363 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16366 else if (MEM_P (op0
))
16368 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16370 op0
= gen_lowpart (V16QImode
, op0
);
16371 op1
= gen_lowpart (V16QImode
, op1
);
16372 /* We will eventually emit movups based on insn attributes. */
16373 emit_insn (gen_sse2_storedqu (op0
, op1
));
16375 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16378 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16379 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16380 || optimize_function_for_size_p (cfun
))
16381 /* We will eventually emit movups based on insn attributes. */
16382 emit_insn (gen_sse2_storeupd (op0
, op1
));
16385 m
= adjust_address (op0
, DFmode
, 0);
16386 emit_insn (gen_sse2_storelpd (m
, op1
));
16387 m
= adjust_address (op0
, DFmode
, 8);
16388 emit_insn (gen_sse2_storehpd (m
, op1
));
16393 if (mode
!= V4SFmode
)
16394 op1
= gen_lowpart (V4SFmode
, op1
);
16397 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16398 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16399 || optimize_function_for_size_p (cfun
))
16401 op0
= gen_lowpart (V4SFmode
, op0
);
16402 emit_insn (gen_sse_storeups (op0
, op1
));
16406 m
= adjust_address (op0
, V2SFmode
, 0);
16407 emit_insn (gen_sse_storelps (m
, op1
));
16408 m
= adjust_address (op0
, V2SFmode
, 8);
16409 emit_insn (gen_sse_storehps (m
, op1
));
16414 gcc_unreachable ();
16417 /* Expand a push in MODE. This is some mode for which we do not support
16418 proper push instructions, at least from the registers that we expect
16419 the value to live in. */
16422 ix86_expand_push (enum machine_mode mode
, rtx x
)
16426 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16427 GEN_INT (-GET_MODE_SIZE (mode
)),
16428 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16429 if (tmp
!= stack_pointer_rtx
)
16430 emit_move_insn (stack_pointer_rtx
, tmp
);
16432 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16434 /* When we push an operand onto stack, it has to be aligned at least
16435 at the function argument boundary. However since we don't have
16436 the argument type, we can't determine the actual argument
16438 emit_move_insn (tmp
, x
);
16441 /* Helper function of ix86_fixup_binary_operands to canonicalize
16442 operand order. Returns true if the operands should be swapped. */
16445 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16448 rtx dst
= operands
[0];
16449 rtx src1
= operands
[1];
16450 rtx src2
= operands
[2];
16452 /* If the operation is not commutative, we can't do anything. */
16453 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16456 /* Highest priority is that src1 should match dst. */
16457 if (rtx_equal_p (dst
, src1
))
16459 if (rtx_equal_p (dst
, src2
))
16462 /* Next highest priority is that immediate constants come second. */
16463 if (immediate_operand (src2
, mode
))
16465 if (immediate_operand (src1
, mode
))
16468 /* Lowest priority is that memory references should come second. */
16478 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16479 destination to use for the operation. If different from the true
16480 destination in operands[0], a copy operation will be required. */
16483 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16486 rtx dst
= operands
[0];
16487 rtx src1
= operands
[1];
16488 rtx src2
= operands
[2];
16490 /* Canonicalize operand order. */
16491 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16495 /* It is invalid to swap operands of different modes. */
16496 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16503 /* Both source operands cannot be in memory. */
16504 if (MEM_P (src1
) && MEM_P (src2
))
16506 /* Optimization: Only read from memory once. */
16507 if (rtx_equal_p (src1
, src2
))
16509 src2
= force_reg (mode
, src2
);
16513 src2
= force_reg (mode
, src2
);
16516 /* If the destination is memory, and we do not have matching source
16517 operands, do things in registers. */
16518 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16519 dst
= gen_reg_rtx (mode
);
16521 /* Source 1 cannot be a constant. */
16522 if (CONSTANT_P (src1
))
16523 src1
= force_reg (mode
, src1
);
16525 /* Source 1 cannot be a non-matching memory. */
16526 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16527 src1
= force_reg (mode
, src1
);
16529 /* Improve address combine. */
16531 && GET_MODE_CLASS (mode
) == MODE_INT
16533 src2
= force_reg (mode
, src2
);
16535 operands
[1] = src1
;
16536 operands
[2] = src2
;
16540 /* Similarly, but assume that the destination has already been
16541 set up properly. */
16544 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16545 enum machine_mode mode
, rtx operands
[])
16547 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16548 gcc_assert (dst
== operands
[0]);
16551 /* Attempt to expand a binary operator. Make the expansion closer to the
16552 actual machine, then just general_operand, which will allow 3 separate
16553 memory references (one output, two input) in a single insn. */
16556 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16559 rtx src1
, src2
, dst
, op
, clob
;
16561 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16562 src1
= operands
[1];
16563 src2
= operands
[2];
16565 /* Emit the instruction. */
16567 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16568 if (reload_in_progress
)
16570 /* Reload doesn't know about the flags register, and doesn't know that
16571 it doesn't want to clobber it. We can only do this with PLUS. */
16572 gcc_assert (code
== PLUS
);
16575 else if (reload_completed
16577 && !rtx_equal_p (dst
, src1
))
16579 /* This is going to be an LEA; avoid splitting it later. */
16584 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16585 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16588 /* Fix up the destination if needed. */
16589 if (dst
!= operands
[0])
16590 emit_move_insn (operands
[0], dst
);
16593 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16594 the given OPERANDS. */
16597 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16600 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16601 if (GET_CODE (operands
[1]) == SUBREG
)
16606 else if (GET_CODE (operands
[2]) == SUBREG
)
16611 /* Optimize (__m128i) d | (__m128i) e and similar code
16612 when d and e are float vectors into float vector logical
16613 insn. In C/C++ without using intrinsics there is no other way
16614 to express vector logical operation on float vectors than
16615 to cast them temporarily to integer vectors. */
16617 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16618 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16619 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16620 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16621 && SUBREG_BYTE (op1
) == 0
16622 && (GET_CODE (op2
) == CONST_VECTOR
16623 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16624 && SUBREG_BYTE (op2
) == 0))
16625 && can_create_pseudo_p ())
16628 switch (GET_MODE (SUBREG_REG (op1
)))
16634 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16635 if (GET_CODE (op2
) == CONST_VECTOR
)
16637 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16638 op2
= force_reg (GET_MODE (dst
), op2
);
16643 op2
= SUBREG_REG (operands
[2]);
16644 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16645 op2
= force_reg (GET_MODE (dst
), op2
);
16647 op1
= SUBREG_REG (op1
);
16648 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16649 op1
= force_reg (GET_MODE (dst
), op1
);
16650 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16651 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16653 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16659 if (!nonimmediate_operand (operands
[1], mode
))
16660 operands
[1] = force_reg (mode
, operands
[1]);
16661 if (!nonimmediate_operand (operands
[2], mode
))
16662 operands
[2] = force_reg (mode
, operands
[2]);
16663 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16664 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16665 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16669 /* Return TRUE or FALSE depending on whether the binary operator meets the
16670 appropriate constraints. */
16673 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16676 rtx dst
= operands
[0];
16677 rtx src1
= operands
[1];
16678 rtx src2
= operands
[2];
16680 /* Both source operands cannot be in memory. */
16681 if (MEM_P (src1
) && MEM_P (src2
))
16684 /* Canonicalize operand order for commutative operators. */
16685 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16692 /* If the destination is memory, we must have a matching source operand. */
16693 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16696 /* Source 1 cannot be a constant. */
16697 if (CONSTANT_P (src1
))
16700 /* Source 1 cannot be a non-matching memory. */
16701 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16702 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16703 return (code
== AND
16706 || (TARGET_64BIT
&& mode
== DImode
))
16707 && satisfies_constraint_L (src2
));
16712 /* Attempt to expand a unary operator. Make the expansion closer to the
16713 actual machine, then just general_operand, which will allow 2 separate
16714 memory references (one output, one input) in a single insn. */
16717 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16720 int matching_memory
;
16721 rtx src
, dst
, op
, clob
;
16726 /* If the destination is memory, and we do not have matching source
16727 operands, do things in registers. */
16728 matching_memory
= 0;
16731 if (rtx_equal_p (dst
, src
))
16732 matching_memory
= 1;
16734 dst
= gen_reg_rtx (mode
);
16737 /* When source operand is memory, destination must match. */
16738 if (MEM_P (src
) && !matching_memory
)
16739 src
= force_reg (mode
, src
);
16741 /* Emit the instruction. */
16743 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16744 if (reload_in_progress
|| code
== NOT
)
16746 /* Reload doesn't know about the flags register, and doesn't know that
16747 it doesn't want to clobber it. */
16748 gcc_assert (code
== NOT
);
16753 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16754 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16757 /* Fix up the destination if needed. */
16758 if (dst
!= operands
[0])
16759 emit_move_insn (operands
[0], dst
);
16762 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16763 divisor are within the range [0-255]. */
16766 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16769 rtx end_label
, qimode_label
;
16770 rtx insn
, div
, mod
;
16771 rtx scratch
, tmp0
, tmp1
, tmp2
;
16772 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16773 rtx (*gen_zero_extend
) (rtx
, rtx
);
16774 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16779 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16780 gen_test_ccno_1
= gen_testsi_ccno_1
;
16781 gen_zero_extend
= gen_zero_extendqisi2
;
16784 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16785 gen_test_ccno_1
= gen_testdi_ccno_1
;
16786 gen_zero_extend
= gen_zero_extendqidi2
;
16789 gcc_unreachable ();
16792 end_label
= gen_label_rtx ();
16793 qimode_label
= gen_label_rtx ();
16795 scratch
= gen_reg_rtx (mode
);
16797 /* Use 8bit unsigned divimod if dividend and divisor are within
16798 the range [0-255]. */
16799 emit_move_insn (scratch
, operands
[2]);
16800 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16801 scratch
, 1, OPTAB_DIRECT
);
16802 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16803 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16804 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16805 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16806 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16808 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16809 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16810 JUMP_LABEL (insn
) = qimode_label
;
16812 /* Generate original signed/unsigned divimod. */
16813 div
= gen_divmod4_1 (operands
[0], operands
[1],
16814 operands
[2], operands
[3]);
16817 /* Branch to the end. */
16818 emit_jump_insn (gen_jump (end_label
));
16821 /* Generate 8bit unsigned divide. */
16822 emit_label (qimode_label
);
16823 /* Don't use operands[0] for result of 8bit divide since not all
16824 registers support QImode ZERO_EXTRACT. */
16825 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16826 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16827 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16828 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16832 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16833 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16837 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16838 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16841 /* Extract remainder from AH. */
16842 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16843 if (REG_P (operands
[1]))
16844 insn
= emit_move_insn (operands
[1], tmp1
);
16847 /* Need a new scratch register since the old one has result
16849 scratch
= gen_reg_rtx (mode
);
16850 emit_move_insn (scratch
, tmp1
);
16851 insn
= emit_move_insn (operands
[1], scratch
);
16853 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16855 /* Zero extend quotient from AL. */
16856 tmp1
= gen_lowpart (QImode
, tmp0
);
16857 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16858 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16860 emit_label (end_label
);
16863 #define LEA_MAX_STALL (3)
16864 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16866 /* Increase given DISTANCE in half-cycles according to
16867 dependencies between PREV and NEXT instructions.
16868 Add 1 half-cycle if there is no dependency and
16869 go to next cycle if there is some dependecy. */
16871 static unsigned int
16872 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16877 if (!prev
|| !next
)
16878 return distance
+ (distance
& 1) + 2;
16880 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16881 return distance
+ 1;
16883 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16884 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16885 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16886 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16887 return distance
+ (distance
& 1) + 2;
16889 return distance
+ 1;
16892 /* Function checks if instruction INSN defines register number
16893 REGNO1 or REGNO2. */
16896 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16901 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16902 if (DF_REF_REG_DEF_P (*def_rec
)
16903 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16904 && (regno1
== DF_REF_REGNO (*def_rec
)
16905 || regno2
== DF_REF_REGNO (*def_rec
)))
16913 /* Function checks if instruction INSN uses register number
16914 REGNO as a part of address expression. */
16917 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16921 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16922 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16928 /* Search backward for non-agu definition of register number REGNO1
16929 or register number REGNO2 in basic block starting from instruction
16930 START up to head of basic block or instruction INSN.
16932 Function puts true value into *FOUND var if definition was found
16933 and false otherwise.
16935 Distance in half-cycles between START and found instruction or head
16936 of BB is added to DISTANCE and returned. */
16939 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16940 rtx insn
, int distance
,
16941 rtx start
, bool *found
)
16943 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16951 && distance
< LEA_SEARCH_THRESHOLD
)
16953 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16955 distance
= increase_distance (prev
, next
, distance
);
16956 if (insn_defines_reg (regno1
, regno2
, prev
))
16958 if (recog_memoized (prev
) < 0
16959 || get_attr_type (prev
) != TYPE_LEA
)
16968 if (prev
== BB_HEAD (bb
))
16971 prev
= PREV_INSN (prev
);
16977 /* Search backward for non-agu definition of register number REGNO1
16978 or register number REGNO2 in INSN's basic block until
16979 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16980 2. Reach neighbour BBs boundary, or
16981 3. Reach agu definition.
16982 Returns the distance between the non-agu definition point and INSN.
16983 If no definition point, returns -1. */
16986 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16989 basic_block bb
= BLOCK_FOR_INSN (insn
);
16991 bool found
= false;
16993 if (insn
!= BB_HEAD (bb
))
16994 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16995 distance
, PREV_INSN (insn
),
16998 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17002 bool simple_loop
= false;
17004 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17007 simple_loop
= true;
17012 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17014 BB_END (bb
), &found
);
17017 int shortest_dist
= -1;
17018 bool found_in_bb
= false;
17020 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17023 = distance_non_agu_define_in_bb (regno1
, regno2
,
17029 if (shortest_dist
< 0)
17030 shortest_dist
= bb_dist
;
17031 else if (bb_dist
> 0)
17032 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17038 distance
= shortest_dist
;
17042 /* get_attr_type may modify recog data. We want to make sure
17043 that recog data is valid for instruction INSN, on which
17044 distance_non_agu_define is called. INSN is unchanged here. */
17045 extract_insn_cached (insn
);
17050 return distance
>> 1;
17053 /* Return the distance in half-cycles between INSN and the next
17054 insn that uses register number REGNO in memory address added
17055 to DISTANCE. Return -1 if REGNO0 is set.
17057 Put true value into *FOUND if register usage was found and
17059 Put true value into *REDEFINED if register redefinition was
17060 found and false otherwise. */
17063 distance_agu_use_in_bb (unsigned int regno
,
17064 rtx insn
, int distance
, rtx start
,
17065 bool *found
, bool *redefined
)
17067 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17072 *redefined
= false;
17076 && distance
< LEA_SEARCH_THRESHOLD
)
17078 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17080 distance
= increase_distance(prev
, next
, distance
);
17081 if (insn_uses_reg_mem (regno
, next
))
17083 /* Return DISTANCE if OP0 is used in memory
17084 address in NEXT. */
17089 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17091 /* Return -1 if OP0 is set in NEXT. */
17099 if (next
== BB_END (bb
))
17102 next
= NEXT_INSN (next
);
17108 /* Return the distance between INSN and the next insn that uses
17109 register number REGNO0 in memory address. Return -1 if no such
17110 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17113 distance_agu_use (unsigned int regno0
, rtx insn
)
17115 basic_block bb
= BLOCK_FOR_INSN (insn
);
17117 bool found
= false;
17118 bool redefined
= false;
17120 if (insn
!= BB_END (bb
))
17121 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17123 &found
, &redefined
);
17125 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17129 bool simple_loop
= false;
17131 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17134 simple_loop
= true;
17139 distance
= distance_agu_use_in_bb (regno0
, insn
,
17140 distance
, BB_HEAD (bb
),
17141 &found
, &redefined
);
17144 int shortest_dist
= -1;
17145 bool found_in_bb
= false;
17146 bool redefined_in_bb
= false;
17148 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17151 = distance_agu_use_in_bb (regno0
, insn
,
17152 distance
, BB_HEAD (e
->dest
),
17153 &found_in_bb
, &redefined_in_bb
);
17156 if (shortest_dist
< 0)
17157 shortest_dist
= bb_dist
;
17158 else if (bb_dist
> 0)
17159 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17165 distance
= shortest_dist
;
17169 if (!found
|| redefined
)
17172 return distance
>> 1;
17175 /* Define this macro to tune LEA priority vs ADD, it take effect when
17176 there is a dilemma of choicing LEA or ADD
17177 Negative value: ADD is more preferred than LEA
17179 Positive value: LEA is more preferred than ADD*/
17180 #define IX86_LEA_PRIORITY 0
17182 /* Return true if usage of lea INSN has performance advantage
17183 over a sequence of instructions. Instructions sequence has
17184 SPLIT_COST cycles higher latency than lea latency. */
17187 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17188 unsigned int regno2
, int split_cost
)
17190 int dist_define
, dist_use
;
17192 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17193 dist_use
= distance_agu_use (regno0
, insn
);
17195 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17197 /* If there is no non AGU operand definition, no AGU
17198 operand usage and split cost is 0 then both lea
17199 and non lea variants have same priority. Currently
17200 we prefer lea for 64 bit code and non lea on 32 bit
17202 if (dist_use
< 0 && split_cost
== 0)
17203 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17208 /* With longer definitions distance lea is more preferable.
17209 Here we change it to take into account splitting cost and
17211 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17213 /* If there is no use in memory addess then we just check
17214 that split cost exceeds AGU stall. */
17216 return dist_define
> LEA_MAX_STALL
;
17218 /* If this insn has both backward non-agu dependence and forward
17219 agu dependence, the one with short distance takes effect. */
17220 return dist_define
>= dist_use
;
17223 /* Return true if it is legal to clobber flags by INSN and
17224 false otherwise. */
17227 ix86_ok_to_clobber_flags (rtx insn
)
17229 basic_block bb
= BLOCK_FOR_INSN (insn
);
17235 if (NONDEBUG_INSN_P (insn
))
17237 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17238 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17241 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17245 if (insn
== BB_END (bb
))
17248 insn
= NEXT_INSN (insn
);
17251 live
= df_get_live_out(bb
);
17252 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17255 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17256 move and add to avoid AGU stalls. */
17259 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17261 unsigned int regno0
, regno1
, regno2
;
17263 /* Check if we need to optimize. */
17264 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17267 /* Check it is correct to split here. */
17268 if (!ix86_ok_to_clobber_flags(insn
))
17271 regno0
= true_regnum (operands
[0]);
17272 regno1
= true_regnum (operands
[1]);
17273 regno2
= true_regnum (operands
[2]);
17275 /* We need to split only adds with non destructive
17276 destination operand. */
17277 if (regno0
== regno1
|| regno0
== regno2
)
17280 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17283 /* Return true if we should emit lea instruction instead of mov
17287 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17289 unsigned int regno0
, regno1
;
17291 /* Check if we need to optimize. */
17292 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17295 /* Use lea for reg to reg moves only. */
17296 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17299 regno0
= true_regnum (operands
[0]);
17300 regno1
= true_regnum (operands
[1]);
17302 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17305 /* Return true if we need to split lea into a sequence of
17306 instructions to avoid AGU stalls. */
17309 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17311 unsigned int regno0
, regno1
, regno2
;
17313 struct ix86_address parts
;
17316 /* Check we need to optimize. */
17317 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17320 /* Check it is correct to split here. */
17321 if (!ix86_ok_to_clobber_flags(insn
))
17324 ok
= ix86_decompose_address (operands
[1], &parts
);
17327 /* There should be at least two components in the address. */
17328 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17329 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17332 /* We should not split into add if non legitimate pic
17333 operand is used as displacement. */
17334 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17337 regno0
= true_regnum (operands
[0]) ;
17338 regno1
= INVALID_REGNUM
;
17339 regno2
= INVALID_REGNUM
;
17342 regno1
= true_regnum (parts
.base
);
17344 regno2
= true_regnum (parts
.index
);
17348 /* Compute how many cycles we will add to execution time
17349 if split lea into a sequence of instructions. */
17350 if (parts
.base
|| parts
.index
)
17352 /* Have to use mov instruction if non desctructive
17353 destination form is used. */
17354 if (regno1
!= regno0
&& regno2
!= regno0
)
17357 /* Have to add index to base if both exist. */
17358 if (parts
.base
&& parts
.index
)
17361 /* Have to use shift and adds if scale is 2 or greater. */
17362 if (parts
.scale
> 1)
17364 if (regno0
!= regno1
)
17366 else if (regno2
== regno0
)
17369 split_cost
+= parts
.scale
;
17372 /* Have to use add instruction with immediate if
17373 disp is non zero. */
17374 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17377 /* Subtract the price of lea. */
17381 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17384 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17385 matches destination. RTX includes clobber of FLAGS_REG. */
17388 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17393 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17394 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17396 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17399 /* Return true if regno1 def is nearest to the insn. */
17402 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17405 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17409 while (prev
&& prev
!= start
)
17411 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17413 prev
= PREV_INSN (prev
);
17416 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17418 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17420 prev
= PREV_INSN (prev
);
17423 /* None of the regs is defined in the bb. */
17427 /* Split lea instructions into a sequence of instructions
17428 which are executed on ALU to avoid AGU stalls.
17429 It is assumed that it is allowed to clobber flags register
17430 at lea position. */
17433 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17435 unsigned int regno0
, regno1
, regno2
;
17436 struct ix86_address parts
;
17440 ok
= ix86_decompose_address (operands
[1], &parts
);
17443 target
= gen_lowpart (mode
, operands
[0]);
17445 regno0
= true_regnum (target
);
17446 regno1
= INVALID_REGNUM
;
17447 regno2
= INVALID_REGNUM
;
17451 parts
.base
= gen_lowpart (mode
, parts
.base
);
17452 regno1
= true_regnum (parts
.base
);
17457 parts
.index
= gen_lowpart (mode
, parts
.index
);
17458 regno2
= true_regnum (parts
.index
);
17462 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17464 if (parts
.scale
> 1)
17466 /* Case r1 = r1 + ... */
17467 if (regno1
== regno0
)
17469 /* If we have a case r1 = r1 + C * r1 then we
17470 should use multiplication which is very
17471 expensive. Assume cost model is wrong if we
17472 have such case here. */
17473 gcc_assert (regno2
!= regno0
);
17475 for (adds
= parts
.scale
; adds
> 0; adds
--)
17476 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17480 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17481 if (regno0
!= regno2
)
17482 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17484 /* Use shift for scaling. */
17485 ix86_emit_binop (ASHIFT
, mode
, target
,
17486 GEN_INT (exact_log2 (parts
.scale
)));
17489 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17491 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17492 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17495 else if (!parts
.base
&& !parts
.index
)
17497 gcc_assert(parts
.disp
);
17498 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17504 if (regno0
!= regno2
)
17505 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17507 else if (!parts
.index
)
17509 if (regno0
!= regno1
)
17510 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17514 if (regno0
== regno1
)
17516 else if (regno0
== regno2
)
17522 /* Find better operand for SET instruction, depending
17523 on which definition is farther from the insn. */
17524 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17525 tmp
= parts
.index
, tmp1
= parts
.base
;
17527 tmp
= parts
.base
, tmp1
= parts
.index
;
17529 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17531 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17532 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17534 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17538 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17541 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17542 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17546 /* Return true if it is ok to optimize an ADD operation to LEA
17547 operation to avoid flag register consumation. For most processors,
17548 ADD is faster than LEA. For the processors like ATOM, if the
17549 destination register of LEA holds an actual address which will be
17550 used soon, LEA is better and otherwise ADD is better. */
17553 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17555 unsigned int regno0
= true_regnum (operands
[0]);
17556 unsigned int regno1
= true_regnum (operands
[1]);
17557 unsigned int regno2
= true_regnum (operands
[2]);
17559 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17560 if (regno0
!= regno1
&& regno0
!= regno2
)
17563 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17566 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17569 /* Return true if destination reg of SET_BODY is shift count of
17573 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17579 /* Retrieve destination of SET_BODY. */
17580 switch (GET_CODE (set_body
))
17583 set_dest
= SET_DEST (set_body
);
17584 if (!set_dest
|| !REG_P (set_dest
))
17588 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17589 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17597 /* Retrieve shift count of USE_BODY. */
17598 switch (GET_CODE (use_body
))
17601 shift_rtx
= XEXP (use_body
, 1);
17604 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17605 if (ix86_dep_by_shift_count_body (set_body
,
17606 XVECEXP (use_body
, 0, i
)))
17614 && (GET_CODE (shift_rtx
) == ASHIFT
17615 || GET_CODE (shift_rtx
) == LSHIFTRT
17616 || GET_CODE (shift_rtx
) == ASHIFTRT
17617 || GET_CODE (shift_rtx
) == ROTATE
17618 || GET_CODE (shift_rtx
) == ROTATERT
))
17620 rtx shift_count
= XEXP (shift_rtx
, 1);
17622 /* Return true if shift count is dest of SET_BODY. */
17623 if (REG_P (shift_count
))
17625 /* Add check since it can be invoked before register
17626 allocation in pre-reload schedule. */
17627 if (reload_completed
17628 && true_regnum (set_dest
) == true_regnum (shift_count
))
17630 else if (REGNO(set_dest
) == REGNO(shift_count
))
17638 /* Return true if destination reg of SET_INSN is shift count of
17642 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17644 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17645 PATTERN (use_insn
));
17648 /* Return TRUE or FALSE depending on whether the unary operator meets the
17649 appropriate constraints. */
17652 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17653 enum machine_mode mode ATTRIBUTE_UNUSED
,
17654 rtx operands
[2] ATTRIBUTE_UNUSED
)
17656 /* If one of operands is memory, source and destination must match. */
17657 if ((MEM_P (operands
[0])
17658 || MEM_P (operands
[1]))
17659 && ! rtx_equal_p (operands
[0], operands
[1]))
17664 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17665 are ok, keeping in mind the possible movddup alternative. */
17668 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17670 if (MEM_P (operands
[0]))
17671 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17672 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17673 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17677 /* Post-reload splitter for converting an SF or DFmode value in an
17678 SSE register into an unsigned SImode. */
17681 ix86_split_convert_uns_si_sse (rtx operands
[])
17683 enum machine_mode vecmode
;
17684 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17686 large
= operands
[1];
17687 zero_or_two31
= operands
[2];
17688 input
= operands
[3];
17689 two31
= operands
[4];
17690 vecmode
= GET_MODE (large
);
17691 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17693 /* Load up the value into the low element. We must ensure that the other
17694 elements are valid floats -- zero is the easiest such value. */
17697 if (vecmode
== V4SFmode
)
17698 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17700 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17704 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17705 emit_move_insn (value
, CONST0_RTX (vecmode
));
17706 if (vecmode
== V4SFmode
)
17707 emit_insn (gen_sse_movss (value
, value
, input
));
17709 emit_insn (gen_sse2_movsd (value
, value
, input
));
17712 emit_move_insn (large
, two31
);
17713 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17715 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17716 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17718 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17719 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17721 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17722 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17724 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17725 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17727 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17728 if (vecmode
== V4SFmode
)
17729 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17731 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17734 emit_insn (gen_xorv4si3 (value
, value
, large
));
17737 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17738 Expects the 64-bit DImode to be supplied in a pair of integral
17739 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17740 -mfpmath=sse, !optimize_size only. */
17743 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17745 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17746 rtx int_xmm
, fp_xmm
;
17747 rtx biases
, exponents
;
17750 int_xmm
= gen_reg_rtx (V4SImode
);
17751 if (TARGET_INTER_UNIT_MOVES
)
17752 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17753 else if (TARGET_SSE_SPLIT_REGS
)
17755 emit_clobber (int_xmm
);
17756 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17760 x
= gen_reg_rtx (V2DImode
);
17761 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17762 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17765 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17766 gen_rtvec (4, GEN_INT (0x43300000UL
),
17767 GEN_INT (0x45300000UL
),
17768 const0_rtx
, const0_rtx
));
17769 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17771 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17772 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17774 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17775 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17776 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17777 (0x1.0p84 + double(fp_value_hi_xmm)).
17778 Note these exponents differ by 32. */
17780 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17782 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17783 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17784 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17785 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17786 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17787 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17788 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17789 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17790 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17792 /* Add the upper and lower DFmode values together. */
17794 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17797 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17798 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17799 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17802 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17805 /* Not used, but eases macroization of patterns. */
17807 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17808 rtx input ATTRIBUTE_UNUSED
)
17810 gcc_unreachable ();
17813 /* Convert an unsigned SImode value into a DFmode. Only currently used
17814 for SSE, but applicable anywhere. */
17817 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17819 REAL_VALUE_TYPE TWO31r
;
17822 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17823 NULL
, 1, OPTAB_DIRECT
);
17825 fp
= gen_reg_rtx (DFmode
);
17826 emit_insn (gen_floatsidf2 (fp
, x
));
17828 real_ldexp (&TWO31r
, &dconst1
, 31);
17829 x
= const_double_from_real_value (TWO31r
, DFmode
);
17831 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17833 emit_move_insn (target
, x
);
17836 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17837 32-bit mode; otherwise we have a direct convert instruction. */
17840 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17842 REAL_VALUE_TYPE TWO32r
;
17843 rtx fp_lo
, fp_hi
, x
;
17845 fp_lo
= gen_reg_rtx (DFmode
);
17846 fp_hi
= gen_reg_rtx (DFmode
);
17848 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17850 real_ldexp (&TWO32r
, &dconst1
, 32);
17851 x
= const_double_from_real_value (TWO32r
, DFmode
);
17852 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17854 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17856 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17859 emit_move_insn (target
, x
);
17862 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17863 For x86_32, -mfpmath=sse, !optimize_size only. */
17865 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17867 REAL_VALUE_TYPE ONE16r
;
17868 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17870 real_ldexp (&ONE16r
, &dconst1
, 16);
17871 x
= const_double_from_real_value (ONE16r
, SFmode
);
17872 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17873 NULL
, 0, OPTAB_DIRECT
);
17874 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17875 NULL
, 0, OPTAB_DIRECT
);
17876 fp_hi
= gen_reg_rtx (SFmode
);
17877 fp_lo
= gen_reg_rtx (SFmode
);
17878 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17879 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17880 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17882 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17884 if (!rtx_equal_p (target
, fp_hi
))
17885 emit_move_insn (target
, fp_hi
);
17888 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17889 a vector of unsigned ints VAL to vector of floats TARGET. */
17892 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17895 REAL_VALUE_TYPE TWO16r
;
17896 enum machine_mode intmode
= GET_MODE (val
);
17897 enum machine_mode fltmode
= GET_MODE (target
);
17898 rtx (*cvt
) (rtx
, rtx
);
17900 if (intmode
== V4SImode
)
17901 cvt
= gen_floatv4siv4sf2
;
17903 cvt
= gen_floatv8siv8sf2
;
17904 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17905 tmp
[0] = force_reg (intmode
, tmp
[0]);
17906 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17908 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17909 NULL_RTX
, 1, OPTAB_DIRECT
);
17910 tmp
[3] = gen_reg_rtx (fltmode
);
17911 emit_insn (cvt (tmp
[3], tmp
[1]));
17912 tmp
[4] = gen_reg_rtx (fltmode
);
17913 emit_insn (cvt (tmp
[4], tmp
[2]));
17914 real_ldexp (&TWO16r
, &dconst1
, 16);
17915 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17916 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17917 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17919 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17921 if (tmp
[7] != target
)
17922 emit_move_insn (target
, tmp
[7]);
17925 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17926 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17927 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17928 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17931 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17933 REAL_VALUE_TYPE TWO31r
;
17934 rtx two31r
, tmp
[4];
17935 enum machine_mode mode
= GET_MODE (val
);
17936 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17937 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17938 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17941 for (i
= 0; i
< 3; i
++)
17942 tmp
[i
] = gen_reg_rtx (mode
);
17943 real_ldexp (&TWO31r
, &dconst1
, 31);
17944 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17945 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17946 two31r
= force_reg (mode
, two31r
);
17949 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17950 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17951 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17952 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17953 default: gcc_unreachable ();
17955 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17956 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17957 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17959 if (intmode
== V4SImode
|| TARGET_AVX2
)
17960 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17961 gen_lowpart (intmode
, tmp
[0]),
17962 GEN_INT (31), NULL_RTX
, 0,
17966 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17967 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17968 *xorp
= expand_simple_binop (intmode
, AND
,
17969 gen_lowpart (intmode
, tmp
[0]),
17970 two31
, NULL_RTX
, 0,
17973 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17977 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17978 then replicate the value for all elements of the vector
17982 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17986 enum machine_mode scalar_mode
;
18003 n_elt
= GET_MODE_NUNITS (mode
);
18004 v
= rtvec_alloc (n_elt
);
18005 scalar_mode
= GET_MODE_INNER (mode
);
18007 RTVEC_ELT (v
, 0) = value
;
18009 for (i
= 1; i
< n_elt
; ++i
)
18010 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18012 return gen_rtx_CONST_VECTOR (mode
, v
);
18015 gcc_unreachable ();
18019 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18020 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18021 for an SSE register. If VECT is true, then replicate the mask for
18022 all elements of the vector register. If INVERT is true, then create
18023 a mask excluding the sign bit. */
18026 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18028 enum machine_mode vec_mode
, imode
;
18029 HOST_WIDE_INT hi
, lo
;
18034 /* Find the sign bit, sign extended to 2*HWI. */
18042 mode
= GET_MODE_INNER (mode
);
18044 lo
= 0x80000000, hi
= lo
< 0;
18052 mode
= GET_MODE_INNER (mode
);
18054 if (HOST_BITS_PER_WIDE_INT
>= 64)
18055 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18057 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18062 vec_mode
= VOIDmode
;
18063 if (HOST_BITS_PER_WIDE_INT
>= 64)
18066 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18073 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18077 lo
= ~lo
, hi
= ~hi
;
18083 mask
= immed_double_const (lo
, hi
, imode
);
18085 vec
= gen_rtvec (2, v
, mask
);
18086 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18087 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18094 gcc_unreachable ();
18098 lo
= ~lo
, hi
= ~hi
;
18100 /* Force this value into the low part of a fp vector constant. */
18101 mask
= immed_double_const (lo
, hi
, imode
);
18102 mask
= gen_lowpart (mode
, mask
);
18104 if (vec_mode
== VOIDmode
)
18105 return force_reg (mode
, mask
);
18107 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18108 return force_reg (vec_mode
, v
);
18111 /* Generate code for floating point ABS or NEG. */
18114 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18117 rtx mask
, set
, dst
, src
;
18118 bool use_sse
= false;
18119 bool vector_mode
= VECTOR_MODE_P (mode
);
18120 enum machine_mode vmode
= mode
;
18124 else if (mode
== TFmode
)
18126 else if (TARGET_SSE_MATH
)
18128 use_sse
= SSE_FLOAT_MODE_P (mode
);
18129 if (mode
== SFmode
)
18131 else if (mode
== DFmode
)
18135 /* NEG and ABS performed with SSE use bitwise mask operations.
18136 Create the appropriate mask now. */
18138 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18145 set
= gen_rtx_fmt_e (code
, mode
, src
);
18146 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18153 use
= gen_rtx_USE (VOIDmode
, mask
);
18155 par
= gen_rtvec (2, set
, use
);
18158 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18159 par
= gen_rtvec (3, set
, use
, clob
);
18161 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18167 /* Expand a copysign operation. Special case operand 0 being a constant. */
18170 ix86_expand_copysign (rtx operands
[])
18172 enum machine_mode mode
, vmode
;
18173 rtx dest
, op0
, op1
, mask
, nmask
;
18175 dest
= operands
[0];
18179 mode
= GET_MODE (dest
);
18181 if (mode
== SFmode
)
18183 else if (mode
== DFmode
)
18188 if (GET_CODE (op0
) == CONST_DOUBLE
)
18190 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18192 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18193 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18195 if (mode
== SFmode
|| mode
== DFmode
)
18197 if (op0
== CONST0_RTX (mode
))
18198 op0
= CONST0_RTX (vmode
);
18201 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18203 op0
= force_reg (vmode
, v
);
18206 else if (op0
!= CONST0_RTX (mode
))
18207 op0
= force_reg (mode
, op0
);
18209 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18211 if (mode
== SFmode
)
18212 copysign_insn
= gen_copysignsf3_const
;
18213 else if (mode
== DFmode
)
18214 copysign_insn
= gen_copysigndf3_const
;
18216 copysign_insn
= gen_copysigntf3_const
;
18218 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18222 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18224 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18225 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18227 if (mode
== SFmode
)
18228 copysign_insn
= gen_copysignsf3_var
;
18229 else if (mode
== DFmode
)
18230 copysign_insn
= gen_copysigndf3_var
;
18232 copysign_insn
= gen_copysigntf3_var
;
18234 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18238 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18239 be a constant, and so has already been expanded into a vector constant. */
18242 ix86_split_copysign_const (rtx operands
[])
18244 enum machine_mode mode
, vmode
;
18245 rtx dest
, op0
, mask
, x
;
18247 dest
= operands
[0];
18249 mask
= operands
[3];
18251 mode
= GET_MODE (dest
);
18252 vmode
= GET_MODE (mask
);
18254 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18255 x
= gen_rtx_AND (vmode
, dest
, mask
);
18256 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18258 if (op0
!= CONST0_RTX (vmode
))
18260 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18261 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18265 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18266 so we have to do two masks. */
18269 ix86_split_copysign_var (rtx operands
[])
18271 enum machine_mode mode
, vmode
;
18272 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18274 dest
= operands
[0];
18275 scratch
= operands
[1];
18278 nmask
= operands
[4];
18279 mask
= operands
[5];
18281 mode
= GET_MODE (dest
);
18282 vmode
= GET_MODE (mask
);
18284 if (rtx_equal_p (op0
, op1
))
18286 /* Shouldn't happen often (it's useless, obviously), but when it does
18287 we'd generate incorrect code if we continue below. */
18288 emit_move_insn (dest
, op0
);
18292 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18294 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18296 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18297 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18300 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18301 x
= gen_rtx_NOT (vmode
, dest
);
18302 x
= gen_rtx_AND (vmode
, x
, op0
);
18303 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18307 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18309 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18311 else /* alternative 2,4 */
18313 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18314 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18315 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18317 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18319 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18321 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18322 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18324 else /* alternative 3,4 */
18326 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18328 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18329 x
= gen_rtx_AND (vmode
, dest
, op0
);
18331 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18334 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18335 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18338 /* Return TRUE or FALSE depending on whether the first SET in INSN
18339 has source and destination with matching CC modes, and that the
18340 CC mode is at least as constrained as REQ_MODE. */
18343 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18346 enum machine_mode set_mode
;
18348 set
= PATTERN (insn
);
18349 if (GET_CODE (set
) == PARALLEL
)
18350 set
= XVECEXP (set
, 0, 0);
18351 gcc_assert (GET_CODE (set
) == SET
);
18352 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18354 set_mode
= GET_MODE (SET_DEST (set
));
18358 if (req_mode
!= CCNOmode
18359 && (req_mode
!= CCmode
18360 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18364 if (req_mode
== CCGCmode
)
18368 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18372 if (req_mode
== CCZmode
)
18382 if (set_mode
!= req_mode
)
18387 gcc_unreachable ();
18390 return GET_MODE (SET_SRC (set
)) == set_mode
;
18393 /* Generate insn patterns to do an integer compare of OPERANDS. */
18396 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18398 enum machine_mode cmpmode
;
18401 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18402 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18404 /* This is very simple, but making the interface the same as in the
18405 FP case makes the rest of the code easier. */
18406 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18407 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18409 /* Return the test that should be put into the flags user, i.e.
18410 the bcc, scc, or cmov instruction. */
18411 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18414 /* Figure out whether to use ordered or unordered fp comparisons.
18415 Return the appropriate mode to use. */
18418 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18420 /* ??? In order to make all comparisons reversible, we do all comparisons
18421 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18422 all forms trapping and nontrapping comparisons, we can make inequality
18423 comparisons trapping again, since it results in better code when using
18424 FCOM based compares. */
18425 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18429 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18431 enum machine_mode mode
= GET_MODE (op0
);
18433 if (SCALAR_FLOAT_MODE_P (mode
))
18435 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18436 return ix86_fp_compare_mode (code
);
18441 /* Only zero flag is needed. */
18442 case EQ
: /* ZF=0 */
18443 case NE
: /* ZF!=0 */
18445 /* Codes needing carry flag. */
18446 case GEU
: /* CF=0 */
18447 case LTU
: /* CF=1 */
18448 /* Detect overflow checks. They need just the carry flag. */
18449 if (GET_CODE (op0
) == PLUS
18450 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18454 case GTU
: /* CF=0 & ZF=0 */
18455 case LEU
: /* CF=1 | ZF=1 */
18456 /* Detect overflow checks. They need just the carry flag. */
18457 if (GET_CODE (op0
) == MINUS
18458 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18462 /* Codes possibly doable only with sign flag when
18463 comparing against zero. */
18464 case GE
: /* SF=OF or SF=0 */
18465 case LT
: /* SF<>OF or SF=1 */
18466 if (op1
== const0_rtx
)
18469 /* For other cases Carry flag is not required. */
18471 /* Codes doable only with sign flag when comparing
18472 against zero, but we miss jump instruction for it
18473 so we need to use relational tests against overflow
18474 that thus needs to be zero. */
18475 case GT
: /* ZF=0 & SF=OF */
18476 case LE
: /* ZF=1 | SF<>OF */
18477 if (op1
== const0_rtx
)
18481 /* strcmp pattern do (use flags) and combine may ask us for proper
18486 gcc_unreachable ();
18490 /* Return the fixed registers used for condition codes. */
18493 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18500 /* If two condition code modes are compatible, return a condition code
18501 mode which is compatible with both. Otherwise, return
18504 static enum machine_mode
18505 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18510 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18513 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18514 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18517 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18519 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18525 gcc_unreachable ();
18555 /* These are only compatible with themselves, which we already
18562 /* Return a comparison we can do and that it is equivalent to
18563 swap_condition (code) apart possibly from orderedness.
18564 But, never change orderedness if TARGET_IEEE_FP, returning
18565 UNKNOWN in that case if necessary. */
18567 static enum rtx_code
18568 ix86_fp_swap_condition (enum rtx_code code
)
18572 case GT
: /* GTU - CF=0 & ZF=0 */
18573 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18574 case GE
: /* GEU - CF=0 */
18575 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18576 case UNLT
: /* LTU - CF=1 */
18577 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18578 case UNLE
: /* LEU - CF=1 | ZF=1 */
18579 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18581 return swap_condition (code
);
18585 /* Return cost of comparison CODE using the best strategy for performance.
18586 All following functions do use number of instructions as a cost metrics.
18587 In future this should be tweaked to compute bytes for optimize_size and
18588 take into account performance of various instructions on various CPUs. */
18591 ix86_fp_comparison_cost (enum rtx_code code
)
18595 /* The cost of code using bit-twiddling on %ah. */
18612 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18616 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18619 gcc_unreachable ();
18622 switch (ix86_fp_comparison_strategy (code
))
18624 case IX86_FPCMP_COMI
:
18625 return arith_cost
> 4 ? 3 : 2;
18626 case IX86_FPCMP_SAHF
:
18627 return arith_cost
> 4 ? 4 : 3;
18633 /* Return strategy to use for floating-point. We assume that fcomi is always
18634 preferrable where available, since that is also true when looking at size
18635 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18637 enum ix86_fpcmp_strategy
18638 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18640 /* Do fcomi/sahf based test when profitable. */
18643 return IX86_FPCMP_COMI
;
18645 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18646 return IX86_FPCMP_SAHF
;
18648 return IX86_FPCMP_ARITH
;
18651 /* Swap, force into registers, or otherwise massage the two operands
18652 to a fp comparison. The operands are updated in place; the new
18653 comparison code is returned. */
18655 static enum rtx_code
18656 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18658 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18659 rtx op0
= *pop0
, op1
= *pop1
;
18660 enum machine_mode op_mode
= GET_MODE (op0
);
18661 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18663 /* All of the unordered compare instructions only work on registers.
18664 The same is true of the fcomi compare instructions. The XFmode
18665 compare instructions require registers except when comparing
18666 against zero or when converting operand 1 from fixed point to
18670 && (fpcmp_mode
== CCFPUmode
18671 || (op_mode
== XFmode
18672 && ! (standard_80387_constant_p (op0
) == 1
18673 || standard_80387_constant_p (op1
) == 1)
18674 && GET_CODE (op1
) != FLOAT
)
18675 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18677 op0
= force_reg (op_mode
, op0
);
18678 op1
= force_reg (op_mode
, op1
);
18682 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18683 things around if they appear profitable, otherwise force op0
18684 into a register. */
18686 if (standard_80387_constant_p (op0
) == 0
18688 && ! (standard_80387_constant_p (op1
) == 0
18691 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18692 if (new_code
!= UNKNOWN
)
18695 tmp
= op0
, op0
= op1
, op1
= tmp
;
18701 op0
= force_reg (op_mode
, op0
);
18703 if (CONSTANT_P (op1
))
18705 int tmp
= standard_80387_constant_p (op1
);
18707 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18711 op1
= force_reg (op_mode
, op1
);
18714 op1
= force_reg (op_mode
, op1
);
18718 /* Try to rearrange the comparison to make it cheaper. */
18719 if (ix86_fp_comparison_cost (code
)
18720 > ix86_fp_comparison_cost (swap_condition (code
))
18721 && (REG_P (op1
) || can_create_pseudo_p ()))
18724 tmp
= op0
, op0
= op1
, op1
= tmp
;
18725 code
= swap_condition (code
);
18727 op0
= force_reg (op_mode
, op0
);
18735 /* Convert comparison codes we use to represent FP comparison to integer
18736 code that will result in proper branch. Return UNKNOWN if no such code
18740 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18769 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18772 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18774 enum machine_mode fpcmp_mode
, intcmp_mode
;
18777 fpcmp_mode
= ix86_fp_compare_mode (code
);
18778 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18780 /* Do fcomi/sahf based test when profitable. */
18781 switch (ix86_fp_comparison_strategy (code
))
18783 case IX86_FPCMP_COMI
:
18784 intcmp_mode
= fpcmp_mode
;
18785 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18786 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18791 case IX86_FPCMP_SAHF
:
18792 intcmp_mode
= fpcmp_mode
;
18793 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18794 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18798 scratch
= gen_reg_rtx (HImode
);
18799 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18800 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18803 case IX86_FPCMP_ARITH
:
18804 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18805 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18806 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18808 scratch
= gen_reg_rtx (HImode
);
18809 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18811 /* In the unordered case, we have to check C2 for NaN's, which
18812 doesn't happen to work out to anything nice combination-wise.
18813 So do some bit twiddling on the value we've got in AH to come
18814 up with an appropriate set of condition codes. */
18816 intcmp_mode
= CCNOmode
;
18821 if (code
== GT
|| !TARGET_IEEE_FP
)
18823 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18828 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18829 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18830 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18831 intcmp_mode
= CCmode
;
18837 if (code
== LT
&& TARGET_IEEE_FP
)
18839 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18840 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18841 intcmp_mode
= CCmode
;
18846 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18852 if (code
== GE
|| !TARGET_IEEE_FP
)
18854 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18859 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18860 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18866 if (code
== LE
&& TARGET_IEEE_FP
)
18868 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18869 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18870 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18871 intcmp_mode
= CCmode
;
18876 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18882 if (code
== EQ
&& TARGET_IEEE_FP
)
18884 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18885 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18886 intcmp_mode
= CCmode
;
18891 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18897 if (code
== NE
&& TARGET_IEEE_FP
)
18899 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18900 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18906 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18912 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18916 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18921 gcc_unreachable ();
18929 /* Return the test that should be put into the flags user, i.e.
18930 the bcc, scc, or cmov instruction. */
18931 return gen_rtx_fmt_ee (code
, VOIDmode
,
18932 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18937 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18941 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18942 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18944 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18946 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18947 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18950 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18956 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18958 enum machine_mode mode
= GET_MODE (op0
);
18970 tmp
= ix86_expand_compare (code
, op0
, op1
);
18971 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18972 gen_rtx_LABEL_REF (VOIDmode
, label
),
18974 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18981 /* Expand DImode branch into multiple compare+branch. */
18983 rtx lo
[2], hi
[2], label2
;
18984 enum rtx_code code1
, code2
, code3
;
18985 enum machine_mode submode
;
18987 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18989 tmp
= op0
, op0
= op1
, op1
= tmp
;
18990 code
= swap_condition (code
);
18993 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18994 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18996 submode
= mode
== DImode
? SImode
: DImode
;
18998 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18999 avoid two branches. This costs one extra insn, so disable when
19000 optimizing for size. */
19002 if ((code
== EQ
|| code
== NE
)
19003 && (!optimize_insn_for_size_p ()
19004 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19009 if (hi
[1] != const0_rtx
)
19010 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19011 NULL_RTX
, 0, OPTAB_WIDEN
);
19014 if (lo
[1] != const0_rtx
)
19015 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19016 NULL_RTX
, 0, OPTAB_WIDEN
);
19018 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19019 NULL_RTX
, 0, OPTAB_WIDEN
);
19021 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19025 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19026 op1 is a constant and the low word is zero, then we can just
19027 examine the high word. Similarly for low word -1 and
19028 less-or-equal-than or greater-than. */
19030 if (CONST_INT_P (hi
[1]))
19033 case LT
: case LTU
: case GE
: case GEU
:
19034 if (lo
[1] == const0_rtx
)
19036 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19040 case LE
: case LEU
: case GT
: case GTU
:
19041 if (lo
[1] == constm1_rtx
)
19043 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19051 /* Otherwise, we need two or three jumps. */
19053 label2
= gen_label_rtx ();
19056 code2
= swap_condition (code
);
19057 code3
= unsigned_condition (code
);
19061 case LT
: case GT
: case LTU
: case GTU
:
19064 case LE
: code1
= LT
; code2
= GT
; break;
19065 case GE
: code1
= GT
; code2
= LT
; break;
19066 case LEU
: code1
= LTU
; code2
= GTU
; break;
19067 case GEU
: code1
= GTU
; code2
= LTU
; break;
19069 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19070 case NE
: code2
= UNKNOWN
; break;
19073 gcc_unreachable ();
19078 * if (hi(a) < hi(b)) goto true;
19079 * if (hi(a) > hi(b)) goto false;
19080 * if (lo(a) < lo(b)) goto true;
19084 if (code1
!= UNKNOWN
)
19085 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19086 if (code2
!= UNKNOWN
)
19087 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19089 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19091 if (code2
!= UNKNOWN
)
19092 emit_label (label2
);
19097 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19102 /* Split branch based on floating point condition. */
19104 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19105 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19110 if (target2
!= pc_rtx
)
19113 code
= reverse_condition_maybe_unordered (code
);
19118 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19121 /* Remove pushed operand from stack. */
19123 ix86_free_from_memory (GET_MODE (pushed
));
19125 i
= emit_jump_insn (gen_rtx_SET
19127 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19128 condition
, target1
, target2
)));
19129 if (split_branch_probability
>= 0)
19130 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19134 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19138 gcc_assert (GET_MODE (dest
) == QImode
);
19140 ret
= ix86_expand_compare (code
, op0
, op1
);
19141 PUT_MODE (ret
, QImode
);
19142 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19145 /* Expand comparison setting or clearing carry flag. Return true when
19146 successful and set pop for the operation. */
19148 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19150 enum machine_mode mode
=
19151 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19153 /* Do not handle double-mode compares that go through special path. */
19154 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19157 if (SCALAR_FLOAT_MODE_P (mode
))
19159 rtx compare_op
, compare_seq
;
19161 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19163 /* Shortcut: following common codes never translate
19164 into carry flag compares. */
19165 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19166 || code
== ORDERED
|| code
== UNORDERED
)
19169 /* These comparisons require zero flag; swap operands so they won't. */
19170 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19171 && !TARGET_IEEE_FP
)
19176 code
= swap_condition (code
);
19179 /* Try to expand the comparison and verify that we end up with
19180 carry flag based comparison. This fails to be true only when
19181 we decide to expand comparison using arithmetic that is not
19182 too common scenario. */
19184 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19185 compare_seq
= get_insns ();
19188 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19189 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19190 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19192 code
= GET_CODE (compare_op
);
19194 if (code
!= LTU
&& code
!= GEU
)
19197 emit_insn (compare_seq
);
19202 if (!INTEGRAL_MODE_P (mode
))
19211 /* Convert a==0 into (unsigned)a<1. */
19214 if (op1
!= const0_rtx
)
19217 code
= (code
== EQ
? LTU
: GEU
);
19220 /* Convert a>b into b<a or a>=b-1. */
19223 if (CONST_INT_P (op1
))
19225 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19226 /* Bail out on overflow. We still can swap operands but that
19227 would force loading of the constant into register. */
19228 if (op1
== const0_rtx
19229 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19231 code
= (code
== GTU
? GEU
: LTU
);
19238 code
= (code
== GTU
? LTU
: GEU
);
19242 /* Convert a>=0 into (unsigned)a<0x80000000. */
19245 if (mode
== DImode
|| op1
!= const0_rtx
)
19247 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19248 code
= (code
== LT
? GEU
: LTU
);
19252 if (mode
== DImode
|| op1
!= constm1_rtx
)
19254 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19255 code
= (code
== LE
? GEU
: LTU
);
19261 /* Swapping operands may cause constant to appear as first operand. */
19262 if (!nonimmediate_operand (op0
, VOIDmode
))
19264 if (!can_create_pseudo_p ())
19266 op0
= force_reg (mode
, op0
);
19268 *pop
= ix86_expand_compare (code
, op0
, op1
);
19269 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19274 ix86_expand_int_movcc (rtx operands
[])
19276 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19277 rtx compare_seq
, compare_op
;
19278 enum machine_mode mode
= GET_MODE (operands
[0]);
19279 bool sign_bit_compare_p
= false;
19280 rtx op0
= XEXP (operands
[1], 0);
19281 rtx op1
= XEXP (operands
[1], 1);
19283 if (GET_MODE (op0
) == TImode
19284 || (GET_MODE (op0
) == DImode
19289 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19290 compare_seq
= get_insns ();
19293 compare_code
= GET_CODE (compare_op
);
19295 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19296 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19297 sign_bit_compare_p
= true;
19299 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19300 HImode insns, we'd be swallowed in word prefix ops. */
19302 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19303 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19304 && CONST_INT_P (operands
[2])
19305 && CONST_INT_P (operands
[3]))
19307 rtx out
= operands
[0];
19308 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19309 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19310 HOST_WIDE_INT diff
;
19313 /* Sign bit compares are better done using shifts than we do by using
19315 if (sign_bit_compare_p
19316 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19318 /* Detect overlap between destination and compare sources. */
19321 if (!sign_bit_compare_p
)
19324 bool fpcmp
= false;
19326 compare_code
= GET_CODE (compare_op
);
19328 flags
= XEXP (compare_op
, 0);
19330 if (GET_MODE (flags
) == CCFPmode
19331 || GET_MODE (flags
) == CCFPUmode
)
19335 = ix86_fp_compare_code_to_integer (compare_code
);
19338 /* To simplify rest of code, restrict to the GEU case. */
19339 if (compare_code
== LTU
)
19341 HOST_WIDE_INT tmp
= ct
;
19344 compare_code
= reverse_condition (compare_code
);
19345 code
= reverse_condition (code
);
19350 PUT_CODE (compare_op
,
19351 reverse_condition_maybe_unordered
19352 (GET_CODE (compare_op
)));
19354 PUT_CODE (compare_op
,
19355 reverse_condition (GET_CODE (compare_op
)));
19359 if (reg_overlap_mentioned_p (out
, op0
)
19360 || reg_overlap_mentioned_p (out
, op1
))
19361 tmp
= gen_reg_rtx (mode
);
19363 if (mode
== DImode
)
19364 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19366 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19367 flags
, compare_op
));
19371 if (code
== GT
|| code
== GE
)
19372 code
= reverse_condition (code
);
19375 HOST_WIDE_INT tmp
= ct
;
19380 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19393 tmp
= expand_simple_binop (mode
, PLUS
,
19395 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19406 tmp
= expand_simple_binop (mode
, IOR
,
19408 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19410 else if (diff
== -1 && ct
)
19420 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19422 tmp
= expand_simple_binop (mode
, PLUS
,
19423 copy_rtx (tmp
), GEN_INT (cf
),
19424 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19432 * andl cf - ct, dest
19442 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19445 tmp
= expand_simple_binop (mode
, AND
,
19447 gen_int_mode (cf
- ct
, mode
),
19448 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19450 tmp
= expand_simple_binop (mode
, PLUS
,
19451 copy_rtx (tmp
), GEN_INT (ct
),
19452 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19455 if (!rtx_equal_p (tmp
, out
))
19456 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19463 enum machine_mode cmp_mode
= GET_MODE (op0
);
19466 tmp
= ct
, ct
= cf
, cf
= tmp
;
19469 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19471 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19473 /* We may be reversing unordered compare to normal compare, that
19474 is not valid in general (we may convert non-trapping condition
19475 to trapping one), however on i386 we currently emit all
19476 comparisons unordered. */
19477 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19478 code
= reverse_condition_maybe_unordered (code
);
19482 compare_code
= reverse_condition (compare_code
);
19483 code
= reverse_condition (code
);
19487 compare_code
= UNKNOWN
;
19488 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19489 && CONST_INT_P (op1
))
19491 if (op1
== const0_rtx
19492 && (code
== LT
|| code
== GE
))
19493 compare_code
= code
;
19494 else if (op1
== constm1_rtx
)
19498 else if (code
== GT
)
19503 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19504 if (compare_code
!= UNKNOWN
19505 && GET_MODE (op0
) == GET_MODE (out
)
19506 && (cf
== -1 || ct
== -1))
19508 /* If lea code below could be used, only optimize
19509 if it results in a 2 insn sequence. */
19511 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19512 || diff
== 3 || diff
== 5 || diff
== 9)
19513 || (compare_code
== LT
&& ct
== -1)
19514 || (compare_code
== GE
&& cf
== -1))
19517 * notl op1 (if necessary)
19525 code
= reverse_condition (code
);
19528 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19530 out
= expand_simple_binop (mode
, IOR
,
19532 out
, 1, OPTAB_DIRECT
);
19533 if (out
!= operands
[0])
19534 emit_move_insn (operands
[0], out
);
19541 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19542 || diff
== 3 || diff
== 5 || diff
== 9)
19543 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19545 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19551 * lea cf(dest*(ct-cf)),dest
19555 * This also catches the degenerate setcc-only case.
19561 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19564 /* On x86_64 the lea instruction operates on Pmode, so we need
19565 to get arithmetics done in proper mode to match. */
19567 tmp
= copy_rtx (out
);
19571 out1
= copy_rtx (out
);
19572 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19576 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19582 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19585 if (!rtx_equal_p (tmp
, out
))
19588 out
= force_operand (tmp
, copy_rtx (out
));
19590 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19592 if (!rtx_equal_p (out
, operands
[0]))
19593 emit_move_insn (operands
[0], copy_rtx (out
));
19599 * General case: Jumpful:
19600 * xorl dest,dest cmpl op1, op2
19601 * cmpl op1, op2 movl ct, dest
19602 * setcc dest jcc 1f
19603 * decl dest movl cf, dest
19604 * andl (cf-ct),dest 1:
19607 * Size 20. Size 14.
19609 * This is reasonably steep, but branch mispredict costs are
19610 * high on modern cpus, so consider failing only if optimizing
19614 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19615 && BRANCH_COST (optimize_insn_for_speed_p (),
19620 enum machine_mode cmp_mode
= GET_MODE (op0
);
19625 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19627 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19629 /* We may be reversing unordered compare to normal compare,
19630 that is not valid in general (we may convert non-trapping
19631 condition to trapping one), however on i386 we currently
19632 emit all comparisons unordered. */
19633 code
= reverse_condition_maybe_unordered (code
);
19637 code
= reverse_condition (code
);
19638 if (compare_code
!= UNKNOWN
)
19639 compare_code
= reverse_condition (compare_code
);
19643 if (compare_code
!= UNKNOWN
)
19645 /* notl op1 (if needed)
19650 For x < 0 (resp. x <= -1) there will be no notl,
19651 so if possible swap the constants to get rid of the
19653 True/false will be -1/0 while code below (store flag
19654 followed by decrement) is 0/-1, so the constants need
19655 to be exchanged once more. */
19657 if (compare_code
== GE
|| !cf
)
19659 code
= reverse_condition (code
);
19664 HOST_WIDE_INT tmp
= cf
;
19669 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19673 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19675 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19677 copy_rtx (out
), 1, OPTAB_DIRECT
);
19680 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19681 gen_int_mode (cf
- ct
, mode
),
19682 copy_rtx (out
), 1, OPTAB_DIRECT
);
19684 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19685 copy_rtx (out
), 1, OPTAB_DIRECT
);
19686 if (!rtx_equal_p (out
, operands
[0]))
19687 emit_move_insn (operands
[0], copy_rtx (out
));
19693 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19695 /* Try a few things more with specific constants and a variable. */
19698 rtx var
, orig_out
, out
, tmp
;
19700 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19703 /* If one of the two operands is an interesting constant, load a
19704 constant with the above and mask it in with a logical operation. */
19706 if (CONST_INT_P (operands
[2]))
19709 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19710 operands
[3] = constm1_rtx
, op
= and_optab
;
19711 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19712 operands
[3] = const0_rtx
, op
= ior_optab
;
19716 else if (CONST_INT_P (operands
[3]))
19719 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19720 operands
[2] = constm1_rtx
, op
= and_optab
;
19721 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19722 operands
[2] = const0_rtx
, op
= ior_optab
;
19729 orig_out
= operands
[0];
19730 tmp
= gen_reg_rtx (mode
);
19733 /* Recurse to get the constant loaded. */
19734 if (ix86_expand_int_movcc (operands
) == 0)
19737 /* Mask in the interesting variable. */
19738 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19740 if (!rtx_equal_p (out
, orig_out
))
19741 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19747 * For comparison with above,
19757 if (! nonimmediate_operand (operands
[2], mode
))
19758 operands
[2] = force_reg (mode
, operands
[2]);
19759 if (! nonimmediate_operand (operands
[3], mode
))
19760 operands
[3] = force_reg (mode
, operands
[3]);
19762 if (! register_operand (operands
[2], VOIDmode
)
19764 || ! register_operand (operands
[3], VOIDmode
)))
19765 operands
[2] = force_reg (mode
, operands
[2]);
19768 && ! register_operand (operands
[3], VOIDmode
))
19769 operands
[3] = force_reg (mode
, operands
[3]);
19771 emit_insn (compare_seq
);
19772 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19773 gen_rtx_IF_THEN_ELSE (mode
,
19774 compare_op
, operands
[2],
19779 /* Swap, force into registers, or otherwise massage the two operands
19780 to an sse comparison with a mask result. Thus we differ a bit from
19781 ix86_prepare_fp_compare_args which expects to produce a flags result.
19783 The DEST operand exists to help determine whether to commute commutative
19784 operators. The POP0/POP1 operands are updated in place. The new
19785 comparison code is returned, or UNKNOWN if not implementable. */
19787 static enum rtx_code
19788 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19789 rtx
*pop0
, rtx
*pop1
)
19797 /* AVX supports all the needed comparisons. */
19800 /* We have no LTGT as an operator. We could implement it with
19801 NE & ORDERED, but this requires an extra temporary. It's
19802 not clear that it's worth it. */
19809 /* These are supported directly. */
19816 /* AVX has 3 operand comparisons, no need to swap anything. */
19819 /* For commutative operators, try to canonicalize the destination
19820 operand to be first in the comparison - this helps reload to
19821 avoid extra moves. */
19822 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19830 /* These are not supported directly before AVX, and furthermore
19831 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19832 comparison operands to transform into something that is
19837 code
= swap_condition (code
);
19841 gcc_unreachable ();
19847 /* Detect conditional moves that exactly match min/max operational
19848 semantics. Note that this is IEEE safe, as long as we don't
19849 interchange the operands.
19851 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19852 and TRUE if the operation is successful and instructions are emitted. */
19855 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19856 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19858 enum machine_mode mode
;
19864 else if (code
== UNGE
)
19867 if_true
= if_false
;
19873 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19875 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19880 mode
= GET_MODE (dest
);
19882 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19883 but MODE may be a vector mode and thus not appropriate. */
19884 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19886 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19889 if_true
= force_reg (mode
, if_true
);
19890 v
= gen_rtvec (2, if_true
, if_false
);
19891 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19895 code
= is_min
? SMIN
: SMAX
;
19896 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19899 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19903 /* Expand an sse vector comparison. Return the register with the result. */
19906 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19907 rtx op_true
, rtx op_false
)
19909 enum machine_mode mode
= GET_MODE (dest
);
19910 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19913 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19914 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19915 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19918 || reg_overlap_mentioned_p (dest
, op_true
)
19919 || reg_overlap_mentioned_p (dest
, op_false
))
19920 dest
= gen_reg_rtx (mode
);
19922 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19923 if (cmp_mode
!= mode
)
19925 x
= force_reg (cmp_mode
, x
);
19926 convert_move (dest
, x
, false);
19929 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19934 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19935 operations. This is used for both scalar and vector conditional moves. */
19938 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19940 enum machine_mode mode
= GET_MODE (dest
);
19943 if (vector_all_ones_operand (op_true
, mode
)
19944 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19946 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19948 else if (op_false
== CONST0_RTX (mode
))
19950 op_true
= force_reg (mode
, op_true
);
19951 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19952 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19954 else if (op_true
== CONST0_RTX (mode
))
19956 op_false
= force_reg (mode
, op_false
);
19957 x
= gen_rtx_NOT (mode
, cmp
);
19958 x
= gen_rtx_AND (mode
, x
, op_false
);
19959 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19961 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19963 op_false
= force_reg (mode
, op_false
);
19964 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19965 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19967 else if (TARGET_XOP
)
19969 op_true
= force_reg (mode
, op_true
);
19971 if (!nonimmediate_operand (op_false
, mode
))
19972 op_false
= force_reg (mode
, op_false
);
19974 emit_insn (gen_rtx_SET (mode
, dest
,
19975 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19981 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19983 if (!nonimmediate_operand (op_true
, mode
))
19984 op_true
= force_reg (mode
, op_true
);
19986 op_false
= force_reg (mode
, op_false
);
19992 gen
= gen_sse4_1_blendvps
;
19996 gen
= gen_sse4_1_blendvpd
;
20004 gen
= gen_sse4_1_pblendvb
;
20005 dest
= gen_lowpart (V16QImode
, dest
);
20006 op_false
= gen_lowpart (V16QImode
, op_false
);
20007 op_true
= gen_lowpart (V16QImode
, op_true
);
20008 cmp
= gen_lowpart (V16QImode
, cmp
);
20013 gen
= gen_avx_blendvps256
;
20017 gen
= gen_avx_blendvpd256
;
20025 gen
= gen_avx2_pblendvb
;
20026 dest
= gen_lowpart (V32QImode
, dest
);
20027 op_false
= gen_lowpart (V32QImode
, op_false
);
20028 op_true
= gen_lowpart (V32QImode
, op_true
);
20029 cmp
= gen_lowpart (V32QImode
, cmp
);
20037 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20040 op_true
= force_reg (mode
, op_true
);
20042 t2
= gen_reg_rtx (mode
);
20044 t3
= gen_reg_rtx (mode
);
20048 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20049 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20051 x
= gen_rtx_NOT (mode
, cmp
);
20052 x
= gen_rtx_AND (mode
, x
, op_false
);
20053 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20055 x
= gen_rtx_IOR (mode
, t3
, t2
);
20056 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20061 /* Expand a floating-point conditional move. Return true if successful. */
20064 ix86_expand_fp_movcc (rtx operands
[])
20066 enum machine_mode mode
= GET_MODE (operands
[0]);
20067 enum rtx_code code
= GET_CODE (operands
[1]);
20068 rtx tmp
, compare_op
;
20069 rtx op0
= XEXP (operands
[1], 0);
20070 rtx op1
= XEXP (operands
[1], 1);
20072 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20074 enum machine_mode cmode
;
20076 /* Since we've no cmove for sse registers, don't force bad register
20077 allocation just to gain access to it. Deny movcc when the
20078 comparison mode doesn't match the move mode. */
20079 cmode
= GET_MODE (op0
);
20080 if (cmode
== VOIDmode
)
20081 cmode
= GET_MODE (op1
);
20085 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20086 if (code
== UNKNOWN
)
20089 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20090 operands
[2], operands
[3]))
20093 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20094 operands
[2], operands
[3]);
20095 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20099 if (GET_MODE (op0
) == TImode
20100 || (GET_MODE (op0
) == DImode
20104 /* The floating point conditional move instructions don't directly
20105 support conditions resulting from a signed integer comparison. */
20107 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20108 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20110 tmp
= gen_reg_rtx (QImode
);
20111 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20113 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20116 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20117 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20118 operands
[2], operands
[3])));
20123 /* Expand a floating-point vector conditional move; a vcond operation
20124 rather than a movcc operation. */
20127 ix86_expand_fp_vcond (rtx operands
[])
20129 enum rtx_code code
= GET_CODE (operands
[3]);
20132 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20133 &operands
[4], &operands
[5]);
20134 if (code
== UNKNOWN
)
20137 switch (GET_CODE (operands
[3]))
20140 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20141 operands
[5], operands
[0], operands
[0]);
20142 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20143 operands
[5], operands
[1], operands
[2]);
20147 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20148 operands
[5], operands
[0], operands
[0]);
20149 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20150 operands
[5], operands
[1], operands
[2]);
20154 gcc_unreachable ();
20156 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20158 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20162 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20163 operands
[5], operands
[1], operands
[2]))
20166 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20167 operands
[1], operands
[2]);
20168 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20172 /* Expand a signed/unsigned integral vector conditional move. */
20175 ix86_expand_int_vcond (rtx operands
[])
20177 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20178 enum machine_mode mode
= GET_MODE (operands
[4]);
20179 enum rtx_code code
= GET_CODE (operands
[3]);
20180 bool negate
= false;
20183 cop0
= operands
[4];
20184 cop1
= operands
[5];
20186 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20187 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20188 if ((code
== LT
|| code
== GE
)
20189 && data_mode
== mode
20190 && cop1
== CONST0_RTX (mode
)
20191 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20192 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20193 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20194 && (GET_MODE_SIZE (data_mode
) == 16
20195 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20197 rtx negop
= operands
[2 - (code
== LT
)];
20198 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20199 if (negop
== CONST1_RTX (data_mode
))
20201 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20202 operands
[0], 1, OPTAB_DIRECT
);
20203 if (res
!= operands
[0])
20204 emit_move_insn (operands
[0], res
);
20207 else if (GET_MODE_INNER (data_mode
) != DImode
20208 && vector_all_ones_operand (negop
, data_mode
))
20210 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20211 operands
[0], 0, OPTAB_DIRECT
);
20212 if (res
!= operands
[0])
20213 emit_move_insn (operands
[0], res
);
20218 if (!nonimmediate_operand (cop1
, mode
))
20219 cop1
= force_reg (mode
, cop1
);
20220 if (!general_operand (operands
[1], data_mode
))
20221 operands
[1] = force_reg (data_mode
, operands
[1]);
20222 if (!general_operand (operands
[2], data_mode
))
20223 operands
[2] = force_reg (data_mode
, operands
[2]);
20225 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20227 && (mode
== V16QImode
|| mode
== V8HImode
20228 || mode
== V4SImode
|| mode
== V2DImode
))
20232 /* Canonicalize the comparison to EQ, GT, GTU. */
20243 code
= reverse_condition (code
);
20249 code
= reverse_condition (code
);
20255 code
= swap_condition (code
);
20256 x
= cop0
, cop0
= cop1
, cop1
= x
;
20260 gcc_unreachable ();
20263 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20264 if (mode
== V2DImode
)
20269 /* SSE4.1 supports EQ. */
20270 if (!TARGET_SSE4_1
)
20276 /* SSE4.2 supports GT/GTU. */
20277 if (!TARGET_SSE4_2
)
20282 gcc_unreachable ();
20286 /* Unsigned parallel compare is not supported by the hardware.
20287 Play some tricks to turn this into a signed comparison
20291 cop0
= force_reg (mode
, cop0
);
20301 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20305 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20306 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20307 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20308 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20310 gcc_unreachable ();
20312 /* Subtract (-(INT MAX) - 1) from both operands to make
20314 mask
= ix86_build_signbit_mask (mode
, true, false);
20315 t1
= gen_reg_rtx (mode
);
20316 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20318 t2
= gen_reg_rtx (mode
);
20319 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20331 /* Perform a parallel unsigned saturating subtraction. */
20332 x
= gen_reg_rtx (mode
);
20333 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20334 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20337 cop1
= CONST0_RTX (mode
);
20343 gcc_unreachable ();
20348 /* Allow the comparison to be done in one mode, but the movcc to
20349 happen in another mode. */
20350 if (data_mode
== mode
)
20352 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20353 operands
[1+negate
], operands
[2-negate
]);
20357 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20358 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20360 operands
[1+negate
], operands
[2-negate
]);
20361 x
= gen_lowpart (data_mode
, x
);
20364 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20365 operands
[2-negate
]);
20369 /* Expand a variable vector permutation. */
20372 ix86_expand_vec_perm (rtx operands
[])
20374 rtx target
= operands
[0];
20375 rtx op0
= operands
[1];
20376 rtx op1
= operands
[2];
20377 rtx mask
= operands
[3];
20378 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20379 enum machine_mode mode
= GET_MODE (op0
);
20380 enum machine_mode maskmode
= GET_MODE (mask
);
20382 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20384 /* Number of elements in the vector. */
20385 w
= GET_MODE_NUNITS (mode
);
20386 e
= GET_MODE_UNIT_SIZE (mode
);
20387 gcc_assert (w
<= 32);
20391 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20393 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20394 an constant shuffle operand. With a tiny bit of effort we can
20395 use VPERMD instead. A re-interpretation stall for V4DFmode is
20396 unfortunate but there's no avoiding it.
20397 Similarly for V16HImode we don't have instructions for variable
20398 shuffling, while for V32QImode we can use after preparing suitable
20399 masks vpshufb; vpshufb; vpermq; vpor. */
20401 if (mode
== V16HImode
)
20403 maskmode
= mode
= V32QImode
;
20409 maskmode
= mode
= V8SImode
;
20413 t1
= gen_reg_rtx (maskmode
);
20415 /* Replicate the low bits of the V4DImode mask into V8SImode:
20417 t1 = { A A B B C C D D }. */
20418 for (i
= 0; i
< w
/ 2; ++i
)
20419 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20420 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20421 vt
= force_reg (maskmode
, vt
);
20422 mask
= gen_lowpart (maskmode
, mask
);
20423 if (maskmode
== V8SImode
)
20424 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20426 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20428 /* Multiply the shuffle indicies by two. */
20429 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20432 /* Add one to the odd shuffle indicies:
20433 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20434 for (i
= 0; i
< w
/ 2; ++i
)
20436 vec
[i
* 2] = const0_rtx
;
20437 vec
[i
* 2 + 1] = const1_rtx
;
20439 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20440 vt
= force_const_mem (maskmode
, vt
);
20441 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20444 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20445 operands
[3] = mask
= t1
;
20446 target
= gen_lowpart (mode
, target
);
20447 op0
= gen_lowpart (mode
, op0
);
20448 op1
= gen_lowpart (mode
, op1
);
20454 /* The VPERMD and VPERMPS instructions already properly ignore
20455 the high bits of the shuffle elements. No need for us to
20456 perform an AND ourselves. */
20457 if (one_operand_shuffle
)
20458 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20461 t1
= gen_reg_rtx (V8SImode
);
20462 t2
= gen_reg_rtx (V8SImode
);
20463 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20464 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20470 mask
= gen_lowpart (V8SFmode
, mask
);
20471 if (one_operand_shuffle
)
20472 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20475 t1
= gen_reg_rtx (V8SFmode
);
20476 t2
= gen_reg_rtx (V8SFmode
);
20477 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20478 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20484 /* By combining the two 128-bit input vectors into one 256-bit
20485 input vector, we can use VPERMD and VPERMPS for the full
20486 two-operand shuffle. */
20487 t1
= gen_reg_rtx (V8SImode
);
20488 t2
= gen_reg_rtx (V8SImode
);
20489 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20490 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20491 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20492 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20496 t1
= gen_reg_rtx (V8SFmode
);
20497 t2
= gen_reg_rtx (V8SImode
);
20498 mask
= gen_lowpart (V4SImode
, mask
);
20499 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20500 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20501 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20502 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20506 t1
= gen_reg_rtx (V32QImode
);
20507 t2
= gen_reg_rtx (V32QImode
);
20508 t3
= gen_reg_rtx (V32QImode
);
20509 vt2
= GEN_INT (128);
20510 for (i
= 0; i
< 32; i
++)
20512 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20513 vt
= force_reg (V32QImode
, vt
);
20514 for (i
= 0; i
< 32; i
++)
20515 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20516 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20517 vt2
= force_reg (V32QImode
, vt2
);
20518 /* From mask create two adjusted masks, which contain the same
20519 bits as mask in the low 7 bits of each vector element.
20520 The first mask will have the most significant bit clear
20521 if it requests element from the same 128-bit lane
20522 and MSB set if it requests element from the other 128-bit lane.
20523 The second mask will have the opposite values of the MSB,
20524 and additionally will have its 128-bit lanes swapped.
20525 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20526 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20527 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20528 stands for other 12 bytes. */
20529 /* The bit whether element is from the same lane or the other
20530 lane is bit 4, so shift it up by 3 to the MSB position. */
20531 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20532 gen_lowpart (V4DImode
, mask
),
20534 /* Clear MSB bits from the mask just in case it had them set. */
20535 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20536 /* After this t1 will have MSB set for elements from other lane. */
20537 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20538 /* Clear bits other than MSB. */
20539 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20540 /* Or in the lower bits from mask into t3. */
20541 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20542 /* And invert MSB bits in t1, so MSB is set for elements from the same
20544 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20545 /* Swap 128-bit lanes in t3. */
20546 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20547 gen_lowpart (V4DImode
, t3
),
20548 const2_rtx
, GEN_INT (3),
20549 const0_rtx
, const1_rtx
));
20550 /* And or in the lower bits from mask into t1. */
20551 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20552 if (one_operand_shuffle
)
20554 /* Each of these shuffles will put 0s in places where
20555 element from the other 128-bit lane is needed, otherwise
20556 will shuffle in the requested value. */
20557 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20558 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20559 /* For t3 the 128-bit lanes are swapped again. */
20560 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20561 gen_lowpart (V4DImode
, t3
),
20562 const2_rtx
, GEN_INT (3),
20563 const0_rtx
, const1_rtx
));
20564 /* And oring both together leads to the result. */
20565 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20569 t4
= gen_reg_rtx (V32QImode
);
20570 /* Similarly to the above one_operand_shuffle code,
20571 just for repeated twice for each operand. merge_two:
20572 code will merge the two results together. */
20573 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20574 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20575 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20576 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20577 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20578 gen_lowpart (V4DImode
, t4
),
20579 const2_rtx
, GEN_INT (3),
20580 const0_rtx
, const1_rtx
));
20581 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20582 gen_lowpart (V4DImode
, t3
),
20583 const2_rtx
, GEN_INT (3),
20584 const0_rtx
, const1_rtx
));
20585 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20586 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20592 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20599 /* The XOP VPPERM insn supports three inputs. By ignoring the
20600 one_operand_shuffle special case, we avoid creating another
20601 set of constant vectors in memory. */
20602 one_operand_shuffle
= false;
20604 /* mask = mask & {2*w-1, ...} */
20605 vt
= GEN_INT (2*w
- 1);
20609 /* mask = mask & {w-1, ...} */
20610 vt
= GEN_INT (w
- 1);
20613 for (i
= 0; i
< w
; i
++)
20615 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20616 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20617 NULL_RTX
, 0, OPTAB_DIRECT
);
20619 /* For non-QImode operations, convert the word permutation control
20620 into a byte permutation control. */
20621 if (mode
!= V16QImode
)
20623 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20624 GEN_INT (exact_log2 (e
)),
20625 NULL_RTX
, 0, OPTAB_DIRECT
);
20627 /* Convert mask to vector of chars. */
20628 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20630 /* Replicate each of the input bytes into byte positions:
20631 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20632 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20633 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20634 for (i
= 0; i
< 16; ++i
)
20635 vec
[i
] = GEN_INT (i
/e
* e
);
20636 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20637 vt
= force_const_mem (V16QImode
, vt
);
20639 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20641 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20643 /* Convert it into the byte positions by doing
20644 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20645 for (i
= 0; i
< 16; ++i
)
20646 vec
[i
] = GEN_INT (i
% e
);
20647 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20648 vt
= force_const_mem (V16QImode
, vt
);
20649 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20652 /* The actual shuffle operations all operate on V16QImode. */
20653 op0
= gen_lowpart (V16QImode
, op0
);
20654 op1
= gen_lowpart (V16QImode
, op1
);
20655 target
= gen_lowpart (V16QImode
, target
);
20659 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20661 else if (one_operand_shuffle
)
20663 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20670 /* Shuffle the two input vectors independently. */
20671 t1
= gen_reg_rtx (V16QImode
);
20672 t2
= gen_reg_rtx (V16QImode
);
20673 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20674 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20677 /* Then merge them together. The key is whether any given control
20678 element contained a bit set that indicates the second word. */
20679 mask
= operands
[3];
20681 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20683 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20684 more shuffle to convert the V2DI input mask into a V4SI
20685 input mask. At which point the masking that expand_int_vcond
20686 will work as desired. */
20687 rtx t3
= gen_reg_rtx (V4SImode
);
20688 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20689 const0_rtx
, const0_rtx
,
20690 const2_rtx
, const2_rtx
));
20692 maskmode
= V4SImode
;
20696 for (i
= 0; i
< w
; i
++)
20698 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20699 vt
= force_reg (maskmode
, vt
);
20700 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20701 NULL_RTX
, 0, OPTAB_DIRECT
);
20703 xops
[0] = gen_lowpart (mode
, operands
[0]);
20704 xops
[1] = gen_lowpart (mode
, t2
);
20705 xops
[2] = gen_lowpart (mode
, t1
);
20706 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20709 ok
= ix86_expand_int_vcond (xops
);
20714 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20715 true if we should do zero extension, else sign extension. HIGH_P is
20716 true if we want the N/2 high elements, else the low elements. */
20719 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20721 enum machine_mode imode
= GET_MODE (src
);
20726 rtx (*unpack
)(rtx
, rtx
);
20727 rtx (*extract
)(rtx
, rtx
) = NULL
;
20728 enum machine_mode halfmode
= BLKmode
;
20734 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20736 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20737 halfmode
= V16QImode
;
20739 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20743 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20745 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20746 halfmode
= V8HImode
;
20748 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20752 unpack
= gen_avx2_zero_extendv4siv4di2
;
20754 unpack
= gen_avx2_sign_extendv4siv4di2
;
20755 halfmode
= V4SImode
;
20757 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20761 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20763 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20767 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20769 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20773 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20775 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20778 gcc_unreachable ();
20781 if (GET_MODE_SIZE (imode
) == 32)
20783 tmp
= gen_reg_rtx (halfmode
);
20784 emit_insn (extract (tmp
, src
));
20788 /* Shift higher 8 bytes to lower 8 bytes. */
20789 tmp
= gen_reg_rtx (imode
);
20790 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20791 gen_lowpart (V1TImode
, src
),
20797 emit_insn (unpack (dest
, tmp
));
20801 rtx (*unpack
)(rtx
, rtx
, rtx
);
20807 unpack
= gen_vec_interleave_highv16qi
;
20809 unpack
= gen_vec_interleave_lowv16qi
;
20813 unpack
= gen_vec_interleave_highv8hi
;
20815 unpack
= gen_vec_interleave_lowv8hi
;
20819 unpack
= gen_vec_interleave_highv4si
;
20821 unpack
= gen_vec_interleave_lowv4si
;
20824 gcc_unreachable ();
20828 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20830 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20831 src
, pc_rtx
, pc_rtx
);
20833 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20837 /* Expand conditional increment or decrement using adb/sbb instructions.
20838 The default case using setcc followed by the conditional move can be
20839 done by generic code. */
20841 ix86_expand_int_addcc (rtx operands
[])
20843 enum rtx_code code
= GET_CODE (operands
[1]);
20845 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20847 rtx val
= const0_rtx
;
20848 bool fpcmp
= false;
20849 enum machine_mode mode
;
20850 rtx op0
= XEXP (operands
[1], 0);
20851 rtx op1
= XEXP (operands
[1], 1);
20853 if (operands
[3] != const1_rtx
20854 && operands
[3] != constm1_rtx
)
20856 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20858 code
= GET_CODE (compare_op
);
20860 flags
= XEXP (compare_op
, 0);
20862 if (GET_MODE (flags
) == CCFPmode
20863 || GET_MODE (flags
) == CCFPUmode
)
20866 code
= ix86_fp_compare_code_to_integer (code
);
20873 PUT_CODE (compare_op
,
20874 reverse_condition_maybe_unordered
20875 (GET_CODE (compare_op
)));
20877 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20880 mode
= GET_MODE (operands
[0]);
20882 /* Construct either adc or sbb insn. */
20883 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20888 insn
= gen_subqi3_carry
;
20891 insn
= gen_subhi3_carry
;
20894 insn
= gen_subsi3_carry
;
20897 insn
= gen_subdi3_carry
;
20900 gcc_unreachable ();
20908 insn
= gen_addqi3_carry
;
20911 insn
= gen_addhi3_carry
;
20914 insn
= gen_addsi3_carry
;
20917 insn
= gen_adddi3_carry
;
20920 gcc_unreachable ();
20923 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20929 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20930 but works for floating pointer parameters and nonoffsetable memories.
20931 For pushes, it returns just stack offsets; the values will be saved
20932 in the right order. Maximally three parts are generated. */
20935 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20940 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20942 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20944 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20945 gcc_assert (size
>= 2 && size
<= 4);
20947 /* Optimize constant pool reference to immediates. This is used by fp
20948 moves, that force all constants to memory to allow combining. */
20949 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20951 rtx tmp
= maybe_get_pool_constant (operand
);
20956 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20958 /* The only non-offsetable memories we handle are pushes. */
20959 int ok
= push_operand (operand
, VOIDmode
);
20963 operand
= copy_rtx (operand
);
20964 PUT_MODE (operand
, word_mode
);
20965 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20969 if (GET_CODE (operand
) == CONST_VECTOR
)
20971 enum machine_mode imode
= int_mode_for_mode (mode
);
20972 /* Caution: if we looked through a constant pool memory above,
20973 the operand may actually have a different mode now. That's
20974 ok, since we want to pun this all the way back to an integer. */
20975 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20976 gcc_assert (operand
!= NULL
);
20982 if (mode
== DImode
)
20983 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20988 if (REG_P (operand
))
20990 gcc_assert (reload_completed
);
20991 for (i
= 0; i
< size
; i
++)
20992 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20994 else if (offsettable_memref_p (operand
))
20996 operand
= adjust_address (operand
, SImode
, 0);
20997 parts
[0] = operand
;
20998 for (i
= 1; i
< size
; i
++)
20999 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21001 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21006 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21010 real_to_target (l
, &r
, mode
);
21011 parts
[3] = gen_int_mode (l
[3], SImode
);
21012 parts
[2] = gen_int_mode (l
[2], SImode
);
21015 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21016 long double may not be 80-bit. */
21017 real_to_target (l
, &r
, mode
);
21018 parts
[2] = gen_int_mode (l
[2], SImode
);
21021 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21024 gcc_unreachable ();
21026 parts
[1] = gen_int_mode (l
[1], SImode
);
21027 parts
[0] = gen_int_mode (l
[0], SImode
);
21030 gcc_unreachable ();
21035 if (mode
== TImode
)
21036 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21037 if (mode
== XFmode
|| mode
== TFmode
)
21039 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21040 if (REG_P (operand
))
21042 gcc_assert (reload_completed
);
21043 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21044 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21046 else if (offsettable_memref_p (operand
))
21048 operand
= adjust_address (operand
, DImode
, 0);
21049 parts
[0] = operand
;
21050 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21052 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21057 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21058 real_to_target (l
, &r
, mode
);
21060 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21061 if (HOST_BITS_PER_WIDE_INT
>= 64)
21064 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21065 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21068 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21070 if (upper_mode
== SImode
)
21071 parts
[1] = gen_int_mode (l
[2], SImode
);
21072 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21075 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21076 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21079 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21082 gcc_unreachable ();
21089 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21090 Return false when normal moves are needed; true when all required
21091 insns have been emitted. Operands 2-4 contain the input values
21092 int the correct order; operands 5-7 contain the output values. */
21095 ix86_split_long_move (rtx operands
[])
21100 int collisions
= 0;
21101 enum machine_mode mode
= GET_MODE (operands
[0]);
21102 bool collisionparts
[4];
21104 /* The DFmode expanders may ask us to move double.
21105 For 64bit target this is single move. By hiding the fact
21106 here we simplify i386.md splitters. */
21107 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21109 /* Optimize constant pool reference to immediates. This is used by
21110 fp moves, that force all constants to memory to allow combining. */
21112 if (MEM_P (operands
[1])
21113 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21114 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21115 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21116 if (push_operand (operands
[0], VOIDmode
))
21118 operands
[0] = copy_rtx (operands
[0]);
21119 PUT_MODE (operands
[0], word_mode
);
21122 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21123 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21124 emit_move_insn (operands
[0], operands
[1]);
21128 /* The only non-offsettable memory we handle is push. */
21129 if (push_operand (operands
[0], VOIDmode
))
21132 gcc_assert (!MEM_P (operands
[0])
21133 || offsettable_memref_p (operands
[0]));
21135 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21136 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21138 /* When emitting push, take care for source operands on the stack. */
21139 if (push
&& MEM_P (operands
[1])
21140 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21142 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21144 /* Compensate for the stack decrement by 4. */
21145 if (!TARGET_64BIT
&& nparts
== 3
21146 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21147 src_base
= plus_constant (Pmode
, src_base
, 4);
21149 /* src_base refers to the stack pointer and is
21150 automatically decreased by emitted push. */
21151 for (i
= 0; i
< nparts
; i
++)
21152 part
[1][i
] = change_address (part
[1][i
],
21153 GET_MODE (part
[1][i
]), src_base
);
21156 /* We need to do copy in the right order in case an address register
21157 of the source overlaps the destination. */
21158 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21162 for (i
= 0; i
< nparts
; i
++)
21165 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21166 if (collisionparts
[i
])
21170 /* Collision in the middle part can be handled by reordering. */
21171 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21173 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21174 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21176 else if (collisions
== 1
21178 && (collisionparts
[1] || collisionparts
[2]))
21180 if (collisionparts
[1])
21182 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21183 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21187 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21188 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21192 /* If there are more collisions, we can't handle it by reordering.
21193 Do an lea to the last part and use only one colliding move. */
21194 else if (collisions
> 1)
21200 base
= part
[0][nparts
- 1];
21202 /* Handle the case when the last part isn't valid for lea.
21203 Happens in 64-bit mode storing the 12-byte XFmode. */
21204 if (GET_MODE (base
) != Pmode
)
21205 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21207 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21208 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21209 for (i
= 1; i
< nparts
; i
++)
21211 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21212 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21223 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21224 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21225 stack_pointer_rtx
, GEN_INT (-4)));
21226 emit_move_insn (part
[0][2], part
[1][2]);
21228 else if (nparts
== 4)
21230 emit_move_insn (part
[0][3], part
[1][3]);
21231 emit_move_insn (part
[0][2], part
[1][2]);
21236 /* In 64bit mode we don't have 32bit push available. In case this is
21237 register, it is OK - we will just use larger counterpart. We also
21238 retype memory - these comes from attempt to avoid REX prefix on
21239 moving of second half of TFmode value. */
21240 if (GET_MODE (part
[1][1]) == SImode
)
21242 switch (GET_CODE (part
[1][1]))
21245 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21249 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21253 gcc_unreachable ();
21256 if (GET_MODE (part
[1][0]) == SImode
)
21257 part
[1][0] = part
[1][1];
21260 emit_move_insn (part
[0][1], part
[1][1]);
21261 emit_move_insn (part
[0][0], part
[1][0]);
21265 /* Choose correct order to not overwrite the source before it is copied. */
21266 if ((REG_P (part
[0][0])
21267 && REG_P (part
[1][1])
21268 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21270 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21272 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21274 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21276 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21278 operands
[2 + i
] = part
[0][j
];
21279 operands
[6 + i
] = part
[1][j
];
21284 for (i
= 0; i
< nparts
; i
++)
21286 operands
[2 + i
] = part
[0][i
];
21287 operands
[6 + i
] = part
[1][i
];
21291 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21292 if (optimize_insn_for_size_p ())
21294 for (j
= 0; j
< nparts
- 1; j
++)
21295 if (CONST_INT_P (operands
[6 + j
])
21296 && operands
[6 + j
] != const0_rtx
21297 && REG_P (operands
[2 + j
]))
21298 for (i
= j
; i
< nparts
- 1; i
++)
21299 if (CONST_INT_P (operands
[7 + i
])
21300 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21301 operands
[7 + i
] = operands
[2 + j
];
21304 for (i
= 0; i
< nparts
; i
++)
21305 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21310 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21311 left shift by a constant, either using a single shift or
21312 a sequence of add instructions. */
21315 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21317 rtx (*insn
)(rtx
, rtx
, rtx
);
21320 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21321 && !optimize_insn_for_size_p ()))
21323 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21324 while (count
-- > 0)
21325 emit_insn (insn (operand
, operand
, operand
));
21329 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21330 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21335 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21337 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21338 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21339 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21341 rtx low
[2], high
[2];
21344 if (CONST_INT_P (operands
[2]))
21346 split_double_mode (mode
, operands
, 2, low
, high
);
21347 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21349 if (count
>= half_width
)
21351 emit_move_insn (high
[0], low
[1]);
21352 emit_move_insn (low
[0], const0_rtx
);
21354 if (count
> half_width
)
21355 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21359 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21361 if (!rtx_equal_p (operands
[0], operands
[1]))
21362 emit_move_insn (operands
[0], operands
[1]);
21364 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21365 ix86_expand_ashl_const (low
[0], count
, mode
);
21370 split_double_mode (mode
, operands
, 1, low
, high
);
21372 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21374 if (operands
[1] == const1_rtx
)
21376 /* Assuming we've chosen a QImode capable registers, then 1 << N
21377 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21378 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21380 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21382 ix86_expand_clear (low
[0]);
21383 ix86_expand_clear (high
[0]);
21384 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21386 d
= gen_lowpart (QImode
, low
[0]);
21387 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21388 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21389 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21391 d
= gen_lowpart (QImode
, high
[0]);
21392 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21393 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21394 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21397 /* Otherwise, we can get the same results by manually performing
21398 a bit extract operation on bit 5/6, and then performing the two
21399 shifts. The two methods of getting 0/1 into low/high are exactly
21400 the same size. Avoiding the shift in the bit extract case helps
21401 pentium4 a bit; no one else seems to care much either way. */
21404 enum machine_mode half_mode
;
21405 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21406 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21407 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21408 HOST_WIDE_INT bits
;
21411 if (mode
== DImode
)
21413 half_mode
= SImode
;
21414 gen_lshr3
= gen_lshrsi3
;
21415 gen_and3
= gen_andsi3
;
21416 gen_xor3
= gen_xorsi3
;
21421 half_mode
= DImode
;
21422 gen_lshr3
= gen_lshrdi3
;
21423 gen_and3
= gen_anddi3
;
21424 gen_xor3
= gen_xordi3
;
21428 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21429 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21431 x
= gen_lowpart (half_mode
, operands
[2]);
21432 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21434 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21435 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21436 emit_move_insn (low
[0], high
[0]);
21437 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21440 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21441 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21445 if (operands
[1] == constm1_rtx
)
21447 /* For -1 << N, we can avoid the shld instruction, because we
21448 know that we're shifting 0...31/63 ones into a -1. */
21449 emit_move_insn (low
[0], constm1_rtx
);
21450 if (optimize_insn_for_size_p ())
21451 emit_move_insn (high
[0], low
[0]);
21453 emit_move_insn (high
[0], constm1_rtx
);
21457 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21459 if (!rtx_equal_p (operands
[0], operands
[1]))
21460 emit_move_insn (operands
[0], operands
[1]);
21462 split_double_mode (mode
, operands
, 1, low
, high
);
21463 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21466 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21468 if (TARGET_CMOVE
&& scratch
)
21470 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21471 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21473 ix86_expand_clear (scratch
);
21474 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21478 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21479 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21481 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21486 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21488 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21489 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21490 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21491 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21493 rtx low
[2], high
[2];
21496 if (CONST_INT_P (operands
[2]))
21498 split_double_mode (mode
, operands
, 2, low
, high
);
21499 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21501 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21503 emit_move_insn (high
[0], high
[1]);
21504 emit_insn (gen_ashr3 (high
[0], high
[0],
21505 GEN_INT (half_width
- 1)));
21506 emit_move_insn (low
[0], high
[0]);
21509 else if (count
>= half_width
)
21511 emit_move_insn (low
[0], high
[1]);
21512 emit_move_insn (high
[0], low
[0]);
21513 emit_insn (gen_ashr3 (high
[0], high
[0],
21514 GEN_INT (half_width
- 1)));
21516 if (count
> half_width
)
21517 emit_insn (gen_ashr3 (low
[0], low
[0],
21518 GEN_INT (count
- half_width
)));
21522 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21524 if (!rtx_equal_p (operands
[0], operands
[1]))
21525 emit_move_insn (operands
[0], operands
[1]);
21527 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21528 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21533 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21535 if (!rtx_equal_p (operands
[0], operands
[1]))
21536 emit_move_insn (operands
[0], operands
[1]);
21538 split_double_mode (mode
, operands
, 1, low
, high
);
21540 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21541 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21543 if (TARGET_CMOVE
&& scratch
)
21545 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21546 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21548 emit_move_insn (scratch
, high
[0]);
21549 emit_insn (gen_ashr3 (scratch
, scratch
,
21550 GEN_INT (half_width
- 1)));
21551 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21556 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21557 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21559 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21565 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21567 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21568 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21569 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21570 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21572 rtx low
[2], high
[2];
21575 if (CONST_INT_P (operands
[2]))
21577 split_double_mode (mode
, operands
, 2, low
, high
);
21578 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21580 if (count
>= half_width
)
21582 emit_move_insn (low
[0], high
[1]);
21583 ix86_expand_clear (high
[0]);
21585 if (count
> half_width
)
21586 emit_insn (gen_lshr3 (low
[0], low
[0],
21587 GEN_INT (count
- half_width
)));
21591 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21593 if (!rtx_equal_p (operands
[0], operands
[1]))
21594 emit_move_insn (operands
[0], operands
[1]);
21596 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21597 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21602 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21604 if (!rtx_equal_p (operands
[0], operands
[1]))
21605 emit_move_insn (operands
[0], operands
[1]);
21607 split_double_mode (mode
, operands
, 1, low
, high
);
21609 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21610 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21612 if (TARGET_CMOVE
&& scratch
)
21614 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21615 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21617 ix86_expand_clear (scratch
);
21618 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21623 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21624 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21626 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21631 /* Predict just emitted jump instruction to be taken with probability PROB. */
21633 predict_jump (int prob
)
21635 rtx insn
= get_last_insn ();
21636 gcc_assert (JUMP_P (insn
));
21637 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21640 /* Helper function for the string operations below. Dest VARIABLE whether
21641 it is aligned to VALUE bytes. If true, jump to the label. */
21643 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21645 rtx label
= gen_label_rtx ();
21646 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21647 if (GET_MODE (variable
) == DImode
)
21648 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21650 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21651 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21654 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21656 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21660 /* Adjust COUNTER by the VALUE. */
21662 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21664 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21665 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21667 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21670 /* Zero extend possibly SImode EXP to Pmode register. */
21672 ix86_zero_extend_to_Pmode (rtx exp
)
21674 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21677 /* Divide COUNTREG by SCALE. */
21679 scale_counter (rtx countreg
, int scale
)
21685 if (CONST_INT_P (countreg
))
21686 return GEN_INT (INTVAL (countreg
) / scale
);
21687 gcc_assert (REG_P (countreg
));
21689 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21690 GEN_INT (exact_log2 (scale
)),
21691 NULL
, 1, OPTAB_DIRECT
);
21695 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21696 DImode for constant loop counts. */
21698 static enum machine_mode
21699 counter_mode (rtx count_exp
)
21701 if (GET_MODE (count_exp
) != VOIDmode
)
21702 return GET_MODE (count_exp
);
21703 if (!CONST_INT_P (count_exp
))
21705 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21710 /* When SRCPTR is non-NULL, output simple loop to move memory
21711 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21712 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21713 equivalent loop to set memory by VALUE (supposed to be in MODE).
21715 The size is rounded down to whole number of chunk size moved at once.
21716 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21720 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21721 rtx destptr
, rtx srcptr
, rtx value
,
21722 rtx count
, enum machine_mode mode
, int unroll
,
21725 rtx out_label
, top_label
, iter
, tmp
;
21726 enum machine_mode iter_mode
= counter_mode (count
);
21727 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21728 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21734 top_label
= gen_label_rtx ();
21735 out_label
= gen_label_rtx ();
21736 iter
= gen_reg_rtx (iter_mode
);
21738 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21739 NULL
, 1, OPTAB_DIRECT
);
21740 /* Those two should combine. */
21741 if (piece_size
== const1_rtx
)
21743 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21745 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21747 emit_move_insn (iter
, const0_rtx
);
21749 emit_label (top_label
);
21751 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21752 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21753 destmem
= change_address (destmem
, mode
, x_addr
);
21757 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21758 srcmem
= change_address (srcmem
, mode
, y_addr
);
21760 /* When unrolling for chips that reorder memory reads and writes,
21761 we can save registers by using single temporary.
21762 Also using 4 temporaries is overkill in 32bit mode. */
21763 if (!TARGET_64BIT
&& 0)
21765 for (i
= 0; i
< unroll
; i
++)
21770 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21772 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21774 emit_move_insn (destmem
, srcmem
);
21780 gcc_assert (unroll
<= 4);
21781 for (i
= 0; i
< unroll
; i
++)
21783 tmpreg
[i
] = gen_reg_rtx (mode
);
21787 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21789 emit_move_insn (tmpreg
[i
], srcmem
);
21791 for (i
= 0; i
< unroll
; i
++)
21796 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21798 emit_move_insn (destmem
, tmpreg
[i
]);
21803 for (i
= 0; i
< unroll
; i
++)
21807 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21808 emit_move_insn (destmem
, value
);
21811 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21812 true, OPTAB_LIB_WIDEN
);
21814 emit_move_insn (iter
, tmp
);
21816 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21818 if (expected_size
!= -1)
21820 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21821 if (expected_size
== 0)
21823 else if (expected_size
> REG_BR_PROB_BASE
)
21824 predict_jump (REG_BR_PROB_BASE
- 1);
21826 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21829 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21830 iter
= ix86_zero_extend_to_Pmode (iter
);
21831 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21832 true, OPTAB_LIB_WIDEN
);
21833 if (tmp
!= destptr
)
21834 emit_move_insn (destptr
, tmp
);
21837 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21838 true, OPTAB_LIB_WIDEN
);
21840 emit_move_insn (srcptr
, tmp
);
21842 emit_label (out_label
);
21845 /* Output "rep; mov" instruction.
21846 Arguments have same meaning as for previous function */
21848 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21849 rtx destptr
, rtx srcptr
,
21851 enum machine_mode mode
)
21856 HOST_WIDE_INT rounded_count
;
21858 /* If the size is known, it is shorter to use rep movs. */
21859 if (mode
== QImode
&& CONST_INT_P (count
)
21860 && !(INTVAL (count
) & 3))
21863 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21864 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21865 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21866 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21867 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21868 if (mode
!= QImode
)
21870 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21871 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21872 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21873 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21874 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21875 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21879 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21880 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21882 if (CONST_INT_P (count
))
21884 rounded_count
= (INTVAL (count
)
21885 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21886 destmem
= shallow_copy_rtx (destmem
);
21887 srcmem
= shallow_copy_rtx (srcmem
);
21888 set_mem_size (destmem
, rounded_count
);
21889 set_mem_size (srcmem
, rounded_count
);
21893 if (MEM_SIZE_KNOWN_P (destmem
))
21894 clear_mem_size (destmem
);
21895 if (MEM_SIZE_KNOWN_P (srcmem
))
21896 clear_mem_size (srcmem
);
21898 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21902 /* Output "rep; stos" instruction.
21903 Arguments have same meaning as for previous function */
21905 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21906 rtx count
, enum machine_mode mode
,
21911 HOST_WIDE_INT rounded_count
;
21913 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21914 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21915 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21916 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21917 if (mode
!= QImode
)
21919 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21920 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21921 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21924 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21925 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21927 rounded_count
= (INTVAL (count
)
21928 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21929 destmem
= shallow_copy_rtx (destmem
);
21930 set_mem_size (destmem
, rounded_count
);
21932 else if (MEM_SIZE_KNOWN_P (destmem
))
21933 clear_mem_size (destmem
);
21934 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21938 emit_strmov (rtx destmem
, rtx srcmem
,
21939 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21941 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21942 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21943 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21946 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21948 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21949 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21952 if (CONST_INT_P (count
))
21954 HOST_WIDE_INT countval
= INTVAL (count
);
21957 if ((countval
& 0x10) && max_size
> 16)
21961 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21962 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21965 gcc_unreachable ();
21968 if ((countval
& 0x08) && max_size
> 8)
21971 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21974 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21975 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21979 if ((countval
& 0x04) && max_size
> 4)
21981 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21984 if ((countval
& 0x02) && max_size
> 2)
21986 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21989 if ((countval
& 0x01) && max_size
> 1)
21991 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21998 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21999 count
, 1, OPTAB_DIRECT
);
22000 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22001 count
, QImode
, 1, 4);
22005 /* When there are stringops, we can cheaply increase dest and src pointers.
22006 Otherwise we save code size by maintaining offset (zero is readily
22007 available from preceding rep operation) and using x86 addressing modes.
22009 if (TARGET_SINGLE_STRINGOP
)
22013 rtx label
= ix86_expand_aligntest (count
, 4, true);
22014 src
= change_address (srcmem
, SImode
, srcptr
);
22015 dest
= change_address (destmem
, SImode
, destptr
);
22016 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22017 emit_label (label
);
22018 LABEL_NUSES (label
) = 1;
22022 rtx label
= ix86_expand_aligntest (count
, 2, true);
22023 src
= change_address (srcmem
, HImode
, srcptr
);
22024 dest
= change_address (destmem
, HImode
, destptr
);
22025 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22026 emit_label (label
);
22027 LABEL_NUSES (label
) = 1;
22031 rtx label
= ix86_expand_aligntest (count
, 1, true);
22032 src
= change_address (srcmem
, QImode
, srcptr
);
22033 dest
= change_address (destmem
, QImode
, destptr
);
22034 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22035 emit_label (label
);
22036 LABEL_NUSES (label
) = 1;
22041 rtx offset
= force_reg (Pmode
, const0_rtx
);
22046 rtx label
= ix86_expand_aligntest (count
, 4, true);
22047 src
= change_address (srcmem
, SImode
, srcptr
);
22048 dest
= change_address (destmem
, SImode
, destptr
);
22049 emit_move_insn (dest
, src
);
22050 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22051 true, OPTAB_LIB_WIDEN
);
22053 emit_move_insn (offset
, tmp
);
22054 emit_label (label
);
22055 LABEL_NUSES (label
) = 1;
22059 rtx label
= ix86_expand_aligntest (count
, 2, true);
22060 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22061 src
= change_address (srcmem
, HImode
, tmp
);
22062 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22063 dest
= change_address (destmem
, HImode
, tmp
);
22064 emit_move_insn (dest
, src
);
22065 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22066 true, OPTAB_LIB_WIDEN
);
22068 emit_move_insn (offset
, tmp
);
22069 emit_label (label
);
22070 LABEL_NUSES (label
) = 1;
22074 rtx label
= ix86_expand_aligntest (count
, 1, true);
22075 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22076 src
= change_address (srcmem
, QImode
, tmp
);
22077 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22078 dest
= change_address (destmem
, QImode
, tmp
);
22079 emit_move_insn (dest
, src
);
22080 emit_label (label
);
22081 LABEL_NUSES (label
) = 1;
22086 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22088 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22089 rtx count
, int max_size
)
22092 expand_simple_binop (counter_mode (count
), AND
, count
,
22093 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22094 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22095 gen_lowpart (QImode
, value
), count
, QImode
,
22099 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22101 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22105 if (CONST_INT_P (count
))
22107 HOST_WIDE_INT countval
= INTVAL (count
);
22110 if ((countval
& 0x10) && max_size
> 16)
22114 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22115 emit_insn (gen_strset (destptr
, dest
, value
));
22116 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22117 emit_insn (gen_strset (destptr
, dest
, value
));
22120 gcc_unreachable ();
22123 if ((countval
& 0x08) && max_size
> 8)
22127 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22128 emit_insn (gen_strset (destptr
, dest
, value
));
22132 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22133 emit_insn (gen_strset (destptr
, dest
, value
));
22134 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22135 emit_insn (gen_strset (destptr
, dest
, value
));
22139 if ((countval
& 0x04) && max_size
> 4)
22141 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22142 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22145 if ((countval
& 0x02) && max_size
> 2)
22147 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22148 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22151 if ((countval
& 0x01) && max_size
> 1)
22153 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22154 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22161 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22166 rtx label
= ix86_expand_aligntest (count
, 16, true);
22169 dest
= change_address (destmem
, DImode
, destptr
);
22170 emit_insn (gen_strset (destptr
, dest
, value
));
22171 emit_insn (gen_strset (destptr
, dest
, value
));
22175 dest
= change_address (destmem
, SImode
, destptr
);
22176 emit_insn (gen_strset (destptr
, dest
, value
));
22177 emit_insn (gen_strset (destptr
, dest
, value
));
22178 emit_insn (gen_strset (destptr
, dest
, value
));
22179 emit_insn (gen_strset (destptr
, dest
, value
));
22181 emit_label (label
);
22182 LABEL_NUSES (label
) = 1;
22186 rtx label
= ix86_expand_aligntest (count
, 8, true);
22189 dest
= change_address (destmem
, DImode
, destptr
);
22190 emit_insn (gen_strset (destptr
, dest
, value
));
22194 dest
= change_address (destmem
, SImode
, destptr
);
22195 emit_insn (gen_strset (destptr
, dest
, value
));
22196 emit_insn (gen_strset (destptr
, dest
, value
));
22198 emit_label (label
);
22199 LABEL_NUSES (label
) = 1;
22203 rtx label
= ix86_expand_aligntest (count
, 4, true);
22204 dest
= change_address (destmem
, SImode
, destptr
);
22205 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22206 emit_label (label
);
22207 LABEL_NUSES (label
) = 1;
22211 rtx label
= ix86_expand_aligntest (count
, 2, true);
22212 dest
= change_address (destmem
, HImode
, destptr
);
22213 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22214 emit_label (label
);
22215 LABEL_NUSES (label
) = 1;
22219 rtx label
= ix86_expand_aligntest (count
, 1, true);
22220 dest
= change_address (destmem
, QImode
, destptr
);
22221 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22222 emit_label (label
);
22223 LABEL_NUSES (label
) = 1;
22227 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22228 DESIRED_ALIGNMENT. */
22230 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22231 rtx destptr
, rtx srcptr
, rtx count
,
22232 int align
, int desired_alignment
)
22234 if (align
<= 1 && desired_alignment
> 1)
22236 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22237 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22238 destmem
= change_address (destmem
, QImode
, destptr
);
22239 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22240 ix86_adjust_counter (count
, 1);
22241 emit_label (label
);
22242 LABEL_NUSES (label
) = 1;
22244 if (align
<= 2 && desired_alignment
> 2)
22246 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22247 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22248 destmem
= change_address (destmem
, HImode
, destptr
);
22249 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22250 ix86_adjust_counter (count
, 2);
22251 emit_label (label
);
22252 LABEL_NUSES (label
) = 1;
22254 if (align
<= 4 && desired_alignment
> 4)
22256 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22257 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22258 destmem
= change_address (destmem
, SImode
, destptr
);
22259 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22260 ix86_adjust_counter (count
, 4);
22261 emit_label (label
);
22262 LABEL_NUSES (label
) = 1;
22264 gcc_assert (desired_alignment
<= 8);
22267 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22268 ALIGN_BYTES is how many bytes need to be copied. */
22270 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22271 int desired_align
, int align_bytes
)
22274 rtx orig_dst
= dst
;
22275 rtx orig_src
= src
;
22277 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22278 if (src_align_bytes
>= 0)
22279 src_align_bytes
= desired_align
- src_align_bytes
;
22280 if (align_bytes
& 1)
22282 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22283 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22285 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22287 if (align_bytes
& 2)
22289 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22290 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22291 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22292 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22293 if (src_align_bytes
>= 0
22294 && (src_align_bytes
& 1) == (align_bytes
& 1)
22295 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22296 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22298 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22300 if (align_bytes
& 4)
22302 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22303 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22304 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22305 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22306 if (src_align_bytes
>= 0)
22308 unsigned int src_align
= 0;
22309 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22311 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22313 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22314 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22317 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22319 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22320 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22321 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22322 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22323 if (src_align_bytes
>= 0)
22325 unsigned int src_align
= 0;
22326 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22328 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22330 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22332 if (src_align
> (unsigned int) desired_align
)
22333 src_align
= desired_align
;
22334 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22335 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22337 if (MEM_SIZE_KNOWN_P (orig_dst
))
22338 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22339 if (MEM_SIZE_KNOWN_P (orig_src
))
22340 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22345 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22346 DESIRED_ALIGNMENT. */
22348 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22349 int align
, int desired_alignment
)
22351 if (align
<= 1 && desired_alignment
> 1)
22353 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22354 destmem
= change_address (destmem
, QImode
, destptr
);
22355 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22356 ix86_adjust_counter (count
, 1);
22357 emit_label (label
);
22358 LABEL_NUSES (label
) = 1;
22360 if (align
<= 2 && desired_alignment
> 2)
22362 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22363 destmem
= change_address (destmem
, HImode
, destptr
);
22364 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22365 ix86_adjust_counter (count
, 2);
22366 emit_label (label
);
22367 LABEL_NUSES (label
) = 1;
22369 if (align
<= 4 && desired_alignment
> 4)
22371 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22372 destmem
= change_address (destmem
, SImode
, destptr
);
22373 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22374 ix86_adjust_counter (count
, 4);
22375 emit_label (label
);
22376 LABEL_NUSES (label
) = 1;
22378 gcc_assert (desired_alignment
<= 8);
22381 /* Set enough from DST to align DST known to by aligned by ALIGN to
22382 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22384 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22385 int desired_align
, int align_bytes
)
22388 rtx orig_dst
= dst
;
22389 if (align_bytes
& 1)
22391 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22393 emit_insn (gen_strset (destreg
, dst
,
22394 gen_lowpart (QImode
, value
)));
22396 if (align_bytes
& 2)
22398 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22399 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22400 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22402 emit_insn (gen_strset (destreg
, dst
,
22403 gen_lowpart (HImode
, value
)));
22405 if (align_bytes
& 4)
22407 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22408 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22409 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22411 emit_insn (gen_strset (destreg
, dst
,
22412 gen_lowpart (SImode
, value
)));
22414 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22415 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22416 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22417 if (MEM_SIZE_KNOWN_P (orig_dst
))
22418 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22422 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22423 static enum stringop_alg
22424 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22425 int *dynamic_check
, bool *noalign
)
22427 const struct stringop_algs
* algs
;
22428 bool optimize_for_speed
;
22429 /* Algorithms using the rep prefix want at least edi and ecx;
22430 additionally, memset wants eax and memcpy wants esi. Don't
22431 consider such algorithms if the user has appropriated those
22432 registers for their own purposes. */
22433 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22435 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22438 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22439 || (alg != rep_prefix_1_byte \
22440 && alg != rep_prefix_4_byte \
22441 && alg != rep_prefix_8_byte))
22442 const struct processor_costs
*cost
;
22444 /* Even if the string operation call is cold, we still might spend a lot
22445 of time processing large blocks. */
22446 if (optimize_function_for_size_p (cfun
)
22447 || (optimize_insn_for_size_p ()
22448 && expected_size
!= -1 && expected_size
< 256))
22449 optimize_for_speed
= false;
22451 optimize_for_speed
= true;
22453 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22455 *dynamic_check
= -1;
22457 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22459 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22460 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22461 return ix86_stringop_alg
;
22462 /* rep; movq or rep; movl is the smallest variant. */
22463 else if (!optimize_for_speed
)
22465 if (!count
|| (count
& 3))
22466 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22468 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22470 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22472 else if (expected_size
!= -1 && expected_size
< 4)
22473 return loop_1_byte
;
22474 else if (expected_size
!= -1)
22477 enum stringop_alg alg
= libcall
;
22478 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22480 /* We get here if the algorithms that were not libcall-based
22481 were rep-prefix based and we are unable to use rep prefixes
22482 based on global register usage. Break out of the loop and
22483 use the heuristic below. */
22484 if (algs
->size
[i
].max
== 0)
22486 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22488 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22490 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22492 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22493 last non-libcall inline algorithm. */
22494 if (TARGET_INLINE_ALL_STRINGOPS
)
22496 /* When the current size is best to be copied by a libcall,
22497 but we are still forced to inline, run the heuristic below
22498 that will pick code for medium sized blocks. */
22499 if (alg
!= libcall
)
22503 else if (ALG_USABLE_P (candidate
))
22505 *noalign
= algs
->size
[i
].noalign
;
22510 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22512 /* When asked to inline the call anyway, try to pick meaningful choice.
22513 We look for maximal size of block that is faster to copy by hand and
22514 take blocks of at most of that size guessing that average size will
22515 be roughly half of the block.
22517 If this turns out to be bad, we might simply specify the preferred
22518 choice in ix86_costs. */
22519 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22520 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22523 enum stringop_alg alg
;
22525 bool any_alg_usable_p
= true;
22527 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22529 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22530 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22532 if (candidate
!= libcall
&& candidate
22533 && ALG_USABLE_P (candidate
))
22534 max
= algs
->size
[i
].max
;
22536 /* If there aren't any usable algorithms, then recursing on
22537 smaller sizes isn't going to find anything. Just return the
22538 simple byte-at-a-time copy loop. */
22539 if (!any_alg_usable_p
)
22541 /* Pick something reasonable. */
22542 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22543 *dynamic_check
= 128;
22544 return loop_1_byte
;
22548 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22549 gcc_assert (*dynamic_check
== -1);
22550 gcc_assert (alg
!= libcall
);
22551 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22552 *dynamic_check
= max
;
22555 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22556 #undef ALG_USABLE_P
22559 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22560 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22562 decide_alignment (int align
,
22563 enum stringop_alg alg
,
22566 int desired_align
= 0;
22570 gcc_unreachable ();
22572 case unrolled_loop
:
22573 desired_align
= GET_MODE_SIZE (Pmode
);
22575 case rep_prefix_8_byte
:
22578 case rep_prefix_4_byte
:
22579 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22580 copying whole cacheline at once. */
22581 if (TARGET_PENTIUMPRO
)
22586 case rep_prefix_1_byte
:
22587 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22588 copying whole cacheline at once. */
22589 if (TARGET_PENTIUMPRO
)
22603 if (desired_align
< align
)
22604 desired_align
= align
;
22605 if (expected_size
!= -1 && expected_size
< 4)
22606 desired_align
= align
;
22607 return desired_align
;
22610 /* Return the smallest power of 2 greater than VAL. */
22612 smallest_pow2_greater_than (int val
)
22620 /* Expand string move (memcpy) operation. Use i386 string operations
22621 when profitable. expand_setmem contains similar code. The code
22622 depends upon architecture, block size and alignment, but always has
22623 the same overall structure:
22625 1) Prologue guard: Conditional that jumps up to epilogues for small
22626 blocks that can be handled by epilogue alone. This is faster
22627 but also needed for correctness, since prologue assume the block
22628 is larger than the desired alignment.
22630 Optional dynamic check for size and libcall for large
22631 blocks is emitted here too, with -minline-stringops-dynamically.
22633 2) Prologue: copy first few bytes in order to get destination
22634 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22635 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22636 copied. We emit either a jump tree on power of two sized
22637 blocks, or a byte loop.
22639 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22640 with specified algorithm.
22642 4) Epilogue: code copying tail of the block that is too small to be
22643 handled by main body (or up to size guarded by prologue guard). */
22646 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22647 rtx expected_align_exp
, rtx expected_size_exp
)
22653 rtx jump_around_label
= NULL
;
22654 HOST_WIDE_INT align
= 1;
22655 unsigned HOST_WIDE_INT count
= 0;
22656 HOST_WIDE_INT expected_size
= -1;
22657 int size_needed
= 0, epilogue_size_needed
;
22658 int desired_align
= 0, align_bytes
= 0;
22659 enum stringop_alg alg
;
22661 bool need_zero_guard
= false;
22664 if (CONST_INT_P (align_exp
))
22665 align
= INTVAL (align_exp
);
22666 /* i386 can do misaligned access on reasonably increased cost. */
22667 if (CONST_INT_P (expected_align_exp
)
22668 && INTVAL (expected_align_exp
) > align
)
22669 align
= INTVAL (expected_align_exp
);
22670 /* ALIGN is the minimum of destination and source alignment, but we care here
22671 just about destination alignment. */
22672 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22673 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22675 if (CONST_INT_P (count_exp
))
22676 count
= expected_size
= INTVAL (count_exp
);
22677 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22678 expected_size
= INTVAL (expected_size_exp
);
22680 /* Make sure we don't need to care about overflow later on. */
22681 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22684 /* Step 0: Decide on preferred algorithm, desired alignment and
22685 size of chunks to be copied by main loop. */
22687 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22688 desired_align
= decide_alignment (align
, alg
, expected_size
);
22690 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
22691 align
= desired_align
;
22693 if (alg
== libcall
)
22695 gcc_assert (alg
!= no_stringop
);
22697 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22698 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22699 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22704 gcc_unreachable ();
22706 need_zero_guard
= true;
22707 size_needed
= GET_MODE_SIZE (word_mode
);
22709 case unrolled_loop
:
22710 need_zero_guard
= true;
22711 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22713 case rep_prefix_8_byte
:
22716 case rep_prefix_4_byte
:
22719 case rep_prefix_1_byte
:
22723 need_zero_guard
= true;
22728 epilogue_size_needed
= size_needed
;
22730 /* Step 1: Prologue guard. */
22732 /* Alignment code needs count to be in register. */
22733 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22735 if (INTVAL (count_exp
) > desired_align
22736 && INTVAL (count_exp
) > size_needed
)
22739 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22740 if (align_bytes
<= 0)
22743 align_bytes
= desired_align
- align_bytes
;
22745 if (align_bytes
== 0)
22746 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22748 gcc_assert (desired_align
>= 1 && align
>= 1);
22750 /* Ensure that alignment prologue won't copy past end of block. */
22751 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22753 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22754 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22755 Make sure it is power of 2. */
22756 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22760 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22762 /* If main algorithm works on QImode, no epilogue is needed.
22763 For small sizes just don't align anything. */
22764 if (size_needed
== 1)
22765 desired_align
= align
;
22772 label
= gen_label_rtx ();
22773 emit_cmp_and_jump_insns (count_exp
,
22774 GEN_INT (epilogue_size_needed
),
22775 LTU
, 0, counter_mode (count_exp
), 1, label
);
22776 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22777 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22779 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22783 /* Emit code to decide on runtime whether library call or inline should be
22785 if (dynamic_check
!= -1)
22787 if (CONST_INT_P (count_exp
))
22789 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22791 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22792 count_exp
= const0_rtx
;
22798 rtx hot_label
= gen_label_rtx ();
22799 jump_around_label
= gen_label_rtx ();
22800 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22801 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22802 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22803 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22804 emit_jump (jump_around_label
);
22805 emit_label (hot_label
);
22809 /* Step 2: Alignment prologue. */
22811 if (desired_align
> align
)
22813 if (align_bytes
== 0)
22815 /* Except for the first move in epilogue, we no longer know
22816 constant offset in aliasing info. It don't seems to worth
22817 the pain to maintain it for the first move, so throw away
22819 src
= change_address (src
, BLKmode
, srcreg
);
22820 dst
= change_address (dst
, BLKmode
, destreg
);
22821 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22826 /* If we know how many bytes need to be stored before dst is
22827 sufficiently aligned, maintain aliasing info accurately. */
22828 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22829 desired_align
, align_bytes
);
22830 count_exp
= plus_constant (counter_mode (count_exp
),
22831 count_exp
, -align_bytes
);
22832 count
-= align_bytes
;
22834 if (need_zero_guard
22835 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22836 || (align_bytes
== 0
22837 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22838 + desired_align
- align
))))
22840 /* It is possible that we copied enough so the main loop will not
22842 gcc_assert (size_needed
> 1);
22843 if (label
== NULL_RTX
)
22844 label
= gen_label_rtx ();
22845 emit_cmp_and_jump_insns (count_exp
,
22846 GEN_INT (size_needed
),
22847 LTU
, 0, counter_mode (count_exp
), 1, label
);
22848 if (expected_size
== -1
22849 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22850 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22852 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22855 if (label
&& size_needed
== 1)
22857 emit_label (label
);
22858 LABEL_NUSES (label
) = 1;
22860 epilogue_size_needed
= 1;
22862 else if (label
== NULL_RTX
)
22863 epilogue_size_needed
= size_needed
;
22865 /* Step 3: Main loop. */
22871 gcc_unreachable ();
22873 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22874 count_exp
, QImode
, 1, expected_size
);
22877 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22878 count_exp
, word_mode
, 1, expected_size
);
22880 case unrolled_loop
:
22881 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22882 registers for 4 temporaries anyway. */
22883 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22884 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22887 case rep_prefix_8_byte
:
22888 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22891 case rep_prefix_4_byte
:
22892 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22895 case rep_prefix_1_byte
:
22896 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22900 /* Adjust properly the offset of src and dest memory for aliasing. */
22901 if (CONST_INT_P (count_exp
))
22903 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22904 (count
/ size_needed
) * size_needed
);
22905 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22906 (count
/ size_needed
) * size_needed
);
22910 src
= change_address (src
, BLKmode
, srcreg
);
22911 dst
= change_address (dst
, BLKmode
, destreg
);
22914 /* Step 4: Epilogue to copy the remaining bytes. */
22918 /* When the main loop is done, COUNT_EXP might hold original count,
22919 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22920 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22921 bytes. Compensate if needed. */
22923 if (size_needed
< epilogue_size_needed
)
22926 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22927 GEN_INT (size_needed
- 1), count_exp
, 1,
22929 if (tmp
!= count_exp
)
22930 emit_move_insn (count_exp
, tmp
);
22932 emit_label (label
);
22933 LABEL_NUSES (label
) = 1;
22936 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22937 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22938 epilogue_size_needed
);
22939 if (jump_around_label
)
22940 emit_label (jump_around_label
);
22944 /* Helper function for memcpy. For QImode value 0xXY produce
22945 0xXYXYXYXY of wide specified by MODE. This is essentially
22946 a * 0x10101010, but we can do slightly better than
22947 synth_mult by unwinding the sequence by hand on CPUs with
22950 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22952 enum machine_mode valmode
= GET_MODE (val
);
22954 int nops
= mode
== DImode
? 3 : 2;
22956 gcc_assert (mode
== SImode
|| mode
== DImode
);
22957 if (val
== const0_rtx
)
22958 return copy_to_mode_reg (mode
, const0_rtx
);
22959 if (CONST_INT_P (val
))
22961 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22965 if (mode
== DImode
)
22966 v
|= (v
<< 16) << 16;
22967 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22970 if (valmode
== VOIDmode
)
22972 if (valmode
!= QImode
)
22973 val
= gen_lowpart (QImode
, val
);
22974 if (mode
== QImode
)
22976 if (!TARGET_PARTIAL_REG_STALL
)
22978 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22979 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22980 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22981 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22983 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22984 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22985 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22990 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22992 if (!TARGET_PARTIAL_REG_STALL
)
22993 if (mode
== SImode
)
22994 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22996 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22999 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23000 NULL
, 1, OPTAB_DIRECT
);
23002 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23004 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23005 NULL
, 1, OPTAB_DIRECT
);
23006 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23007 if (mode
== SImode
)
23009 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23010 NULL
, 1, OPTAB_DIRECT
);
23011 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23016 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23017 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23018 alignment from ALIGN to DESIRED_ALIGN. */
23020 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23025 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23026 promoted_val
= promote_duplicated_reg (DImode
, val
);
23027 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23028 promoted_val
= promote_duplicated_reg (SImode
, val
);
23029 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23030 promoted_val
= promote_duplicated_reg (HImode
, val
);
23032 promoted_val
= val
;
23034 return promoted_val
;
23037 /* Expand string clear operation (bzero). Use i386 string operations when
23038 profitable. See expand_movmem comment for explanation of individual
23039 steps performed. */
23041 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23042 rtx expected_align_exp
, rtx expected_size_exp
)
23047 rtx jump_around_label
= NULL
;
23048 HOST_WIDE_INT align
= 1;
23049 unsigned HOST_WIDE_INT count
= 0;
23050 HOST_WIDE_INT expected_size
= -1;
23051 int size_needed
= 0, epilogue_size_needed
;
23052 int desired_align
= 0, align_bytes
= 0;
23053 enum stringop_alg alg
;
23054 rtx promoted_val
= NULL
;
23055 bool force_loopy_epilogue
= false;
23057 bool need_zero_guard
= false;
23060 if (CONST_INT_P (align_exp
))
23061 align
= INTVAL (align_exp
);
23062 /* i386 can do misaligned access on reasonably increased cost. */
23063 if (CONST_INT_P (expected_align_exp
)
23064 && INTVAL (expected_align_exp
) > align
)
23065 align
= INTVAL (expected_align_exp
);
23066 if (CONST_INT_P (count_exp
))
23067 count
= expected_size
= INTVAL (count_exp
);
23068 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23069 expected_size
= INTVAL (expected_size_exp
);
23071 /* Make sure we don't need to care about overflow later on. */
23072 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23075 /* Step 0: Decide on preferred algorithm, desired alignment and
23076 size of chunks to be copied by main loop. */
23078 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23079 desired_align
= decide_alignment (align
, alg
, expected_size
);
23081 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23082 align
= desired_align
;
23084 if (alg
== libcall
)
23086 gcc_assert (alg
!= no_stringop
);
23088 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23089 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23094 gcc_unreachable ();
23096 need_zero_guard
= true;
23097 size_needed
= GET_MODE_SIZE (word_mode
);
23099 case unrolled_loop
:
23100 need_zero_guard
= true;
23101 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23103 case rep_prefix_8_byte
:
23106 case rep_prefix_4_byte
:
23109 case rep_prefix_1_byte
:
23113 need_zero_guard
= true;
23117 epilogue_size_needed
= size_needed
;
23119 /* Step 1: Prologue guard. */
23121 /* Alignment code needs count to be in register. */
23122 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23124 if (INTVAL (count_exp
) > desired_align
23125 && INTVAL (count_exp
) > size_needed
)
23128 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23129 if (align_bytes
<= 0)
23132 align_bytes
= desired_align
- align_bytes
;
23134 if (align_bytes
== 0)
23136 enum machine_mode mode
= SImode
;
23137 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23139 count_exp
= force_reg (mode
, count_exp
);
23142 /* Do the cheap promotion to allow better CSE across the
23143 main loop and epilogue (ie one load of the big constant in the
23144 front of all code. */
23145 if (CONST_INT_P (val_exp
))
23146 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23147 desired_align
, align
);
23148 /* Ensure that alignment prologue won't copy past end of block. */
23149 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23151 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23152 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23153 Make sure it is power of 2. */
23154 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23156 /* To improve performance of small blocks, we jump around the VAL
23157 promoting mode. This mean that if the promoted VAL is not constant,
23158 we might not use it in the epilogue and have to use byte
23160 if (epilogue_size_needed
> 2 && !promoted_val
)
23161 force_loopy_epilogue
= true;
23164 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23166 /* If main algorithm works on QImode, no epilogue is needed.
23167 For small sizes just don't align anything. */
23168 if (size_needed
== 1)
23169 desired_align
= align
;
23176 label
= gen_label_rtx ();
23177 emit_cmp_and_jump_insns (count_exp
,
23178 GEN_INT (epilogue_size_needed
),
23179 LTU
, 0, counter_mode (count_exp
), 1, label
);
23180 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23181 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23183 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23186 if (dynamic_check
!= -1)
23188 rtx hot_label
= gen_label_rtx ();
23189 jump_around_label
= gen_label_rtx ();
23190 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23191 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23192 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23193 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23194 emit_jump (jump_around_label
);
23195 emit_label (hot_label
);
23198 /* Step 2: Alignment prologue. */
23200 /* Do the expensive promotion once we branched off the small blocks. */
23202 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23203 desired_align
, align
);
23204 gcc_assert (desired_align
>= 1 && align
>= 1);
23206 if (desired_align
> align
)
23208 if (align_bytes
== 0)
23210 /* Except for the first move in epilogue, we no longer know
23211 constant offset in aliasing info. It don't seems to worth
23212 the pain to maintain it for the first move, so throw away
23214 dst
= change_address (dst
, BLKmode
, destreg
);
23215 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23220 /* If we know how many bytes need to be stored before dst is
23221 sufficiently aligned, maintain aliasing info accurately. */
23222 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23223 desired_align
, align_bytes
);
23224 count_exp
= plus_constant (counter_mode (count_exp
),
23225 count_exp
, -align_bytes
);
23226 count
-= align_bytes
;
23228 if (need_zero_guard
23229 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23230 || (align_bytes
== 0
23231 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23232 + desired_align
- align
))))
23234 /* It is possible that we copied enough so the main loop will not
23236 gcc_assert (size_needed
> 1);
23237 if (label
== NULL_RTX
)
23238 label
= gen_label_rtx ();
23239 emit_cmp_and_jump_insns (count_exp
,
23240 GEN_INT (size_needed
),
23241 LTU
, 0, counter_mode (count_exp
), 1, label
);
23242 if (expected_size
== -1
23243 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23244 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23246 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23249 if (label
&& size_needed
== 1)
23251 emit_label (label
);
23252 LABEL_NUSES (label
) = 1;
23254 promoted_val
= val_exp
;
23255 epilogue_size_needed
= 1;
23257 else if (label
== NULL_RTX
)
23258 epilogue_size_needed
= size_needed
;
23260 /* Step 3: Main loop. */
23266 gcc_unreachable ();
23268 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23269 count_exp
, QImode
, 1, expected_size
);
23272 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23273 count_exp
, word_mode
, 1, expected_size
);
23275 case unrolled_loop
:
23276 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23277 count_exp
, word_mode
, 4, expected_size
);
23279 case rep_prefix_8_byte
:
23280 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23283 case rep_prefix_4_byte
:
23284 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23287 case rep_prefix_1_byte
:
23288 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23292 /* Adjust properly the offset of src and dest memory for aliasing. */
23293 if (CONST_INT_P (count_exp
))
23294 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23295 (count
/ size_needed
) * size_needed
);
23297 dst
= change_address (dst
, BLKmode
, destreg
);
23299 /* Step 4: Epilogue to copy the remaining bytes. */
23303 /* When the main loop is done, COUNT_EXP might hold original count,
23304 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23305 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23306 bytes. Compensate if needed. */
23308 if (size_needed
< epilogue_size_needed
)
23311 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23312 GEN_INT (size_needed
- 1), count_exp
, 1,
23314 if (tmp
!= count_exp
)
23315 emit_move_insn (count_exp
, tmp
);
23317 emit_label (label
);
23318 LABEL_NUSES (label
) = 1;
23321 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23323 if (force_loopy_epilogue
)
23324 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23325 epilogue_size_needed
);
23327 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23328 epilogue_size_needed
);
23330 if (jump_around_label
)
23331 emit_label (jump_around_label
);
23335 /* Expand the appropriate insns for doing strlen if not just doing
23338 out = result, initialized with the start address
23339 align_rtx = alignment of the address.
23340 scratch = scratch register, initialized with the startaddress when
23341 not aligned, otherwise undefined
23343 This is just the body. It needs the initializations mentioned above and
23344 some address computing at the end. These things are done in i386.md. */
23347 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23351 rtx align_2_label
= NULL_RTX
;
23352 rtx align_3_label
= NULL_RTX
;
23353 rtx align_4_label
= gen_label_rtx ();
23354 rtx end_0_label
= gen_label_rtx ();
23356 rtx tmpreg
= gen_reg_rtx (SImode
);
23357 rtx scratch
= gen_reg_rtx (SImode
);
23361 if (CONST_INT_P (align_rtx
))
23362 align
= INTVAL (align_rtx
);
23364 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23366 /* Is there a known alignment and is it less than 4? */
23369 rtx scratch1
= gen_reg_rtx (Pmode
);
23370 emit_move_insn (scratch1
, out
);
23371 /* Is there a known alignment and is it not 2? */
23374 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23375 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23377 /* Leave just the 3 lower bits. */
23378 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23379 NULL_RTX
, 0, OPTAB_WIDEN
);
23381 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23382 Pmode
, 1, align_4_label
);
23383 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23384 Pmode
, 1, align_2_label
);
23385 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23386 Pmode
, 1, align_3_label
);
23390 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23391 check if is aligned to 4 - byte. */
23393 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23394 NULL_RTX
, 0, OPTAB_WIDEN
);
23396 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23397 Pmode
, 1, align_4_label
);
23400 mem
= change_address (src
, QImode
, out
);
23402 /* Now compare the bytes. */
23404 /* Compare the first n unaligned byte on a byte per byte basis. */
23405 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23406 QImode
, 1, end_0_label
);
23408 /* Increment the address. */
23409 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23411 /* Not needed with an alignment of 2 */
23414 emit_label (align_2_label
);
23416 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23419 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23421 emit_label (align_3_label
);
23424 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23427 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23430 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23431 align this loop. It gives only huge programs, but does not help to
23433 emit_label (align_4_label
);
23435 mem
= change_address (src
, SImode
, out
);
23436 emit_move_insn (scratch
, mem
);
23437 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23439 /* This formula yields a nonzero result iff one of the bytes is zero.
23440 This saves three branches inside loop and many cycles. */
23442 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23443 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23444 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23445 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23446 gen_int_mode (0x80808080, SImode
)));
23447 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23452 rtx reg
= gen_reg_rtx (SImode
);
23453 rtx reg2
= gen_reg_rtx (Pmode
);
23454 emit_move_insn (reg
, tmpreg
);
23455 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23457 /* If zero is not in the first two bytes, move two bytes forward. */
23458 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23459 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23460 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23461 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23462 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23465 /* Emit lea manually to avoid clobbering of flags. */
23466 emit_insn (gen_rtx_SET (SImode
, reg2
,
23467 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23469 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23470 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23471 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23472 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23478 rtx end_2_label
= gen_label_rtx ();
23479 /* Is zero in the first two bytes? */
23481 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23482 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23483 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23484 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23485 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23487 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23488 JUMP_LABEL (tmp
) = end_2_label
;
23490 /* Not in the first two. Move two bytes forward. */
23491 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23492 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23494 emit_label (end_2_label
);
23498 /* Avoid branch in fixing the byte. */
23499 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23500 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23501 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23502 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23503 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23505 emit_label (end_0_label
);
23508 /* Expand strlen. */
23511 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23513 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23515 /* The generic case of strlen expander is long. Avoid it's
23516 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23518 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23519 && !TARGET_INLINE_ALL_STRINGOPS
23520 && !optimize_insn_for_size_p ()
23521 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23524 addr
= force_reg (Pmode
, XEXP (src
, 0));
23525 scratch1
= gen_reg_rtx (Pmode
);
23527 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23528 && !optimize_insn_for_size_p ())
23530 /* Well it seems that some optimizer does not combine a call like
23531 foo(strlen(bar), strlen(bar));
23532 when the move and the subtraction is done here. It does calculate
23533 the length just once when these instructions are done inside of
23534 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23535 often used and I use one fewer register for the lifetime of
23536 output_strlen_unroll() this is better. */
23538 emit_move_insn (out
, addr
);
23540 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23542 /* strlensi_unroll_1 returns the address of the zero at the end of
23543 the string, like memchr(), so compute the length by subtracting
23544 the start address. */
23545 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23551 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23552 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23555 scratch2
= gen_reg_rtx (Pmode
);
23556 scratch3
= gen_reg_rtx (Pmode
);
23557 scratch4
= force_reg (Pmode
, constm1_rtx
);
23559 emit_move_insn (scratch3
, addr
);
23560 eoschar
= force_reg (QImode
, eoschar
);
23562 src
= replace_equiv_address_nv (src
, scratch3
);
23564 /* If .md starts supporting :P, this can be done in .md. */
23565 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23566 scratch4
), UNSPEC_SCAS
);
23567 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23568 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23569 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23574 /* For given symbol (function) construct code to compute address of it's PLT
23575 entry in large x86-64 PIC model. */
23577 construct_plt_address (rtx symbol
)
23581 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23582 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23583 gcc_assert (Pmode
== DImode
);
23585 tmp
= gen_reg_rtx (Pmode
);
23586 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23588 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23589 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23594 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23596 rtx pop
, bool sibcall
)
23598 /* We need to represent that SI and DI registers are clobbered
23600 static int clobbered_registers
[] = {
23601 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23602 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23603 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23604 XMM15_REG
, SI_REG
, DI_REG
23606 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23607 rtx use
= NULL
, call
;
23608 unsigned int vec_len
;
23610 if (pop
== const0_rtx
)
23612 gcc_assert (!TARGET_64BIT
|| !pop
);
23614 if (TARGET_MACHO
&& !TARGET_64BIT
)
23617 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23618 fnaddr
= machopic_indirect_call_target (fnaddr
);
23623 /* Static functions and indirect calls don't need the pic register. */
23624 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23625 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23626 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23627 use_reg (&use
, pic_offset_table_rtx
);
23630 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23632 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23633 emit_move_insn (al
, callarg2
);
23634 use_reg (&use
, al
);
23637 if (ix86_cmodel
== CM_LARGE_PIC
23639 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23640 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23641 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23643 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23644 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23646 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23647 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23651 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23653 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23654 vec
[vec_len
++] = call
;
23658 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23659 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23660 vec
[vec_len
++] = pop
;
23663 if (TARGET_64BIT_MS_ABI
23664 && (!callarg2
|| INTVAL (callarg2
) != -2))
23668 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23669 UNSPEC_MS_TO_SYSV_CALL
);
23671 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23673 = gen_rtx_CLOBBER (VOIDmode
,
23674 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23676 clobbered_registers
[i
]));
23680 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23681 call
= emit_call_insn (call
);
23683 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23688 /* Output the assembly for a call instruction. */
23691 ix86_output_call_insn (rtx insn
, rtx call_op
)
23693 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23694 bool seh_nop_p
= false;
23697 if (SIBLING_CALL_P (insn
))
23701 /* SEH epilogue detection requires the indirect branch case
23702 to include REX.W. */
23703 else if (TARGET_SEH
)
23704 xasm
= "rex.W jmp %A0";
23708 output_asm_insn (xasm
, &call_op
);
23712 /* SEH unwinding can require an extra nop to be emitted in several
23713 circumstances. Determine if we have one of those. */
23718 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23720 /* If we get to another real insn, we don't need the nop. */
23724 /* If we get to the epilogue note, prevent a catch region from
23725 being adjacent to the standard epilogue sequence. If non-
23726 call-exceptions, we'll have done this during epilogue emission. */
23727 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23728 && !flag_non_call_exceptions
23729 && !can_throw_internal (insn
))
23736 /* If we didn't find a real insn following the call, prevent the
23737 unwinder from looking into the next function. */
23743 xasm
= "call\t%P0";
23745 xasm
= "call\t%A0";
23747 output_asm_insn (xasm
, &call_op
);
23755 /* Clear stack slot assignments remembered from previous functions.
23756 This is called from INIT_EXPANDERS once before RTL is emitted for each
23759 static struct machine_function
*
23760 ix86_init_machine_status (void)
23762 struct machine_function
*f
;
23764 f
= ggc_alloc_cleared_machine_function ();
23765 f
->use_fast_prologue_epilogue_nregs
= -1;
23766 f
->call_abi
= ix86_abi
;
23771 /* Return a MEM corresponding to a stack slot with mode MODE.
23772 Allocate a new slot if necessary.
23774 The RTL for a function can have several slots available: N is
23775 which slot to use. */
23778 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23780 struct stack_local_entry
*s
;
23782 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23784 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23785 if (s
->mode
== mode
&& s
->n
== n
)
23786 return validize_mem (copy_rtx (s
->rtl
));
23788 s
= ggc_alloc_stack_local_entry ();
23791 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23793 s
->next
= ix86_stack_locals
;
23794 ix86_stack_locals
= s
;
23795 return validize_mem (s
->rtl
);
23799 ix86_instantiate_decls (void)
23801 struct stack_local_entry
*s
;
23803 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23804 if (s
->rtl
!= NULL_RTX
)
23805 instantiate_decl_rtl (s
->rtl
);
23808 /* Calculate the length of the memory address in the instruction encoding.
23809 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23810 or other prefixes. We never generate addr32 prefix for LEA insn. */
23813 memory_address_length (rtx addr
, bool lea
)
23815 struct ix86_address parts
;
23816 rtx base
, index
, disp
;
23820 if (GET_CODE (addr
) == PRE_DEC
23821 || GET_CODE (addr
) == POST_INC
23822 || GET_CODE (addr
) == PRE_MODIFY
23823 || GET_CODE (addr
) == POST_MODIFY
)
23826 ok
= ix86_decompose_address (addr
, &parts
);
23829 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23831 /* If this is not LEA instruction, add the length of addr32 prefix. */
23832 if (TARGET_64BIT
&& !lea
23833 && (SImode_address_operand (addr
, VOIDmode
)
23834 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23835 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23839 index
= parts
.index
;
23842 if (base
&& GET_CODE (base
) == SUBREG
)
23843 base
= SUBREG_REG (base
);
23844 if (index
&& GET_CODE (index
) == SUBREG
)
23845 index
= SUBREG_REG (index
);
23847 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23848 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23851 - esp as the base always wants an index,
23852 - ebp as the base always wants a displacement,
23853 - r12 as the base always wants an index,
23854 - r13 as the base always wants a displacement. */
23856 /* Register Indirect. */
23857 if (base
&& !index
&& !disp
)
23859 /* esp (for its index) and ebp (for its displacement) need
23860 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23862 if (base
== arg_pointer_rtx
23863 || base
== frame_pointer_rtx
23864 || REGNO (base
) == SP_REG
23865 || REGNO (base
) == BP_REG
23866 || REGNO (base
) == R12_REG
23867 || REGNO (base
) == R13_REG
)
23871 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23872 is not disp32, but disp32(%rip), so for disp32
23873 SIB byte is needed, unless print_operand_address
23874 optimizes it into disp32(%rip) or (%rip) is implied
23876 else if (disp
&& !base
&& !index
)
23883 if (GET_CODE (disp
) == CONST
)
23884 symbol
= XEXP (disp
, 0);
23885 if (GET_CODE (symbol
) == PLUS
23886 && CONST_INT_P (XEXP (symbol
, 1)))
23887 symbol
= XEXP (symbol
, 0);
23889 if (GET_CODE (symbol
) != LABEL_REF
23890 && (GET_CODE (symbol
) != SYMBOL_REF
23891 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23892 && (GET_CODE (symbol
) != UNSPEC
23893 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23894 && XINT (symbol
, 1) != UNSPEC_PCREL
23895 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23901 /* Find the length of the displacement constant. */
23904 if (base
&& satisfies_constraint_K (disp
))
23909 /* ebp always wants a displacement. Similarly r13. */
23910 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23913 /* An index requires the two-byte modrm form.... */
23915 /* ...like esp (or r12), which always wants an index. */
23916 || base
== arg_pointer_rtx
23917 || base
== frame_pointer_rtx
23918 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23925 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23926 is set, expect that insn have 8bit immediate alternative. */
23928 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23932 extract_insn_cached (insn
);
23933 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23934 if (CONSTANT_P (recog_data
.operand
[i
]))
23936 enum attr_mode mode
= get_attr_mode (insn
);
23939 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23941 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23948 ival
= trunc_int_for_mode (ival
, HImode
);
23951 ival
= trunc_int_for_mode (ival
, SImode
);
23956 if (IN_RANGE (ival
, -128, 127))
23973 /* Immediates for DImode instructions are encoded
23974 as 32bit sign extended values. */
23979 fatal_insn ("unknown insn mode", insn
);
23985 /* Compute default value for "length_address" attribute. */
23987 ix86_attr_length_address_default (rtx insn
)
23991 if (get_attr_type (insn
) == TYPE_LEA
)
23993 rtx set
= PATTERN (insn
), addr
;
23995 if (GET_CODE (set
) == PARALLEL
)
23996 set
= XVECEXP (set
, 0, 0);
23998 gcc_assert (GET_CODE (set
) == SET
);
24000 addr
= SET_SRC (set
);
24002 return memory_address_length (addr
, true);
24005 extract_insn_cached (insn
);
24006 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24007 if (MEM_P (recog_data
.operand
[i
]))
24009 constrain_operands_cached (reload_completed
);
24010 if (which_alternative
!= -1)
24012 const char *constraints
= recog_data
.constraints
[i
];
24013 int alt
= which_alternative
;
24015 while (*constraints
== '=' || *constraints
== '+')
24018 while (*constraints
++ != ',')
24020 /* Skip ignored operands. */
24021 if (*constraints
== 'X')
24024 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24029 /* Compute default value for "length_vex" attribute. It includes
24030 2 or 3 byte VEX prefix and 1 opcode byte. */
24033 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24037 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24038 byte VEX prefix. */
24039 if (!has_0f_opcode
|| has_vex_w
)
24042 /* We can always use 2 byte VEX prefix in 32bit. */
24046 extract_insn_cached (insn
);
24048 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24049 if (REG_P (recog_data
.operand
[i
]))
24051 /* REX.W bit uses 3 byte VEX prefix. */
24052 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24053 && GENERAL_REG_P (recog_data
.operand
[i
]))
24058 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24059 if (MEM_P (recog_data
.operand
[i
])
24060 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24067 /* Return the maximum number of instructions a cpu can issue. */
24070 ix86_issue_rate (void)
24074 case PROCESSOR_PENTIUM
:
24075 case PROCESSOR_ATOM
:
24077 case PROCESSOR_BTVER2
:
24080 case PROCESSOR_PENTIUMPRO
:
24081 case PROCESSOR_PENTIUM4
:
24082 case PROCESSOR_CORE2
:
24083 case PROCESSOR_COREI7
:
24084 case PROCESSOR_HASWELL
:
24085 case PROCESSOR_ATHLON
:
24087 case PROCESSOR_AMDFAM10
:
24088 case PROCESSOR_NOCONA
:
24089 case PROCESSOR_GENERIC32
:
24090 case PROCESSOR_GENERIC64
:
24091 case PROCESSOR_BDVER1
:
24092 case PROCESSOR_BDVER2
:
24093 case PROCESSOR_BDVER3
:
24094 case PROCESSOR_BTVER1
:
24102 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24103 by DEP_INSN and nothing set by DEP_INSN. */
24106 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24110 /* Simplify the test for uninteresting insns. */
24111 if (insn_type
!= TYPE_SETCC
24112 && insn_type
!= TYPE_ICMOV
24113 && insn_type
!= TYPE_FCMOV
24114 && insn_type
!= TYPE_IBR
)
24117 if ((set
= single_set (dep_insn
)) != 0)
24119 set
= SET_DEST (set
);
24122 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24123 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24124 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24125 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24127 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24128 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24133 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24136 /* This test is true if the dependent insn reads the flags but
24137 not any other potentially set register. */
24138 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24141 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24147 /* Return true iff USE_INSN has a memory address with operands set by
24151 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24154 extract_insn_cached (use_insn
);
24155 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24156 if (MEM_P (recog_data
.operand
[i
]))
24158 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24159 return modified_in_p (addr
, set_insn
) != 0;
24165 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24167 enum attr_type insn_type
, dep_insn_type
;
24168 enum attr_memory memory
;
24170 int dep_insn_code_number
;
24172 /* Anti and output dependencies have zero cost on all CPUs. */
24173 if (REG_NOTE_KIND (link
) != 0)
24176 dep_insn_code_number
= recog_memoized (dep_insn
);
24178 /* If we can't recognize the insns, we can't really do anything. */
24179 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24182 insn_type
= get_attr_type (insn
);
24183 dep_insn_type
= get_attr_type (dep_insn
);
24187 case PROCESSOR_PENTIUM
:
24188 /* Address Generation Interlock adds a cycle of latency. */
24189 if (insn_type
== TYPE_LEA
)
24191 rtx addr
= PATTERN (insn
);
24193 if (GET_CODE (addr
) == PARALLEL
)
24194 addr
= XVECEXP (addr
, 0, 0);
24196 gcc_assert (GET_CODE (addr
) == SET
);
24198 addr
= SET_SRC (addr
);
24199 if (modified_in_p (addr
, dep_insn
))
24202 else if (ix86_agi_dependent (dep_insn
, insn
))
24205 /* ??? Compares pair with jump/setcc. */
24206 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24209 /* Floating point stores require value to be ready one cycle earlier. */
24210 if (insn_type
== TYPE_FMOV
24211 && get_attr_memory (insn
) == MEMORY_STORE
24212 && !ix86_agi_dependent (dep_insn
, insn
))
24216 case PROCESSOR_PENTIUMPRO
:
24217 memory
= get_attr_memory (insn
);
24219 /* INT->FP conversion is expensive. */
24220 if (get_attr_fp_int_src (dep_insn
))
24223 /* There is one cycle extra latency between an FP op and a store. */
24224 if (insn_type
== TYPE_FMOV
24225 && (set
= single_set (dep_insn
)) != NULL_RTX
24226 && (set2
= single_set (insn
)) != NULL_RTX
24227 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24228 && MEM_P (SET_DEST (set2
)))
24231 /* Show ability of reorder buffer to hide latency of load by executing
24232 in parallel with previous instruction in case
24233 previous instruction is not needed to compute the address. */
24234 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24235 && !ix86_agi_dependent (dep_insn
, insn
))
24237 /* Claim moves to take one cycle, as core can issue one load
24238 at time and the next load can start cycle later. */
24239 if (dep_insn_type
== TYPE_IMOV
24240 || dep_insn_type
== TYPE_FMOV
)
24248 memory
= get_attr_memory (insn
);
24250 /* The esp dependency is resolved before the instruction is really
24252 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24253 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24256 /* INT->FP conversion is expensive. */
24257 if (get_attr_fp_int_src (dep_insn
))
24260 /* Show ability of reorder buffer to hide latency of load by executing
24261 in parallel with previous instruction in case
24262 previous instruction is not needed to compute the address. */
24263 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24264 && !ix86_agi_dependent (dep_insn
, insn
))
24266 /* Claim moves to take one cycle, as core can issue one load
24267 at time and the next load can start cycle later. */
24268 if (dep_insn_type
== TYPE_IMOV
24269 || dep_insn_type
== TYPE_FMOV
)
24278 case PROCESSOR_ATHLON
:
24280 case PROCESSOR_AMDFAM10
:
24281 case PROCESSOR_BDVER1
:
24282 case PROCESSOR_BDVER2
:
24283 case PROCESSOR_BDVER3
:
24284 case PROCESSOR_BTVER1
:
24285 case PROCESSOR_BTVER2
:
24286 case PROCESSOR_ATOM
:
24287 case PROCESSOR_GENERIC32
:
24288 case PROCESSOR_GENERIC64
:
24289 memory
= get_attr_memory (insn
);
24291 /* Show ability of reorder buffer to hide latency of load by executing
24292 in parallel with previous instruction in case
24293 previous instruction is not needed to compute the address. */
24294 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24295 && !ix86_agi_dependent (dep_insn
, insn
))
24297 enum attr_unit unit
= get_attr_unit (insn
);
24300 /* Because of the difference between the length of integer and
24301 floating unit pipeline preparation stages, the memory operands
24302 for floating point are cheaper.
24304 ??? For Athlon it the difference is most probably 2. */
24305 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24308 loadcost
= TARGET_ATHLON
? 2 : 0;
24310 if (cost
>= loadcost
)
24323 /* How many alternative schedules to try. This should be as wide as the
24324 scheduling freedom in the DFA, but no wider. Making this value too
24325 large results extra work for the scheduler. */
24328 ia32_multipass_dfa_lookahead (void)
24332 case PROCESSOR_PENTIUM
:
24335 case PROCESSOR_PENTIUMPRO
:
24339 case PROCESSOR_CORE2
:
24340 case PROCESSOR_COREI7
:
24341 case PROCESSOR_HASWELL
:
24342 case PROCESSOR_ATOM
:
24343 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24344 as many instructions can be executed on a cycle, i.e.,
24345 issue_rate. I wonder why tuning for many CPUs does not do this. */
24346 if (reload_completed
)
24347 return ix86_issue_rate ();
24348 /* Don't use lookahead for pre-reload schedule to save compile time. */
24356 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24357 execution. It is applied if
24358 (1) IMUL instruction is on the top of list;
24359 (2) There exists the only producer of independent IMUL instruction in
24361 (3) Put found producer on the top of ready list.
24362 Returns issue rate. */
24365 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24366 int clock_var ATTRIBUTE_UNUSED
)
24368 static int issue_rate
= -1;
24369 int n_ready
= *pn_ready
;
24370 rtx insn
, insn1
, insn2
;
24372 sd_iterator_def sd_it
;
24376 /* Set up issue rate. */
24377 issue_rate
= ix86_issue_rate();
24379 /* Do reodering for Atom only. */
24380 if (ix86_tune
!= PROCESSOR_ATOM
)
24382 /* Do not perform ready list reodering for pre-reload schedule pass. */
24383 if (!reload_completed
)
24385 /* Nothing to do if ready list contains only 1 instruction. */
24389 /* Check that IMUL instruction is on the top of ready list. */
24390 insn
= ready
[n_ready
- 1];
24391 if (!NONDEBUG_INSN_P (insn
))
24393 insn
= PATTERN (insn
);
24394 if (GET_CODE (insn
) == PARALLEL
)
24395 insn
= XVECEXP (insn
, 0, 0);
24396 if (GET_CODE (insn
) != SET
)
24398 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24399 && GET_MODE (SET_SRC (insn
)) == SImode
))
24402 /* Search for producer of independent IMUL instruction. */
24403 for (i
= n_ready
- 2; i
>= 0; i
--)
24406 if (!NONDEBUG_INSN_P (insn
))
24408 /* Skip IMUL instruction. */
24409 insn2
= PATTERN (insn
);
24410 if (GET_CODE (insn2
) == PARALLEL
)
24411 insn2
= XVECEXP (insn2
, 0, 0);
24412 if (GET_CODE (insn2
) == SET
24413 && GET_CODE (SET_SRC (insn2
)) == MULT
24414 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24417 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24420 con
= DEP_CON (dep
);
24421 if (!NONDEBUG_INSN_P (con
))
24423 insn1
= PATTERN (con
);
24424 if (GET_CODE (insn1
) == PARALLEL
)
24425 insn1
= XVECEXP (insn1
, 0, 0);
24427 if (GET_CODE (insn1
) == SET
24428 && GET_CODE (SET_SRC (insn1
)) == MULT
24429 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24431 sd_iterator_def sd_it1
;
24433 /* Check if there is no other dependee for IMUL. */
24435 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24438 pro
= DEP_PRO (dep1
);
24439 if (!NONDEBUG_INSN_P (pro
))
24452 return issue_rate
; /* Didn't find IMUL producer. */
24454 if (sched_verbose
> 1)
24455 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24456 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24458 /* Put IMUL producer (ready[index]) at the top of ready list. */
24459 insn1
= ready
[index
];
24460 for (i
= index
; i
< n_ready
- 1; i
++)
24461 ready
[i
] = ready
[i
+ 1];
24462 ready
[n_ready
- 1] = insn1
;
24468 ix86_class_likely_spilled_p (reg_class_t
);
24470 /* Returns true if lhs of insn is HW function argument register and set up
24471 is_spilled to true if it is likely spilled HW register. */
24473 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24477 if (!NONDEBUG_INSN_P (insn
))
24479 /* Call instructions are not movable, ignore it. */
24482 insn
= PATTERN (insn
);
24483 if (GET_CODE (insn
) == PARALLEL
)
24484 insn
= XVECEXP (insn
, 0, 0);
24485 if (GET_CODE (insn
) != SET
)
24487 dst
= SET_DEST (insn
);
24488 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24489 && ix86_function_arg_regno_p (REGNO (dst
)))
24491 /* Is it likely spilled HW register? */
24492 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24493 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24494 *is_spilled
= true;
24500 /* Add output dependencies for chain of function adjacent arguments if only
24501 there is a move to likely spilled HW register. Return first argument
24502 if at least one dependence was added or NULL otherwise. */
24504 add_parameter_dependencies (rtx call
, rtx head
)
24508 rtx first_arg
= NULL
;
24509 bool is_spilled
= false;
24511 head
= PREV_INSN (head
);
24513 /* Find nearest to call argument passing instruction. */
24516 last
= PREV_INSN (last
);
24519 if (!NONDEBUG_INSN_P (last
))
24521 if (insn_is_function_arg (last
, &is_spilled
))
24529 insn
= PREV_INSN (last
);
24530 if (!INSN_P (insn
))
24534 if (!NONDEBUG_INSN_P (insn
))
24539 if (insn_is_function_arg (insn
, &is_spilled
))
24541 /* Add output depdendence between two function arguments if chain
24542 of output arguments contains likely spilled HW registers. */
24544 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24545 first_arg
= last
= insn
;
24555 /* Add output or anti dependency from insn to first_arg to restrict its code
24558 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24563 set
= single_set (insn
);
24566 tmp
= SET_DEST (set
);
24569 /* Add output dependency to the first function argument. */
24570 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24573 /* Add anti dependency. */
24574 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24577 /* Avoid cross block motion of function argument through adding dependency
24578 from the first non-jump instruction in bb. */
24580 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24582 rtx insn
= BB_END (bb
);
24586 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24588 rtx set
= single_set (insn
);
24591 avoid_func_arg_motion (arg
, insn
);
24595 if (insn
== BB_HEAD (bb
))
24597 insn
= PREV_INSN (insn
);
24601 /* Hook for pre-reload schedule - avoid motion of function arguments
24602 passed in likely spilled HW registers. */
24604 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24607 rtx first_arg
= NULL
;
24608 if (reload_completed
)
24610 while (head
!= tail
&& DEBUG_INSN_P (head
))
24611 head
= NEXT_INSN (head
);
24612 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24613 if (INSN_P (insn
) && CALL_P (insn
))
24615 first_arg
= add_parameter_dependencies (insn
, head
);
24618 /* Add dependee for first argument to predecessors if only
24619 region contains more than one block. */
24620 basic_block bb
= BLOCK_FOR_INSN (insn
);
24621 int rgn
= CONTAINING_RGN (bb
->index
);
24622 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24623 /* Skip trivial regions and region head blocks that can have
24624 predecessors outside of region. */
24625 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24629 /* Assume that region is SCC, i.e. all immediate predecessors
24630 of non-head block are in the same region. */
24631 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24633 /* Avoid creating of loop-carried dependencies through
24634 using topological odering in region. */
24635 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24636 add_dependee_for_func_arg (first_arg
, e
->src
);
24644 else if (first_arg
)
24645 avoid_func_arg_motion (first_arg
, insn
);
24648 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24649 HW registers to maximum, to schedule them at soon as possible. These are
24650 moves from function argument registers at the top of the function entry
24651 and moves from function return value registers after call. */
24653 ix86_adjust_priority (rtx insn
, int priority
)
24657 if (reload_completed
)
24660 if (!NONDEBUG_INSN_P (insn
))
24663 set
= single_set (insn
);
24666 rtx tmp
= SET_SRC (set
);
24668 && HARD_REGISTER_P (tmp
)
24669 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24670 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24671 return current_sched_info
->sched_max_insns_priority
;
24677 /* Model decoder of Core 2/i7.
24678 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24679 track the instruction fetch block boundaries and make sure that long
24680 (9+ bytes) instructions are assigned to D0. */
24682 /* Maximum length of an insn that can be handled by
24683 a secondary decoder unit. '8' for Core 2/i7. */
24684 static int core2i7_secondary_decoder_max_insn_size
;
24686 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24687 '16' for Core 2/i7. */
24688 static int core2i7_ifetch_block_size
;
24690 /* Maximum number of instructions decoder can handle per cycle.
24691 '6' for Core 2/i7. */
24692 static int core2i7_ifetch_block_max_insns
;
24694 typedef struct ix86_first_cycle_multipass_data_
*
24695 ix86_first_cycle_multipass_data_t
;
24696 typedef const struct ix86_first_cycle_multipass_data_
*
24697 const_ix86_first_cycle_multipass_data_t
;
24699 /* A variable to store target state across calls to max_issue within
24701 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24702 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24704 /* Initialize DATA. */
24706 core2i7_first_cycle_multipass_init (void *_data
)
24708 ix86_first_cycle_multipass_data_t data
24709 = (ix86_first_cycle_multipass_data_t
) _data
;
24711 data
->ifetch_block_len
= 0;
24712 data
->ifetch_block_n_insns
= 0;
24713 data
->ready_try_change
= NULL
;
24714 data
->ready_try_change_size
= 0;
24717 /* Advancing the cycle; reset ifetch block counts. */
24719 core2i7_dfa_post_advance_cycle (void)
24721 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24723 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24725 data
->ifetch_block_len
= 0;
24726 data
->ifetch_block_n_insns
= 0;
24729 static int min_insn_size (rtx
);
24731 /* Filter out insns from ready_try that the core will not be able to issue
24732 on current cycle due to decoder. */
24734 core2i7_first_cycle_multipass_filter_ready_try
24735 (const_ix86_first_cycle_multipass_data_t data
,
24736 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24743 if (ready_try
[n_ready
])
24746 insn
= get_ready_element (n_ready
);
24747 insn_size
= min_insn_size (insn
);
24749 if (/* If this is a too long an insn for a secondary decoder ... */
24750 (!first_cycle_insn_p
24751 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24752 /* ... or it would not fit into the ifetch block ... */
24753 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24754 /* ... or the decoder is full already ... */
24755 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24756 /* ... mask the insn out. */
24758 ready_try
[n_ready
] = 1;
24760 if (data
->ready_try_change
)
24761 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24766 /* Prepare for a new round of multipass lookahead scheduling. */
24768 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24769 bool first_cycle_insn_p
)
24771 ix86_first_cycle_multipass_data_t data
24772 = (ix86_first_cycle_multipass_data_t
) _data
;
24773 const_ix86_first_cycle_multipass_data_t prev_data
24774 = ix86_first_cycle_multipass_data
;
24776 /* Restore the state from the end of the previous round. */
24777 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24778 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24780 /* Filter instructions that cannot be issued on current cycle due to
24781 decoder restrictions. */
24782 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24783 first_cycle_insn_p
);
24786 /* INSN is being issued in current solution. Account for its impact on
24787 the decoder model. */
24789 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24790 rtx insn
, const void *_prev_data
)
24792 ix86_first_cycle_multipass_data_t data
24793 = (ix86_first_cycle_multipass_data_t
) _data
;
24794 const_ix86_first_cycle_multipass_data_t prev_data
24795 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24797 int insn_size
= min_insn_size (insn
);
24799 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24800 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24801 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24802 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24804 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24805 if (!data
->ready_try_change
)
24807 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24808 data
->ready_try_change_size
= n_ready
;
24810 else if (data
->ready_try_change_size
< n_ready
)
24812 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24814 data
->ready_try_change_size
= n_ready
;
24816 bitmap_clear (data
->ready_try_change
);
24818 /* Filter out insns from ready_try that the core will not be able to issue
24819 on current cycle due to decoder. */
24820 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24824 /* Revert the effect on ready_try. */
24826 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24828 int n_ready ATTRIBUTE_UNUSED
)
24830 const_ix86_first_cycle_multipass_data_t data
24831 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24832 unsigned int i
= 0;
24833 sbitmap_iterator sbi
;
24835 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24836 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24842 /* Save the result of multipass lookahead scheduling for the next round. */
24844 core2i7_first_cycle_multipass_end (const void *_data
)
24846 const_ix86_first_cycle_multipass_data_t data
24847 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24848 ix86_first_cycle_multipass_data_t next_data
24849 = ix86_first_cycle_multipass_data
;
24853 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24854 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24858 /* Deallocate target data. */
24860 core2i7_first_cycle_multipass_fini (void *_data
)
24862 ix86_first_cycle_multipass_data_t data
24863 = (ix86_first_cycle_multipass_data_t
) _data
;
24865 if (data
->ready_try_change
)
24867 sbitmap_free (data
->ready_try_change
);
24868 data
->ready_try_change
= NULL
;
24869 data
->ready_try_change_size
= 0;
24873 /* Prepare for scheduling pass. */
24875 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24876 int verbose ATTRIBUTE_UNUSED
,
24877 int max_uid ATTRIBUTE_UNUSED
)
24879 /* Install scheduling hooks for current CPU. Some of these hooks are used
24880 in time-critical parts of the scheduler, so we only set them up when
24881 they are actually used. */
24884 case PROCESSOR_CORE2
:
24885 case PROCESSOR_COREI7
:
24886 case PROCESSOR_HASWELL
:
24887 /* Do not perform multipass scheduling for pre-reload schedule
24888 to save compile time. */
24889 if (reload_completed
)
24891 targetm
.sched
.dfa_post_advance_cycle
24892 = core2i7_dfa_post_advance_cycle
;
24893 targetm
.sched
.first_cycle_multipass_init
24894 = core2i7_first_cycle_multipass_init
;
24895 targetm
.sched
.first_cycle_multipass_begin
24896 = core2i7_first_cycle_multipass_begin
;
24897 targetm
.sched
.first_cycle_multipass_issue
24898 = core2i7_first_cycle_multipass_issue
;
24899 targetm
.sched
.first_cycle_multipass_backtrack
24900 = core2i7_first_cycle_multipass_backtrack
;
24901 targetm
.sched
.first_cycle_multipass_end
24902 = core2i7_first_cycle_multipass_end
;
24903 targetm
.sched
.first_cycle_multipass_fini
24904 = core2i7_first_cycle_multipass_fini
;
24906 /* Set decoder parameters. */
24907 core2i7_secondary_decoder_max_insn_size
= 8;
24908 core2i7_ifetch_block_size
= 16;
24909 core2i7_ifetch_block_max_insns
= 6;
24912 /* ... Fall through ... */
24914 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24915 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24916 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24917 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24918 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24919 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24920 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24926 /* Compute the alignment given to a constant that is being placed in memory.
24927 EXP is the constant and ALIGN is the alignment that the object would
24929 The value of this function is used instead of that alignment to align
24933 ix86_constant_alignment (tree exp
, int align
)
24935 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24936 || TREE_CODE (exp
) == INTEGER_CST
)
24938 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24940 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24943 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24944 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24945 return BITS_PER_WORD
;
24950 /* Compute the alignment for a static variable.
24951 TYPE is the data type, and ALIGN is the alignment that
24952 the object would ordinarily have. The value of this function is used
24953 instead of that alignment to align the object. */
24956 ix86_data_alignment (tree type
, int align
)
24958 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24960 if (AGGREGATE_TYPE_P (type
)
24961 && TYPE_SIZE (type
)
24962 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24963 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24964 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24965 && align
< max_align
)
24968 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24969 to 16byte boundary. */
24972 if (AGGREGATE_TYPE_P (type
)
24973 && TYPE_SIZE (type
)
24974 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24975 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24976 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24980 if (TREE_CODE (type
) == ARRAY_TYPE
)
24982 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24984 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24987 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24990 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24992 if ((TYPE_MODE (type
) == XCmode
24993 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24996 else if ((TREE_CODE (type
) == RECORD_TYPE
24997 || TREE_CODE (type
) == UNION_TYPE
24998 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24999 && TYPE_FIELDS (type
))
25001 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25003 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25006 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25007 || TREE_CODE (type
) == INTEGER_TYPE
)
25009 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25011 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25018 /* Compute the alignment for a local variable or a stack slot. EXP is
25019 the data type or decl itself, MODE is the widest mode available and
25020 ALIGN is the alignment that the object would ordinarily have. The
25021 value of this macro is used instead of that alignment to align the
25025 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25026 unsigned int align
)
25030 if (exp
&& DECL_P (exp
))
25032 type
= TREE_TYPE (exp
);
25041 /* Don't do dynamic stack realignment for long long objects with
25042 -mpreferred-stack-boundary=2. */
25045 && ix86_preferred_stack_boundary
< 64
25046 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25047 && (!type
|| !TYPE_USER_ALIGN (type
))
25048 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25051 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25052 register in MODE. We will return the largest alignment of XF
25056 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25057 align
= GET_MODE_ALIGNMENT (DFmode
);
25061 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25062 to 16byte boundary. Exact wording is:
25064 An array uses the same alignment as its elements, except that a local or
25065 global array variable of length at least 16 bytes or
25066 a C99 variable-length array variable always has alignment of at least 16 bytes.
25068 This was added to allow use of aligned SSE instructions at arrays. This
25069 rule is meant for static storage (where compiler can not do the analysis
25070 by itself). We follow it for automatic variables only when convenient.
25071 We fully control everything in the function compiled and functions from
25072 other unit can not rely on the alignment.
25074 Exclude va_list type. It is the common case of local array where
25075 we can not benefit from the alignment. */
25076 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25079 if (AGGREGATE_TYPE_P (type
)
25080 && (va_list_type_node
== NULL_TREE
25081 || (TYPE_MAIN_VARIANT (type
)
25082 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25083 && TYPE_SIZE (type
)
25084 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25085 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25086 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25089 if (TREE_CODE (type
) == ARRAY_TYPE
)
25091 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25093 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25096 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25098 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25100 if ((TYPE_MODE (type
) == XCmode
25101 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25104 else if ((TREE_CODE (type
) == RECORD_TYPE
25105 || TREE_CODE (type
) == UNION_TYPE
25106 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25107 && TYPE_FIELDS (type
))
25109 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25111 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25114 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25115 || TREE_CODE (type
) == INTEGER_TYPE
)
25118 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25120 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25126 /* Compute the minimum required alignment for dynamic stack realignment
25127 purposes for a local variable, parameter or a stack slot. EXP is
25128 the data type or decl itself, MODE is its mode and ALIGN is the
25129 alignment that the object would ordinarily have. */
25132 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25133 unsigned int align
)
25137 if (exp
&& DECL_P (exp
))
25139 type
= TREE_TYPE (exp
);
25148 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25151 /* Don't do dynamic stack realignment for long long objects with
25152 -mpreferred-stack-boundary=2. */
25153 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25154 && (!type
|| !TYPE_USER_ALIGN (type
))
25155 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25161 /* Find a location for the static chain incoming to a nested function.
25162 This is a register, unless all free registers are used by arguments. */
25165 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25169 if (!DECL_STATIC_CHAIN (fndecl
))
25174 /* We always use R10 in 64-bit mode. */
25182 /* By default in 32-bit mode we use ECX to pass the static chain. */
25185 fntype
= TREE_TYPE (fndecl
);
25186 ccvt
= ix86_get_callcvt (fntype
);
25187 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25189 /* Fastcall functions use ecx/edx for arguments, which leaves
25190 us with EAX for the static chain.
25191 Thiscall functions use ecx for arguments, which also
25192 leaves us with EAX for the static chain. */
25195 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25197 /* Thiscall functions use ecx for arguments, which leaves
25198 us with EAX and EDX for the static chain.
25199 We are using for abi-compatibility EAX. */
25202 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25204 /* For regparm 3, we have no free call-clobbered registers in
25205 which to store the static chain. In order to implement this,
25206 we have the trampoline push the static chain to the stack.
25207 However, we can't push a value below the return address when
25208 we call the nested function directly, so we have to use an
25209 alternate entry point. For this we use ESI, and have the
25210 alternate entry point push ESI, so that things appear the
25211 same once we're executing the nested function. */
25214 if (fndecl
== current_function_decl
)
25215 ix86_static_chain_on_stack
= true;
25216 return gen_frame_mem (SImode
,
25217 plus_constant (Pmode
,
25218 arg_pointer_rtx
, -8));
25224 return gen_rtx_REG (Pmode
, regno
);
25227 /* Emit RTL insns to initialize the variable parts of a trampoline.
25228 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25229 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25230 to be passed to the target function. */
25233 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25239 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25245 /* Load the function address to r11. Try to load address using
25246 the shorter movl instead of movabs. We may want to support
25247 movq for kernel mode, but kernel does not use trampolines at
25248 the moment. FNADDR is a 32bit address and may not be in
25249 DImode when ptr_mode == SImode. Always use movl in this
25251 if (ptr_mode
== SImode
25252 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25254 fnaddr
= copy_addr_to_reg (fnaddr
);
25256 mem
= adjust_address (m_tramp
, HImode
, offset
);
25257 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25259 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25260 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25265 mem
= adjust_address (m_tramp
, HImode
, offset
);
25266 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25268 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25269 emit_move_insn (mem
, fnaddr
);
25273 /* Load static chain using movabs to r10. Use the shorter movl
25274 instead of movabs when ptr_mode == SImode. */
25275 if (ptr_mode
== SImode
)
25286 mem
= adjust_address (m_tramp
, HImode
, offset
);
25287 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25289 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25290 emit_move_insn (mem
, chain_value
);
25293 /* Jump to r11; the last (unused) byte is a nop, only there to
25294 pad the write out to a single 32-bit store. */
25295 mem
= adjust_address (m_tramp
, SImode
, offset
);
25296 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25303 /* Depending on the static chain location, either load a register
25304 with a constant, or push the constant to the stack. All of the
25305 instructions are the same size. */
25306 chain
= ix86_static_chain (fndecl
, true);
25309 switch (REGNO (chain
))
25312 opcode
= 0xb8; break;
25314 opcode
= 0xb9; break;
25316 gcc_unreachable ();
25322 mem
= adjust_address (m_tramp
, QImode
, offset
);
25323 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25325 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25326 emit_move_insn (mem
, chain_value
);
25329 mem
= adjust_address (m_tramp
, QImode
, offset
);
25330 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25332 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25334 /* Compute offset from the end of the jmp to the target function.
25335 In the case in which the trampoline stores the static chain on
25336 the stack, we need to skip the first insn which pushes the
25337 (call-saved) register static chain; this push is 1 byte. */
25339 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25340 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25341 offset
- (MEM_P (chain
) ? 1 : 0)),
25342 NULL_RTX
, 1, OPTAB_DIRECT
);
25343 emit_move_insn (mem
, disp
);
25346 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25348 #ifdef HAVE_ENABLE_EXECUTE_STACK
25349 #ifdef CHECK_EXECUTE_STACK_ENABLED
25350 if (CHECK_EXECUTE_STACK_ENABLED
)
25352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25353 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25357 /* The following file contains several enumerations and data structures
25358 built from the definitions in i386-builtin-types.def. */
25360 #include "i386-builtin-types.inc"
25362 /* Table for the ix86 builtin non-function types. */
25363 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25365 /* Retrieve an element from the above table, building some of
25366 the types lazily. */
25369 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25371 unsigned int index
;
25374 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25376 type
= ix86_builtin_type_tab
[(int) tcode
];
25380 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25381 if (tcode
<= IX86_BT_LAST_VECT
)
25383 enum machine_mode mode
;
25385 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25386 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25387 mode
= ix86_builtin_type_vect_mode
[index
];
25389 type
= build_vector_type_for_mode (itype
, mode
);
25395 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25396 if (tcode
<= IX86_BT_LAST_PTR
)
25397 quals
= TYPE_UNQUALIFIED
;
25399 quals
= TYPE_QUAL_CONST
;
25401 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25402 if (quals
!= TYPE_UNQUALIFIED
)
25403 itype
= build_qualified_type (itype
, quals
);
25405 type
= build_pointer_type (itype
);
25408 ix86_builtin_type_tab
[(int) tcode
] = type
;
25412 /* Table for the ix86 builtin function types. */
25413 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25415 /* Retrieve an element from the above table, building some of
25416 the types lazily. */
25419 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25423 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25425 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25429 if (tcode
<= IX86_BT_LAST_FUNC
)
25431 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25432 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25433 tree rtype
, atype
, args
= void_list_node
;
25436 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25437 for (i
= after
- 1; i
> start
; --i
)
25439 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25440 args
= tree_cons (NULL
, atype
, args
);
25443 type
= build_function_type (rtype
, args
);
25447 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25448 enum ix86_builtin_func_type icode
;
25450 icode
= ix86_builtin_func_alias_base
[index
];
25451 type
= ix86_get_builtin_func_type (icode
);
25454 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25459 /* Codes for all the SSE/MMX builtins. */
25462 IX86_BUILTIN_ADDPS
,
25463 IX86_BUILTIN_ADDSS
,
25464 IX86_BUILTIN_DIVPS
,
25465 IX86_BUILTIN_DIVSS
,
25466 IX86_BUILTIN_MULPS
,
25467 IX86_BUILTIN_MULSS
,
25468 IX86_BUILTIN_SUBPS
,
25469 IX86_BUILTIN_SUBSS
,
25471 IX86_BUILTIN_CMPEQPS
,
25472 IX86_BUILTIN_CMPLTPS
,
25473 IX86_BUILTIN_CMPLEPS
,
25474 IX86_BUILTIN_CMPGTPS
,
25475 IX86_BUILTIN_CMPGEPS
,
25476 IX86_BUILTIN_CMPNEQPS
,
25477 IX86_BUILTIN_CMPNLTPS
,
25478 IX86_BUILTIN_CMPNLEPS
,
25479 IX86_BUILTIN_CMPNGTPS
,
25480 IX86_BUILTIN_CMPNGEPS
,
25481 IX86_BUILTIN_CMPORDPS
,
25482 IX86_BUILTIN_CMPUNORDPS
,
25483 IX86_BUILTIN_CMPEQSS
,
25484 IX86_BUILTIN_CMPLTSS
,
25485 IX86_BUILTIN_CMPLESS
,
25486 IX86_BUILTIN_CMPNEQSS
,
25487 IX86_BUILTIN_CMPNLTSS
,
25488 IX86_BUILTIN_CMPNLESS
,
25489 IX86_BUILTIN_CMPNGTSS
,
25490 IX86_BUILTIN_CMPNGESS
,
25491 IX86_BUILTIN_CMPORDSS
,
25492 IX86_BUILTIN_CMPUNORDSS
,
25494 IX86_BUILTIN_COMIEQSS
,
25495 IX86_BUILTIN_COMILTSS
,
25496 IX86_BUILTIN_COMILESS
,
25497 IX86_BUILTIN_COMIGTSS
,
25498 IX86_BUILTIN_COMIGESS
,
25499 IX86_BUILTIN_COMINEQSS
,
25500 IX86_BUILTIN_UCOMIEQSS
,
25501 IX86_BUILTIN_UCOMILTSS
,
25502 IX86_BUILTIN_UCOMILESS
,
25503 IX86_BUILTIN_UCOMIGTSS
,
25504 IX86_BUILTIN_UCOMIGESS
,
25505 IX86_BUILTIN_UCOMINEQSS
,
25507 IX86_BUILTIN_CVTPI2PS
,
25508 IX86_BUILTIN_CVTPS2PI
,
25509 IX86_BUILTIN_CVTSI2SS
,
25510 IX86_BUILTIN_CVTSI642SS
,
25511 IX86_BUILTIN_CVTSS2SI
,
25512 IX86_BUILTIN_CVTSS2SI64
,
25513 IX86_BUILTIN_CVTTPS2PI
,
25514 IX86_BUILTIN_CVTTSS2SI
,
25515 IX86_BUILTIN_CVTTSS2SI64
,
25517 IX86_BUILTIN_MAXPS
,
25518 IX86_BUILTIN_MAXSS
,
25519 IX86_BUILTIN_MINPS
,
25520 IX86_BUILTIN_MINSS
,
25522 IX86_BUILTIN_LOADUPS
,
25523 IX86_BUILTIN_STOREUPS
,
25524 IX86_BUILTIN_MOVSS
,
25526 IX86_BUILTIN_MOVHLPS
,
25527 IX86_BUILTIN_MOVLHPS
,
25528 IX86_BUILTIN_LOADHPS
,
25529 IX86_BUILTIN_LOADLPS
,
25530 IX86_BUILTIN_STOREHPS
,
25531 IX86_BUILTIN_STORELPS
,
25533 IX86_BUILTIN_MASKMOVQ
,
25534 IX86_BUILTIN_MOVMSKPS
,
25535 IX86_BUILTIN_PMOVMSKB
,
25537 IX86_BUILTIN_MOVNTPS
,
25538 IX86_BUILTIN_MOVNTQ
,
25540 IX86_BUILTIN_LOADDQU
,
25541 IX86_BUILTIN_STOREDQU
,
25543 IX86_BUILTIN_PACKSSWB
,
25544 IX86_BUILTIN_PACKSSDW
,
25545 IX86_BUILTIN_PACKUSWB
,
25547 IX86_BUILTIN_PADDB
,
25548 IX86_BUILTIN_PADDW
,
25549 IX86_BUILTIN_PADDD
,
25550 IX86_BUILTIN_PADDQ
,
25551 IX86_BUILTIN_PADDSB
,
25552 IX86_BUILTIN_PADDSW
,
25553 IX86_BUILTIN_PADDUSB
,
25554 IX86_BUILTIN_PADDUSW
,
25555 IX86_BUILTIN_PSUBB
,
25556 IX86_BUILTIN_PSUBW
,
25557 IX86_BUILTIN_PSUBD
,
25558 IX86_BUILTIN_PSUBQ
,
25559 IX86_BUILTIN_PSUBSB
,
25560 IX86_BUILTIN_PSUBSW
,
25561 IX86_BUILTIN_PSUBUSB
,
25562 IX86_BUILTIN_PSUBUSW
,
25565 IX86_BUILTIN_PANDN
,
25569 IX86_BUILTIN_PAVGB
,
25570 IX86_BUILTIN_PAVGW
,
25572 IX86_BUILTIN_PCMPEQB
,
25573 IX86_BUILTIN_PCMPEQW
,
25574 IX86_BUILTIN_PCMPEQD
,
25575 IX86_BUILTIN_PCMPGTB
,
25576 IX86_BUILTIN_PCMPGTW
,
25577 IX86_BUILTIN_PCMPGTD
,
25579 IX86_BUILTIN_PMADDWD
,
25581 IX86_BUILTIN_PMAXSW
,
25582 IX86_BUILTIN_PMAXUB
,
25583 IX86_BUILTIN_PMINSW
,
25584 IX86_BUILTIN_PMINUB
,
25586 IX86_BUILTIN_PMULHUW
,
25587 IX86_BUILTIN_PMULHW
,
25588 IX86_BUILTIN_PMULLW
,
25590 IX86_BUILTIN_PSADBW
,
25591 IX86_BUILTIN_PSHUFW
,
25593 IX86_BUILTIN_PSLLW
,
25594 IX86_BUILTIN_PSLLD
,
25595 IX86_BUILTIN_PSLLQ
,
25596 IX86_BUILTIN_PSRAW
,
25597 IX86_BUILTIN_PSRAD
,
25598 IX86_BUILTIN_PSRLW
,
25599 IX86_BUILTIN_PSRLD
,
25600 IX86_BUILTIN_PSRLQ
,
25601 IX86_BUILTIN_PSLLWI
,
25602 IX86_BUILTIN_PSLLDI
,
25603 IX86_BUILTIN_PSLLQI
,
25604 IX86_BUILTIN_PSRAWI
,
25605 IX86_BUILTIN_PSRADI
,
25606 IX86_BUILTIN_PSRLWI
,
25607 IX86_BUILTIN_PSRLDI
,
25608 IX86_BUILTIN_PSRLQI
,
25610 IX86_BUILTIN_PUNPCKHBW
,
25611 IX86_BUILTIN_PUNPCKHWD
,
25612 IX86_BUILTIN_PUNPCKHDQ
,
25613 IX86_BUILTIN_PUNPCKLBW
,
25614 IX86_BUILTIN_PUNPCKLWD
,
25615 IX86_BUILTIN_PUNPCKLDQ
,
25617 IX86_BUILTIN_SHUFPS
,
25619 IX86_BUILTIN_RCPPS
,
25620 IX86_BUILTIN_RCPSS
,
25621 IX86_BUILTIN_RSQRTPS
,
25622 IX86_BUILTIN_RSQRTPS_NR
,
25623 IX86_BUILTIN_RSQRTSS
,
25624 IX86_BUILTIN_RSQRTF
,
25625 IX86_BUILTIN_SQRTPS
,
25626 IX86_BUILTIN_SQRTPS_NR
,
25627 IX86_BUILTIN_SQRTSS
,
25629 IX86_BUILTIN_UNPCKHPS
,
25630 IX86_BUILTIN_UNPCKLPS
,
25632 IX86_BUILTIN_ANDPS
,
25633 IX86_BUILTIN_ANDNPS
,
25635 IX86_BUILTIN_XORPS
,
25638 IX86_BUILTIN_LDMXCSR
,
25639 IX86_BUILTIN_STMXCSR
,
25640 IX86_BUILTIN_SFENCE
,
25642 IX86_BUILTIN_FXSAVE
,
25643 IX86_BUILTIN_FXRSTOR
,
25644 IX86_BUILTIN_FXSAVE64
,
25645 IX86_BUILTIN_FXRSTOR64
,
25647 IX86_BUILTIN_XSAVE
,
25648 IX86_BUILTIN_XRSTOR
,
25649 IX86_BUILTIN_XSAVE64
,
25650 IX86_BUILTIN_XRSTOR64
,
25652 IX86_BUILTIN_XSAVEOPT
,
25653 IX86_BUILTIN_XSAVEOPT64
,
25655 /* 3DNow! Original */
25656 IX86_BUILTIN_FEMMS
,
25657 IX86_BUILTIN_PAVGUSB
,
25658 IX86_BUILTIN_PF2ID
,
25659 IX86_BUILTIN_PFACC
,
25660 IX86_BUILTIN_PFADD
,
25661 IX86_BUILTIN_PFCMPEQ
,
25662 IX86_BUILTIN_PFCMPGE
,
25663 IX86_BUILTIN_PFCMPGT
,
25664 IX86_BUILTIN_PFMAX
,
25665 IX86_BUILTIN_PFMIN
,
25666 IX86_BUILTIN_PFMUL
,
25667 IX86_BUILTIN_PFRCP
,
25668 IX86_BUILTIN_PFRCPIT1
,
25669 IX86_BUILTIN_PFRCPIT2
,
25670 IX86_BUILTIN_PFRSQIT1
,
25671 IX86_BUILTIN_PFRSQRT
,
25672 IX86_BUILTIN_PFSUB
,
25673 IX86_BUILTIN_PFSUBR
,
25674 IX86_BUILTIN_PI2FD
,
25675 IX86_BUILTIN_PMULHRW
,
25677 /* 3DNow! Athlon Extensions */
25678 IX86_BUILTIN_PF2IW
,
25679 IX86_BUILTIN_PFNACC
,
25680 IX86_BUILTIN_PFPNACC
,
25681 IX86_BUILTIN_PI2FW
,
25682 IX86_BUILTIN_PSWAPDSI
,
25683 IX86_BUILTIN_PSWAPDSF
,
25686 IX86_BUILTIN_ADDPD
,
25687 IX86_BUILTIN_ADDSD
,
25688 IX86_BUILTIN_DIVPD
,
25689 IX86_BUILTIN_DIVSD
,
25690 IX86_BUILTIN_MULPD
,
25691 IX86_BUILTIN_MULSD
,
25692 IX86_BUILTIN_SUBPD
,
25693 IX86_BUILTIN_SUBSD
,
25695 IX86_BUILTIN_CMPEQPD
,
25696 IX86_BUILTIN_CMPLTPD
,
25697 IX86_BUILTIN_CMPLEPD
,
25698 IX86_BUILTIN_CMPGTPD
,
25699 IX86_BUILTIN_CMPGEPD
,
25700 IX86_BUILTIN_CMPNEQPD
,
25701 IX86_BUILTIN_CMPNLTPD
,
25702 IX86_BUILTIN_CMPNLEPD
,
25703 IX86_BUILTIN_CMPNGTPD
,
25704 IX86_BUILTIN_CMPNGEPD
,
25705 IX86_BUILTIN_CMPORDPD
,
25706 IX86_BUILTIN_CMPUNORDPD
,
25707 IX86_BUILTIN_CMPEQSD
,
25708 IX86_BUILTIN_CMPLTSD
,
25709 IX86_BUILTIN_CMPLESD
,
25710 IX86_BUILTIN_CMPNEQSD
,
25711 IX86_BUILTIN_CMPNLTSD
,
25712 IX86_BUILTIN_CMPNLESD
,
25713 IX86_BUILTIN_CMPORDSD
,
25714 IX86_BUILTIN_CMPUNORDSD
,
25716 IX86_BUILTIN_COMIEQSD
,
25717 IX86_BUILTIN_COMILTSD
,
25718 IX86_BUILTIN_COMILESD
,
25719 IX86_BUILTIN_COMIGTSD
,
25720 IX86_BUILTIN_COMIGESD
,
25721 IX86_BUILTIN_COMINEQSD
,
25722 IX86_BUILTIN_UCOMIEQSD
,
25723 IX86_BUILTIN_UCOMILTSD
,
25724 IX86_BUILTIN_UCOMILESD
,
25725 IX86_BUILTIN_UCOMIGTSD
,
25726 IX86_BUILTIN_UCOMIGESD
,
25727 IX86_BUILTIN_UCOMINEQSD
,
25729 IX86_BUILTIN_MAXPD
,
25730 IX86_BUILTIN_MAXSD
,
25731 IX86_BUILTIN_MINPD
,
25732 IX86_BUILTIN_MINSD
,
25734 IX86_BUILTIN_ANDPD
,
25735 IX86_BUILTIN_ANDNPD
,
25737 IX86_BUILTIN_XORPD
,
25739 IX86_BUILTIN_SQRTPD
,
25740 IX86_BUILTIN_SQRTSD
,
25742 IX86_BUILTIN_UNPCKHPD
,
25743 IX86_BUILTIN_UNPCKLPD
,
25745 IX86_BUILTIN_SHUFPD
,
25747 IX86_BUILTIN_LOADUPD
,
25748 IX86_BUILTIN_STOREUPD
,
25749 IX86_BUILTIN_MOVSD
,
25751 IX86_BUILTIN_LOADHPD
,
25752 IX86_BUILTIN_LOADLPD
,
25754 IX86_BUILTIN_CVTDQ2PD
,
25755 IX86_BUILTIN_CVTDQ2PS
,
25757 IX86_BUILTIN_CVTPD2DQ
,
25758 IX86_BUILTIN_CVTPD2PI
,
25759 IX86_BUILTIN_CVTPD2PS
,
25760 IX86_BUILTIN_CVTTPD2DQ
,
25761 IX86_BUILTIN_CVTTPD2PI
,
25763 IX86_BUILTIN_CVTPI2PD
,
25764 IX86_BUILTIN_CVTSI2SD
,
25765 IX86_BUILTIN_CVTSI642SD
,
25767 IX86_BUILTIN_CVTSD2SI
,
25768 IX86_BUILTIN_CVTSD2SI64
,
25769 IX86_BUILTIN_CVTSD2SS
,
25770 IX86_BUILTIN_CVTSS2SD
,
25771 IX86_BUILTIN_CVTTSD2SI
,
25772 IX86_BUILTIN_CVTTSD2SI64
,
25774 IX86_BUILTIN_CVTPS2DQ
,
25775 IX86_BUILTIN_CVTPS2PD
,
25776 IX86_BUILTIN_CVTTPS2DQ
,
25778 IX86_BUILTIN_MOVNTI
,
25779 IX86_BUILTIN_MOVNTI64
,
25780 IX86_BUILTIN_MOVNTPD
,
25781 IX86_BUILTIN_MOVNTDQ
,
25783 IX86_BUILTIN_MOVQ128
,
25786 IX86_BUILTIN_MASKMOVDQU
,
25787 IX86_BUILTIN_MOVMSKPD
,
25788 IX86_BUILTIN_PMOVMSKB128
,
25790 IX86_BUILTIN_PACKSSWB128
,
25791 IX86_BUILTIN_PACKSSDW128
,
25792 IX86_BUILTIN_PACKUSWB128
,
25794 IX86_BUILTIN_PADDB128
,
25795 IX86_BUILTIN_PADDW128
,
25796 IX86_BUILTIN_PADDD128
,
25797 IX86_BUILTIN_PADDQ128
,
25798 IX86_BUILTIN_PADDSB128
,
25799 IX86_BUILTIN_PADDSW128
,
25800 IX86_BUILTIN_PADDUSB128
,
25801 IX86_BUILTIN_PADDUSW128
,
25802 IX86_BUILTIN_PSUBB128
,
25803 IX86_BUILTIN_PSUBW128
,
25804 IX86_BUILTIN_PSUBD128
,
25805 IX86_BUILTIN_PSUBQ128
,
25806 IX86_BUILTIN_PSUBSB128
,
25807 IX86_BUILTIN_PSUBSW128
,
25808 IX86_BUILTIN_PSUBUSB128
,
25809 IX86_BUILTIN_PSUBUSW128
,
25811 IX86_BUILTIN_PAND128
,
25812 IX86_BUILTIN_PANDN128
,
25813 IX86_BUILTIN_POR128
,
25814 IX86_BUILTIN_PXOR128
,
25816 IX86_BUILTIN_PAVGB128
,
25817 IX86_BUILTIN_PAVGW128
,
25819 IX86_BUILTIN_PCMPEQB128
,
25820 IX86_BUILTIN_PCMPEQW128
,
25821 IX86_BUILTIN_PCMPEQD128
,
25822 IX86_BUILTIN_PCMPGTB128
,
25823 IX86_BUILTIN_PCMPGTW128
,
25824 IX86_BUILTIN_PCMPGTD128
,
25826 IX86_BUILTIN_PMADDWD128
,
25828 IX86_BUILTIN_PMAXSW128
,
25829 IX86_BUILTIN_PMAXUB128
,
25830 IX86_BUILTIN_PMINSW128
,
25831 IX86_BUILTIN_PMINUB128
,
25833 IX86_BUILTIN_PMULUDQ
,
25834 IX86_BUILTIN_PMULUDQ128
,
25835 IX86_BUILTIN_PMULHUW128
,
25836 IX86_BUILTIN_PMULHW128
,
25837 IX86_BUILTIN_PMULLW128
,
25839 IX86_BUILTIN_PSADBW128
,
25840 IX86_BUILTIN_PSHUFHW
,
25841 IX86_BUILTIN_PSHUFLW
,
25842 IX86_BUILTIN_PSHUFD
,
25844 IX86_BUILTIN_PSLLDQI128
,
25845 IX86_BUILTIN_PSLLWI128
,
25846 IX86_BUILTIN_PSLLDI128
,
25847 IX86_BUILTIN_PSLLQI128
,
25848 IX86_BUILTIN_PSRAWI128
,
25849 IX86_BUILTIN_PSRADI128
,
25850 IX86_BUILTIN_PSRLDQI128
,
25851 IX86_BUILTIN_PSRLWI128
,
25852 IX86_BUILTIN_PSRLDI128
,
25853 IX86_BUILTIN_PSRLQI128
,
25855 IX86_BUILTIN_PSLLDQ128
,
25856 IX86_BUILTIN_PSLLW128
,
25857 IX86_BUILTIN_PSLLD128
,
25858 IX86_BUILTIN_PSLLQ128
,
25859 IX86_BUILTIN_PSRAW128
,
25860 IX86_BUILTIN_PSRAD128
,
25861 IX86_BUILTIN_PSRLW128
,
25862 IX86_BUILTIN_PSRLD128
,
25863 IX86_BUILTIN_PSRLQ128
,
25865 IX86_BUILTIN_PUNPCKHBW128
,
25866 IX86_BUILTIN_PUNPCKHWD128
,
25867 IX86_BUILTIN_PUNPCKHDQ128
,
25868 IX86_BUILTIN_PUNPCKHQDQ128
,
25869 IX86_BUILTIN_PUNPCKLBW128
,
25870 IX86_BUILTIN_PUNPCKLWD128
,
25871 IX86_BUILTIN_PUNPCKLDQ128
,
25872 IX86_BUILTIN_PUNPCKLQDQ128
,
25874 IX86_BUILTIN_CLFLUSH
,
25875 IX86_BUILTIN_MFENCE
,
25876 IX86_BUILTIN_LFENCE
,
25877 IX86_BUILTIN_PAUSE
,
25879 IX86_BUILTIN_BSRSI
,
25880 IX86_BUILTIN_BSRDI
,
25881 IX86_BUILTIN_RDPMC
,
25882 IX86_BUILTIN_RDTSC
,
25883 IX86_BUILTIN_RDTSCP
,
25884 IX86_BUILTIN_ROLQI
,
25885 IX86_BUILTIN_ROLHI
,
25886 IX86_BUILTIN_RORQI
,
25887 IX86_BUILTIN_RORHI
,
25890 IX86_BUILTIN_ADDSUBPS
,
25891 IX86_BUILTIN_HADDPS
,
25892 IX86_BUILTIN_HSUBPS
,
25893 IX86_BUILTIN_MOVSHDUP
,
25894 IX86_BUILTIN_MOVSLDUP
,
25895 IX86_BUILTIN_ADDSUBPD
,
25896 IX86_BUILTIN_HADDPD
,
25897 IX86_BUILTIN_HSUBPD
,
25898 IX86_BUILTIN_LDDQU
,
25900 IX86_BUILTIN_MONITOR
,
25901 IX86_BUILTIN_MWAIT
,
25904 IX86_BUILTIN_PHADDW
,
25905 IX86_BUILTIN_PHADDD
,
25906 IX86_BUILTIN_PHADDSW
,
25907 IX86_BUILTIN_PHSUBW
,
25908 IX86_BUILTIN_PHSUBD
,
25909 IX86_BUILTIN_PHSUBSW
,
25910 IX86_BUILTIN_PMADDUBSW
,
25911 IX86_BUILTIN_PMULHRSW
,
25912 IX86_BUILTIN_PSHUFB
,
25913 IX86_BUILTIN_PSIGNB
,
25914 IX86_BUILTIN_PSIGNW
,
25915 IX86_BUILTIN_PSIGND
,
25916 IX86_BUILTIN_PALIGNR
,
25917 IX86_BUILTIN_PABSB
,
25918 IX86_BUILTIN_PABSW
,
25919 IX86_BUILTIN_PABSD
,
25921 IX86_BUILTIN_PHADDW128
,
25922 IX86_BUILTIN_PHADDD128
,
25923 IX86_BUILTIN_PHADDSW128
,
25924 IX86_BUILTIN_PHSUBW128
,
25925 IX86_BUILTIN_PHSUBD128
,
25926 IX86_BUILTIN_PHSUBSW128
,
25927 IX86_BUILTIN_PMADDUBSW128
,
25928 IX86_BUILTIN_PMULHRSW128
,
25929 IX86_BUILTIN_PSHUFB128
,
25930 IX86_BUILTIN_PSIGNB128
,
25931 IX86_BUILTIN_PSIGNW128
,
25932 IX86_BUILTIN_PSIGND128
,
25933 IX86_BUILTIN_PALIGNR128
,
25934 IX86_BUILTIN_PABSB128
,
25935 IX86_BUILTIN_PABSW128
,
25936 IX86_BUILTIN_PABSD128
,
25938 /* AMDFAM10 - SSE4A New Instructions. */
25939 IX86_BUILTIN_MOVNTSD
,
25940 IX86_BUILTIN_MOVNTSS
,
25941 IX86_BUILTIN_EXTRQI
,
25942 IX86_BUILTIN_EXTRQ
,
25943 IX86_BUILTIN_INSERTQI
,
25944 IX86_BUILTIN_INSERTQ
,
25947 IX86_BUILTIN_BLENDPD
,
25948 IX86_BUILTIN_BLENDPS
,
25949 IX86_BUILTIN_BLENDVPD
,
25950 IX86_BUILTIN_BLENDVPS
,
25951 IX86_BUILTIN_PBLENDVB128
,
25952 IX86_BUILTIN_PBLENDW128
,
25957 IX86_BUILTIN_INSERTPS128
,
25959 IX86_BUILTIN_MOVNTDQA
,
25960 IX86_BUILTIN_MPSADBW128
,
25961 IX86_BUILTIN_PACKUSDW128
,
25962 IX86_BUILTIN_PCMPEQQ
,
25963 IX86_BUILTIN_PHMINPOSUW128
,
25965 IX86_BUILTIN_PMAXSB128
,
25966 IX86_BUILTIN_PMAXSD128
,
25967 IX86_BUILTIN_PMAXUD128
,
25968 IX86_BUILTIN_PMAXUW128
,
25970 IX86_BUILTIN_PMINSB128
,
25971 IX86_BUILTIN_PMINSD128
,
25972 IX86_BUILTIN_PMINUD128
,
25973 IX86_BUILTIN_PMINUW128
,
25975 IX86_BUILTIN_PMOVSXBW128
,
25976 IX86_BUILTIN_PMOVSXBD128
,
25977 IX86_BUILTIN_PMOVSXBQ128
,
25978 IX86_BUILTIN_PMOVSXWD128
,
25979 IX86_BUILTIN_PMOVSXWQ128
,
25980 IX86_BUILTIN_PMOVSXDQ128
,
25982 IX86_BUILTIN_PMOVZXBW128
,
25983 IX86_BUILTIN_PMOVZXBD128
,
25984 IX86_BUILTIN_PMOVZXBQ128
,
25985 IX86_BUILTIN_PMOVZXWD128
,
25986 IX86_BUILTIN_PMOVZXWQ128
,
25987 IX86_BUILTIN_PMOVZXDQ128
,
25989 IX86_BUILTIN_PMULDQ128
,
25990 IX86_BUILTIN_PMULLD128
,
25992 IX86_BUILTIN_ROUNDSD
,
25993 IX86_BUILTIN_ROUNDSS
,
25995 IX86_BUILTIN_ROUNDPD
,
25996 IX86_BUILTIN_ROUNDPS
,
25998 IX86_BUILTIN_FLOORPD
,
25999 IX86_BUILTIN_CEILPD
,
26000 IX86_BUILTIN_TRUNCPD
,
26001 IX86_BUILTIN_RINTPD
,
26002 IX86_BUILTIN_ROUNDPD_AZ
,
26004 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26005 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26006 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26008 IX86_BUILTIN_FLOORPS
,
26009 IX86_BUILTIN_CEILPS
,
26010 IX86_BUILTIN_TRUNCPS
,
26011 IX86_BUILTIN_RINTPS
,
26012 IX86_BUILTIN_ROUNDPS_AZ
,
26014 IX86_BUILTIN_FLOORPS_SFIX
,
26015 IX86_BUILTIN_CEILPS_SFIX
,
26016 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26018 IX86_BUILTIN_PTESTZ
,
26019 IX86_BUILTIN_PTESTC
,
26020 IX86_BUILTIN_PTESTNZC
,
26022 IX86_BUILTIN_VEC_INIT_V2SI
,
26023 IX86_BUILTIN_VEC_INIT_V4HI
,
26024 IX86_BUILTIN_VEC_INIT_V8QI
,
26025 IX86_BUILTIN_VEC_EXT_V2DF
,
26026 IX86_BUILTIN_VEC_EXT_V2DI
,
26027 IX86_BUILTIN_VEC_EXT_V4SF
,
26028 IX86_BUILTIN_VEC_EXT_V4SI
,
26029 IX86_BUILTIN_VEC_EXT_V8HI
,
26030 IX86_BUILTIN_VEC_EXT_V2SI
,
26031 IX86_BUILTIN_VEC_EXT_V4HI
,
26032 IX86_BUILTIN_VEC_EXT_V16QI
,
26033 IX86_BUILTIN_VEC_SET_V2DI
,
26034 IX86_BUILTIN_VEC_SET_V4SF
,
26035 IX86_BUILTIN_VEC_SET_V4SI
,
26036 IX86_BUILTIN_VEC_SET_V8HI
,
26037 IX86_BUILTIN_VEC_SET_V4HI
,
26038 IX86_BUILTIN_VEC_SET_V16QI
,
26040 IX86_BUILTIN_VEC_PACK_SFIX
,
26041 IX86_BUILTIN_VEC_PACK_SFIX256
,
26044 IX86_BUILTIN_CRC32QI
,
26045 IX86_BUILTIN_CRC32HI
,
26046 IX86_BUILTIN_CRC32SI
,
26047 IX86_BUILTIN_CRC32DI
,
26049 IX86_BUILTIN_PCMPESTRI128
,
26050 IX86_BUILTIN_PCMPESTRM128
,
26051 IX86_BUILTIN_PCMPESTRA128
,
26052 IX86_BUILTIN_PCMPESTRC128
,
26053 IX86_BUILTIN_PCMPESTRO128
,
26054 IX86_BUILTIN_PCMPESTRS128
,
26055 IX86_BUILTIN_PCMPESTRZ128
,
26056 IX86_BUILTIN_PCMPISTRI128
,
26057 IX86_BUILTIN_PCMPISTRM128
,
26058 IX86_BUILTIN_PCMPISTRA128
,
26059 IX86_BUILTIN_PCMPISTRC128
,
26060 IX86_BUILTIN_PCMPISTRO128
,
26061 IX86_BUILTIN_PCMPISTRS128
,
26062 IX86_BUILTIN_PCMPISTRZ128
,
26064 IX86_BUILTIN_PCMPGTQ
,
26066 /* AES instructions */
26067 IX86_BUILTIN_AESENC128
,
26068 IX86_BUILTIN_AESENCLAST128
,
26069 IX86_BUILTIN_AESDEC128
,
26070 IX86_BUILTIN_AESDECLAST128
,
26071 IX86_BUILTIN_AESIMC128
,
26072 IX86_BUILTIN_AESKEYGENASSIST128
,
26074 /* PCLMUL instruction */
26075 IX86_BUILTIN_PCLMULQDQ128
,
26078 IX86_BUILTIN_ADDPD256
,
26079 IX86_BUILTIN_ADDPS256
,
26080 IX86_BUILTIN_ADDSUBPD256
,
26081 IX86_BUILTIN_ADDSUBPS256
,
26082 IX86_BUILTIN_ANDPD256
,
26083 IX86_BUILTIN_ANDPS256
,
26084 IX86_BUILTIN_ANDNPD256
,
26085 IX86_BUILTIN_ANDNPS256
,
26086 IX86_BUILTIN_BLENDPD256
,
26087 IX86_BUILTIN_BLENDPS256
,
26088 IX86_BUILTIN_BLENDVPD256
,
26089 IX86_BUILTIN_BLENDVPS256
,
26090 IX86_BUILTIN_DIVPD256
,
26091 IX86_BUILTIN_DIVPS256
,
26092 IX86_BUILTIN_DPPS256
,
26093 IX86_BUILTIN_HADDPD256
,
26094 IX86_BUILTIN_HADDPS256
,
26095 IX86_BUILTIN_HSUBPD256
,
26096 IX86_BUILTIN_HSUBPS256
,
26097 IX86_BUILTIN_MAXPD256
,
26098 IX86_BUILTIN_MAXPS256
,
26099 IX86_BUILTIN_MINPD256
,
26100 IX86_BUILTIN_MINPS256
,
26101 IX86_BUILTIN_MULPD256
,
26102 IX86_BUILTIN_MULPS256
,
26103 IX86_BUILTIN_ORPD256
,
26104 IX86_BUILTIN_ORPS256
,
26105 IX86_BUILTIN_SHUFPD256
,
26106 IX86_BUILTIN_SHUFPS256
,
26107 IX86_BUILTIN_SUBPD256
,
26108 IX86_BUILTIN_SUBPS256
,
26109 IX86_BUILTIN_XORPD256
,
26110 IX86_BUILTIN_XORPS256
,
26111 IX86_BUILTIN_CMPSD
,
26112 IX86_BUILTIN_CMPSS
,
26113 IX86_BUILTIN_CMPPD
,
26114 IX86_BUILTIN_CMPPS
,
26115 IX86_BUILTIN_CMPPD256
,
26116 IX86_BUILTIN_CMPPS256
,
26117 IX86_BUILTIN_CVTDQ2PD256
,
26118 IX86_BUILTIN_CVTDQ2PS256
,
26119 IX86_BUILTIN_CVTPD2PS256
,
26120 IX86_BUILTIN_CVTPS2DQ256
,
26121 IX86_BUILTIN_CVTPS2PD256
,
26122 IX86_BUILTIN_CVTTPD2DQ256
,
26123 IX86_BUILTIN_CVTPD2DQ256
,
26124 IX86_BUILTIN_CVTTPS2DQ256
,
26125 IX86_BUILTIN_EXTRACTF128PD256
,
26126 IX86_BUILTIN_EXTRACTF128PS256
,
26127 IX86_BUILTIN_EXTRACTF128SI256
,
26128 IX86_BUILTIN_VZEROALL
,
26129 IX86_BUILTIN_VZEROUPPER
,
26130 IX86_BUILTIN_VPERMILVARPD
,
26131 IX86_BUILTIN_VPERMILVARPS
,
26132 IX86_BUILTIN_VPERMILVARPD256
,
26133 IX86_BUILTIN_VPERMILVARPS256
,
26134 IX86_BUILTIN_VPERMILPD
,
26135 IX86_BUILTIN_VPERMILPS
,
26136 IX86_BUILTIN_VPERMILPD256
,
26137 IX86_BUILTIN_VPERMILPS256
,
26138 IX86_BUILTIN_VPERMIL2PD
,
26139 IX86_BUILTIN_VPERMIL2PS
,
26140 IX86_BUILTIN_VPERMIL2PD256
,
26141 IX86_BUILTIN_VPERMIL2PS256
,
26142 IX86_BUILTIN_VPERM2F128PD256
,
26143 IX86_BUILTIN_VPERM2F128PS256
,
26144 IX86_BUILTIN_VPERM2F128SI256
,
26145 IX86_BUILTIN_VBROADCASTSS
,
26146 IX86_BUILTIN_VBROADCASTSD256
,
26147 IX86_BUILTIN_VBROADCASTSS256
,
26148 IX86_BUILTIN_VBROADCASTPD256
,
26149 IX86_BUILTIN_VBROADCASTPS256
,
26150 IX86_BUILTIN_VINSERTF128PD256
,
26151 IX86_BUILTIN_VINSERTF128PS256
,
26152 IX86_BUILTIN_VINSERTF128SI256
,
26153 IX86_BUILTIN_LOADUPD256
,
26154 IX86_BUILTIN_LOADUPS256
,
26155 IX86_BUILTIN_STOREUPD256
,
26156 IX86_BUILTIN_STOREUPS256
,
26157 IX86_BUILTIN_LDDQU256
,
26158 IX86_BUILTIN_MOVNTDQ256
,
26159 IX86_BUILTIN_MOVNTPD256
,
26160 IX86_BUILTIN_MOVNTPS256
,
26161 IX86_BUILTIN_LOADDQU256
,
26162 IX86_BUILTIN_STOREDQU256
,
26163 IX86_BUILTIN_MASKLOADPD
,
26164 IX86_BUILTIN_MASKLOADPS
,
26165 IX86_BUILTIN_MASKSTOREPD
,
26166 IX86_BUILTIN_MASKSTOREPS
,
26167 IX86_BUILTIN_MASKLOADPD256
,
26168 IX86_BUILTIN_MASKLOADPS256
,
26169 IX86_BUILTIN_MASKSTOREPD256
,
26170 IX86_BUILTIN_MASKSTOREPS256
,
26171 IX86_BUILTIN_MOVSHDUP256
,
26172 IX86_BUILTIN_MOVSLDUP256
,
26173 IX86_BUILTIN_MOVDDUP256
,
26175 IX86_BUILTIN_SQRTPD256
,
26176 IX86_BUILTIN_SQRTPS256
,
26177 IX86_BUILTIN_SQRTPS_NR256
,
26178 IX86_BUILTIN_RSQRTPS256
,
26179 IX86_BUILTIN_RSQRTPS_NR256
,
26181 IX86_BUILTIN_RCPPS256
,
26183 IX86_BUILTIN_ROUNDPD256
,
26184 IX86_BUILTIN_ROUNDPS256
,
26186 IX86_BUILTIN_FLOORPD256
,
26187 IX86_BUILTIN_CEILPD256
,
26188 IX86_BUILTIN_TRUNCPD256
,
26189 IX86_BUILTIN_RINTPD256
,
26190 IX86_BUILTIN_ROUNDPD_AZ256
,
26192 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26193 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26194 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26196 IX86_BUILTIN_FLOORPS256
,
26197 IX86_BUILTIN_CEILPS256
,
26198 IX86_BUILTIN_TRUNCPS256
,
26199 IX86_BUILTIN_RINTPS256
,
26200 IX86_BUILTIN_ROUNDPS_AZ256
,
26202 IX86_BUILTIN_FLOORPS_SFIX256
,
26203 IX86_BUILTIN_CEILPS_SFIX256
,
26204 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26206 IX86_BUILTIN_UNPCKHPD256
,
26207 IX86_BUILTIN_UNPCKLPD256
,
26208 IX86_BUILTIN_UNPCKHPS256
,
26209 IX86_BUILTIN_UNPCKLPS256
,
26211 IX86_BUILTIN_SI256_SI
,
26212 IX86_BUILTIN_PS256_PS
,
26213 IX86_BUILTIN_PD256_PD
,
26214 IX86_BUILTIN_SI_SI256
,
26215 IX86_BUILTIN_PS_PS256
,
26216 IX86_BUILTIN_PD_PD256
,
26218 IX86_BUILTIN_VTESTZPD
,
26219 IX86_BUILTIN_VTESTCPD
,
26220 IX86_BUILTIN_VTESTNZCPD
,
26221 IX86_BUILTIN_VTESTZPS
,
26222 IX86_BUILTIN_VTESTCPS
,
26223 IX86_BUILTIN_VTESTNZCPS
,
26224 IX86_BUILTIN_VTESTZPD256
,
26225 IX86_BUILTIN_VTESTCPD256
,
26226 IX86_BUILTIN_VTESTNZCPD256
,
26227 IX86_BUILTIN_VTESTZPS256
,
26228 IX86_BUILTIN_VTESTCPS256
,
26229 IX86_BUILTIN_VTESTNZCPS256
,
26230 IX86_BUILTIN_PTESTZ256
,
26231 IX86_BUILTIN_PTESTC256
,
26232 IX86_BUILTIN_PTESTNZC256
,
26234 IX86_BUILTIN_MOVMSKPD256
,
26235 IX86_BUILTIN_MOVMSKPS256
,
26238 IX86_BUILTIN_MPSADBW256
,
26239 IX86_BUILTIN_PABSB256
,
26240 IX86_BUILTIN_PABSW256
,
26241 IX86_BUILTIN_PABSD256
,
26242 IX86_BUILTIN_PACKSSDW256
,
26243 IX86_BUILTIN_PACKSSWB256
,
26244 IX86_BUILTIN_PACKUSDW256
,
26245 IX86_BUILTIN_PACKUSWB256
,
26246 IX86_BUILTIN_PADDB256
,
26247 IX86_BUILTIN_PADDW256
,
26248 IX86_BUILTIN_PADDD256
,
26249 IX86_BUILTIN_PADDQ256
,
26250 IX86_BUILTIN_PADDSB256
,
26251 IX86_BUILTIN_PADDSW256
,
26252 IX86_BUILTIN_PADDUSB256
,
26253 IX86_BUILTIN_PADDUSW256
,
26254 IX86_BUILTIN_PALIGNR256
,
26255 IX86_BUILTIN_AND256I
,
26256 IX86_BUILTIN_ANDNOT256I
,
26257 IX86_BUILTIN_PAVGB256
,
26258 IX86_BUILTIN_PAVGW256
,
26259 IX86_BUILTIN_PBLENDVB256
,
26260 IX86_BUILTIN_PBLENDVW256
,
26261 IX86_BUILTIN_PCMPEQB256
,
26262 IX86_BUILTIN_PCMPEQW256
,
26263 IX86_BUILTIN_PCMPEQD256
,
26264 IX86_BUILTIN_PCMPEQQ256
,
26265 IX86_BUILTIN_PCMPGTB256
,
26266 IX86_BUILTIN_PCMPGTW256
,
26267 IX86_BUILTIN_PCMPGTD256
,
26268 IX86_BUILTIN_PCMPGTQ256
,
26269 IX86_BUILTIN_PHADDW256
,
26270 IX86_BUILTIN_PHADDD256
,
26271 IX86_BUILTIN_PHADDSW256
,
26272 IX86_BUILTIN_PHSUBW256
,
26273 IX86_BUILTIN_PHSUBD256
,
26274 IX86_BUILTIN_PHSUBSW256
,
26275 IX86_BUILTIN_PMADDUBSW256
,
26276 IX86_BUILTIN_PMADDWD256
,
26277 IX86_BUILTIN_PMAXSB256
,
26278 IX86_BUILTIN_PMAXSW256
,
26279 IX86_BUILTIN_PMAXSD256
,
26280 IX86_BUILTIN_PMAXUB256
,
26281 IX86_BUILTIN_PMAXUW256
,
26282 IX86_BUILTIN_PMAXUD256
,
26283 IX86_BUILTIN_PMINSB256
,
26284 IX86_BUILTIN_PMINSW256
,
26285 IX86_BUILTIN_PMINSD256
,
26286 IX86_BUILTIN_PMINUB256
,
26287 IX86_BUILTIN_PMINUW256
,
26288 IX86_BUILTIN_PMINUD256
,
26289 IX86_BUILTIN_PMOVMSKB256
,
26290 IX86_BUILTIN_PMOVSXBW256
,
26291 IX86_BUILTIN_PMOVSXBD256
,
26292 IX86_BUILTIN_PMOVSXBQ256
,
26293 IX86_BUILTIN_PMOVSXWD256
,
26294 IX86_BUILTIN_PMOVSXWQ256
,
26295 IX86_BUILTIN_PMOVSXDQ256
,
26296 IX86_BUILTIN_PMOVZXBW256
,
26297 IX86_BUILTIN_PMOVZXBD256
,
26298 IX86_BUILTIN_PMOVZXBQ256
,
26299 IX86_BUILTIN_PMOVZXWD256
,
26300 IX86_BUILTIN_PMOVZXWQ256
,
26301 IX86_BUILTIN_PMOVZXDQ256
,
26302 IX86_BUILTIN_PMULDQ256
,
26303 IX86_BUILTIN_PMULHRSW256
,
26304 IX86_BUILTIN_PMULHUW256
,
26305 IX86_BUILTIN_PMULHW256
,
26306 IX86_BUILTIN_PMULLW256
,
26307 IX86_BUILTIN_PMULLD256
,
26308 IX86_BUILTIN_PMULUDQ256
,
26309 IX86_BUILTIN_POR256
,
26310 IX86_BUILTIN_PSADBW256
,
26311 IX86_BUILTIN_PSHUFB256
,
26312 IX86_BUILTIN_PSHUFD256
,
26313 IX86_BUILTIN_PSHUFHW256
,
26314 IX86_BUILTIN_PSHUFLW256
,
26315 IX86_BUILTIN_PSIGNB256
,
26316 IX86_BUILTIN_PSIGNW256
,
26317 IX86_BUILTIN_PSIGND256
,
26318 IX86_BUILTIN_PSLLDQI256
,
26319 IX86_BUILTIN_PSLLWI256
,
26320 IX86_BUILTIN_PSLLW256
,
26321 IX86_BUILTIN_PSLLDI256
,
26322 IX86_BUILTIN_PSLLD256
,
26323 IX86_BUILTIN_PSLLQI256
,
26324 IX86_BUILTIN_PSLLQ256
,
26325 IX86_BUILTIN_PSRAWI256
,
26326 IX86_BUILTIN_PSRAW256
,
26327 IX86_BUILTIN_PSRADI256
,
26328 IX86_BUILTIN_PSRAD256
,
26329 IX86_BUILTIN_PSRLDQI256
,
26330 IX86_BUILTIN_PSRLWI256
,
26331 IX86_BUILTIN_PSRLW256
,
26332 IX86_BUILTIN_PSRLDI256
,
26333 IX86_BUILTIN_PSRLD256
,
26334 IX86_BUILTIN_PSRLQI256
,
26335 IX86_BUILTIN_PSRLQ256
,
26336 IX86_BUILTIN_PSUBB256
,
26337 IX86_BUILTIN_PSUBW256
,
26338 IX86_BUILTIN_PSUBD256
,
26339 IX86_BUILTIN_PSUBQ256
,
26340 IX86_BUILTIN_PSUBSB256
,
26341 IX86_BUILTIN_PSUBSW256
,
26342 IX86_BUILTIN_PSUBUSB256
,
26343 IX86_BUILTIN_PSUBUSW256
,
26344 IX86_BUILTIN_PUNPCKHBW256
,
26345 IX86_BUILTIN_PUNPCKHWD256
,
26346 IX86_BUILTIN_PUNPCKHDQ256
,
26347 IX86_BUILTIN_PUNPCKHQDQ256
,
26348 IX86_BUILTIN_PUNPCKLBW256
,
26349 IX86_BUILTIN_PUNPCKLWD256
,
26350 IX86_BUILTIN_PUNPCKLDQ256
,
26351 IX86_BUILTIN_PUNPCKLQDQ256
,
26352 IX86_BUILTIN_PXOR256
,
26353 IX86_BUILTIN_MOVNTDQA256
,
26354 IX86_BUILTIN_VBROADCASTSS_PS
,
26355 IX86_BUILTIN_VBROADCASTSS_PS256
,
26356 IX86_BUILTIN_VBROADCASTSD_PD256
,
26357 IX86_BUILTIN_VBROADCASTSI256
,
26358 IX86_BUILTIN_PBLENDD256
,
26359 IX86_BUILTIN_PBLENDD128
,
26360 IX86_BUILTIN_PBROADCASTB256
,
26361 IX86_BUILTIN_PBROADCASTW256
,
26362 IX86_BUILTIN_PBROADCASTD256
,
26363 IX86_BUILTIN_PBROADCASTQ256
,
26364 IX86_BUILTIN_PBROADCASTB128
,
26365 IX86_BUILTIN_PBROADCASTW128
,
26366 IX86_BUILTIN_PBROADCASTD128
,
26367 IX86_BUILTIN_PBROADCASTQ128
,
26368 IX86_BUILTIN_VPERMVARSI256
,
26369 IX86_BUILTIN_VPERMDF256
,
26370 IX86_BUILTIN_VPERMVARSF256
,
26371 IX86_BUILTIN_VPERMDI256
,
26372 IX86_BUILTIN_VPERMTI256
,
26373 IX86_BUILTIN_VEXTRACT128I256
,
26374 IX86_BUILTIN_VINSERT128I256
,
26375 IX86_BUILTIN_MASKLOADD
,
26376 IX86_BUILTIN_MASKLOADQ
,
26377 IX86_BUILTIN_MASKLOADD256
,
26378 IX86_BUILTIN_MASKLOADQ256
,
26379 IX86_BUILTIN_MASKSTORED
,
26380 IX86_BUILTIN_MASKSTOREQ
,
26381 IX86_BUILTIN_MASKSTORED256
,
26382 IX86_BUILTIN_MASKSTOREQ256
,
26383 IX86_BUILTIN_PSLLVV4DI
,
26384 IX86_BUILTIN_PSLLVV2DI
,
26385 IX86_BUILTIN_PSLLVV8SI
,
26386 IX86_BUILTIN_PSLLVV4SI
,
26387 IX86_BUILTIN_PSRAVV8SI
,
26388 IX86_BUILTIN_PSRAVV4SI
,
26389 IX86_BUILTIN_PSRLVV4DI
,
26390 IX86_BUILTIN_PSRLVV2DI
,
26391 IX86_BUILTIN_PSRLVV8SI
,
26392 IX86_BUILTIN_PSRLVV4SI
,
26394 IX86_BUILTIN_GATHERSIV2DF
,
26395 IX86_BUILTIN_GATHERSIV4DF
,
26396 IX86_BUILTIN_GATHERDIV2DF
,
26397 IX86_BUILTIN_GATHERDIV4DF
,
26398 IX86_BUILTIN_GATHERSIV4SF
,
26399 IX86_BUILTIN_GATHERSIV8SF
,
26400 IX86_BUILTIN_GATHERDIV4SF
,
26401 IX86_BUILTIN_GATHERDIV8SF
,
26402 IX86_BUILTIN_GATHERSIV2DI
,
26403 IX86_BUILTIN_GATHERSIV4DI
,
26404 IX86_BUILTIN_GATHERDIV2DI
,
26405 IX86_BUILTIN_GATHERDIV4DI
,
26406 IX86_BUILTIN_GATHERSIV4SI
,
26407 IX86_BUILTIN_GATHERSIV8SI
,
26408 IX86_BUILTIN_GATHERDIV4SI
,
26409 IX86_BUILTIN_GATHERDIV8SI
,
26411 /* Alternate 4 element gather for the vectorizer where
26412 all operands are 32-byte wide. */
26413 IX86_BUILTIN_GATHERALTSIV4DF
,
26414 IX86_BUILTIN_GATHERALTDIV8SF
,
26415 IX86_BUILTIN_GATHERALTSIV4DI
,
26416 IX86_BUILTIN_GATHERALTDIV8SI
,
26418 /* TFmode support builtins. */
26420 IX86_BUILTIN_HUGE_VALQ
,
26421 IX86_BUILTIN_FABSQ
,
26422 IX86_BUILTIN_COPYSIGNQ
,
26424 /* Vectorizer support builtins. */
26425 IX86_BUILTIN_CPYSGNPS
,
26426 IX86_BUILTIN_CPYSGNPD
,
26427 IX86_BUILTIN_CPYSGNPS256
,
26428 IX86_BUILTIN_CPYSGNPD256
,
26430 /* FMA4 instructions. */
26431 IX86_BUILTIN_VFMADDSS
,
26432 IX86_BUILTIN_VFMADDSD
,
26433 IX86_BUILTIN_VFMADDPS
,
26434 IX86_BUILTIN_VFMADDPD
,
26435 IX86_BUILTIN_VFMADDPS256
,
26436 IX86_BUILTIN_VFMADDPD256
,
26437 IX86_BUILTIN_VFMADDSUBPS
,
26438 IX86_BUILTIN_VFMADDSUBPD
,
26439 IX86_BUILTIN_VFMADDSUBPS256
,
26440 IX86_BUILTIN_VFMADDSUBPD256
,
26442 /* FMA3 instructions. */
26443 IX86_BUILTIN_VFMADDSS3
,
26444 IX86_BUILTIN_VFMADDSD3
,
26446 /* XOP instructions. */
26447 IX86_BUILTIN_VPCMOV
,
26448 IX86_BUILTIN_VPCMOV_V2DI
,
26449 IX86_BUILTIN_VPCMOV_V4SI
,
26450 IX86_BUILTIN_VPCMOV_V8HI
,
26451 IX86_BUILTIN_VPCMOV_V16QI
,
26452 IX86_BUILTIN_VPCMOV_V4SF
,
26453 IX86_BUILTIN_VPCMOV_V2DF
,
26454 IX86_BUILTIN_VPCMOV256
,
26455 IX86_BUILTIN_VPCMOV_V4DI256
,
26456 IX86_BUILTIN_VPCMOV_V8SI256
,
26457 IX86_BUILTIN_VPCMOV_V16HI256
,
26458 IX86_BUILTIN_VPCMOV_V32QI256
,
26459 IX86_BUILTIN_VPCMOV_V8SF256
,
26460 IX86_BUILTIN_VPCMOV_V4DF256
,
26462 IX86_BUILTIN_VPPERM
,
26464 IX86_BUILTIN_VPMACSSWW
,
26465 IX86_BUILTIN_VPMACSWW
,
26466 IX86_BUILTIN_VPMACSSWD
,
26467 IX86_BUILTIN_VPMACSWD
,
26468 IX86_BUILTIN_VPMACSSDD
,
26469 IX86_BUILTIN_VPMACSDD
,
26470 IX86_BUILTIN_VPMACSSDQL
,
26471 IX86_BUILTIN_VPMACSSDQH
,
26472 IX86_BUILTIN_VPMACSDQL
,
26473 IX86_BUILTIN_VPMACSDQH
,
26474 IX86_BUILTIN_VPMADCSSWD
,
26475 IX86_BUILTIN_VPMADCSWD
,
26477 IX86_BUILTIN_VPHADDBW
,
26478 IX86_BUILTIN_VPHADDBD
,
26479 IX86_BUILTIN_VPHADDBQ
,
26480 IX86_BUILTIN_VPHADDWD
,
26481 IX86_BUILTIN_VPHADDWQ
,
26482 IX86_BUILTIN_VPHADDDQ
,
26483 IX86_BUILTIN_VPHADDUBW
,
26484 IX86_BUILTIN_VPHADDUBD
,
26485 IX86_BUILTIN_VPHADDUBQ
,
26486 IX86_BUILTIN_VPHADDUWD
,
26487 IX86_BUILTIN_VPHADDUWQ
,
26488 IX86_BUILTIN_VPHADDUDQ
,
26489 IX86_BUILTIN_VPHSUBBW
,
26490 IX86_BUILTIN_VPHSUBWD
,
26491 IX86_BUILTIN_VPHSUBDQ
,
26493 IX86_BUILTIN_VPROTB
,
26494 IX86_BUILTIN_VPROTW
,
26495 IX86_BUILTIN_VPROTD
,
26496 IX86_BUILTIN_VPROTQ
,
26497 IX86_BUILTIN_VPROTB_IMM
,
26498 IX86_BUILTIN_VPROTW_IMM
,
26499 IX86_BUILTIN_VPROTD_IMM
,
26500 IX86_BUILTIN_VPROTQ_IMM
,
26502 IX86_BUILTIN_VPSHLB
,
26503 IX86_BUILTIN_VPSHLW
,
26504 IX86_BUILTIN_VPSHLD
,
26505 IX86_BUILTIN_VPSHLQ
,
26506 IX86_BUILTIN_VPSHAB
,
26507 IX86_BUILTIN_VPSHAW
,
26508 IX86_BUILTIN_VPSHAD
,
26509 IX86_BUILTIN_VPSHAQ
,
26511 IX86_BUILTIN_VFRCZSS
,
26512 IX86_BUILTIN_VFRCZSD
,
26513 IX86_BUILTIN_VFRCZPS
,
26514 IX86_BUILTIN_VFRCZPD
,
26515 IX86_BUILTIN_VFRCZPS256
,
26516 IX86_BUILTIN_VFRCZPD256
,
26518 IX86_BUILTIN_VPCOMEQUB
,
26519 IX86_BUILTIN_VPCOMNEUB
,
26520 IX86_BUILTIN_VPCOMLTUB
,
26521 IX86_BUILTIN_VPCOMLEUB
,
26522 IX86_BUILTIN_VPCOMGTUB
,
26523 IX86_BUILTIN_VPCOMGEUB
,
26524 IX86_BUILTIN_VPCOMFALSEUB
,
26525 IX86_BUILTIN_VPCOMTRUEUB
,
26527 IX86_BUILTIN_VPCOMEQUW
,
26528 IX86_BUILTIN_VPCOMNEUW
,
26529 IX86_BUILTIN_VPCOMLTUW
,
26530 IX86_BUILTIN_VPCOMLEUW
,
26531 IX86_BUILTIN_VPCOMGTUW
,
26532 IX86_BUILTIN_VPCOMGEUW
,
26533 IX86_BUILTIN_VPCOMFALSEUW
,
26534 IX86_BUILTIN_VPCOMTRUEUW
,
26536 IX86_BUILTIN_VPCOMEQUD
,
26537 IX86_BUILTIN_VPCOMNEUD
,
26538 IX86_BUILTIN_VPCOMLTUD
,
26539 IX86_BUILTIN_VPCOMLEUD
,
26540 IX86_BUILTIN_VPCOMGTUD
,
26541 IX86_BUILTIN_VPCOMGEUD
,
26542 IX86_BUILTIN_VPCOMFALSEUD
,
26543 IX86_BUILTIN_VPCOMTRUEUD
,
26545 IX86_BUILTIN_VPCOMEQUQ
,
26546 IX86_BUILTIN_VPCOMNEUQ
,
26547 IX86_BUILTIN_VPCOMLTUQ
,
26548 IX86_BUILTIN_VPCOMLEUQ
,
26549 IX86_BUILTIN_VPCOMGTUQ
,
26550 IX86_BUILTIN_VPCOMGEUQ
,
26551 IX86_BUILTIN_VPCOMFALSEUQ
,
26552 IX86_BUILTIN_VPCOMTRUEUQ
,
26554 IX86_BUILTIN_VPCOMEQB
,
26555 IX86_BUILTIN_VPCOMNEB
,
26556 IX86_BUILTIN_VPCOMLTB
,
26557 IX86_BUILTIN_VPCOMLEB
,
26558 IX86_BUILTIN_VPCOMGTB
,
26559 IX86_BUILTIN_VPCOMGEB
,
26560 IX86_BUILTIN_VPCOMFALSEB
,
26561 IX86_BUILTIN_VPCOMTRUEB
,
26563 IX86_BUILTIN_VPCOMEQW
,
26564 IX86_BUILTIN_VPCOMNEW
,
26565 IX86_BUILTIN_VPCOMLTW
,
26566 IX86_BUILTIN_VPCOMLEW
,
26567 IX86_BUILTIN_VPCOMGTW
,
26568 IX86_BUILTIN_VPCOMGEW
,
26569 IX86_BUILTIN_VPCOMFALSEW
,
26570 IX86_BUILTIN_VPCOMTRUEW
,
26572 IX86_BUILTIN_VPCOMEQD
,
26573 IX86_BUILTIN_VPCOMNED
,
26574 IX86_BUILTIN_VPCOMLTD
,
26575 IX86_BUILTIN_VPCOMLED
,
26576 IX86_BUILTIN_VPCOMGTD
,
26577 IX86_BUILTIN_VPCOMGED
,
26578 IX86_BUILTIN_VPCOMFALSED
,
26579 IX86_BUILTIN_VPCOMTRUED
,
26581 IX86_BUILTIN_VPCOMEQQ
,
26582 IX86_BUILTIN_VPCOMNEQ
,
26583 IX86_BUILTIN_VPCOMLTQ
,
26584 IX86_BUILTIN_VPCOMLEQ
,
26585 IX86_BUILTIN_VPCOMGTQ
,
26586 IX86_BUILTIN_VPCOMGEQ
,
26587 IX86_BUILTIN_VPCOMFALSEQ
,
26588 IX86_BUILTIN_VPCOMTRUEQ
,
26590 /* LWP instructions. */
26591 IX86_BUILTIN_LLWPCB
,
26592 IX86_BUILTIN_SLWPCB
,
26593 IX86_BUILTIN_LWPVAL32
,
26594 IX86_BUILTIN_LWPVAL64
,
26595 IX86_BUILTIN_LWPINS32
,
26596 IX86_BUILTIN_LWPINS64
,
26601 IX86_BUILTIN_XBEGIN
,
26603 IX86_BUILTIN_XABORT
,
26604 IX86_BUILTIN_XTEST
,
26606 /* BMI instructions. */
26607 IX86_BUILTIN_BEXTR32
,
26608 IX86_BUILTIN_BEXTR64
,
26611 /* TBM instructions. */
26612 IX86_BUILTIN_BEXTRI32
,
26613 IX86_BUILTIN_BEXTRI64
,
26615 /* BMI2 instructions. */
26616 IX86_BUILTIN_BZHI32
,
26617 IX86_BUILTIN_BZHI64
,
26618 IX86_BUILTIN_PDEP32
,
26619 IX86_BUILTIN_PDEP64
,
26620 IX86_BUILTIN_PEXT32
,
26621 IX86_BUILTIN_PEXT64
,
26623 /* ADX instructions. */
26624 IX86_BUILTIN_ADDCARRYX32
,
26625 IX86_BUILTIN_ADDCARRYX64
,
26627 /* FSGSBASE instructions. */
26628 IX86_BUILTIN_RDFSBASE32
,
26629 IX86_BUILTIN_RDFSBASE64
,
26630 IX86_BUILTIN_RDGSBASE32
,
26631 IX86_BUILTIN_RDGSBASE64
,
26632 IX86_BUILTIN_WRFSBASE32
,
26633 IX86_BUILTIN_WRFSBASE64
,
26634 IX86_BUILTIN_WRGSBASE32
,
26635 IX86_BUILTIN_WRGSBASE64
,
26637 /* RDRND instructions. */
26638 IX86_BUILTIN_RDRAND16_STEP
,
26639 IX86_BUILTIN_RDRAND32_STEP
,
26640 IX86_BUILTIN_RDRAND64_STEP
,
26642 /* RDSEED instructions. */
26643 IX86_BUILTIN_RDSEED16_STEP
,
26644 IX86_BUILTIN_RDSEED32_STEP
,
26645 IX86_BUILTIN_RDSEED64_STEP
,
26647 /* F16C instructions. */
26648 IX86_BUILTIN_CVTPH2PS
,
26649 IX86_BUILTIN_CVTPH2PS256
,
26650 IX86_BUILTIN_CVTPS2PH
,
26651 IX86_BUILTIN_CVTPS2PH256
,
26653 /* CFString built-in for darwin */
26654 IX86_BUILTIN_CFSTRING
,
26656 /* Builtins to get CPU type and supported features. */
26657 IX86_BUILTIN_CPU_INIT
,
26658 IX86_BUILTIN_CPU_IS
,
26659 IX86_BUILTIN_CPU_SUPPORTS
,
26664 /* Table for the ix86 builtin decls. */
26665 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26667 /* Table of all of the builtin functions that are possible with different ISA's
26668 but are waiting to be built until a function is declared to use that
26670 struct builtin_isa
{
26671 const char *name
; /* function name */
26672 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26673 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26674 bool const_p
; /* true if the declaration is constant */
26675 bool set_and_not_built_p
;
26678 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26681 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26682 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26683 function decl in the ix86_builtins array. Returns the function decl or
26684 NULL_TREE, if the builtin was not added.
26686 If the front end has a special hook for builtin functions, delay adding
26687 builtin functions that aren't in the current ISA until the ISA is changed
26688 with function specific optimization. Doing so, can save about 300K for the
26689 default compiler. When the builtin is expanded, check at that time whether
26692 If the front end doesn't have a special hook, record all builtins, even if
26693 it isn't an instruction set in the current ISA in case the user uses
26694 function specific options for a different ISA, so that we don't get scope
26695 errors if a builtin is added in the middle of a function scope. */
26698 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26699 enum ix86_builtin_func_type tcode
,
26700 enum ix86_builtins code
)
26702 tree decl
= NULL_TREE
;
26704 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26706 ix86_builtins_isa
[(int) code
].isa
= mask
;
26708 mask
&= ~OPTION_MASK_ISA_64BIT
;
26710 || (mask
& ix86_isa_flags
) != 0
26711 || (lang_hooks
.builtin_function
26712 == lang_hooks
.builtin_function_ext_scope
))
26715 tree type
= ix86_get_builtin_func_type (tcode
);
26716 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26718 ix86_builtins
[(int) code
] = decl
;
26719 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26723 ix86_builtins
[(int) code
] = NULL_TREE
;
26724 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26725 ix86_builtins_isa
[(int) code
].name
= name
;
26726 ix86_builtins_isa
[(int) code
].const_p
= false;
26727 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26734 /* Like def_builtin, but also marks the function decl "const". */
26737 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26738 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26740 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26742 TREE_READONLY (decl
) = 1;
26744 ix86_builtins_isa
[(int) code
].const_p
= true;
26749 /* Add any new builtin functions for a given ISA that may not have been
26750 declared. This saves a bit of space compared to adding all of the
26751 declarations to the tree, even if we didn't use them. */
26754 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26758 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26760 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26761 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26765 /* Don't define the builtin again. */
26766 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26768 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26769 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26770 type
, i
, BUILT_IN_MD
, NULL
,
26773 ix86_builtins
[i
] = decl
;
26774 if (ix86_builtins_isa
[i
].const_p
)
26775 TREE_READONLY (decl
) = 1;
26780 /* Bits for builtin_description.flag. */
26782 /* Set when we don't support the comparison natively, and should
26783 swap_comparison in order to support it. */
26784 #define BUILTIN_DESC_SWAP_OPERANDS 1
26786 struct builtin_description
26788 const HOST_WIDE_INT mask
;
26789 const enum insn_code icode
;
26790 const char *const name
;
26791 const enum ix86_builtins code
;
26792 const enum rtx_code comparison
;
26796 static const struct builtin_description bdesc_comi
[] =
26798 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26799 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26800 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26801 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26802 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26803 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26804 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26805 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26806 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26807 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26808 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26809 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26824 static const struct builtin_description bdesc_pcmpestr
[] =
26827 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26828 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26829 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26830 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26831 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26832 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26833 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26836 static const struct builtin_description bdesc_pcmpistr
[] =
26839 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26840 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26841 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26842 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26843 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26844 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26845 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26848 /* Special builtins with variable number of arguments. */
26849 static const struct builtin_description bdesc_special_args
[] =
26851 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26852 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26853 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26856 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26859 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26861 /* FXSR, XSAVE and XSAVEOPT */
26862 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26863 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26864 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26865 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26866 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26868 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26869 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26870 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26871 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26872 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26875 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26876 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26877 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26879 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26880 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26881 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26882 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26884 /* SSE or 3DNow!A */
26885 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26886 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26889 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26890 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26891 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26892 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26896 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26897 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26904 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26910 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26911 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26914 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26915 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26917 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26918 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26919 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26920 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26921 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26923 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26926 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26927 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26928 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26929 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26931 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26932 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26933 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26935 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26936 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26937 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26938 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26939 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26940 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26941 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26942 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26955 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26956 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26957 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26958 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26959 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26960 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26963 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26964 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26965 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26966 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26967 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26968 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26969 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26970 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26973 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26974 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26975 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26978 /* Builtins with variable number of arguments. */
26979 static const struct builtin_description bdesc_args
[] =
26981 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26982 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26983 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26984 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26985 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26986 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26987 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26990 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26991 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26992 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26993 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26994 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26995 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26997 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26998 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26999 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27004 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27006 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27011 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27015 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27016 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27017 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27018 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27019 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27021 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27022 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27023 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27024 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27025 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27026 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27028 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27029 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27030 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27032 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27034 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27035 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27036 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27037 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27038 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27039 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27041 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27042 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27043 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27044 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27045 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27046 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27048 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27049 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27050 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27051 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27054 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27055 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27056 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27057 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27059 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27060 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27061 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27062 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27063 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27064 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27065 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27066 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27067 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27068 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27069 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27070 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27071 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27072 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27073 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27076 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27077 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27078 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27079 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27080 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27081 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27084 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27085 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27086 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27088 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27092 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27095 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27099 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27117 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27126 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27131 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27132 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27133 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27134 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27136 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27137 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27138 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27139 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27141 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27143 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27144 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27145 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27146 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27147 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27149 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27150 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27151 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27153 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27156 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27157 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27159 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27160 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27162 /* SSE MMX or 3Dnow!A */
27163 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27164 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27165 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27167 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27168 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27169 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27170 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27172 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27173 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27175 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27181 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27196 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27197 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27206 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27207 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27208 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27212 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27214 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27255 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27278 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27314 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27345 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27352 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27353 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27355 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27356 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27357 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27358 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27359 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27360 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27363 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27364 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27365 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27366 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27367 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27368 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27370 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27371 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27372 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27374 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27377 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27378 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27379 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27380 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27381 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27382 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27383 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27384 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27385 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27386 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27387 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27388 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27389 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27390 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27391 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27392 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27393 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27396 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27397 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27400 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27401 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27402 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27403 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27404 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27405 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27406 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27407 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27408 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27409 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27411 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27413 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27414 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27415 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27416 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27417 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27418 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27419 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27420 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27421 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27422 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27423 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27425 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27426 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27427 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27428 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27429 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27430 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27431 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27432 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27433 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27434 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27435 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27436 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27439 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27440 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27441 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27442 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27444 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27445 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27446 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27447 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27449 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27450 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27452 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27453 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27455 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27456 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27457 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27458 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27460 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27461 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27463 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27464 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27466 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27467 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27468 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27471 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27472 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27473 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27474 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27475 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27478 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27479 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27480 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27481 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27496 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27497 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27498 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27499 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27500 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27501 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27502 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27503 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27504 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27505 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27506 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27518 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27523 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27526 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27530 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27532 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27541 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27546 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27549 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27560 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27563 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27573 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27575 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27576 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27581 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27583 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27584 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27591 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27592 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27594 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27595 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27597 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27598 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27600 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27606 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27607 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27608 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27609 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27610 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27614 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27618 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27619 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27620 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27621 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27623 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27624 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27628 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27629 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27631 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27632 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27634 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27637 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27638 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27639 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27640 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27641 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27642 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27643 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27644 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27645 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27646 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27784 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27787 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27788 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27789 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27792 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27793 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27796 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27797 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27798 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27799 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27802 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27803 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27804 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27805 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27806 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27807 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27810 /* FMA4 and XOP. */
27811 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27812 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27813 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27814 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27815 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27816 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27817 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27818 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27819 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27820 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27821 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27822 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27823 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27824 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27825 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27826 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27827 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27828 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27829 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27830 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27831 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27832 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27833 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27834 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27835 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27836 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27837 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27838 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27839 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27840 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27841 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27842 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27843 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27844 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27845 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27846 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27847 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27848 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27849 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27850 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27851 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27852 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27853 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27854 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27855 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27856 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27857 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27858 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27859 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27860 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27861 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27862 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27864 static const struct builtin_description bdesc_multi_arg
[] =
27866 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27867 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27868 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27869 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27870 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27871 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27873 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27874 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27875 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27876 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27877 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27878 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27880 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27881 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27882 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27883 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27884 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27885 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27886 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27887 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27888 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27889 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27890 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27891 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27893 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27894 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27895 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27896 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27897 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27898 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27899 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27900 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27901 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27902 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27903 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27904 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27909 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27910 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27915 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27917 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27924 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27931 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27932 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27933 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27940 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27946 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27948 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27956 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27963 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27964 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27970 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27972 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27980 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28059 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28066 /* TM vector builtins. */
28068 /* Reuse the existing x86-specific `struct builtin_description' cause
28069 we're lazy. Add casts to make them fit. */
28070 static const struct builtin_description bdesc_tm
[] =
28072 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28073 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28074 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28075 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28076 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28077 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28078 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28080 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28081 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28082 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28083 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28084 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28085 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28086 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28088 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28089 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28090 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28091 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28092 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28093 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28094 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28096 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28097 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28098 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28101 /* TM callbacks. */
28103 /* Return the builtin decl needed to load a vector of TYPE. */
28106 ix86_builtin_tm_load (tree type
)
28108 if (TREE_CODE (type
) == VECTOR_TYPE
)
28110 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28113 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28115 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28117 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28123 /* Return the builtin decl needed to store a vector of TYPE. */
28126 ix86_builtin_tm_store (tree type
)
28128 if (TREE_CODE (type
) == VECTOR_TYPE
)
28130 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28133 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28135 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28137 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28143 /* Initialize the transactional memory vector load/store builtins. */
28146 ix86_init_tm_builtins (void)
28148 enum ix86_builtin_func_type ftype
;
28149 const struct builtin_description
*d
;
28152 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28153 tree attrs_log
, attrs_type_log
;
28158 /* If there are no builtins defined, we must be compiling in a
28159 language without trans-mem support. */
28160 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28163 /* Use whatever attributes a normal TM load has. */
28164 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28165 attrs_load
= DECL_ATTRIBUTES (decl
);
28166 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28167 /* Use whatever attributes a normal TM store has. */
28168 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28169 attrs_store
= DECL_ATTRIBUTES (decl
);
28170 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28171 /* Use whatever attributes a normal TM log has. */
28172 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28173 attrs_log
= DECL_ATTRIBUTES (decl
);
28174 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28176 for (i
= 0, d
= bdesc_tm
;
28177 i
< ARRAY_SIZE (bdesc_tm
);
28180 if ((d
->mask
& ix86_isa_flags
) != 0
28181 || (lang_hooks
.builtin_function
28182 == lang_hooks
.builtin_function_ext_scope
))
28184 tree type
, attrs
, attrs_type
;
28185 enum built_in_function code
= (enum built_in_function
) d
->code
;
28187 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28188 type
= ix86_get_builtin_func_type (ftype
);
28190 if (BUILTIN_TM_LOAD_P (code
))
28192 attrs
= attrs_load
;
28193 attrs_type
= attrs_type_load
;
28195 else if (BUILTIN_TM_STORE_P (code
))
28197 attrs
= attrs_store
;
28198 attrs_type
= attrs_type_store
;
28203 attrs_type
= attrs_type_log
;
28205 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28206 /* The builtin without the prefix for
28207 calling it directly. */
28208 d
->name
+ strlen ("__builtin_"),
28210 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28211 set the TYPE_ATTRIBUTES. */
28212 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28214 set_builtin_decl (code
, decl
, false);
28219 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28220 in the current target ISA to allow the user to compile particular modules
28221 with different target specific options that differ from the command line
28224 ix86_init_mmx_sse_builtins (void)
28226 const struct builtin_description
* d
;
28227 enum ix86_builtin_func_type ftype
;
28230 /* Add all special builtins with variable number of operands. */
28231 for (i
= 0, d
= bdesc_special_args
;
28232 i
< ARRAY_SIZE (bdesc_special_args
);
28238 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28239 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28242 /* Add all builtins with variable number of operands. */
28243 for (i
= 0, d
= bdesc_args
;
28244 i
< ARRAY_SIZE (bdesc_args
);
28250 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28251 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28254 /* pcmpestr[im] insns. */
28255 for (i
= 0, d
= bdesc_pcmpestr
;
28256 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28259 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28260 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28262 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28263 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28266 /* pcmpistr[im] insns. */
28267 for (i
= 0, d
= bdesc_pcmpistr
;
28268 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28271 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28272 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28274 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28275 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28278 /* comi/ucomi insns. */
28279 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28281 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28282 ftype
= INT_FTYPE_V2DF_V2DF
;
28284 ftype
= INT_FTYPE_V4SF_V4SF
;
28285 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28289 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28290 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28291 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28292 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28294 /* SSE or 3DNow!A */
28295 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28296 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28297 IX86_BUILTIN_MASKMOVQ
);
28300 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28301 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28303 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28304 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28305 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28306 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28309 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28310 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28311 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28312 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28315 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28316 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28317 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28318 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28319 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28320 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28321 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28322 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28323 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28324 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28325 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28326 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28329 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28330 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28333 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28334 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28335 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28336 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28337 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28338 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28339 IX86_BUILTIN_RDRAND64_STEP
);
28342 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28343 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28344 IX86_BUILTIN_GATHERSIV2DF
);
28346 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28347 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28348 IX86_BUILTIN_GATHERSIV4DF
);
28350 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28351 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28352 IX86_BUILTIN_GATHERDIV2DF
);
28354 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28355 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28356 IX86_BUILTIN_GATHERDIV4DF
);
28358 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28359 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28360 IX86_BUILTIN_GATHERSIV4SF
);
28362 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28363 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28364 IX86_BUILTIN_GATHERSIV8SF
);
28366 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28367 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28368 IX86_BUILTIN_GATHERDIV4SF
);
28370 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28371 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28372 IX86_BUILTIN_GATHERDIV8SF
);
28374 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28375 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28376 IX86_BUILTIN_GATHERSIV2DI
);
28378 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28379 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28380 IX86_BUILTIN_GATHERSIV4DI
);
28382 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28383 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28384 IX86_BUILTIN_GATHERDIV2DI
);
28386 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28387 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28388 IX86_BUILTIN_GATHERDIV4DI
);
28390 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28391 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28392 IX86_BUILTIN_GATHERSIV4SI
);
28394 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28395 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28396 IX86_BUILTIN_GATHERSIV8SI
);
28398 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28399 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28400 IX86_BUILTIN_GATHERDIV4SI
);
28402 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28403 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28404 IX86_BUILTIN_GATHERDIV8SI
);
28406 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28407 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28408 IX86_BUILTIN_GATHERALTSIV4DF
);
28410 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28411 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28412 IX86_BUILTIN_GATHERALTDIV8SF
);
28414 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28415 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28416 IX86_BUILTIN_GATHERALTSIV4DI
);
28418 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28419 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28420 IX86_BUILTIN_GATHERALTDIV8SI
);
28423 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28424 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28426 /* MMX access to the vec_init patterns. */
28427 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28428 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28430 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28431 V4HI_FTYPE_HI_HI_HI_HI
,
28432 IX86_BUILTIN_VEC_INIT_V4HI
);
28434 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28435 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28436 IX86_BUILTIN_VEC_INIT_V8QI
);
28438 /* Access to the vec_extract patterns. */
28439 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28440 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28441 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28442 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28443 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28444 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28445 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28446 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28447 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28448 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28450 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28451 "__builtin_ia32_vec_ext_v4hi",
28452 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28454 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28455 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28457 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28458 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28460 /* Access to the vec_set patterns. */
28461 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28462 "__builtin_ia32_vec_set_v2di",
28463 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28465 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28466 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28468 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28469 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28471 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28472 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28474 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28475 "__builtin_ia32_vec_set_v4hi",
28476 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28478 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28479 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28482 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28483 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28484 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28485 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28486 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28487 "__builtin_ia32_rdseed_di_step",
28488 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28491 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28492 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28493 def_builtin (OPTION_MASK_ISA_64BIT
,
28494 "__builtin_ia32_addcarryx_u64",
28495 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28496 IX86_BUILTIN_ADDCARRYX64
);
28498 /* Add FMA4 multi-arg argument instructions */
28499 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28504 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28505 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28509 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28510 to return a pointer to VERSION_DECL if the outcome of the expression
28511 formed by PREDICATE_CHAIN is true. This function will be called during
28512 version dispatch to decide which function version to execute. It returns
28513 the basic block at the end, to which more conditions can be added. */
28516 add_condition_to_bb (tree function_decl
, tree version_decl
,
28517 tree predicate_chain
, basic_block new_bb
)
28519 gimple return_stmt
;
28520 tree convert_expr
, result_var
;
28521 gimple convert_stmt
;
28522 gimple call_cond_stmt
;
28523 gimple if_else_stmt
;
28525 basic_block bb1
, bb2
, bb3
;
28528 tree cond_var
, and_expr_var
= NULL_TREE
;
28531 tree predicate_decl
, predicate_arg
;
28533 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28535 gcc_assert (new_bb
!= NULL
);
28536 gseq
= bb_seq (new_bb
);
28539 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28540 build_fold_addr_expr (version_decl
));
28541 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28542 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28543 return_stmt
= gimple_build_return (result_var
);
28545 if (predicate_chain
== NULL_TREE
)
28547 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28548 gimple_seq_add_stmt (&gseq
, return_stmt
);
28549 set_bb_seq (new_bb
, gseq
);
28550 gimple_set_bb (convert_stmt
, new_bb
);
28551 gimple_set_bb (return_stmt
, new_bb
);
28556 while (predicate_chain
!= NULL
)
28558 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28559 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28560 predicate_arg
= TREE_VALUE (predicate_chain
);
28561 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28562 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28564 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28565 gimple_set_bb (call_cond_stmt
, new_bb
);
28566 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28568 predicate_chain
= TREE_CHAIN (predicate_chain
);
28570 if (and_expr_var
== NULL
)
28571 and_expr_var
= cond_var
;
28574 gimple assign_stmt
;
28575 /* Use MIN_EXPR to check if any integer is zero?.
28576 and_expr_var = min_expr <cond_var, and_expr_var> */
28577 assign_stmt
= gimple_build_assign (and_expr_var
,
28578 build2 (MIN_EXPR
, integer_type_node
,
28579 cond_var
, and_expr_var
));
28581 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28582 gimple_set_bb (assign_stmt
, new_bb
);
28583 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28587 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28589 NULL_TREE
, NULL_TREE
);
28590 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28591 gimple_set_bb (if_else_stmt
, new_bb
);
28592 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28594 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28595 gimple_seq_add_stmt (&gseq
, return_stmt
);
28596 set_bb_seq (new_bb
, gseq
);
28599 e12
= split_block (bb1
, if_else_stmt
);
28601 e12
->flags
&= ~EDGE_FALLTHRU
;
28602 e12
->flags
|= EDGE_TRUE_VALUE
;
28604 e23
= split_block (bb2
, return_stmt
);
28606 gimple_set_bb (convert_stmt
, bb2
);
28607 gimple_set_bb (return_stmt
, bb2
);
28610 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28613 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28620 /* This parses the attribute arguments to target in DECL and determines
28621 the right builtin to use to match the platform specification.
28622 It returns the priority value for this version decl. If PREDICATE_LIST
28623 is not NULL, it stores the list of cpu features that need to be checked
28624 before dispatching this function. */
28626 static unsigned int
28627 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28630 struct cl_target_option cur_target
;
28632 struct cl_target_option
*new_target
;
28633 const char *arg_str
= NULL
;
28634 const char *attrs_str
= NULL
;
28635 char *tok_str
= NULL
;
28638 /* Priority of i386 features, greater value is higher priority. This is
28639 used to decide the order in which function dispatch must happen. For
28640 instance, a version specialized for SSE4.2 should be checked for dispatch
28641 before a version for SSE3, as SSE4.2 implies SSE3. */
28642 enum feature_priority
28663 enum feature_priority priority
= P_ZERO
;
28665 /* These are the target attribute strings for which a dispatcher is
28666 available, from fold_builtin_cpu. */
28668 static struct _feature_list
28670 const char *const name
;
28671 const enum feature_priority priority
;
28673 const feature_list
[] =
28679 {"ssse3", P_SSSE3
},
28680 {"sse4.1", P_SSE4_1
},
28681 {"sse4.2", P_SSE4_2
},
28682 {"popcnt", P_POPCNT
},
28688 static unsigned int NUM_FEATURES
28689 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28693 tree predicate_chain
= NULL_TREE
;
28694 tree predicate_decl
, predicate_arg
;
28696 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28697 gcc_assert (attrs
!= NULL
);
28699 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28701 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28702 attrs_str
= TREE_STRING_POINTER (attrs
);
28705 /* Handle arch= if specified. For priority, set it to be 1 more than
28706 the best instruction set the processor can handle. For instance, if
28707 there is a version for atom and a version for ssse3 (the highest ISA
28708 priority for atom), the atom version must be checked for dispatch
28709 before the ssse3 version. */
28710 if (strstr (attrs_str
, "arch=") != NULL
)
28712 cl_target_option_save (&cur_target
, &global_options
);
28713 target_node
= ix86_valid_target_attribute_tree (attrs
);
28715 gcc_assert (target_node
);
28716 new_target
= TREE_TARGET_OPTION (target_node
);
28717 gcc_assert (new_target
);
28719 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28721 switch (new_target
->arch
)
28723 case PROCESSOR_CORE2
:
28725 priority
= P_PROC_SSSE3
;
28727 case PROCESSOR_COREI7
:
28728 arg_str
= "corei7";
28729 priority
= P_PROC_SSE4_2
;
28731 case PROCESSOR_ATOM
:
28733 priority
= P_PROC_SSSE3
;
28735 case PROCESSOR_AMDFAM10
:
28736 arg_str
= "amdfam10h";
28737 priority
= P_PROC_SSE4_a
;
28739 case PROCESSOR_BDVER1
:
28740 arg_str
= "bdver1";
28741 priority
= P_PROC_FMA
;
28743 case PROCESSOR_BDVER2
:
28744 arg_str
= "bdver2";
28745 priority
= P_PROC_FMA
;
28750 cl_target_option_restore (&global_options
, &cur_target
);
28752 if (predicate_list
&& arg_str
== NULL
)
28754 error_at (DECL_SOURCE_LOCATION (decl
),
28755 "No dispatcher found for the versioning attributes");
28759 if (predicate_list
)
28761 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28762 /* For a C string literal the length includes the trailing NULL. */
28763 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28764 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28769 /* Process feature name. */
28770 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28771 strcpy (tok_str
, attrs_str
);
28772 token
= strtok (tok_str
, ",");
28773 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28775 while (token
!= NULL
)
28777 /* Do not process "arch=" */
28778 if (strncmp (token
, "arch=", 5) == 0)
28780 token
= strtok (NULL
, ",");
28783 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28785 if (strcmp (token
, feature_list
[i
].name
) == 0)
28787 if (predicate_list
)
28789 predicate_arg
= build_string_literal (
28790 strlen (feature_list
[i
].name
) + 1,
28791 feature_list
[i
].name
);
28792 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28795 /* Find the maximum priority feature. */
28796 if (feature_list
[i
].priority
> priority
)
28797 priority
= feature_list
[i
].priority
;
28802 if (predicate_list
&& i
== NUM_FEATURES
)
28804 error_at (DECL_SOURCE_LOCATION (decl
),
28805 "No dispatcher found for %s", token
);
28808 token
= strtok (NULL
, ",");
28812 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28814 error_at (DECL_SOURCE_LOCATION (decl
),
28815 "No dispatcher found for the versioning attributes : %s",
28819 else if (predicate_list
)
28821 predicate_chain
= nreverse (predicate_chain
);
28822 *predicate_list
= predicate_chain
;
28828 /* This compares the priority of target features in function DECL1
28829 and DECL2. It returns positive value if DECL1 is higher priority,
28830 negative value if DECL2 is higher priority and 0 if they are the
28834 ix86_compare_version_priority (tree decl1
, tree decl2
)
28836 unsigned int priority1
= 0;
28837 unsigned int priority2
= 0;
28839 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1
)) != NULL
)
28840 priority1
= get_builtin_code_for_version (decl1
, NULL
);
28842 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2
)) != NULL
)
28843 priority2
= get_builtin_code_for_version (decl2
, NULL
);
28845 return (int)priority1
- (int)priority2
;
28848 /* V1 and V2 point to function versions with different priorities
28849 based on the target ISA. This function compares their priorities. */
28852 feature_compare (const void *v1
, const void *v2
)
28854 typedef struct _function_version_info
28857 tree predicate_chain
;
28858 unsigned int dispatch_priority
;
28859 } function_version_info
;
28861 const function_version_info c1
= *(const function_version_info
*)v1
;
28862 const function_version_info c2
= *(const function_version_info
*)v2
;
28863 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28866 /* This function generates the dispatch function for
28867 multi-versioned functions. DISPATCH_DECL is the function which will
28868 contain the dispatch logic. FNDECLS are the function choices for
28869 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28870 in DISPATCH_DECL in which the dispatch code is generated. */
28873 dispatch_function_versions (tree dispatch_decl
,
28875 basic_block
*empty_bb
)
28878 gimple ifunc_cpu_init_stmt
;
28882 vec
<tree
> *fndecls
;
28883 unsigned int num_versions
= 0;
28884 unsigned int actual_versions
= 0;
28887 struct _function_version_info
28890 tree predicate_chain
;
28891 unsigned int dispatch_priority
;
28892 }*function_version_info
;
28894 gcc_assert (dispatch_decl
!= NULL
28895 && fndecls_p
!= NULL
28896 && empty_bb
!= NULL
);
28898 /*fndecls_p is actually a vector. */
28899 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28901 /* At least one more version other than the default. */
28902 num_versions
= fndecls
->length ();
28903 gcc_assert (num_versions
>= 2);
28905 function_version_info
= (struct _function_version_info
*)
28906 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28908 /* The first version in the vector is the default decl. */
28909 default_decl
= (*fndecls
)[0];
28911 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28913 gseq
= bb_seq (*empty_bb
);
28914 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28915 constructors, so explicity call __builtin_cpu_init here. */
28916 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28917 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
28918 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28919 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28920 set_bb_seq (*empty_bb
, gseq
);
28925 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28927 tree version_decl
= ele
;
28928 tree predicate_chain
= NULL_TREE
;
28929 unsigned int priority
;
28930 /* Get attribute string, parse it and find the right predicate decl.
28931 The predicate function could be a lengthy combination of many
28932 features, like arch-type and various isa-variants. */
28933 priority
= get_builtin_code_for_version (version_decl
,
28936 if (predicate_chain
== NULL_TREE
)
28940 function_version_info
[ix
- 1].version_decl
= version_decl
;
28941 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28942 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28945 /* Sort the versions according to descending order of dispatch priority. The
28946 priority is based on the ISA. This is not a perfect solution. There
28947 could still be ambiguity. If more than one function version is suitable
28948 to execute, which one should be dispatched? In future, allow the user
28949 to specify a dispatch priority next to the version. */
28950 qsort (function_version_info
, actual_versions
,
28951 sizeof (struct _function_version_info
), feature_compare
);
28953 for (i
= 0; i
< actual_versions
; ++i
)
28954 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28955 function_version_info
[i
].version_decl
,
28956 function_version_info
[i
].predicate_chain
,
28959 /* dispatch default version at the end. */
28960 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28963 free (function_version_info
);
28967 /* Comparator function to be used in qsort routine to sort attribute
28968 specification strings to "target". */
28971 attr_strcmp (const void *v1
, const void *v2
)
28973 const char *c1
= *(char *const*)v1
;
28974 const char *c2
= *(char *const*)v2
;
28975 return strcmp (c1
, c2
);
28978 /* ARGLIST is the argument to target attribute. This function tokenizes
28979 the comma separated arguments, sorts them and returns a string which
28980 is a unique identifier for the comma separated arguments. It also
28981 replaces non-identifier characters "=,-" with "_". */
28984 sorted_attr_string (tree arglist
)
28987 size_t str_len_sum
= 0;
28988 char **args
= NULL
;
28989 char *attr_str
, *ret_str
;
28991 unsigned int argnum
= 1;
28994 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
28996 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
28997 size_t len
= strlen (str
);
28998 str_len_sum
+= len
+ 1;
28999 if (arg
!= arglist
)
29001 for (i
= 0; i
< strlen (str
); i
++)
29006 attr_str
= XNEWVEC (char, str_len_sum
);
29008 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29010 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29011 size_t len
= strlen (str
);
29012 memcpy (attr_str
+ str_len_sum
, str
, len
);
29013 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29014 str_len_sum
+= len
+ 1;
29017 /* Replace "=,-" with "_". */
29018 for (i
= 0; i
< strlen (attr_str
); i
++)
29019 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29025 args
= XNEWVEC (char *, argnum
);
29028 attr
= strtok (attr_str
, ",");
29029 while (attr
!= NULL
)
29033 attr
= strtok (NULL
, ",");
29036 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29038 ret_str
= XNEWVEC (char, str_len_sum
);
29040 for (i
= 0; i
< argnum
; i
++)
29042 size_t len
= strlen (args
[i
]);
29043 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29044 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29045 str_len_sum
+= len
+ 1;
29049 XDELETEVEC (attr_str
);
29053 /* This function changes the assembler name for functions that are
29054 versions. If DECL is a function version and has a "target"
29055 attribute, it appends the attribute string to its assembler name. */
29058 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29061 const char *orig_name
, *version_string
;
29062 char *attr_str
, *assembler_name
;
29064 if (DECL_DECLARED_INLINE_P (decl
)
29065 && lookup_attribute ("gnu_inline",
29066 DECL_ATTRIBUTES (decl
)))
29067 error_at (DECL_SOURCE_LOCATION (decl
),
29068 "Function versions cannot be marked as gnu_inline,"
29069 " bodies have to be generated");
29071 if (DECL_VIRTUAL_P (decl
)
29072 || DECL_VINDEX (decl
))
29073 error_at (DECL_SOURCE_LOCATION (decl
),
29074 "Virtual function versioning not supported\n");
29076 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29078 /* target attribute string is NULL for default functions. */
29079 if (version_attr
== NULL_TREE
)
29082 orig_name
= IDENTIFIER_POINTER (id
);
29084 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29086 if (strcmp (version_string
, "default") == 0)
29089 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29090 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29092 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29094 /* Allow assembler name to be modified if already set. */
29095 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29096 SET_DECL_RTL (decl
, NULL
);
29098 tree ret
= get_identifier (assembler_name
);
29099 XDELETEVEC (attr_str
);
29100 XDELETEVEC (assembler_name
);
29104 /* This function returns true if FN1 and FN2 are versions of the same function,
29105 that is, the target strings of the function decls are different. This assumes
29106 that FN1 and FN2 have the same signature. */
29109 ix86_function_versions (tree fn1
, tree fn2
)
29112 char *target1
, *target2
;
29115 if (TREE_CODE (fn1
) != FUNCTION_DECL
29116 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29119 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29120 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29122 /* At least one function decl should have the target attribute specified. */
29123 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29126 /* Diagnose missing target attribute if one of the decls is already
29127 multi-versioned. */
29128 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29130 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29132 if (attr2
!= NULL_TREE
)
29139 error_at (DECL_SOURCE_LOCATION (fn2
),
29140 "missing %<target%> attribute for multi-versioned %D",
29142 error_at (DECL_SOURCE_LOCATION (fn1
),
29143 "previous declaration of %D", fn1
);
29144 /* Prevent diagnosing of the same error multiple times. */
29145 DECL_ATTRIBUTES (fn2
)
29146 = tree_cons (get_identifier ("target"),
29147 copy_node (TREE_VALUE (attr1
)),
29148 DECL_ATTRIBUTES (fn2
));
29153 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29154 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29156 /* The sorted target strings must be different for fn1 and fn2
29158 if (strcmp (target1
, target2
) == 0)
29163 XDELETEVEC (target1
);
29164 XDELETEVEC (target2
);
29170 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29172 /* For function version, add the target suffix to the assembler name. */
29173 if (TREE_CODE (decl
) == FUNCTION_DECL
29174 && DECL_FUNCTION_VERSIONED (decl
))
29175 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29176 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29177 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29183 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29184 is true, append the full path name of the source file. */
29187 make_name (tree decl
, const char *suffix
, bool make_unique
)
29189 char *global_var_name
;
29192 const char *unique_name
= NULL
;
29194 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29196 /* Get a unique name that can be used globally without any chances
29197 of collision at link time. */
29199 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29201 name_len
= strlen (name
) + strlen (suffix
) + 2;
29204 name_len
+= strlen (unique_name
) + 1;
29205 global_var_name
= XNEWVEC (char, name_len
);
29207 /* Use '.' to concatenate names as it is demangler friendly. */
29209 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29212 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29214 return global_var_name
;
29217 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29219 /* Make a dispatcher declaration for the multi-versioned function DECL.
29220 Calls to DECL function will be replaced with calls to the dispatcher
29221 by the front-end. Return the decl created. */
29224 make_dispatcher_decl (const tree decl
)
29228 tree fn_type
, func_type
;
29229 bool is_uniq
= false;
29231 if (TREE_PUBLIC (decl
) == 0)
29234 func_name
= make_name (decl
, "ifunc", is_uniq
);
29236 fn_type
= TREE_TYPE (decl
);
29237 func_type
= build_function_type (TREE_TYPE (fn_type
),
29238 TYPE_ARG_TYPES (fn_type
));
29240 func_decl
= build_fn_decl (func_name
, func_type
);
29241 XDELETEVEC (func_name
);
29242 TREE_USED (func_decl
) = 1;
29243 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29244 DECL_INITIAL (func_decl
) = error_mark_node
;
29245 DECL_ARTIFICIAL (func_decl
) = 1;
29246 /* Mark this func as external, the resolver will flip it again if
29247 it gets generated. */
29248 DECL_EXTERNAL (func_decl
) = 1;
29249 /* This will be of type IFUNCs have to be externally visible. */
29250 TREE_PUBLIC (func_decl
) = 1;
29257 /* Returns true if decl is multi-versioned and DECL is the default function,
29258 that is it is not tagged with target specific optimization. */
29261 is_function_default_version (const tree decl
)
29263 if (TREE_CODE (decl
) != FUNCTION_DECL
29264 || !DECL_FUNCTION_VERSIONED (decl
))
29266 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29268 attr
= TREE_VALUE (TREE_VALUE (attr
));
29269 return (TREE_CODE (attr
) == STRING_CST
29270 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29273 /* Make a dispatcher declaration for the multi-versioned function DECL.
29274 Calls to DECL function will be replaced with calls to the dispatcher
29275 by the front-end. Returns the decl of the dispatcher function. */
29278 ix86_get_function_versions_dispatcher (void *decl
)
29280 tree fn
= (tree
) decl
;
29281 struct cgraph_node
*node
= NULL
;
29282 struct cgraph_node
*default_node
= NULL
;
29283 struct cgraph_function_version_info
*node_v
= NULL
;
29284 struct cgraph_function_version_info
*first_v
= NULL
;
29286 tree dispatch_decl
= NULL
;
29288 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29289 struct cgraph_function_version_info
*it_v
= NULL
;
29290 struct cgraph_node
*dispatcher_node
= NULL
;
29291 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29294 struct cgraph_function_version_info
*default_version_info
= NULL
;
29296 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29298 node
= cgraph_get_node (fn
);
29299 gcc_assert (node
!= NULL
);
29301 node_v
= get_cgraph_node_version (node
);
29302 gcc_assert (node_v
!= NULL
);
29304 if (node_v
->dispatcher_resolver
!= NULL
)
29305 return node_v
->dispatcher_resolver
;
29307 /* Find the default version and make it the first node. */
29309 /* Go to the beginnig of the chain. */
29310 while (first_v
->prev
!= NULL
)
29311 first_v
= first_v
->prev
;
29312 default_version_info
= first_v
;
29313 while (default_version_info
!= NULL
)
29315 if (is_function_default_version
29316 (default_version_info
->this_node
->symbol
.decl
))
29318 default_version_info
= default_version_info
->next
;
29321 /* If there is no default node, just return NULL. */
29322 if (default_version_info
== NULL
)
29325 /* Make default info the first node. */
29326 if (first_v
!= default_version_info
)
29328 default_version_info
->prev
->next
= default_version_info
->next
;
29329 if (default_version_info
->next
)
29330 default_version_info
->next
->prev
= default_version_info
->prev
;
29331 first_v
->prev
= default_version_info
;
29332 default_version_info
->next
= first_v
;
29333 default_version_info
->prev
= NULL
;
29336 default_node
= default_version_info
->this_node
;
29338 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29339 /* Right now, the dispatching is done via ifunc. */
29340 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29342 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29343 gcc_assert (dispatcher_node
!= NULL
);
29344 dispatcher_node
->dispatcher_function
= 1;
29345 dispatcher_version_info
29346 = insert_new_cgraph_node_version (dispatcher_node
);
29347 dispatcher_version_info
->next
= default_version_info
;
29348 dispatcher_node
->local
.finalized
= 1;
29350 /* Set the dispatcher for all the versions. */
29351 it_v
= default_version_info
;
29352 while (it_v
!= NULL
)
29354 it_v
->dispatcher_resolver
= dispatch_decl
;
29358 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29359 "multiversioning needs ifunc which is not supported "
29360 "in this configuration");
29362 return dispatch_decl
;
29365 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29369 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29372 tree attr_arg_name
;
29376 attr_name
= get_identifier (name
);
29377 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29378 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29379 attr
= tree_cons (attr_name
, attr_args
, chain
);
29383 /* Make the resolver function decl to dispatch the versions of
29384 a multi-versioned function, DEFAULT_DECL. Create an
29385 empty basic block in the resolver and store the pointer in
29386 EMPTY_BB. Return the decl of the resolver function. */
29389 make_resolver_func (const tree default_decl
,
29390 const tree dispatch_decl
,
29391 basic_block
*empty_bb
)
29393 char *resolver_name
;
29394 tree decl
, type
, decl_name
, t
;
29395 bool is_uniq
= false;
29397 /* IFUNC's have to be globally visible. So, if the default_decl is
29398 not, then the name of the IFUNC should be made unique. */
29399 if (TREE_PUBLIC (default_decl
) == 0)
29402 /* Append the filename to the resolver function if the versions are
29403 not externally visible. This is because the resolver function has
29404 to be externally visible for the loader to find it. So, appending
29405 the filename will prevent conflicts with a resolver function from
29406 another module which is based on the same version name. */
29407 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29409 /* The resolver function should return a (void *). */
29410 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29412 decl
= build_fn_decl (resolver_name
, type
);
29413 decl_name
= get_identifier (resolver_name
);
29414 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29416 DECL_NAME (decl
) = decl_name
;
29417 TREE_USED (decl
) = 1;
29418 DECL_ARTIFICIAL (decl
) = 1;
29419 DECL_IGNORED_P (decl
) = 0;
29420 /* IFUNC resolvers have to be externally visible. */
29421 TREE_PUBLIC (decl
) = 1;
29422 DECL_UNINLINABLE (decl
) = 0;
29424 /* Resolver is not external, body is generated. */
29425 DECL_EXTERNAL (decl
) = 0;
29426 DECL_EXTERNAL (dispatch_decl
) = 0;
29428 DECL_CONTEXT (decl
) = NULL_TREE
;
29429 DECL_INITIAL (decl
) = make_node (BLOCK
);
29430 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29432 if (DECL_COMDAT_GROUP (default_decl
)
29433 || TREE_PUBLIC (default_decl
))
29435 /* In this case, each translation unit with a call to this
29436 versioned function will put out a resolver. Ensure it
29437 is comdat to keep just one copy. */
29438 DECL_COMDAT (decl
) = 1;
29439 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29441 /* Build result decl and add to function_decl. */
29442 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29443 DECL_ARTIFICIAL (t
) = 1;
29444 DECL_IGNORED_P (t
) = 1;
29445 DECL_RESULT (decl
) = t
;
29447 gimplify_function_tree (decl
);
29448 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29449 *empty_bb
= init_lowered_empty_function (decl
, false);
29451 cgraph_add_new_function (decl
, true);
29452 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29456 gcc_assert (dispatch_decl
!= NULL
);
29457 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29458 DECL_ATTRIBUTES (dispatch_decl
)
29459 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29461 /* Create the alias for dispatch to resolver here. */
29462 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29463 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29464 XDELETEVEC (resolver_name
);
29468 /* Generate the dispatching code body to dispatch multi-versioned function
29469 DECL. The target hook is called to process the "target" attributes and
29470 provide the code to dispatch the right function at run-time. NODE points
29471 to the dispatcher decl whose body will be created. */
29474 ix86_generate_version_dispatcher_body (void *node_p
)
29476 tree resolver_decl
;
29477 basic_block empty_bb
;
29478 vec
<tree
> fn_ver_vec
= vNULL
;
29479 tree default_ver_decl
;
29480 struct cgraph_node
*versn
;
29481 struct cgraph_node
*node
;
29483 struct cgraph_function_version_info
*node_version_info
= NULL
;
29484 struct cgraph_function_version_info
*versn_info
= NULL
;
29486 node
= (cgraph_node
*)node_p
;
29488 node_version_info
= get_cgraph_node_version (node
);
29489 gcc_assert (node
->dispatcher_function
29490 && node_version_info
!= NULL
);
29492 if (node_version_info
->dispatcher_resolver
)
29493 return node_version_info
->dispatcher_resolver
;
29495 /* The first version in the chain corresponds to the default version. */
29496 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29498 /* node is going to be an alias, so remove the finalized bit. */
29499 node
->local
.finalized
= false;
29501 resolver_decl
= make_resolver_func (default_ver_decl
,
29502 node
->symbol
.decl
, &empty_bb
);
29504 node_version_info
->dispatcher_resolver
= resolver_decl
;
29506 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29508 fn_ver_vec
.create (2);
29510 for (versn_info
= node_version_info
->next
; versn_info
;
29511 versn_info
= versn_info
->next
)
29513 versn
= versn_info
->this_node
;
29514 /* Check for virtual functions here again, as by this time it should
29515 have been determined if this function needs a vtable index or
29516 not. This happens for methods in derived classes that override
29517 virtual methods in base classes but are not explicitly marked as
29519 if (DECL_VINDEX (versn
->symbol
.decl
))
29520 error_at (DECL_SOURCE_LOCATION (versn
->symbol
.decl
),
29521 "Virtual function multiversioning not supported");
29522 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29525 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29526 fn_ver_vec
.release ();
29527 rebuild_cgraph_edges ();
29529 return resolver_decl
;
29531 /* This builds the processor_model struct type defined in
29532 libgcc/config/i386/cpuinfo.c */
29535 build_processor_model_struct (void)
29537 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29539 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29541 tree type
= make_node (RECORD_TYPE
);
29543 /* The first 3 fields are unsigned int. */
29544 for (i
= 0; i
< 3; ++i
)
29546 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29547 get_identifier (field_name
[i
]), unsigned_type_node
);
29548 if (field_chain
!= NULL_TREE
)
29549 DECL_CHAIN (field
) = field_chain
;
29550 field_chain
= field
;
29553 /* The last field is an array of unsigned integers of size one. */
29554 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29555 get_identifier (field_name
[3]),
29556 build_array_type (unsigned_type_node
,
29557 build_index_type (size_one_node
)));
29558 if (field_chain
!= NULL_TREE
)
29559 DECL_CHAIN (field
) = field_chain
;
29560 field_chain
= field
;
29562 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29566 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29569 make_var_decl (tree type
, const char *name
)
29573 new_decl
= build_decl (UNKNOWN_LOCATION
,
29575 get_identifier(name
),
29578 DECL_EXTERNAL (new_decl
) = 1;
29579 TREE_STATIC (new_decl
) = 1;
29580 TREE_PUBLIC (new_decl
) = 1;
29581 DECL_INITIAL (new_decl
) = 0;
29582 DECL_ARTIFICIAL (new_decl
) = 0;
29583 DECL_PRESERVE_P (new_decl
) = 1;
29585 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29586 assemble_variable (new_decl
, 0, 0, 0);
29591 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29592 into an integer defined in libgcc/config/i386/cpuinfo.c */
29595 fold_builtin_cpu (tree fndecl
, tree
*args
)
29598 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29599 DECL_FUNCTION_CODE (fndecl
);
29600 tree param_string_cst
= NULL
;
29602 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29603 enum processor_features
29619 /* These are the values for vendor types and cpu types and subtypes
29620 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29621 the corresponding start value. */
29622 enum processor_model
29632 M_CPU_SUBTYPE_START
,
29633 M_INTEL_COREI7_NEHALEM
,
29634 M_INTEL_COREI7_WESTMERE
,
29635 M_INTEL_COREI7_SANDYBRIDGE
,
29636 M_AMDFAM10H_BARCELONA
,
29637 M_AMDFAM10H_SHANGHAI
,
29638 M_AMDFAM10H_ISTANBUL
,
29639 M_AMDFAM15H_BDVER1
,
29640 M_AMDFAM15H_BDVER2
,
29644 static struct _arch_names_table
29646 const char *const name
;
29647 const enum processor_model model
;
29649 const arch_names_table
[] =
29652 {"intel", M_INTEL
},
29653 {"atom", M_INTEL_ATOM
},
29654 {"core2", M_INTEL_CORE2
},
29655 {"corei7", M_INTEL_COREI7
},
29656 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29657 {"westmere", M_INTEL_COREI7_WESTMERE
},
29658 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29659 {"amdfam10h", M_AMDFAM10H
},
29660 {"barcelona", M_AMDFAM10H_BARCELONA
},
29661 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29662 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29663 {"amdfam15h", M_AMDFAM15H
},
29664 {"bdver1", M_AMDFAM15H_BDVER1
},
29665 {"bdver2", M_AMDFAM15H_BDVER2
},
29666 {"bdver3", M_AMDFAM15H_BDVER3
},
29669 static struct _isa_names_table
29671 const char *const name
;
29672 const enum processor_features feature
;
29674 const isa_names_table
[] =
29678 {"popcnt", F_POPCNT
},
29682 {"ssse3", F_SSSE3
},
29683 {"sse4.1", F_SSE4_1
},
29684 {"sse4.2", F_SSE4_2
},
29689 tree __processor_model_type
= build_processor_model_struct ();
29690 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
29693 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29695 param_string_cst
= *args
;
29696 while (param_string_cst
29697 && TREE_CODE (param_string_cst
) != STRING_CST
)
29699 /* *args must be a expr that can contain other EXPRS leading to a
29701 if (!EXPR_P (param_string_cst
))
29703 error ("Parameter to builtin must be a string constant or literal");
29704 return integer_zero_node
;
29706 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29709 gcc_assert (param_string_cst
);
29711 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29717 unsigned int field_val
= 0;
29718 unsigned int NUM_ARCH_NAMES
29719 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29721 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29722 if (strcmp (arch_names_table
[i
].name
,
29723 TREE_STRING_POINTER (param_string_cst
)) == 0)
29726 if (i
== NUM_ARCH_NAMES
)
29728 error ("Parameter to builtin not valid: %s",
29729 TREE_STRING_POINTER (param_string_cst
));
29730 return integer_zero_node
;
29733 field
= TYPE_FIELDS (__processor_model_type
);
29734 field_val
= arch_names_table
[i
].model
;
29736 /* CPU types are stored in the next field. */
29737 if (field_val
> M_CPU_TYPE_START
29738 && field_val
< M_CPU_SUBTYPE_START
)
29740 field
= DECL_CHAIN (field
);
29741 field_val
-= M_CPU_TYPE_START
;
29744 /* CPU subtypes are stored in the next field. */
29745 if (field_val
> M_CPU_SUBTYPE_START
)
29747 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29748 field_val
-= M_CPU_SUBTYPE_START
;
29751 /* Get the appropriate field in __cpu_model. */
29752 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29755 /* Check the value. */
29756 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29757 build_int_cstu (unsigned_type_node
, field_val
));
29758 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29760 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29767 unsigned int field_val
= 0;
29768 unsigned int NUM_ISA_NAMES
29769 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29771 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29772 if (strcmp (isa_names_table
[i
].name
,
29773 TREE_STRING_POINTER (param_string_cst
)) == 0)
29776 if (i
== NUM_ISA_NAMES
)
29778 error ("Parameter to builtin not valid: %s",
29779 TREE_STRING_POINTER (param_string_cst
));
29780 return integer_zero_node
;
29783 field
= TYPE_FIELDS (__processor_model_type
);
29784 /* Get the last field, which is __cpu_features. */
29785 while (DECL_CHAIN (field
))
29786 field
= DECL_CHAIN (field
);
29788 /* Get the appropriate field: __cpu_model.__cpu_features */
29789 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29792 /* Access the 0th element of __cpu_features array. */
29793 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29794 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29796 field_val
= (1 << isa_names_table
[i
].feature
);
29797 /* Return __cpu_model.__cpu_features[0] & field_val */
29798 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29799 build_int_cstu (unsigned_type_node
, field_val
));
29800 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29802 gcc_unreachable ();
29806 ix86_fold_builtin (tree fndecl
, int n_args
,
29807 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29809 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29811 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29812 DECL_FUNCTION_CODE (fndecl
);
29813 if (fn_code
== IX86_BUILTIN_CPU_IS
29814 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29816 gcc_assert (n_args
== 1);
29817 return fold_builtin_cpu (fndecl
, args
);
29821 #ifdef SUBTARGET_FOLD_BUILTIN
29822 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29828 /* Make builtins to detect cpu type and features supported. NAME is
29829 the builtin name, CODE is the builtin code, and FTYPE is the function
29830 type of the builtin. */
29833 make_cpu_type_builtin (const char* name
, int code
,
29834 enum ix86_builtin_func_type ftype
, bool is_const
)
29839 type
= ix86_get_builtin_func_type (ftype
);
29840 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29842 gcc_assert (decl
!= NULL_TREE
);
29843 ix86_builtins
[(int) code
] = decl
;
29844 TREE_READONLY (decl
) = is_const
;
29847 /* Make builtins to get CPU type and features supported. The created
29850 __builtin_cpu_init (), to detect cpu type and features,
29851 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29852 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29856 ix86_init_platform_type_builtins (void)
29858 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29859 INT_FTYPE_VOID
, false);
29860 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29861 INT_FTYPE_PCCHAR
, true);
29862 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29863 INT_FTYPE_PCCHAR
, true);
29866 /* Internal method for ix86_init_builtins. */
29869 ix86_init_builtins_va_builtins_abi (void)
29871 tree ms_va_ref
, sysv_va_ref
;
29872 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29873 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29874 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29875 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29879 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29880 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29881 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29883 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29886 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29887 fnvoid_va_start_ms
=
29888 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29889 fnvoid_va_end_sysv
=
29890 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29891 fnvoid_va_start_sysv
=
29892 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29894 fnvoid_va_copy_ms
=
29895 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29897 fnvoid_va_copy_sysv
=
29898 build_function_type_list (void_type_node
, sysv_va_ref
,
29899 sysv_va_ref
, NULL_TREE
);
29901 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29902 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29903 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29904 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29905 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29906 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29907 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29908 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29909 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29910 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29911 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29912 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29916 ix86_init_builtin_types (void)
29918 tree float128_type_node
, float80_type_node
;
29920 /* The __float80 type. */
29921 float80_type_node
= long_double_type_node
;
29922 if (TYPE_MODE (float80_type_node
) != XFmode
)
29924 /* The __float80 type. */
29925 float80_type_node
= make_node (REAL_TYPE
);
29927 TYPE_PRECISION (float80_type_node
) = 80;
29928 layout_type (float80_type_node
);
29930 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29932 /* The __float128 type. */
29933 float128_type_node
= make_node (REAL_TYPE
);
29934 TYPE_PRECISION (float128_type_node
) = 128;
29935 layout_type (float128_type_node
);
29936 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29938 /* This macro is built by i386-builtin-types.awk. */
29939 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29943 ix86_init_builtins (void)
29947 ix86_init_builtin_types ();
29949 /* Builtins to get CPU type and features. */
29950 ix86_init_platform_type_builtins ();
29952 /* TFmode support builtins. */
29953 def_builtin_const (0, "__builtin_infq",
29954 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29955 def_builtin_const (0, "__builtin_huge_valq",
29956 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29958 /* We will expand them to normal call if SSE isn't available since
29959 they are used by libgcc. */
29960 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29961 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29962 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29963 TREE_READONLY (t
) = 1;
29964 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29966 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29967 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29968 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29969 TREE_READONLY (t
) = 1;
29970 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29972 ix86_init_tm_builtins ();
29973 ix86_init_mmx_sse_builtins ();
29976 ix86_init_builtins_va_builtins_abi ();
29978 #ifdef SUBTARGET_INIT_BUILTINS
29979 SUBTARGET_INIT_BUILTINS
;
29983 /* Return the ix86 builtin for CODE. */
29986 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29988 if (code
>= IX86_BUILTIN_MAX
)
29989 return error_mark_node
;
29991 return ix86_builtins
[code
];
29994 /* Errors in the source file can cause expand_expr to return const0_rtx
29995 where we expect a vector. To avoid crashing, use one of the vector
29996 clear instructions. */
29998 safe_vector_operand (rtx x
, enum machine_mode mode
)
30000 if (x
== const0_rtx
)
30001 x
= CONST0_RTX (mode
);
30005 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30008 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30011 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30012 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30013 rtx op0
= expand_normal (arg0
);
30014 rtx op1
= expand_normal (arg1
);
30015 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30016 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30017 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30019 if (VECTOR_MODE_P (mode0
))
30020 op0
= safe_vector_operand (op0
, mode0
);
30021 if (VECTOR_MODE_P (mode1
))
30022 op1
= safe_vector_operand (op1
, mode1
);
30024 if (optimize
|| !target
30025 || GET_MODE (target
) != tmode
30026 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30027 target
= gen_reg_rtx (tmode
);
30029 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30031 rtx x
= gen_reg_rtx (V4SImode
);
30032 emit_insn (gen_sse2_loadd (x
, op1
));
30033 op1
= gen_lowpart (TImode
, x
);
30036 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30037 op0
= copy_to_mode_reg (mode0
, op0
);
30038 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30039 op1
= copy_to_mode_reg (mode1
, op1
);
30041 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30050 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30053 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30054 enum ix86_builtin_func_type m_type
,
30055 enum rtx_code sub_code
)
30060 bool comparison_p
= false;
30062 bool last_arg_constant
= false;
30063 int num_memory
= 0;
30066 enum machine_mode mode
;
30069 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30073 case MULTI_ARG_4_DF2_DI_I
:
30074 case MULTI_ARG_4_DF2_DI_I1
:
30075 case MULTI_ARG_4_SF2_SI_I
:
30076 case MULTI_ARG_4_SF2_SI_I1
:
30078 last_arg_constant
= true;
30081 case MULTI_ARG_3_SF
:
30082 case MULTI_ARG_3_DF
:
30083 case MULTI_ARG_3_SF2
:
30084 case MULTI_ARG_3_DF2
:
30085 case MULTI_ARG_3_DI
:
30086 case MULTI_ARG_3_SI
:
30087 case MULTI_ARG_3_SI_DI
:
30088 case MULTI_ARG_3_HI
:
30089 case MULTI_ARG_3_HI_SI
:
30090 case MULTI_ARG_3_QI
:
30091 case MULTI_ARG_3_DI2
:
30092 case MULTI_ARG_3_SI2
:
30093 case MULTI_ARG_3_HI2
:
30094 case MULTI_ARG_3_QI2
:
30098 case MULTI_ARG_2_SF
:
30099 case MULTI_ARG_2_DF
:
30100 case MULTI_ARG_2_DI
:
30101 case MULTI_ARG_2_SI
:
30102 case MULTI_ARG_2_HI
:
30103 case MULTI_ARG_2_QI
:
30107 case MULTI_ARG_2_DI_IMM
:
30108 case MULTI_ARG_2_SI_IMM
:
30109 case MULTI_ARG_2_HI_IMM
:
30110 case MULTI_ARG_2_QI_IMM
:
30112 last_arg_constant
= true;
30115 case MULTI_ARG_1_SF
:
30116 case MULTI_ARG_1_DF
:
30117 case MULTI_ARG_1_SF2
:
30118 case MULTI_ARG_1_DF2
:
30119 case MULTI_ARG_1_DI
:
30120 case MULTI_ARG_1_SI
:
30121 case MULTI_ARG_1_HI
:
30122 case MULTI_ARG_1_QI
:
30123 case MULTI_ARG_1_SI_DI
:
30124 case MULTI_ARG_1_HI_DI
:
30125 case MULTI_ARG_1_HI_SI
:
30126 case MULTI_ARG_1_QI_DI
:
30127 case MULTI_ARG_1_QI_SI
:
30128 case MULTI_ARG_1_QI_HI
:
30132 case MULTI_ARG_2_DI_CMP
:
30133 case MULTI_ARG_2_SI_CMP
:
30134 case MULTI_ARG_2_HI_CMP
:
30135 case MULTI_ARG_2_QI_CMP
:
30137 comparison_p
= true;
30140 case MULTI_ARG_2_SF_TF
:
30141 case MULTI_ARG_2_DF_TF
:
30142 case MULTI_ARG_2_DI_TF
:
30143 case MULTI_ARG_2_SI_TF
:
30144 case MULTI_ARG_2_HI_TF
:
30145 case MULTI_ARG_2_QI_TF
:
30151 gcc_unreachable ();
30154 if (optimize
|| !target
30155 || GET_MODE (target
) != tmode
30156 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30157 target
= gen_reg_rtx (tmode
);
30159 gcc_assert (nargs
<= 4);
30161 for (i
= 0; i
< nargs
; i
++)
30163 tree arg
= CALL_EXPR_ARG (exp
, i
);
30164 rtx op
= expand_normal (arg
);
30165 int adjust
= (comparison_p
) ? 1 : 0;
30166 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30168 if (last_arg_constant
&& i
== nargs
- 1)
30170 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30172 enum insn_code new_icode
= icode
;
30175 case CODE_FOR_xop_vpermil2v2df3
:
30176 case CODE_FOR_xop_vpermil2v4sf3
:
30177 case CODE_FOR_xop_vpermil2v4df3
:
30178 case CODE_FOR_xop_vpermil2v8sf3
:
30179 error ("the last argument must be a 2-bit immediate");
30180 return gen_reg_rtx (tmode
);
30181 case CODE_FOR_xop_rotlv2di3
:
30182 new_icode
= CODE_FOR_rotlv2di3
;
30184 case CODE_FOR_xop_rotlv4si3
:
30185 new_icode
= CODE_FOR_rotlv4si3
;
30187 case CODE_FOR_xop_rotlv8hi3
:
30188 new_icode
= CODE_FOR_rotlv8hi3
;
30190 case CODE_FOR_xop_rotlv16qi3
:
30191 new_icode
= CODE_FOR_rotlv16qi3
;
30193 if (CONST_INT_P (op
))
30195 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30196 op
= GEN_INT (INTVAL (op
) & mask
);
30197 gcc_checking_assert
30198 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30202 gcc_checking_assert
30204 && insn_data
[new_icode
].operand
[0].mode
== tmode
30205 && insn_data
[new_icode
].operand
[1].mode
== tmode
30206 && insn_data
[new_icode
].operand
[2].mode
== mode
30207 && insn_data
[new_icode
].operand
[0].predicate
30208 == insn_data
[icode
].operand
[0].predicate
30209 && insn_data
[new_icode
].operand
[1].predicate
30210 == insn_data
[icode
].operand
[1].predicate
);
30216 gcc_unreachable ();
30223 if (VECTOR_MODE_P (mode
))
30224 op
= safe_vector_operand (op
, mode
);
30226 /* If we aren't optimizing, only allow one memory operand to be
30228 if (memory_operand (op
, mode
))
30231 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30234 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30236 op
= force_reg (mode
, op
);
30240 args
[i
].mode
= mode
;
30246 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30251 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30252 GEN_INT ((int)sub_code
));
30253 else if (! comparison_p
)
30254 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30257 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30261 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30266 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30270 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30274 gcc_unreachable ();
30284 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30285 insns with vec_merge. */
30288 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30292 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30293 rtx op1
, op0
= expand_normal (arg0
);
30294 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30295 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30297 if (optimize
|| !target
30298 || GET_MODE (target
) != tmode
30299 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30300 target
= gen_reg_rtx (tmode
);
30302 if (VECTOR_MODE_P (mode0
))
30303 op0
= safe_vector_operand (op0
, mode0
);
30305 if ((optimize
&& !register_operand (op0
, mode0
))
30306 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30307 op0
= copy_to_mode_reg (mode0
, op0
);
30310 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30311 op1
= copy_to_mode_reg (mode0
, op1
);
30313 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30320 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30323 ix86_expand_sse_compare (const struct builtin_description
*d
,
30324 tree exp
, rtx target
, bool swap
)
30327 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30328 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30329 rtx op0
= expand_normal (arg0
);
30330 rtx op1
= expand_normal (arg1
);
30332 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30333 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30334 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30335 enum rtx_code comparison
= d
->comparison
;
30337 if (VECTOR_MODE_P (mode0
))
30338 op0
= safe_vector_operand (op0
, mode0
);
30339 if (VECTOR_MODE_P (mode1
))
30340 op1
= safe_vector_operand (op1
, mode1
);
30342 /* Swap operands if we have a comparison that isn't available in
30346 rtx tmp
= gen_reg_rtx (mode1
);
30347 emit_move_insn (tmp
, op1
);
30352 if (optimize
|| !target
30353 || GET_MODE (target
) != tmode
30354 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30355 target
= gen_reg_rtx (tmode
);
30357 if ((optimize
&& !register_operand (op0
, mode0
))
30358 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30359 op0
= copy_to_mode_reg (mode0
, op0
);
30360 if ((optimize
&& !register_operand (op1
, mode1
))
30361 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30362 op1
= copy_to_mode_reg (mode1
, op1
);
30364 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30365 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30372 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30375 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30379 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30380 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30381 rtx op0
= expand_normal (arg0
);
30382 rtx op1
= expand_normal (arg1
);
30383 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30384 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30385 enum rtx_code comparison
= d
->comparison
;
30387 if (VECTOR_MODE_P (mode0
))
30388 op0
= safe_vector_operand (op0
, mode0
);
30389 if (VECTOR_MODE_P (mode1
))
30390 op1
= safe_vector_operand (op1
, mode1
);
30392 /* Swap operands if we have a comparison that isn't available in
30394 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30401 target
= gen_reg_rtx (SImode
);
30402 emit_move_insn (target
, const0_rtx
);
30403 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30405 if ((optimize
&& !register_operand (op0
, mode0
))
30406 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30407 op0
= copy_to_mode_reg (mode0
, op0
);
30408 if ((optimize
&& !register_operand (op1
, mode1
))
30409 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30410 op1
= copy_to_mode_reg (mode1
, op1
);
30412 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30416 emit_insn (gen_rtx_SET (VOIDmode
,
30417 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30418 gen_rtx_fmt_ee (comparison
, QImode
,
30422 return SUBREG_REG (target
);
30425 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30428 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30432 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30433 rtx op1
, op0
= expand_normal (arg0
);
30434 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30435 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30437 if (optimize
|| target
== 0
30438 || GET_MODE (target
) != tmode
30439 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30440 target
= gen_reg_rtx (tmode
);
30442 if (VECTOR_MODE_P (mode0
))
30443 op0
= safe_vector_operand (op0
, mode0
);
30445 if ((optimize
&& !register_operand (op0
, mode0
))
30446 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30447 op0
= copy_to_mode_reg (mode0
, op0
);
30449 op1
= GEN_INT (d
->comparison
);
30451 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30459 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30460 tree exp
, rtx target
)
30463 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30464 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30465 rtx op0
= expand_normal (arg0
);
30466 rtx op1
= expand_normal (arg1
);
30468 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30469 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30470 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30472 if (optimize
|| target
== 0
30473 || GET_MODE (target
) != tmode
30474 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30475 target
= gen_reg_rtx (tmode
);
30477 op0
= safe_vector_operand (op0
, mode0
);
30478 op1
= safe_vector_operand (op1
, mode1
);
30480 if ((optimize
&& !register_operand (op0
, mode0
))
30481 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30482 op0
= copy_to_mode_reg (mode0
, op0
);
30483 if ((optimize
&& !register_operand (op1
, mode1
))
30484 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30485 op1
= copy_to_mode_reg (mode1
, op1
);
30487 op2
= GEN_INT (d
->comparison
);
30489 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30496 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30499 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30503 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30504 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30505 rtx op0
= expand_normal (arg0
);
30506 rtx op1
= expand_normal (arg1
);
30507 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30508 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30509 enum rtx_code comparison
= d
->comparison
;
30511 if (VECTOR_MODE_P (mode0
))
30512 op0
= safe_vector_operand (op0
, mode0
);
30513 if (VECTOR_MODE_P (mode1
))
30514 op1
= safe_vector_operand (op1
, mode1
);
30516 target
= gen_reg_rtx (SImode
);
30517 emit_move_insn (target
, const0_rtx
);
30518 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30520 if ((optimize
&& !register_operand (op0
, mode0
))
30521 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30522 op0
= copy_to_mode_reg (mode0
, op0
);
30523 if ((optimize
&& !register_operand (op1
, mode1
))
30524 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30525 op1
= copy_to_mode_reg (mode1
, op1
);
30527 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30531 emit_insn (gen_rtx_SET (VOIDmode
,
30532 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30533 gen_rtx_fmt_ee (comparison
, QImode
,
30537 return SUBREG_REG (target
);
30540 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30543 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30544 tree exp
, rtx target
)
30547 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30548 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30549 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30550 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30551 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30552 rtx scratch0
, scratch1
;
30553 rtx op0
= expand_normal (arg0
);
30554 rtx op1
= expand_normal (arg1
);
30555 rtx op2
= expand_normal (arg2
);
30556 rtx op3
= expand_normal (arg3
);
30557 rtx op4
= expand_normal (arg4
);
30558 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30560 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30561 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30562 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30563 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30564 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30565 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30566 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30568 if (VECTOR_MODE_P (modev2
))
30569 op0
= safe_vector_operand (op0
, modev2
);
30570 if (VECTOR_MODE_P (modev4
))
30571 op2
= safe_vector_operand (op2
, modev4
);
30573 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30574 op0
= copy_to_mode_reg (modev2
, op0
);
30575 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30576 op1
= copy_to_mode_reg (modei3
, op1
);
30577 if ((optimize
&& !register_operand (op2
, modev4
))
30578 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30579 op2
= copy_to_mode_reg (modev4
, op2
);
30580 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30581 op3
= copy_to_mode_reg (modei5
, op3
);
30583 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30585 error ("the fifth argument must be an 8-bit immediate");
30589 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30591 if (optimize
|| !target
30592 || GET_MODE (target
) != tmode0
30593 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30594 target
= gen_reg_rtx (tmode0
);
30596 scratch1
= gen_reg_rtx (tmode1
);
30598 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30600 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30602 if (optimize
|| !target
30603 || GET_MODE (target
) != tmode1
30604 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30605 target
= gen_reg_rtx (tmode1
);
30607 scratch0
= gen_reg_rtx (tmode0
);
30609 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30613 gcc_assert (d
->flag
);
30615 scratch0
= gen_reg_rtx (tmode0
);
30616 scratch1
= gen_reg_rtx (tmode1
);
30618 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30628 target
= gen_reg_rtx (SImode
);
30629 emit_move_insn (target
, const0_rtx
);
30630 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30633 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30634 gen_rtx_fmt_ee (EQ
, QImode
,
30635 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30638 return SUBREG_REG (target
);
30645 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30648 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30649 tree exp
, rtx target
)
30652 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30653 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30654 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30655 rtx scratch0
, scratch1
;
30656 rtx op0
= expand_normal (arg0
);
30657 rtx op1
= expand_normal (arg1
);
30658 rtx op2
= expand_normal (arg2
);
30659 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30661 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30662 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30663 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30664 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30665 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30667 if (VECTOR_MODE_P (modev2
))
30668 op0
= safe_vector_operand (op0
, modev2
);
30669 if (VECTOR_MODE_P (modev3
))
30670 op1
= safe_vector_operand (op1
, modev3
);
30672 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30673 op0
= copy_to_mode_reg (modev2
, op0
);
30674 if ((optimize
&& !register_operand (op1
, modev3
))
30675 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30676 op1
= copy_to_mode_reg (modev3
, op1
);
30678 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30680 error ("the third argument must be an 8-bit immediate");
30684 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30686 if (optimize
|| !target
30687 || GET_MODE (target
) != tmode0
30688 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30689 target
= gen_reg_rtx (tmode0
);
30691 scratch1
= gen_reg_rtx (tmode1
);
30693 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30695 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30697 if (optimize
|| !target
30698 || GET_MODE (target
) != tmode1
30699 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30700 target
= gen_reg_rtx (tmode1
);
30702 scratch0
= gen_reg_rtx (tmode0
);
30704 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30708 gcc_assert (d
->flag
);
30710 scratch0
= gen_reg_rtx (tmode0
);
30711 scratch1
= gen_reg_rtx (tmode1
);
30713 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30723 target
= gen_reg_rtx (SImode
);
30724 emit_move_insn (target
, const0_rtx
);
30725 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30728 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30729 gen_rtx_fmt_ee (EQ
, QImode
,
30730 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30733 return SUBREG_REG (target
);
30739 /* Subroutine of ix86_expand_builtin to take care of insns with
30740 variable number of operands. */
30743 ix86_expand_args_builtin (const struct builtin_description
*d
,
30744 tree exp
, rtx target
)
30746 rtx pat
, real_target
;
30747 unsigned int i
, nargs
;
30748 unsigned int nargs_constant
= 0;
30749 int num_memory
= 0;
30753 enum machine_mode mode
;
30755 bool last_arg_count
= false;
30756 enum insn_code icode
= d
->icode
;
30757 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30758 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30759 enum machine_mode rmode
= VOIDmode
;
30761 enum rtx_code comparison
= d
->comparison
;
30763 switch ((enum ix86_builtin_func_type
) d
->flag
)
30765 case V2DF_FTYPE_V2DF_ROUND
:
30766 case V4DF_FTYPE_V4DF_ROUND
:
30767 case V4SF_FTYPE_V4SF_ROUND
:
30768 case V8SF_FTYPE_V8SF_ROUND
:
30769 case V4SI_FTYPE_V4SF_ROUND
:
30770 case V8SI_FTYPE_V8SF_ROUND
:
30771 return ix86_expand_sse_round (d
, exp
, target
);
30772 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30773 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30774 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30775 case INT_FTYPE_V8SF_V8SF_PTEST
:
30776 case INT_FTYPE_V4DI_V4DI_PTEST
:
30777 case INT_FTYPE_V4DF_V4DF_PTEST
:
30778 case INT_FTYPE_V4SF_V4SF_PTEST
:
30779 case INT_FTYPE_V2DI_V2DI_PTEST
:
30780 case INT_FTYPE_V2DF_V2DF_PTEST
:
30781 return ix86_expand_sse_ptest (d
, exp
, target
);
30782 case FLOAT128_FTYPE_FLOAT128
:
30783 case FLOAT_FTYPE_FLOAT
:
30784 case INT_FTYPE_INT
:
30785 case UINT64_FTYPE_INT
:
30786 case UINT16_FTYPE_UINT16
:
30787 case INT64_FTYPE_INT64
:
30788 case INT64_FTYPE_V4SF
:
30789 case INT64_FTYPE_V2DF
:
30790 case INT_FTYPE_V16QI
:
30791 case INT_FTYPE_V8QI
:
30792 case INT_FTYPE_V8SF
:
30793 case INT_FTYPE_V4DF
:
30794 case INT_FTYPE_V4SF
:
30795 case INT_FTYPE_V2DF
:
30796 case INT_FTYPE_V32QI
:
30797 case V16QI_FTYPE_V16QI
:
30798 case V8SI_FTYPE_V8SF
:
30799 case V8SI_FTYPE_V4SI
:
30800 case V8HI_FTYPE_V8HI
:
30801 case V8HI_FTYPE_V16QI
:
30802 case V8QI_FTYPE_V8QI
:
30803 case V8SF_FTYPE_V8SF
:
30804 case V8SF_FTYPE_V8SI
:
30805 case V8SF_FTYPE_V4SF
:
30806 case V8SF_FTYPE_V8HI
:
30807 case V4SI_FTYPE_V4SI
:
30808 case V4SI_FTYPE_V16QI
:
30809 case V4SI_FTYPE_V4SF
:
30810 case V4SI_FTYPE_V8SI
:
30811 case V4SI_FTYPE_V8HI
:
30812 case V4SI_FTYPE_V4DF
:
30813 case V4SI_FTYPE_V2DF
:
30814 case V4HI_FTYPE_V4HI
:
30815 case V4DF_FTYPE_V4DF
:
30816 case V4DF_FTYPE_V4SI
:
30817 case V4DF_FTYPE_V4SF
:
30818 case V4DF_FTYPE_V2DF
:
30819 case V4SF_FTYPE_V4SF
:
30820 case V4SF_FTYPE_V4SI
:
30821 case V4SF_FTYPE_V8SF
:
30822 case V4SF_FTYPE_V4DF
:
30823 case V4SF_FTYPE_V8HI
:
30824 case V4SF_FTYPE_V2DF
:
30825 case V2DI_FTYPE_V2DI
:
30826 case V2DI_FTYPE_V16QI
:
30827 case V2DI_FTYPE_V8HI
:
30828 case V2DI_FTYPE_V4SI
:
30829 case V2DF_FTYPE_V2DF
:
30830 case V2DF_FTYPE_V4SI
:
30831 case V2DF_FTYPE_V4DF
:
30832 case V2DF_FTYPE_V4SF
:
30833 case V2DF_FTYPE_V2SI
:
30834 case V2SI_FTYPE_V2SI
:
30835 case V2SI_FTYPE_V4SF
:
30836 case V2SI_FTYPE_V2SF
:
30837 case V2SI_FTYPE_V2DF
:
30838 case V2SF_FTYPE_V2SF
:
30839 case V2SF_FTYPE_V2SI
:
30840 case V32QI_FTYPE_V32QI
:
30841 case V32QI_FTYPE_V16QI
:
30842 case V16HI_FTYPE_V16HI
:
30843 case V16HI_FTYPE_V8HI
:
30844 case V8SI_FTYPE_V8SI
:
30845 case V16HI_FTYPE_V16QI
:
30846 case V8SI_FTYPE_V16QI
:
30847 case V4DI_FTYPE_V16QI
:
30848 case V8SI_FTYPE_V8HI
:
30849 case V4DI_FTYPE_V8HI
:
30850 case V4DI_FTYPE_V4SI
:
30851 case V4DI_FTYPE_V2DI
:
30854 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30855 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30856 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30857 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30858 case V16QI_FTYPE_V16QI_V16QI
:
30859 case V16QI_FTYPE_V8HI_V8HI
:
30860 case V8QI_FTYPE_V8QI_V8QI
:
30861 case V8QI_FTYPE_V4HI_V4HI
:
30862 case V8HI_FTYPE_V8HI_V8HI
:
30863 case V8HI_FTYPE_V16QI_V16QI
:
30864 case V8HI_FTYPE_V4SI_V4SI
:
30865 case V8SF_FTYPE_V8SF_V8SF
:
30866 case V8SF_FTYPE_V8SF_V8SI
:
30867 case V4SI_FTYPE_V4SI_V4SI
:
30868 case V4SI_FTYPE_V8HI_V8HI
:
30869 case V4SI_FTYPE_V4SF_V4SF
:
30870 case V4SI_FTYPE_V2DF_V2DF
:
30871 case V4HI_FTYPE_V4HI_V4HI
:
30872 case V4HI_FTYPE_V8QI_V8QI
:
30873 case V4HI_FTYPE_V2SI_V2SI
:
30874 case V4DF_FTYPE_V4DF_V4DF
:
30875 case V4DF_FTYPE_V4DF_V4DI
:
30876 case V4SF_FTYPE_V4SF_V4SF
:
30877 case V4SF_FTYPE_V4SF_V4SI
:
30878 case V4SF_FTYPE_V4SF_V2SI
:
30879 case V4SF_FTYPE_V4SF_V2DF
:
30880 case V4SF_FTYPE_V4SF_DI
:
30881 case V4SF_FTYPE_V4SF_SI
:
30882 case V2DI_FTYPE_V2DI_V2DI
:
30883 case V2DI_FTYPE_V16QI_V16QI
:
30884 case V2DI_FTYPE_V4SI_V4SI
:
30885 case V2UDI_FTYPE_V4USI_V4USI
:
30886 case V2DI_FTYPE_V2DI_V16QI
:
30887 case V2DI_FTYPE_V2DF_V2DF
:
30888 case V2SI_FTYPE_V2SI_V2SI
:
30889 case V2SI_FTYPE_V4HI_V4HI
:
30890 case V2SI_FTYPE_V2SF_V2SF
:
30891 case V2DF_FTYPE_V2DF_V2DF
:
30892 case V2DF_FTYPE_V2DF_V4SF
:
30893 case V2DF_FTYPE_V2DF_V2DI
:
30894 case V2DF_FTYPE_V2DF_DI
:
30895 case V2DF_FTYPE_V2DF_SI
:
30896 case V2SF_FTYPE_V2SF_V2SF
:
30897 case V1DI_FTYPE_V1DI_V1DI
:
30898 case V1DI_FTYPE_V8QI_V8QI
:
30899 case V1DI_FTYPE_V2SI_V2SI
:
30900 case V32QI_FTYPE_V16HI_V16HI
:
30901 case V16HI_FTYPE_V8SI_V8SI
:
30902 case V32QI_FTYPE_V32QI_V32QI
:
30903 case V16HI_FTYPE_V32QI_V32QI
:
30904 case V16HI_FTYPE_V16HI_V16HI
:
30905 case V8SI_FTYPE_V4DF_V4DF
:
30906 case V8SI_FTYPE_V8SI_V8SI
:
30907 case V8SI_FTYPE_V16HI_V16HI
:
30908 case V4DI_FTYPE_V4DI_V4DI
:
30909 case V4DI_FTYPE_V8SI_V8SI
:
30910 case V4UDI_FTYPE_V8USI_V8USI
:
30911 if (comparison
== UNKNOWN
)
30912 return ix86_expand_binop_builtin (icode
, exp
, target
);
30915 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30916 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30917 gcc_assert (comparison
!= UNKNOWN
);
30921 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30922 case V16HI_FTYPE_V16HI_SI_COUNT
:
30923 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30924 case V8SI_FTYPE_V8SI_SI_COUNT
:
30925 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30926 case V4DI_FTYPE_V4DI_INT_COUNT
:
30927 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30928 case V8HI_FTYPE_V8HI_SI_COUNT
:
30929 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30930 case V4SI_FTYPE_V4SI_SI_COUNT
:
30931 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30932 case V4HI_FTYPE_V4HI_SI_COUNT
:
30933 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30934 case V2DI_FTYPE_V2DI_SI_COUNT
:
30935 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30936 case V2SI_FTYPE_V2SI_SI_COUNT
:
30937 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30938 case V1DI_FTYPE_V1DI_SI_COUNT
:
30940 last_arg_count
= true;
30942 case UINT64_FTYPE_UINT64_UINT64
:
30943 case UINT_FTYPE_UINT_UINT
:
30944 case UINT_FTYPE_UINT_USHORT
:
30945 case UINT_FTYPE_UINT_UCHAR
:
30946 case UINT16_FTYPE_UINT16_INT
:
30947 case UINT8_FTYPE_UINT8_INT
:
30950 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30953 nargs_constant
= 1;
30955 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30958 nargs_constant
= 1;
30960 case V8HI_FTYPE_V8HI_INT
:
30961 case V8HI_FTYPE_V8SF_INT
:
30962 case V8HI_FTYPE_V4SF_INT
:
30963 case V8SF_FTYPE_V8SF_INT
:
30964 case V4SI_FTYPE_V4SI_INT
:
30965 case V4SI_FTYPE_V8SI_INT
:
30966 case V4HI_FTYPE_V4HI_INT
:
30967 case V4DF_FTYPE_V4DF_INT
:
30968 case V4SF_FTYPE_V4SF_INT
:
30969 case V4SF_FTYPE_V8SF_INT
:
30970 case V2DI_FTYPE_V2DI_INT
:
30971 case V2DF_FTYPE_V2DF_INT
:
30972 case V2DF_FTYPE_V4DF_INT
:
30973 case V16HI_FTYPE_V16HI_INT
:
30974 case V8SI_FTYPE_V8SI_INT
:
30975 case V4DI_FTYPE_V4DI_INT
:
30976 case V2DI_FTYPE_V4DI_INT
:
30978 nargs_constant
= 1;
30980 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30981 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30982 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30983 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30984 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30985 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30988 case V32QI_FTYPE_V32QI_V32QI_INT
:
30989 case V16HI_FTYPE_V16HI_V16HI_INT
:
30990 case V16QI_FTYPE_V16QI_V16QI_INT
:
30991 case V4DI_FTYPE_V4DI_V4DI_INT
:
30992 case V8HI_FTYPE_V8HI_V8HI_INT
:
30993 case V8SI_FTYPE_V8SI_V8SI_INT
:
30994 case V8SI_FTYPE_V8SI_V4SI_INT
:
30995 case V8SF_FTYPE_V8SF_V8SF_INT
:
30996 case V8SF_FTYPE_V8SF_V4SF_INT
:
30997 case V4SI_FTYPE_V4SI_V4SI_INT
:
30998 case V4DF_FTYPE_V4DF_V4DF_INT
:
30999 case V4DF_FTYPE_V4DF_V2DF_INT
:
31000 case V4SF_FTYPE_V4SF_V4SF_INT
:
31001 case V2DI_FTYPE_V2DI_V2DI_INT
:
31002 case V4DI_FTYPE_V4DI_V2DI_INT
:
31003 case V2DF_FTYPE_V2DF_V2DF_INT
:
31005 nargs_constant
= 1;
31007 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31010 nargs_constant
= 1;
31012 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31015 nargs_constant
= 1;
31017 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31020 nargs_constant
= 1;
31022 case V2DI_FTYPE_V2DI_UINT_UINT
:
31024 nargs_constant
= 2;
31026 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31027 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31028 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31029 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31031 nargs_constant
= 1;
31033 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31035 nargs_constant
= 2;
31037 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31038 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31042 gcc_unreachable ();
31045 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31047 if (comparison
!= UNKNOWN
)
31049 gcc_assert (nargs
== 2);
31050 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31053 if (rmode
== VOIDmode
|| rmode
== tmode
)
31057 || GET_MODE (target
) != tmode
31058 || !insn_p
->operand
[0].predicate (target
, tmode
))
31059 target
= gen_reg_rtx (tmode
);
31060 real_target
= target
;
31064 target
= gen_reg_rtx (rmode
);
31065 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31068 for (i
= 0; i
< nargs
; i
++)
31070 tree arg
= CALL_EXPR_ARG (exp
, i
);
31071 rtx op
= expand_normal (arg
);
31072 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31073 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31075 if (last_arg_count
&& (i
+ 1) == nargs
)
31077 /* SIMD shift insns take either an 8-bit immediate or
31078 register as count. But builtin functions take int as
31079 count. If count doesn't match, we put it in register. */
31082 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31083 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31084 op
= copy_to_reg (op
);
31087 else if ((nargs
- i
) <= nargs_constant
)
31092 case CODE_FOR_avx2_inserti128
:
31093 case CODE_FOR_avx2_extracti128
:
31094 error ("the last argument must be an 1-bit immediate");
31097 case CODE_FOR_sse4_1_roundsd
:
31098 case CODE_FOR_sse4_1_roundss
:
31100 case CODE_FOR_sse4_1_roundpd
:
31101 case CODE_FOR_sse4_1_roundps
:
31102 case CODE_FOR_avx_roundpd256
:
31103 case CODE_FOR_avx_roundps256
:
31105 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31106 case CODE_FOR_sse4_1_roundps_sfix
:
31107 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31108 case CODE_FOR_avx_roundps_sfix256
:
31110 case CODE_FOR_sse4_1_blendps
:
31111 case CODE_FOR_avx_blendpd256
:
31112 case CODE_FOR_avx_vpermilv4df
:
31113 error ("the last argument must be a 4-bit immediate");
31116 case CODE_FOR_sse4_1_blendpd
:
31117 case CODE_FOR_avx_vpermilv2df
:
31118 case CODE_FOR_xop_vpermil2v2df3
:
31119 case CODE_FOR_xop_vpermil2v4sf3
:
31120 case CODE_FOR_xop_vpermil2v4df3
:
31121 case CODE_FOR_xop_vpermil2v8sf3
:
31122 error ("the last argument must be a 2-bit immediate");
31125 case CODE_FOR_avx_vextractf128v4df
:
31126 case CODE_FOR_avx_vextractf128v8sf
:
31127 case CODE_FOR_avx_vextractf128v8si
:
31128 case CODE_FOR_avx_vinsertf128v4df
:
31129 case CODE_FOR_avx_vinsertf128v8sf
:
31130 case CODE_FOR_avx_vinsertf128v8si
:
31131 error ("the last argument must be a 1-bit immediate");
31134 case CODE_FOR_avx_vmcmpv2df3
:
31135 case CODE_FOR_avx_vmcmpv4sf3
:
31136 case CODE_FOR_avx_cmpv2df3
:
31137 case CODE_FOR_avx_cmpv4sf3
:
31138 case CODE_FOR_avx_cmpv4df3
:
31139 case CODE_FOR_avx_cmpv8sf3
:
31140 error ("the last argument must be a 5-bit immediate");
31144 switch (nargs_constant
)
31147 if ((nargs
- i
) == nargs_constant
)
31149 error ("the next to last argument must be an 8-bit immediate");
31153 error ("the last argument must be an 8-bit immediate");
31156 gcc_unreachable ();
31163 if (VECTOR_MODE_P (mode
))
31164 op
= safe_vector_operand (op
, mode
);
31166 /* If we aren't optimizing, only allow one memory operand to
31168 if (memory_operand (op
, mode
))
31171 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31173 if (optimize
|| !match
|| num_memory
> 1)
31174 op
= copy_to_mode_reg (mode
, op
);
31178 op
= copy_to_reg (op
);
31179 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31184 args
[i
].mode
= mode
;
31190 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31193 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31196 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31200 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31201 args
[2].op
, args
[3].op
);
31204 gcc_unreachable ();
31214 /* Subroutine of ix86_expand_builtin to take care of special insns
31215 with variable number of operands. */
31218 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31219 tree exp
, rtx target
)
31223 unsigned int i
, nargs
, arg_adjust
, memory
;
31227 enum machine_mode mode
;
31229 enum insn_code icode
= d
->icode
;
31230 bool last_arg_constant
= false;
31231 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31232 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31233 enum { load
, store
} klass
;
31235 switch ((enum ix86_builtin_func_type
) d
->flag
)
31237 case VOID_FTYPE_VOID
:
31238 emit_insn (GEN_FCN (icode
) (target
));
31240 case VOID_FTYPE_UINT64
:
31241 case VOID_FTYPE_UNSIGNED
:
31247 case INT_FTYPE_VOID
:
31248 case UINT64_FTYPE_VOID
:
31249 case UNSIGNED_FTYPE_VOID
:
31254 case UINT64_FTYPE_PUNSIGNED
:
31255 case V2DI_FTYPE_PV2DI
:
31256 case V4DI_FTYPE_PV4DI
:
31257 case V32QI_FTYPE_PCCHAR
:
31258 case V16QI_FTYPE_PCCHAR
:
31259 case V8SF_FTYPE_PCV4SF
:
31260 case V8SF_FTYPE_PCFLOAT
:
31261 case V4SF_FTYPE_PCFLOAT
:
31262 case V4DF_FTYPE_PCV2DF
:
31263 case V4DF_FTYPE_PCDOUBLE
:
31264 case V2DF_FTYPE_PCDOUBLE
:
31265 case VOID_FTYPE_PVOID
:
31270 case VOID_FTYPE_PV2SF_V4SF
:
31271 case VOID_FTYPE_PV4DI_V4DI
:
31272 case VOID_FTYPE_PV2DI_V2DI
:
31273 case VOID_FTYPE_PCHAR_V32QI
:
31274 case VOID_FTYPE_PCHAR_V16QI
:
31275 case VOID_FTYPE_PFLOAT_V8SF
:
31276 case VOID_FTYPE_PFLOAT_V4SF
:
31277 case VOID_FTYPE_PDOUBLE_V4DF
:
31278 case VOID_FTYPE_PDOUBLE_V2DF
:
31279 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31280 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31281 case VOID_FTYPE_PINT_INT
:
31284 /* Reserve memory operand for target. */
31285 memory
= ARRAY_SIZE (args
);
31287 case V4SF_FTYPE_V4SF_PCV2SF
:
31288 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31293 case V8SF_FTYPE_PCV8SF_V8SI
:
31294 case V4DF_FTYPE_PCV4DF_V4DI
:
31295 case V4SF_FTYPE_PCV4SF_V4SI
:
31296 case V2DF_FTYPE_PCV2DF_V2DI
:
31297 case V8SI_FTYPE_PCV8SI_V8SI
:
31298 case V4DI_FTYPE_PCV4DI_V4DI
:
31299 case V4SI_FTYPE_PCV4SI_V4SI
:
31300 case V2DI_FTYPE_PCV2DI_V2DI
:
31305 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31306 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31307 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31308 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31309 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31310 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31311 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31312 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31315 /* Reserve memory operand for target. */
31316 memory
= ARRAY_SIZE (args
);
31318 case VOID_FTYPE_UINT_UINT_UINT
:
31319 case VOID_FTYPE_UINT64_UINT_UINT
:
31320 case UCHAR_FTYPE_UINT_UINT_UINT
:
31321 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31324 memory
= ARRAY_SIZE (args
);
31325 last_arg_constant
= true;
31328 gcc_unreachable ();
31331 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31333 if (klass
== store
)
31335 arg
= CALL_EXPR_ARG (exp
, 0);
31336 op
= expand_normal (arg
);
31337 gcc_assert (target
== 0);
31340 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31341 target
= gen_rtx_MEM (tmode
, op
);
31344 target
= force_reg (tmode
, op
);
31352 || !register_operand (target
, tmode
)
31353 || GET_MODE (target
) != tmode
)
31354 target
= gen_reg_rtx (tmode
);
31357 for (i
= 0; i
< nargs
; i
++)
31359 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31362 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31363 op
= expand_normal (arg
);
31364 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31366 if (last_arg_constant
&& (i
+ 1) == nargs
)
31370 if (icode
== CODE_FOR_lwp_lwpvalsi3
31371 || icode
== CODE_FOR_lwp_lwpinssi3
31372 || icode
== CODE_FOR_lwp_lwpvaldi3
31373 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31374 error ("the last argument must be a 32-bit immediate");
31376 error ("the last argument must be an 8-bit immediate");
31384 /* This must be the memory operand. */
31385 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31386 op
= gen_rtx_MEM (mode
, op
);
31387 gcc_assert (GET_MODE (op
) == mode
31388 || GET_MODE (op
) == VOIDmode
);
31392 /* This must be register. */
31393 if (VECTOR_MODE_P (mode
))
31394 op
= safe_vector_operand (op
, mode
);
31396 gcc_assert (GET_MODE (op
) == mode
31397 || GET_MODE (op
) == VOIDmode
);
31398 op
= copy_to_mode_reg (mode
, op
);
31403 args
[i
].mode
= mode
;
31409 pat
= GEN_FCN (icode
) (target
);
31412 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31415 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31418 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31421 gcc_unreachable ();
31427 return klass
== store
? 0 : target
;
31430 /* Return the integer constant in ARG. Constrain it to be in the range
31431 of the subparts of VEC_TYPE; issue an error if not. */
31434 get_element_number (tree vec_type
, tree arg
)
31436 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31438 if (!host_integerp (arg
, 1)
31439 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31441 error ("selector must be an integer constant in the range 0..%wi", max
);
31448 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31449 ix86_expand_vector_init. We DO have language-level syntax for this, in
31450 the form of (type){ init-list }. Except that since we can't place emms
31451 instructions from inside the compiler, we can't allow the use of MMX
31452 registers unless the user explicitly asks for it. So we do *not* define
31453 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31454 we have builtins invoked by mmintrin.h that gives us license to emit
31455 these sorts of instructions. */
31458 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31460 enum machine_mode tmode
= TYPE_MODE (type
);
31461 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31462 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31463 rtvec v
= rtvec_alloc (n_elt
);
31465 gcc_assert (VECTOR_MODE_P (tmode
));
31466 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31468 for (i
= 0; i
< n_elt
; ++i
)
31470 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31471 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31474 if (!target
|| !register_operand (target
, tmode
))
31475 target
= gen_reg_rtx (tmode
);
31477 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31481 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31482 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31483 had a language-level syntax for referencing vector elements. */
31486 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31488 enum machine_mode tmode
, mode0
;
31493 arg0
= CALL_EXPR_ARG (exp
, 0);
31494 arg1
= CALL_EXPR_ARG (exp
, 1);
31496 op0
= expand_normal (arg0
);
31497 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31499 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31500 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31501 gcc_assert (VECTOR_MODE_P (mode0
));
31503 op0
= force_reg (mode0
, op0
);
31505 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31506 target
= gen_reg_rtx (tmode
);
31508 ix86_expand_vector_extract (true, target
, op0
, elt
);
31513 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31514 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31515 a language-level syntax for referencing vector elements. */
31518 ix86_expand_vec_set_builtin (tree exp
)
31520 enum machine_mode tmode
, mode1
;
31521 tree arg0
, arg1
, arg2
;
31523 rtx op0
, op1
, target
;
31525 arg0
= CALL_EXPR_ARG (exp
, 0);
31526 arg1
= CALL_EXPR_ARG (exp
, 1);
31527 arg2
= CALL_EXPR_ARG (exp
, 2);
31529 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31530 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31531 gcc_assert (VECTOR_MODE_P (tmode
));
31533 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31534 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31535 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31537 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31538 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31540 op0
= force_reg (tmode
, op0
);
31541 op1
= force_reg (mode1
, op1
);
31543 /* OP0 is the source of these builtin functions and shouldn't be
31544 modified. Create a copy, use it and return it as target. */
31545 target
= gen_reg_rtx (tmode
);
31546 emit_move_insn (target
, op0
);
31547 ix86_expand_vector_set (true, target
, op1
, elt
);
31552 /* Expand an expression EXP that calls a built-in function,
31553 with result going to TARGET if that's convenient
31554 (and in mode MODE if that's convenient).
31555 SUBTARGET may be used as the target for computing one of EXP's operands.
31556 IGNORE is nonzero if the value is to be ignored. */
31559 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31560 enum machine_mode mode ATTRIBUTE_UNUSED
,
31561 int ignore ATTRIBUTE_UNUSED
)
31563 const struct builtin_description
*d
;
31565 enum insn_code icode
;
31566 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31567 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31568 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31569 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31570 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31572 /* For CPU builtins that can be folded, fold first and expand the fold. */
31575 case IX86_BUILTIN_CPU_INIT
:
31577 /* Make it call __cpu_indicator_init in libgcc. */
31578 tree call_expr
, fndecl
, type
;
31579 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31580 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31581 call_expr
= build_call_expr (fndecl
, 0);
31582 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31584 case IX86_BUILTIN_CPU_IS
:
31585 case IX86_BUILTIN_CPU_SUPPORTS
:
31587 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31588 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31589 gcc_assert (fold_expr
!= NULL_TREE
);
31590 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31594 /* Determine whether the builtin function is available under the current ISA.
31595 Originally the builtin was not created if it wasn't applicable to the
31596 current ISA based on the command line switches. With function specific
31597 options, we need to check in the context of the function making the call
31598 whether it is supported. */
31599 if (ix86_builtins_isa
[fcode
].isa
31600 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31602 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31603 NULL
, (enum fpmath_unit
) 0, false);
31606 error ("%qE needs unknown isa option", fndecl
);
31609 gcc_assert (opts
!= NULL
);
31610 error ("%qE needs isa option %s", fndecl
, opts
);
31618 case IX86_BUILTIN_MASKMOVQ
:
31619 case IX86_BUILTIN_MASKMOVDQU
:
31620 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31621 ? CODE_FOR_mmx_maskmovq
31622 : CODE_FOR_sse2_maskmovdqu
);
31623 /* Note the arg order is different from the operand order. */
31624 arg1
= CALL_EXPR_ARG (exp
, 0);
31625 arg2
= CALL_EXPR_ARG (exp
, 1);
31626 arg0
= CALL_EXPR_ARG (exp
, 2);
31627 op0
= expand_normal (arg0
);
31628 op1
= expand_normal (arg1
);
31629 op2
= expand_normal (arg2
);
31630 mode0
= insn_data
[icode
].operand
[0].mode
;
31631 mode1
= insn_data
[icode
].operand
[1].mode
;
31632 mode2
= insn_data
[icode
].operand
[2].mode
;
31634 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31635 op0
= gen_rtx_MEM (mode1
, op0
);
31637 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31638 op0
= copy_to_mode_reg (mode0
, op0
);
31639 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31640 op1
= copy_to_mode_reg (mode1
, op1
);
31641 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31642 op2
= copy_to_mode_reg (mode2
, op2
);
31643 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31649 case IX86_BUILTIN_LDMXCSR
:
31650 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31651 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31652 emit_move_insn (target
, op0
);
31653 emit_insn (gen_sse_ldmxcsr (target
));
31656 case IX86_BUILTIN_STMXCSR
:
31657 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31658 emit_insn (gen_sse_stmxcsr (target
));
31659 return copy_to_mode_reg (SImode
, target
);
31661 case IX86_BUILTIN_CLFLUSH
:
31662 arg0
= CALL_EXPR_ARG (exp
, 0);
31663 op0
= expand_normal (arg0
);
31664 icode
= CODE_FOR_sse2_clflush
;
31665 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31666 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31668 emit_insn (gen_sse2_clflush (op0
));
31671 case IX86_BUILTIN_MONITOR
:
31672 arg0
= CALL_EXPR_ARG (exp
, 0);
31673 arg1
= CALL_EXPR_ARG (exp
, 1);
31674 arg2
= CALL_EXPR_ARG (exp
, 2);
31675 op0
= expand_normal (arg0
);
31676 op1
= expand_normal (arg1
);
31677 op2
= expand_normal (arg2
);
31679 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31681 op1
= copy_to_mode_reg (SImode
, op1
);
31683 op2
= copy_to_mode_reg (SImode
, op2
);
31684 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31687 case IX86_BUILTIN_MWAIT
:
31688 arg0
= CALL_EXPR_ARG (exp
, 0);
31689 arg1
= CALL_EXPR_ARG (exp
, 1);
31690 op0
= expand_normal (arg0
);
31691 op1
= expand_normal (arg1
);
31693 op0
= copy_to_mode_reg (SImode
, op0
);
31695 op1
= copy_to_mode_reg (SImode
, op1
);
31696 emit_insn (gen_sse3_mwait (op0
, op1
));
31699 case IX86_BUILTIN_VEC_INIT_V2SI
:
31700 case IX86_BUILTIN_VEC_INIT_V4HI
:
31701 case IX86_BUILTIN_VEC_INIT_V8QI
:
31702 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31704 case IX86_BUILTIN_VEC_EXT_V2DF
:
31705 case IX86_BUILTIN_VEC_EXT_V2DI
:
31706 case IX86_BUILTIN_VEC_EXT_V4SF
:
31707 case IX86_BUILTIN_VEC_EXT_V4SI
:
31708 case IX86_BUILTIN_VEC_EXT_V8HI
:
31709 case IX86_BUILTIN_VEC_EXT_V2SI
:
31710 case IX86_BUILTIN_VEC_EXT_V4HI
:
31711 case IX86_BUILTIN_VEC_EXT_V16QI
:
31712 return ix86_expand_vec_ext_builtin (exp
, target
);
31714 case IX86_BUILTIN_VEC_SET_V2DI
:
31715 case IX86_BUILTIN_VEC_SET_V4SF
:
31716 case IX86_BUILTIN_VEC_SET_V4SI
:
31717 case IX86_BUILTIN_VEC_SET_V8HI
:
31718 case IX86_BUILTIN_VEC_SET_V4HI
:
31719 case IX86_BUILTIN_VEC_SET_V16QI
:
31720 return ix86_expand_vec_set_builtin (exp
);
31722 case IX86_BUILTIN_INFQ
:
31723 case IX86_BUILTIN_HUGE_VALQ
:
31725 REAL_VALUE_TYPE inf
;
31729 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31731 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31734 target
= gen_reg_rtx (mode
);
31736 emit_move_insn (target
, tmp
);
31740 case IX86_BUILTIN_RDPMC
:
31741 case IX86_BUILTIN_RDTSC
:
31742 case IX86_BUILTIN_RDTSCP
:
31744 op0
= gen_reg_rtx (DImode
);
31745 op1
= gen_reg_rtx (DImode
);
31747 if (fcode
== IX86_BUILTIN_RDPMC
)
31749 arg0
= CALL_EXPR_ARG (exp
, 0);
31750 op2
= expand_normal (arg0
);
31751 if (!register_operand (op2
, SImode
))
31752 op2
= copy_to_mode_reg (SImode
, op2
);
31754 insn
= (TARGET_64BIT
31755 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31756 : gen_rdpmc (op0
, op2
));
31759 else if (fcode
== IX86_BUILTIN_RDTSC
)
31761 insn
= (TARGET_64BIT
31762 ? gen_rdtsc_rex64 (op0
, op1
)
31763 : gen_rdtsc (op0
));
31768 op2
= gen_reg_rtx (SImode
);
31770 insn
= (TARGET_64BIT
31771 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31772 : gen_rdtscp (op0
, op2
));
31775 arg0
= CALL_EXPR_ARG (exp
, 0);
31776 op4
= expand_normal (arg0
);
31777 if (!address_operand (op4
, VOIDmode
))
31779 op4
= convert_memory_address (Pmode
, op4
);
31780 op4
= copy_addr_to_reg (op4
);
31782 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31786 target
= gen_reg_rtx (mode
);
31790 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31791 op1
, 1, OPTAB_DIRECT
);
31792 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31793 op0
, 1, OPTAB_DIRECT
);
31796 emit_move_insn (target
, op0
);
31799 case IX86_BUILTIN_FXSAVE
:
31800 case IX86_BUILTIN_FXRSTOR
:
31801 case IX86_BUILTIN_FXSAVE64
:
31802 case IX86_BUILTIN_FXRSTOR64
:
31805 case IX86_BUILTIN_FXSAVE
:
31806 icode
= CODE_FOR_fxsave
;
31808 case IX86_BUILTIN_FXRSTOR
:
31809 icode
= CODE_FOR_fxrstor
;
31811 case IX86_BUILTIN_FXSAVE64
:
31812 icode
= CODE_FOR_fxsave64
;
31814 case IX86_BUILTIN_FXRSTOR64
:
31815 icode
= CODE_FOR_fxrstor64
;
31818 gcc_unreachable ();
31821 arg0
= CALL_EXPR_ARG (exp
, 0);
31822 op0
= expand_normal (arg0
);
31824 if (!address_operand (op0
, VOIDmode
))
31826 op0
= convert_memory_address (Pmode
, op0
);
31827 op0
= copy_addr_to_reg (op0
);
31829 op0
= gen_rtx_MEM (BLKmode
, op0
);
31831 pat
= GEN_FCN (icode
) (op0
);
31836 case IX86_BUILTIN_XSAVE
:
31837 case IX86_BUILTIN_XRSTOR
:
31838 case IX86_BUILTIN_XSAVE64
:
31839 case IX86_BUILTIN_XRSTOR64
:
31840 case IX86_BUILTIN_XSAVEOPT
:
31841 case IX86_BUILTIN_XSAVEOPT64
:
31842 arg0
= CALL_EXPR_ARG (exp
, 0);
31843 arg1
= CALL_EXPR_ARG (exp
, 1);
31844 op0
= expand_normal (arg0
);
31845 op1
= expand_normal (arg1
);
31847 if (!address_operand (op0
, VOIDmode
))
31849 op0
= convert_memory_address (Pmode
, op0
);
31850 op0
= copy_addr_to_reg (op0
);
31852 op0
= gen_rtx_MEM (BLKmode
, op0
);
31854 op1
= force_reg (DImode
, op1
);
31858 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31859 NULL
, 1, OPTAB_DIRECT
);
31862 case IX86_BUILTIN_XSAVE
:
31863 icode
= CODE_FOR_xsave_rex64
;
31865 case IX86_BUILTIN_XRSTOR
:
31866 icode
= CODE_FOR_xrstor_rex64
;
31868 case IX86_BUILTIN_XSAVE64
:
31869 icode
= CODE_FOR_xsave64
;
31871 case IX86_BUILTIN_XRSTOR64
:
31872 icode
= CODE_FOR_xrstor64
;
31874 case IX86_BUILTIN_XSAVEOPT
:
31875 icode
= CODE_FOR_xsaveopt_rex64
;
31877 case IX86_BUILTIN_XSAVEOPT64
:
31878 icode
= CODE_FOR_xsaveopt64
;
31881 gcc_unreachable ();
31884 op2
= gen_lowpart (SImode
, op2
);
31885 op1
= gen_lowpart (SImode
, op1
);
31886 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31892 case IX86_BUILTIN_XSAVE
:
31893 icode
= CODE_FOR_xsave
;
31895 case IX86_BUILTIN_XRSTOR
:
31896 icode
= CODE_FOR_xrstor
;
31898 case IX86_BUILTIN_XSAVEOPT
:
31899 icode
= CODE_FOR_xsaveopt
;
31902 gcc_unreachable ();
31904 pat
= GEN_FCN (icode
) (op0
, op1
);
31911 case IX86_BUILTIN_LLWPCB
:
31912 arg0
= CALL_EXPR_ARG (exp
, 0);
31913 op0
= expand_normal (arg0
);
31914 icode
= CODE_FOR_lwp_llwpcb
;
31915 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31916 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31917 emit_insn (gen_lwp_llwpcb (op0
));
31920 case IX86_BUILTIN_SLWPCB
:
31921 icode
= CODE_FOR_lwp_slwpcb
;
31923 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31924 target
= gen_reg_rtx (Pmode
);
31925 emit_insn (gen_lwp_slwpcb (target
));
31928 case IX86_BUILTIN_BEXTRI32
:
31929 case IX86_BUILTIN_BEXTRI64
:
31930 arg0
= CALL_EXPR_ARG (exp
, 0);
31931 arg1
= CALL_EXPR_ARG (exp
, 1);
31932 op0
= expand_normal (arg0
);
31933 op1
= expand_normal (arg1
);
31934 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31935 ? CODE_FOR_tbm_bextri_si
31936 : CODE_FOR_tbm_bextri_di
);
31937 if (!CONST_INT_P (op1
))
31939 error ("last argument must be an immediate");
31944 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31945 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31946 op1
= GEN_INT (length
);
31947 op2
= GEN_INT (lsb_index
);
31948 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31954 case IX86_BUILTIN_RDRAND16_STEP
:
31955 icode
= CODE_FOR_rdrandhi_1
;
31959 case IX86_BUILTIN_RDRAND32_STEP
:
31960 icode
= CODE_FOR_rdrandsi_1
;
31964 case IX86_BUILTIN_RDRAND64_STEP
:
31965 icode
= CODE_FOR_rdranddi_1
;
31969 op0
= gen_reg_rtx (mode0
);
31970 emit_insn (GEN_FCN (icode
) (op0
));
31972 arg0
= CALL_EXPR_ARG (exp
, 0);
31973 op1
= expand_normal (arg0
);
31974 if (!address_operand (op1
, VOIDmode
))
31976 op1
= convert_memory_address (Pmode
, op1
);
31977 op1
= copy_addr_to_reg (op1
);
31979 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31981 op1
= gen_reg_rtx (SImode
);
31982 emit_move_insn (op1
, CONST1_RTX (SImode
));
31984 /* Emit SImode conditional move. */
31985 if (mode0
== HImode
)
31987 op2
= gen_reg_rtx (SImode
);
31988 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
31990 else if (mode0
== SImode
)
31993 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
31996 target
= gen_reg_rtx (SImode
);
31998 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32000 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32001 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32004 case IX86_BUILTIN_RDSEED16_STEP
:
32005 icode
= CODE_FOR_rdseedhi_1
;
32009 case IX86_BUILTIN_RDSEED32_STEP
:
32010 icode
= CODE_FOR_rdseedsi_1
;
32014 case IX86_BUILTIN_RDSEED64_STEP
:
32015 icode
= CODE_FOR_rdseeddi_1
;
32019 op0
= gen_reg_rtx (mode0
);
32020 emit_insn (GEN_FCN (icode
) (op0
));
32022 arg0
= CALL_EXPR_ARG (exp
, 0);
32023 op1
= expand_normal (arg0
);
32024 if (!address_operand (op1
, VOIDmode
))
32026 op1
= convert_memory_address (Pmode
, op1
);
32027 op1
= copy_addr_to_reg (op1
);
32029 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32031 op2
= gen_reg_rtx (QImode
);
32033 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32035 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32038 target
= gen_reg_rtx (SImode
);
32040 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32043 case IX86_BUILTIN_ADDCARRYX32
:
32044 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32048 case IX86_BUILTIN_ADDCARRYX64
:
32049 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32053 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32054 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32055 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32056 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32058 op0
= gen_reg_rtx (QImode
);
32060 /* Generate CF from input operand. */
32061 op1
= expand_normal (arg0
);
32062 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32063 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32065 /* Gen ADCX instruction to compute X+Y+CF. */
32066 op2
= expand_normal (arg1
);
32067 op3
= expand_normal (arg2
);
32070 op2
= copy_to_mode_reg (mode0
, op2
);
32072 op3
= copy_to_mode_reg (mode0
, op3
);
32074 op0
= gen_reg_rtx (mode0
);
32076 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32077 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32078 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32080 /* Store the result. */
32081 op4
= expand_normal (arg3
);
32082 if (!address_operand (op4
, VOIDmode
))
32084 op4
= convert_memory_address (Pmode
, op4
);
32085 op4
= copy_addr_to_reg (op4
);
32087 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32089 /* Return current CF value. */
32091 target
= gen_reg_rtx (QImode
);
32093 PUT_MODE (pat
, QImode
);
32094 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32097 case IX86_BUILTIN_GATHERSIV2DF
:
32098 icode
= CODE_FOR_avx2_gathersiv2df
;
32100 case IX86_BUILTIN_GATHERSIV4DF
:
32101 icode
= CODE_FOR_avx2_gathersiv4df
;
32103 case IX86_BUILTIN_GATHERDIV2DF
:
32104 icode
= CODE_FOR_avx2_gatherdiv2df
;
32106 case IX86_BUILTIN_GATHERDIV4DF
:
32107 icode
= CODE_FOR_avx2_gatherdiv4df
;
32109 case IX86_BUILTIN_GATHERSIV4SF
:
32110 icode
= CODE_FOR_avx2_gathersiv4sf
;
32112 case IX86_BUILTIN_GATHERSIV8SF
:
32113 icode
= CODE_FOR_avx2_gathersiv8sf
;
32115 case IX86_BUILTIN_GATHERDIV4SF
:
32116 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32118 case IX86_BUILTIN_GATHERDIV8SF
:
32119 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32121 case IX86_BUILTIN_GATHERSIV2DI
:
32122 icode
= CODE_FOR_avx2_gathersiv2di
;
32124 case IX86_BUILTIN_GATHERSIV4DI
:
32125 icode
= CODE_FOR_avx2_gathersiv4di
;
32127 case IX86_BUILTIN_GATHERDIV2DI
:
32128 icode
= CODE_FOR_avx2_gatherdiv2di
;
32130 case IX86_BUILTIN_GATHERDIV4DI
:
32131 icode
= CODE_FOR_avx2_gatherdiv4di
;
32133 case IX86_BUILTIN_GATHERSIV4SI
:
32134 icode
= CODE_FOR_avx2_gathersiv4si
;
32136 case IX86_BUILTIN_GATHERSIV8SI
:
32137 icode
= CODE_FOR_avx2_gathersiv8si
;
32139 case IX86_BUILTIN_GATHERDIV4SI
:
32140 icode
= CODE_FOR_avx2_gatherdiv4si
;
32142 case IX86_BUILTIN_GATHERDIV8SI
:
32143 icode
= CODE_FOR_avx2_gatherdiv8si
;
32145 case IX86_BUILTIN_GATHERALTSIV4DF
:
32146 icode
= CODE_FOR_avx2_gathersiv4df
;
32148 case IX86_BUILTIN_GATHERALTDIV8SF
:
32149 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32151 case IX86_BUILTIN_GATHERALTSIV4DI
:
32152 icode
= CODE_FOR_avx2_gathersiv4di
;
32154 case IX86_BUILTIN_GATHERALTDIV8SI
:
32155 icode
= CODE_FOR_avx2_gatherdiv8si
;
32159 arg0
= CALL_EXPR_ARG (exp
, 0);
32160 arg1
= CALL_EXPR_ARG (exp
, 1);
32161 arg2
= CALL_EXPR_ARG (exp
, 2);
32162 arg3
= CALL_EXPR_ARG (exp
, 3);
32163 arg4
= CALL_EXPR_ARG (exp
, 4);
32164 op0
= expand_normal (arg0
);
32165 op1
= expand_normal (arg1
);
32166 op2
= expand_normal (arg2
);
32167 op3
= expand_normal (arg3
);
32168 op4
= expand_normal (arg4
);
32169 /* Note the arg order is different from the operand order. */
32170 mode0
= insn_data
[icode
].operand
[1].mode
;
32171 mode2
= insn_data
[icode
].operand
[3].mode
;
32172 mode3
= insn_data
[icode
].operand
[4].mode
;
32173 mode4
= insn_data
[icode
].operand
[5].mode
;
32175 if (target
== NULL_RTX
32176 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32177 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32179 subtarget
= target
;
32181 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32182 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32184 rtx half
= gen_reg_rtx (V4SImode
);
32185 if (!nonimmediate_operand (op2
, V8SImode
))
32186 op2
= copy_to_mode_reg (V8SImode
, op2
);
32187 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32190 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32191 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32193 rtx (*gen
) (rtx
, rtx
);
32194 rtx half
= gen_reg_rtx (mode0
);
32195 if (mode0
== V4SFmode
)
32196 gen
= gen_vec_extract_lo_v8sf
;
32198 gen
= gen_vec_extract_lo_v8si
;
32199 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32200 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32201 emit_insn (gen (half
, op0
));
32203 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32204 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32205 emit_insn (gen (half
, op3
));
32209 /* Force memory operand only with base register here. But we
32210 don't want to do it on memory operand for other builtin
32212 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32214 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32215 op0
= copy_to_mode_reg (mode0
, op0
);
32216 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32217 op1
= copy_to_mode_reg (Pmode
, op1
);
32218 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32219 op2
= copy_to_mode_reg (mode2
, op2
);
32220 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32221 op3
= copy_to_mode_reg (mode3
, op3
);
32222 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32224 error ("last argument must be scale 1, 2, 4, 8");
32228 /* Optimize. If mask is known to have all high bits set,
32229 replace op0 with pc_rtx to signal that the instruction
32230 overwrites the whole destination and doesn't use its
32231 previous contents. */
32234 if (TREE_CODE (arg3
) == VECTOR_CST
)
32236 unsigned int negative
= 0;
32237 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32239 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32240 if (TREE_CODE (cst
) == INTEGER_CST
32241 && tree_int_cst_sign_bit (cst
))
32243 else if (TREE_CODE (cst
) == REAL_CST
32244 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32247 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32250 else if (TREE_CODE (arg3
) == SSA_NAME
)
32252 /* Recognize also when mask is like:
32253 __v2df src = _mm_setzero_pd ();
32254 __v2df mask = _mm_cmpeq_pd (src, src);
32256 __v8sf src = _mm256_setzero_ps ();
32257 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32258 as that is a cheaper way to load all ones into
32259 a register than having to load a constant from
32261 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32262 if (is_gimple_call (def_stmt
))
32264 tree fndecl
= gimple_call_fndecl (def_stmt
);
32266 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32267 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32269 case IX86_BUILTIN_CMPPD
:
32270 case IX86_BUILTIN_CMPPS
:
32271 case IX86_BUILTIN_CMPPD256
:
32272 case IX86_BUILTIN_CMPPS256
:
32273 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32276 case IX86_BUILTIN_CMPEQPD
:
32277 case IX86_BUILTIN_CMPEQPS
:
32278 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32279 && initializer_zerop (gimple_call_arg (def_stmt
,
32290 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32295 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32296 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32298 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32299 ? V4SFmode
: V4SImode
;
32300 if (target
== NULL_RTX
)
32301 target
= gen_reg_rtx (tmode
);
32302 if (tmode
== V4SFmode
)
32303 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32305 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32308 target
= subtarget
;
32312 case IX86_BUILTIN_XABORT
:
32313 icode
= CODE_FOR_xabort
;
32314 arg0
= CALL_EXPR_ARG (exp
, 0);
32315 op0
= expand_normal (arg0
);
32316 mode0
= insn_data
[icode
].operand
[0].mode
;
32317 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32319 error ("the xabort's argument must be an 8-bit immediate");
32322 emit_insn (gen_xabort (op0
));
32329 for (i
= 0, d
= bdesc_special_args
;
32330 i
< ARRAY_SIZE (bdesc_special_args
);
32332 if (d
->code
== fcode
)
32333 return ix86_expand_special_args_builtin (d
, exp
, target
);
32335 for (i
= 0, d
= bdesc_args
;
32336 i
< ARRAY_SIZE (bdesc_args
);
32338 if (d
->code
== fcode
)
32341 case IX86_BUILTIN_FABSQ
:
32342 case IX86_BUILTIN_COPYSIGNQ
:
32344 /* Emit a normal call if SSE isn't available. */
32345 return expand_call (exp
, target
, ignore
);
32347 return ix86_expand_args_builtin (d
, exp
, target
);
32350 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32351 if (d
->code
== fcode
)
32352 return ix86_expand_sse_comi (d
, exp
, target
);
32354 for (i
= 0, d
= bdesc_pcmpestr
;
32355 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32357 if (d
->code
== fcode
)
32358 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32360 for (i
= 0, d
= bdesc_pcmpistr
;
32361 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32363 if (d
->code
== fcode
)
32364 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32366 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32367 if (d
->code
== fcode
)
32368 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32369 (enum ix86_builtin_func_type
)
32370 d
->flag
, d
->comparison
);
32372 gcc_unreachable ();
32375 /* Returns a function decl for a vectorized version of the builtin function
32376 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32377 if it is not available. */
32380 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32383 enum machine_mode in_mode
, out_mode
;
32385 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32387 if (TREE_CODE (type_out
) != VECTOR_TYPE
32388 || TREE_CODE (type_in
) != VECTOR_TYPE
32389 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32392 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32393 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32394 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32395 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32399 case BUILT_IN_SQRT
:
32400 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32402 if (out_n
== 2 && in_n
== 2)
32403 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32404 else if (out_n
== 4 && in_n
== 4)
32405 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32409 case BUILT_IN_SQRTF
:
32410 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32412 if (out_n
== 4 && in_n
== 4)
32413 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32414 else if (out_n
== 8 && in_n
== 8)
32415 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32419 case BUILT_IN_IFLOOR
:
32420 case BUILT_IN_LFLOOR
:
32421 case BUILT_IN_LLFLOOR
:
32422 /* The round insn does not trap on denormals. */
32423 if (flag_trapping_math
|| !TARGET_ROUND
)
32426 if (out_mode
== SImode
&& in_mode
== DFmode
)
32428 if (out_n
== 4 && in_n
== 2)
32429 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32430 else if (out_n
== 8 && in_n
== 4)
32431 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32435 case BUILT_IN_IFLOORF
:
32436 case BUILT_IN_LFLOORF
:
32437 case BUILT_IN_LLFLOORF
:
32438 /* The round insn does not trap on denormals. */
32439 if (flag_trapping_math
|| !TARGET_ROUND
)
32442 if (out_mode
== SImode
&& in_mode
== SFmode
)
32444 if (out_n
== 4 && in_n
== 4)
32445 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32446 else if (out_n
== 8 && in_n
== 8)
32447 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32451 case BUILT_IN_ICEIL
:
32452 case BUILT_IN_LCEIL
:
32453 case BUILT_IN_LLCEIL
:
32454 /* The round insn does not trap on denormals. */
32455 if (flag_trapping_math
|| !TARGET_ROUND
)
32458 if (out_mode
== SImode
&& in_mode
== DFmode
)
32460 if (out_n
== 4 && in_n
== 2)
32461 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32462 else if (out_n
== 8 && in_n
== 4)
32463 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32467 case BUILT_IN_ICEILF
:
32468 case BUILT_IN_LCEILF
:
32469 case BUILT_IN_LLCEILF
:
32470 /* The round insn does not trap on denormals. */
32471 if (flag_trapping_math
|| !TARGET_ROUND
)
32474 if (out_mode
== SImode
&& in_mode
== SFmode
)
32476 if (out_n
== 4 && in_n
== 4)
32477 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32478 else if (out_n
== 8 && in_n
== 8)
32479 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32483 case BUILT_IN_IRINT
:
32484 case BUILT_IN_LRINT
:
32485 case BUILT_IN_LLRINT
:
32486 if (out_mode
== SImode
&& in_mode
== DFmode
)
32488 if (out_n
== 4 && in_n
== 2)
32489 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32490 else if (out_n
== 8 && in_n
== 4)
32491 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32495 case BUILT_IN_IRINTF
:
32496 case BUILT_IN_LRINTF
:
32497 case BUILT_IN_LLRINTF
:
32498 if (out_mode
== SImode
&& in_mode
== SFmode
)
32500 if (out_n
== 4 && in_n
== 4)
32501 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32502 else if (out_n
== 8 && in_n
== 8)
32503 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32507 case BUILT_IN_IROUND
:
32508 case BUILT_IN_LROUND
:
32509 case BUILT_IN_LLROUND
:
32510 /* The round insn does not trap on denormals. */
32511 if (flag_trapping_math
|| !TARGET_ROUND
)
32514 if (out_mode
== SImode
&& in_mode
== DFmode
)
32516 if (out_n
== 4 && in_n
== 2)
32517 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32518 else if (out_n
== 8 && in_n
== 4)
32519 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32523 case BUILT_IN_IROUNDF
:
32524 case BUILT_IN_LROUNDF
:
32525 case BUILT_IN_LLROUNDF
:
32526 /* The round insn does not trap on denormals. */
32527 if (flag_trapping_math
|| !TARGET_ROUND
)
32530 if (out_mode
== SImode
&& in_mode
== SFmode
)
32532 if (out_n
== 4 && in_n
== 4)
32533 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32534 else if (out_n
== 8 && in_n
== 8)
32535 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32539 case BUILT_IN_COPYSIGN
:
32540 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32542 if (out_n
== 2 && in_n
== 2)
32543 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32544 else if (out_n
== 4 && in_n
== 4)
32545 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32549 case BUILT_IN_COPYSIGNF
:
32550 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32552 if (out_n
== 4 && in_n
== 4)
32553 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32554 else if (out_n
== 8 && in_n
== 8)
32555 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32559 case BUILT_IN_FLOOR
:
32560 /* The round insn does not trap on denormals. */
32561 if (flag_trapping_math
|| !TARGET_ROUND
)
32564 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32566 if (out_n
== 2 && in_n
== 2)
32567 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32568 else if (out_n
== 4 && in_n
== 4)
32569 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32573 case BUILT_IN_FLOORF
:
32574 /* The round insn does not trap on denormals. */
32575 if (flag_trapping_math
|| !TARGET_ROUND
)
32578 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32580 if (out_n
== 4 && in_n
== 4)
32581 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32582 else if (out_n
== 8 && in_n
== 8)
32583 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32587 case BUILT_IN_CEIL
:
32588 /* The round insn does not trap on denormals. */
32589 if (flag_trapping_math
|| !TARGET_ROUND
)
32592 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32594 if (out_n
== 2 && in_n
== 2)
32595 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32596 else if (out_n
== 4 && in_n
== 4)
32597 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32601 case BUILT_IN_CEILF
:
32602 /* The round insn does not trap on denormals. */
32603 if (flag_trapping_math
|| !TARGET_ROUND
)
32606 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32608 if (out_n
== 4 && in_n
== 4)
32609 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32610 else if (out_n
== 8 && in_n
== 8)
32611 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32615 case BUILT_IN_TRUNC
:
32616 /* The round insn does not trap on denormals. */
32617 if (flag_trapping_math
|| !TARGET_ROUND
)
32620 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32622 if (out_n
== 2 && in_n
== 2)
32623 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32624 else if (out_n
== 4 && in_n
== 4)
32625 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32629 case BUILT_IN_TRUNCF
:
32630 /* The round insn does not trap on denormals. */
32631 if (flag_trapping_math
|| !TARGET_ROUND
)
32634 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32636 if (out_n
== 4 && in_n
== 4)
32637 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32638 else if (out_n
== 8 && in_n
== 8)
32639 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32643 case BUILT_IN_RINT
:
32644 /* The round insn does not trap on denormals. */
32645 if (flag_trapping_math
|| !TARGET_ROUND
)
32648 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32650 if (out_n
== 2 && in_n
== 2)
32651 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32652 else if (out_n
== 4 && in_n
== 4)
32653 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32657 case BUILT_IN_RINTF
:
32658 /* The round insn does not trap on denormals. */
32659 if (flag_trapping_math
|| !TARGET_ROUND
)
32662 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32664 if (out_n
== 4 && in_n
== 4)
32665 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32666 else if (out_n
== 8 && in_n
== 8)
32667 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32671 case BUILT_IN_ROUND
:
32672 /* The round insn does not trap on denormals. */
32673 if (flag_trapping_math
|| !TARGET_ROUND
)
32676 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32678 if (out_n
== 2 && in_n
== 2)
32679 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32680 else if (out_n
== 4 && in_n
== 4)
32681 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32685 case BUILT_IN_ROUNDF
:
32686 /* The round insn does not trap on denormals. */
32687 if (flag_trapping_math
|| !TARGET_ROUND
)
32690 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32692 if (out_n
== 4 && in_n
== 4)
32693 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32694 else if (out_n
== 8 && in_n
== 8)
32695 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32700 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32702 if (out_n
== 2 && in_n
== 2)
32703 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32704 if (out_n
== 4 && in_n
== 4)
32705 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32709 case BUILT_IN_FMAF
:
32710 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32712 if (out_n
== 4 && in_n
== 4)
32713 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32714 if (out_n
== 8 && in_n
== 8)
32715 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32723 /* Dispatch to a handler for a vectorization library. */
32724 if (ix86_veclib_handler
)
32725 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32731 /* Handler for an SVML-style interface to
32732 a library with vectorized intrinsics. */
32735 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32738 tree fntype
, new_fndecl
, args
;
32741 enum machine_mode el_mode
, in_mode
;
32744 /* The SVML is suitable for unsafe math only. */
32745 if (!flag_unsafe_math_optimizations
)
32748 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32749 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32750 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32751 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32752 if (el_mode
!= in_mode
32760 case BUILT_IN_LOG10
:
32762 case BUILT_IN_TANH
:
32764 case BUILT_IN_ATAN
:
32765 case BUILT_IN_ATAN2
:
32766 case BUILT_IN_ATANH
:
32767 case BUILT_IN_CBRT
:
32768 case BUILT_IN_SINH
:
32770 case BUILT_IN_ASINH
:
32771 case BUILT_IN_ASIN
:
32772 case BUILT_IN_COSH
:
32774 case BUILT_IN_ACOSH
:
32775 case BUILT_IN_ACOS
:
32776 if (el_mode
!= DFmode
|| n
!= 2)
32780 case BUILT_IN_EXPF
:
32781 case BUILT_IN_LOGF
:
32782 case BUILT_IN_LOG10F
:
32783 case BUILT_IN_POWF
:
32784 case BUILT_IN_TANHF
:
32785 case BUILT_IN_TANF
:
32786 case BUILT_IN_ATANF
:
32787 case BUILT_IN_ATAN2F
:
32788 case BUILT_IN_ATANHF
:
32789 case BUILT_IN_CBRTF
:
32790 case BUILT_IN_SINHF
:
32791 case BUILT_IN_SINF
:
32792 case BUILT_IN_ASINHF
:
32793 case BUILT_IN_ASINF
:
32794 case BUILT_IN_COSHF
:
32795 case BUILT_IN_COSF
:
32796 case BUILT_IN_ACOSHF
:
32797 case BUILT_IN_ACOSF
:
32798 if (el_mode
!= SFmode
|| n
!= 4)
32806 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32808 if (fn
== BUILT_IN_LOGF
)
32809 strcpy (name
, "vmlsLn4");
32810 else if (fn
== BUILT_IN_LOG
)
32811 strcpy (name
, "vmldLn2");
32814 sprintf (name
, "vmls%s", bname
+10);
32815 name
[strlen (name
)-1] = '4';
32818 sprintf (name
, "vmld%s2", bname
+10);
32820 /* Convert to uppercase. */
32824 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32826 args
= TREE_CHAIN (args
))
32830 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32832 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32834 /* Build a function declaration for the vectorized function. */
32835 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32836 FUNCTION_DECL
, get_identifier (name
), fntype
);
32837 TREE_PUBLIC (new_fndecl
) = 1;
32838 DECL_EXTERNAL (new_fndecl
) = 1;
32839 DECL_IS_NOVOPS (new_fndecl
) = 1;
32840 TREE_READONLY (new_fndecl
) = 1;
32845 /* Handler for an ACML-style interface to
32846 a library with vectorized intrinsics. */
32849 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32851 char name
[20] = "__vr.._";
32852 tree fntype
, new_fndecl
, args
;
32855 enum machine_mode el_mode
, in_mode
;
32858 /* The ACML is 64bits only and suitable for unsafe math only as
32859 it does not correctly support parts of IEEE with the required
32860 precision such as denormals. */
32862 || !flag_unsafe_math_optimizations
)
32865 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32866 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32867 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32868 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32869 if (el_mode
!= in_mode
32879 case BUILT_IN_LOG2
:
32880 case BUILT_IN_LOG10
:
32883 if (el_mode
!= DFmode
32888 case BUILT_IN_SINF
:
32889 case BUILT_IN_COSF
:
32890 case BUILT_IN_EXPF
:
32891 case BUILT_IN_POWF
:
32892 case BUILT_IN_LOGF
:
32893 case BUILT_IN_LOG2F
:
32894 case BUILT_IN_LOG10F
:
32897 if (el_mode
!= SFmode
32906 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32907 sprintf (name
+ 7, "%s", bname
+10);
32910 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32912 args
= TREE_CHAIN (args
))
32916 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32918 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32920 /* Build a function declaration for the vectorized function. */
32921 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32922 FUNCTION_DECL
, get_identifier (name
), fntype
);
32923 TREE_PUBLIC (new_fndecl
) = 1;
32924 DECL_EXTERNAL (new_fndecl
) = 1;
32925 DECL_IS_NOVOPS (new_fndecl
) = 1;
32926 TREE_READONLY (new_fndecl
) = 1;
32931 /* Returns a decl of a function that implements gather load with
32932 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32933 Return NULL_TREE if it is not available. */
32936 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32937 const_tree index_type
, int scale
)
32940 enum ix86_builtins code
;
32945 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32946 && !POINTER_TYPE_P (index_type
))
32947 || (TYPE_MODE (index_type
) != SImode
32948 && TYPE_MODE (index_type
) != DImode
))
32951 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32954 /* v*gather* insn sign extends index to pointer mode. */
32955 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32956 && TYPE_UNSIGNED (index_type
))
32961 || (scale
& (scale
- 1)) != 0)
32964 si
= TYPE_MODE (index_type
) == SImode
;
32965 switch (TYPE_MODE (mem_vectype
))
32968 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32971 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32974 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32977 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32980 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32983 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32986 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32989 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
32995 return ix86_builtins
[code
];
32998 /* Returns a code for a target-specific builtin that implements
32999 reciprocal of the function, or NULL_TREE if not available. */
33002 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33003 bool sqrt ATTRIBUTE_UNUSED
)
33005 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33006 && flag_finite_math_only
&& !flag_trapping_math
33007 && flag_unsafe_math_optimizations
))
33011 /* Machine dependent builtins. */
33014 /* Vectorized version of sqrt to rsqrt conversion. */
33015 case IX86_BUILTIN_SQRTPS_NR
:
33016 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33018 case IX86_BUILTIN_SQRTPS_NR256
:
33019 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33025 /* Normal builtins. */
33028 /* Sqrt to rsqrt conversion. */
33029 case BUILT_IN_SQRTF
:
33030 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33037 /* Helper for avx_vpermilps256_operand et al. This is also used by
33038 the expansion functions to turn the parallel back into a mask.
33039 The return value is 0 for no match and the imm8+1 for a match. */
33042 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33044 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33046 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33048 if (XVECLEN (par
, 0) != (int) nelt
)
33051 /* Validate that all of the elements are constants, and not totally
33052 out of range. Copy the data into an integral array to make the
33053 subsequent checks easier. */
33054 for (i
= 0; i
< nelt
; ++i
)
33056 rtx er
= XVECEXP (par
, 0, i
);
33057 unsigned HOST_WIDE_INT ei
;
33059 if (!CONST_INT_P (er
))
33070 /* In the 256-bit DFmode case, we can only move elements within
33072 for (i
= 0; i
< 2; ++i
)
33076 mask
|= ipar
[i
] << i
;
33078 for (i
= 2; i
< 4; ++i
)
33082 mask
|= (ipar
[i
] - 2) << i
;
33087 /* In the 256-bit SFmode case, we have full freedom of movement
33088 within the low 128-bit lane, but the high 128-bit lane must
33089 mirror the exact same pattern. */
33090 for (i
= 0; i
< 4; ++i
)
33091 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33098 /* In the 128-bit case, we've full freedom in the placement of
33099 the elements from the source operand. */
33100 for (i
= 0; i
< nelt
; ++i
)
33101 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33105 gcc_unreachable ();
33108 /* Make sure success has a non-zero value by adding one. */
33112 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33113 the expansion functions to turn the parallel back into a mask.
33114 The return value is 0 for no match and the imm8+1 for a match. */
33117 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33119 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33121 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33123 if (XVECLEN (par
, 0) != (int) nelt
)
33126 /* Validate that all of the elements are constants, and not totally
33127 out of range. Copy the data into an integral array to make the
33128 subsequent checks easier. */
33129 for (i
= 0; i
< nelt
; ++i
)
33131 rtx er
= XVECEXP (par
, 0, i
);
33132 unsigned HOST_WIDE_INT ei
;
33134 if (!CONST_INT_P (er
))
33137 if (ei
>= 2 * nelt
)
33142 /* Validate that the halves of the permute are halves. */
33143 for (i
= 0; i
< nelt2
- 1; ++i
)
33144 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33146 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33147 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33150 /* Reconstruct the mask. */
33151 for (i
= 0; i
< 2; ++i
)
33153 unsigned e
= ipar
[i
* nelt2
];
33157 mask
|= e
<< (i
* 4);
33160 /* Make sure success has a non-zero value by adding one. */
33164 /* Store OPERAND to the memory after reload is completed. This means
33165 that we can't easily use assign_stack_local. */
33167 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33171 gcc_assert (reload_completed
);
33172 if (ix86_using_red_zone ())
33174 result
= gen_rtx_MEM (mode
,
33175 gen_rtx_PLUS (Pmode
,
33177 GEN_INT (-RED_ZONE_SIZE
)));
33178 emit_move_insn (result
, operand
);
33180 else if (TARGET_64BIT
)
33186 operand
= gen_lowpart (DImode
, operand
);
33190 gen_rtx_SET (VOIDmode
,
33191 gen_rtx_MEM (DImode
,
33192 gen_rtx_PRE_DEC (DImode
,
33193 stack_pointer_rtx
)),
33197 gcc_unreachable ();
33199 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33208 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33210 gen_rtx_SET (VOIDmode
,
33211 gen_rtx_MEM (SImode
,
33212 gen_rtx_PRE_DEC (Pmode
,
33213 stack_pointer_rtx
)),
33216 gen_rtx_SET (VOIDmode
,
33217 gen_rtx_MEM (SImode
,
33218 gen_rtx_PRE_DEC (Pmode
,
33219 stack_pointer_rtx
)),
33224 /* Store HImodes as SImodes. */
33225 operand
= gen_lowpart (SImode
, operand
);
33229 gen_rtx_SET (VOIDmode
,
33230 gen_rtx_MEM (GET_MODE (operand
),
33231 gen_rtx_PRE_DEC (SImode
,
33232 stack_pointer_rtx
)),
33236 gcc_unreachable ();
33238 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33243 /* Free operand from the memory. */
33245 ix86_free_from_memory (enum machine_mode mode
)
33247 if (!ix86_using_red_zone ())
33251 if (mode
== DImode
|| TARGET_64BIT
)
33255 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33256 to pop or add instruction if registers are available. */
33257 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33258 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33263 /* Return a register priority for hard reg REGNO. */
33265 ix86_register_priority (int hard_regno
)
33267 /* ebp and r13 as the base always wants a displacement, r12 as the
33268 base always wants an index. So discourage their usage in an
33270 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33272 if (hard_regno
== BP_REG
)
33274 /* New x86-64 int registers result in bigger code size. Discourage
33276 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33278 /* New x86-64 SSE registers result in bigger code size. Discourage
33280 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33282 /* Usage of AX register results in smaller code. Prefer it. */
33283 if (hard_regno
== 0)
33288 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33290 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33291 QImode must go into class Q_REGS.
33292 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33293 movdf to do mem-to-mem moves through integer regs. */
33296 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33298 enum machine_mode mode
= GET_MODE (x
);
33300 /* We're only allowed to return a subclass of CLASS. Many of the
33301 following checks fail for NO_REGS, so eliminate that early. */
33302 if (regclass
== NO_REGS
)
33305 /* All classes can load zeros. */
33306 if (x
== CONST0_RTX (mode
))
33309 /* Force constants into memory if we are loading a (nonzero) constant into
33310 an MMX or SSE register. This is because there are no MMX/SSE instructions
33311 to load from a constant. */
33313 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33316 /* Prefer SSE regs only, if we can use them for math. */
33317 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33318 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33320 /* Floating-point constants need more complex checks. */
33321 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33323 /* General regs can load everything. */
33324 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33327 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33328 zero above. We only want to wind up preferring 80387 registers if
33329 we plan on doing computation with them. */
33331 && standard_80387_constant_p (x
) > 0)
33333 /* Limit class to non-sse. */
33334 if (regclass
== FLOAT_SSE_REGS
)
33336 if (regclass
== FP_TOP_SSE_REGS
)
33338 if (regclass
== FP_SECOND_SSE_REGS
)
33339 return FP_SECOND_REG
;
33340 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33347 /* Generally when we see PLUS here, it's the function invariant
33348 (plus soft-fp const_int). Which can only be computed into general
33350 if (GET_CODE (x
) == PLUS
)
33351 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33353 /* QImode constants are easy to load, but non-constant QImode data
33354 must go into Q_REGS. */
33355 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33357 if (reg_class_subset_p (regclass
, Q_REGS
))
33359 if (reg_class_subset_p (Q_REGS
, regclass
))
33367 /* Discourage putting floating-point values in SSE registers unless
33368 SSE math is being used, and likewise for the 387 registers. */
33370 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33372 enum machine_mode mode
= GET_MODE (x
);
33374 /* Restrict the output reload class to the register bank that we are doing
33375 math on. If we would like not to return a subset of CLASS, reject this
33376 alternative: if reload cannot do this, it will still use its choice. */
33377 mode
= GET_MODE (x
);
33378 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33379 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33381 if (X87_FLOAT_MODE_P (mode
))
33383 if (regclass
== FP_TOP_SSE_REGS
)
33385 else if (regclass
== FP_SECOND_SSE_REGS
)
33386 return FP_SECOND_REG
;
33388 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33395 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33396 enum machine_mode mode
, secondary_reload_info
*sri
)
33398 /* Double-word spills from general registers to non-offsettable memory
33399 references (zero-extended addresses) require special handling. */
33402 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33403 && rclass
== GENERAL_REGS
33404 && !offsettable_memref_p (x
))
33407 ? CODE_FOR_reload_noff_load
33408 : CODE_FOR_reload_noff_store
);
33409 /* Add the cost of moving address to a temporary. */
33410 sri
->extra_cost
= 1;
33415 /* QImode spills from non-QI registers require
33416 intermediate register on 32bit targets. */
33418 && !in_p
&& mode
== QImode
33419 && (rclass
== GENERAL_REGS
33420 || rclass
== LEGACY_REGS
33421 || rclass
== NON_Q_REGS
33424 || rclass
== INDEX_REGS
))
33433 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33434 regno
= true_regnum (x
);
33436 /* Return Q_REGS if the operand is in memory. */
33441 /* This condition handles corner case where an expression involving
33442 pointers gets vectorized. We're trying to use the address of a
33443 stack slot as a vector initializer.
33445 (set (reg:V2DI 74 [ vect_cst_.2 ])
33446 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33448 Eventually frame gets turned into sp+offset like this:
33450 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33451 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33452 (const_int 392 [0x188]))))
33454 That later gets turned into:
33456 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33457 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33458 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33460 We'll have the following reload recorded:
33462 Reload 0: reload_in (DI) =
33463 (plus:DI (reg/f:DI 7 sp)
33464 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33465 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33466 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33467 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33468 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33469 reload_reg_rtx: (reg:V2DI 22 xmm1)
33471 Which isn't going to work since SSE instructions can't handle scalar
33472 additions. Returning GENERAL_REGS forces the addition into integer
33473 register and reload can handle subsequent reloads without problems. */
33475 if (in_p
&& GET_CODE (x
) == PLUS
33476 && SSE_CLASS_P (rclass
)
33477 && SCALAR_INT_MODE_P (mode
))
33478 return GENERAL_REGS
;
33483 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33486 ix86_class_likely_spilled_p (reg_class_t rclass
)
33497 case SSE_FIRST_REG
:
33499 case FP_SECOND_REG
:
33509 /* If we are copying between general and FP registers, we need a memory
33510 location. The same is true for SSE and MMX registers.
33512 To optimize register_move_cost performance, allow inline variant.
33514 The macro can't work reliably when one of the CLASSES is class containing
33515 registers from multiple units (SSE, MMX, integer). We avoid this by never
33516 combining those units in single alternative in the machine description.
33517 Ensure that this constraint holds to avoid unexpected surprises.
33519 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33520 enforce these sanity checks. */
33523 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33524 enum machine_mode mode
, int strict
)
33526 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33527 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33528 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33529 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33530 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33531 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33533 gcc_assert (!strict
|| lra_in_progress
);
33537 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33540 /* ??? This is a lie. We do have moves between mmx/general, and for
33541 mmx/sse2. But by saying we need secondary memory we discourage the
33542 register allocator from using the mmx registers unless needed. */
33543 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33546 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33548 /* SSE1 doesn't have any direct moves from other classes. */
33552 /* If the target says that inter-unit moves are more expensive
33553 than moving through memory, then don't generate them. */
33554 if (!TARGET_INTER_UNIT_MOVES
)
33557 /* Between SSE and general, we have moves no larger than word size. */
33558 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33566 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33567 enum machine_mode mode
, int strict
)
33569 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33572 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33574 On the 80386, this is the size of MODE in words,
33575 except in the FP regs, where a single reg is always enough. */
33577 static unsigned char
33578 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33580 if (MAYBE_INTEGER_CLASS_P (rclass
))
33582 if (mode
== XFmode
)
33583 return (TARGET_64BIT
? 2 : 3);
33584 else if (mode
== XCmode
)
33585 return (TARGET_64BIT
? 4 : 6);
33587 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33591 if (COMPLEX_MODE_P (mode
))
33598 /* Return true if the registers in CLASS cannot represent the change from
33599 modes FROM to TO. */
33602 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33603 enum reg_class regclass
)
33608 /* x87 registers can't do subreg at all, as all values are reformatted
33609 to extended precision. */
33610 if (MAYBE_FLOAT_CLASS_P (regclass
))
33613 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33615 /* Vector registers do not support QI or HImode loads. If we don't
33616 disallow a change to these modes, reload will assume it's ok to
33617 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33618 the vec_dupv4hi pattern. */
33619 if (GET_MODE_SIZE (from
) < 4)
33622 /* Vector registers do not support subreg with nonzero offsets, which
33623 are otherwise valid for integer registers. Since we can't see
33624 whether we have a nonzero offset from here, prohibit all
33625 nonparadoxical subregs changing size. */
33626 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33633 /* Return the cost of moving data of mode M between a
33634 register and memory. A value of 2 is the default; this cost is
33635 relative to those in `REGISTER_MOVE_COST'.
33637 This function is used extensively by register_move_cost that is used to
33638 build tables at startup. Make it inline in this case.
33639 When IN is 2, return maximum of in and out move cost.
33641 If moving between registers and memory is more expensive than
33642 between two registers, you should define this macro to express the
33645 Model also increased moving costs of QImode registers in non
33649 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33653 if (FLOAT_CLASS_P (regclass
))
33671 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33672 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33674 if (SSE_CLASS_P (regclass
))
33677 switch (GET_MODE_SIZE (mode
))
33692 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33693 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33695 if (MMX_CLASS_P (regclass
))
33698 switch (GET_MODE_SIZE (mode
))
33710 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33711 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33713 switch (GET_MODE_SIZE (mode
))
33716 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33719 return ix86_cost
->int_store
[0];
33720 if (TARGET_PARTIAL_REG_DEPENDENCY
33721 && optimize_function_for_speed_p (cfun
))
33722 cost
= ix86_cost
->movzbl_load
;
33724 cost
= ix86_cost
->int_load
[0];
33726 return MAX (cost
, ix86_cost
->int_store
[0]);
33732 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33734 return ix86_cost
->movzbl_load
;
33736 return ix86_cost
->int_store
[0] + 4;
33741 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33742 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33744 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33745 if (mode
== TFmode
)
33748 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33750 cost
= ix86_cost
->int_load
[2];
33752 cost
= ix86_cost
->int_store
[2];
33753 return (cost
* (((int) GET_MODE_SIZE (mode
)
33754 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33759 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33762 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33766 /* Return the cost of moving data from a register in class CLASS1 to
33767 one in class CLASS2.
33769 It is not required that the cost always equal 2 when FROM is the same as TO;
33770 on some machines it is expensive to move between registers if they are not
33771 general registers. */
33774 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33775 reg_class_t class2_i
)
33777 enum reg_class class1
= (enum reg_class
) class1_i
;
33778 enum reg_class class2
= (enum reg_class
) class2_i
;
33780 /* In case we require secondary memory, compute cost of the store followed
33781 by load. In order to avoid bad register allocation choices, we need
33782 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33784 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33788 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33789 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33791 /* In case of copying from general_purpose_register we may emit multiple
33792 stores followed by single load causing memory size mismatch stall.
33793 Count this as arbitrarily high cost of 20. */
33794 if (targetm
.class_max_nregs (class1
, mode
)
33795 > targetm
.class_max_nregs (class2
, mode
))
33798 /* In the case of FP/MMX moves, the registers actually overlap, and we
33799 have to switch modes in order to treat them differently. */
33800 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33801 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33807 /* Moves between SSE/MMX and integer unit are expensive. */
33808 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33809 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33811 /* ??? By keeping returned value relatively high, we limit the number
33812 of moves between integer and MMX/SSE registers for all targets.
33813 Additionally, high value prevents problem with x86_modes_tieable_p(),
33814 where integer modes in MMX/SSE registers are not tieable
33815 because of missing QImode and HImode moves to, from or between
33816 MMX/SSE registers. */
33817 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33819 if (MAYBE_FLOAT_CLASS_P (class1
))
33820 return ix86_cost
->fp_move
;
33821 if (MAYBE_SSE_CLASS_P (class1
))
33822 return ix86_cost
->sse_move
;
33823 if (MAYBE_MMX_CLASS_P (class1
))
33824 return ix86_cost
->mmx_move
;
33828 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33832 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33834 /* Flags and only flags can only hold CCmode values. */
33835 if (CC_REGNO_P (regno
))
33836 return GET_MODE_CLASS (mode
) == MODE_CC
;
33837 if (GET_MODE_CLASS (mode
) == MODE_CC
33838 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33839 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33841 if (STACK_REGNO_P (regno
))
33842 return VALID_FP_MODE_P (mode
);
33843 if (SSE_REGNO_P (regno
))
33845 /* We implement the move patterns for all vector modes into and
33846 out of SSE registers, even when no operation instructions
33847 are available. OImode move is available only when AVX is
33849 return ((TARGET_AVX
&& mode
== OImode
)
33850 || VALID_AVX256_REG_MODE (mode
)
33851 || VALID_SSE_REG_MODE (mode
)
33852 || VALID_SSE2_REG_MODE (mode
)
33853 || VALID_MMX_REG_MODE (mode
)
33854 || VALID_MMX_REG_MODE_3DNOW (mode
));
33856 if (MMX_REGNO_P (regno
))
33858 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33859 so if the register is available at all, then we can move data of
33860 the given mode into or out of it. */
33861 return (VALID_MMX_REG_MODE (mode
)
33862 || VALID_MMX_REG_MODE_3DNOW (mode
));
33865 if (mode
== QImode
)
33867 /* Take care for QImode values - they can be in non-QI regs,
33868 but then they do cause partial register stalls. */
33869 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33871 if (!TARGET_PARTIAL_REG_STALL
)
33873 return !can_create_pseudo_p ();
33875 /* We handle both integer and floats in the general purpose registers. */
33876 else if (VALID_INT_MODE_P (mode
))
33878 else if (VALID_FP_MODE_P (mode
))
33880 else if (VALID_DFP_MODE_P (mode
))
33882 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33883 on to use that value in smaller contexts, this can easily force a
33884 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33885 supporting DImode, allow it. */
33886 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33892 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33893 tieable integer mode. */
33896 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33905 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33908 return TARGET_64BIT
;
33915 /* Return true if MODE1 is accessible in a register that can hold MODE2
33916 without copying. That is, all register classes that can hold MODE2
33917 can also hold MODE1. */
33920 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33922 if (mode1
== mode2
)
33925 if (ix86_tieable_integer_mode_p (mode1
)
33926 && ix86_tieable_integer_mode_p (mode2
))
33929 /* MODE2 being XFmode implies fp stack or general regs, which means we
33930 can tie any smaller floating point modes to it. Note that we do not
33931 tie this with TFmode. */
33932 if (mode2
== XFmode
)
33933 return mode1
== SFmode
|| mode1
== DFmode
;
33935 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33936 that we can tie it with SFmode. */
33937 if (mode2
== DFmode
)
33938 return mode1
== SFmode
;
33940 /* If MODE2 is only appropriate for an SSE register, then tie with
33941 any other mode acceptable to SSE registers. */
33942 if (GET_MODE_SIZE (mode2
) == 32
33943 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33944 return (GET_MODE_SIZE (mode1
) == 32
33945 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33946 if (GET_MODE_SIZE (mode2
) == 16
33947 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33948 return (GET_MODE_SIZE (mode1
) == 16
33949 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33951 /* If MODE2 is appropriate for an MMX register, then tie
33952 with any other mode acceptable to MMX registers. */
33953 if (GET_MODE_SIZE (mode2
) == 8
33954 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33955 return (GET_MODE_SIZE (mode1
) == 8
33956 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33961 /* Return the cost of moving between two registers of mode MODE. */
33964 ix86_set_reg_reg_cost (enum machine_mode mode
)
33966 unsigned int units
= UNITS_PER_WORD
;
33968 switch (GET_MODE_CLASS (mode
))
33974 units
= GET_MODE_SIZE (CCmode
);
33978 if ((TARGET_SSE
&& mode
== TFmode
)
33979 || (TARGET_80387
&& mode
== XFmode
)
33980 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33981 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33982 units
= GET_MODE_SIZE (mode
);
33985 case MODE_COMPLEX_FLOAT
:
33986 if ((TARGET_SSE
&& mode
== TCmode
)
33987 || (TARGET_80387
&& mode
== XCmode
)
33988 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33989 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
33990 units
= GET_MODE_SIZE (mode
);
33993 case MODE_VECTOR_INT
:
33994 case MODE_VECTOR_FLOAT
:
33995 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33996 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33997 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33998 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
33999 units
= GET_MODE_SIZE (mode
);
34002 /* Return the cost of moving between two registers of mode MODE,
34003 assuming that the move will be in pieces of at most UNITS bytes. */
34004 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34007 /* Compute a (partial) cost for rtx X. Return true if the complete
34008 cost has been computed, and false if subexpressions should be
34009 scanned. In either case, *TOTAL contains the cost result. */
34012 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34015 enum rtx_code code
= (enum rtx_code
) code_i
;
34016 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34017 enum machine_mode mode
= GET_MODE (x
);
34018 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34023 if (register_operand (SET_DEST (x
), VOIDmode
)
34024 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34026 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34035 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34037 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34039 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34041 || (!GET_CODE (x
) != LABEL_REF
34042 && (GET_CODE (x
) != SYMBOL_REF
34043 || !SYMBOL_REF_LOCAL_P (x
)))))
34050 if (mode
== VOIDmode
)
34055 switch (standard_80387_constant_p (x
))
34060 default: /* Other constants */
34067 if (SSE_FLOAT_MODE_P (mode
))
34070 switch (standard_sse_constant_p (x
))
34074 case 1: /* 0: xor eliminates false dependency */
34077 default: /* -1: cmp contains false dependency */
34082 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34083 it'll probably end up. Add a penalty for size. */
34084 *total
= (COSTS_N_INSNS (1)
34085 + (flag_pic
!= 0 && !TARGET_64BIT
)
34086 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34090 /* The zero extensions is often completely free on x86_64, so make
34091 it as cheap as possible. */
34092 if (TARGET_64BIT
&& mode
== DImode
34093 && GET_MODE (XEXP (x
, 0)) == SImode
)
34095 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34096 *total
= cost
->add
;
34098 *total
= cost
->movzx
;
34102 *total
= cost
->movsx
;
34106 if (SCALAR_INT_MODE_P (mode
)
34107 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34108 && CONST_INT_P (XEXP (x
, 1)))
34110 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34113 *total
= cost
->add
;
34116 if ((value
== 2 || value
== 3)
34117 && cost
->lea
<= cost
->shift_const
)
34119 *total
= cost
->lea
;
34129 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34131 /* ??? Should be SSE vector operation cost. */
34132 /* At least for published AMD latencies, this really is the same
34133 as the latency for a simple fpu operation like fabs. */
34134 /* V*QImode is emulated with 1-11 insns. */
34135 if (mode
== V16QImode
|| mode
== V32QImode
)
34138 if (TARGET_XOP
&& mode
== V16QImode
)
34140 /* For XOP we use vpshab, which requires a broadcast of the
34141 value to the variable shift insn. For constants this
34142 means a V16Q const in mem; even when we can perform the
34143 shift with one insn set the cost to prefer paddb. */
34144 if (CONSTANT_P (XEXP (x
, 1)))
34146 *total
= (cost
->fabs
34147 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34148 + (speed
? 2 : COSTS_N_BYTES (16)));
34153 else if (TARGET_SSSE3
)
34155 *total
= cost
->fabs
* count
;
34158 *total
= cost
->fabs
;
34160 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34162 if (CONST_INT_P (XEXP (x
, 1)))
34164 if (INTVAL (XEXP (x
, 1)) > 32)
34165 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34167 *total
= cost
->shift_const
* 2;
34171 if (GET_CODE (XEXP (x
, 1)) == AND
)
34172 *total
= cost
->shift_var
* 2;
34174 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34179 if (CONST_INT_P (XEXP (x
, 1)))
34180 *total
= cost
->shift_const
;
34182 *total
= cost
->shift_var
;
34190 gcc_assert (FLOAT_MODE_P (mode
));
34191 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34193 /* ??? SSE scalar/vector cost should be used here. */
34194 /* ??? Bald assumption that fma has the same cost as fmul. */
34195 *total
= cost
->fmul
;
34196 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34198 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34200 if (GET_CODE (sub
) == NEG
)
34201 sub
= XEXP (sub
, 0);
34202 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34205 if (GET_CODE (sub
) == NEG
)
34206 sub
= XEXP (sub
, 0);
34207 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34212 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34214 /* ??? SSE scalar cost should be used here. */
34215 *total
= cost
->fmul
;
34218 else if (X87_FLOAT_MODE_P (mode
))
34220 *total
= cost
->fmul
;
34223 else if (FLOAT_MODE_P (mode
))
34225 /* ??? SSE vector cost should be used here. */
34226 *total
= cost
->fmul
;
34229 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34231 /* V*QImode is emulated with 7-13 insns. */
34232 if (mode
== V16QImode
|| mode
== V32QImode
)
34235 if (TARGET_XOP
&& mode
== V16QImode
)
34237 else if (TARGET_SSSE3
)
34239 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34241 /* V*DImode is emulated with 5-8 insns. */
34242 else if (mode
== V2DImode
|| mode
== V4DImode
)
34244 if (TARGET_XOP
&& mode
== V2DImode
)
34245 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34247 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34249 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34250 insns, including two PMULUDQ. */
34251 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34252 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34254 *total
= cost
->fmul
;
34259 rtx op0
= XEXP (x
, 0);
34260 rtx op1
= XEXP (x
, 1);
34262 if (CONST_INT_P (XEXP (x
, 1)))
34264 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34265 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34269 /* This is arbitrary. */
34272 /* Compute costs correctly for widening multiplication. */
34273 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34274 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34275 == GET_MODE_SIZE (mode
))
34277 int is_mulwiden
= 0;
34278 enum machine_mode inner_mode
= GET_MODE (op0
);
34280 if (GET_CODE (op0
) == GET_CODE (op1
))
34281 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34282 else if (CONST_INT_P (op1
))
34284 if (GET_CODE (op0
) == SIGN_EXTEND
)
34285 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34288 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34292 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34295 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34296 + nbits
* cost
->mult_bit
34297 + rtx_cost (op0
, outer_code
, opno
, speed
)
34298 + rtx_cost (op1
, outer_code
, opno
, speed
));
34307 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34308 /* ??? SSE cost should be used here. */
34309 *total
= cost
->fdiv
;
34310 else if (X87_FLOAT_MODE_P (mode
))
34311 *total
= cost
->fdiv
;
34312 else if (FLOAT_MODE_P (mode
))
34313 /* ??? SSE vector cost should be used here. */
34314 *total
= cost
->fdiv
;
34316 *total
= cost
->divide
[MODE_INDEX (mode
)];
34320 if (GET_MODE_CLASS (mode
) == MODE_INT
34321 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34323 if (GET_CODE (XEXP (x
, 0)) == PLUS
34324 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34325 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34326 && CONSTANT_P (XEXP (x
, 1)))
34328 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34329 if (val
== 2 || val
== 4 || val
== 8)
34331 *total
= cost
->lea
;
34332 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34333 outer_code
, opno
, speed
);
34334 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34335 outer_code
, opno
, speed
);
34336 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34340 else if (GET_CODE (XEXP (x
, 0)) == MULT
34341 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34343 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34344 if (val
== 2 || val
== 4 || val
== 8)
34346 *total
= cost
->lea
;
34347 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34348 outer_code
, opno
, speed
);
34349 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34353 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34355 *total
= cost
->lea
;
34356 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34357 outer_code
, opno
, speed
);
34358 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34359 outer_code
, opno
, speed
);
34360 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34367 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34369 /* ??? SSE cost should be used here. */
34370 *total
= cost
->fadd
;
34373 else if (X87_FLOAT_MODE_P (mode
))
34375 *total
= cost
->fadd
;
34378 else if (FLOAT_MODE_P (mode
))
34380 /* ??? SSE vector cost should be used here. */
34381 *total
= cost
->fadd
;
34389 if (GET_MODE_CLASS (mode
) == MODE_INT
34390 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34392 *total
= (cost
->add
* 2
34393 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34394 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34395 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34396 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34402 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34404 /* ??? SSE cost should be used here. */
34405 *total
= cost
->fchs
;
34408 else if (X87_FLOAT_MODE_P (mode
))
34410 *total
= cost
->fchs
;
34413 else if (FLOAT_MODE_P (mode
))
34415 /* ??? SSE vector cost should be used here. */
34416 *total
= cost
->fchs
;
34422 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34424 /* ??? Should be SSE vector operation cost. */
34425 /* At least for published AMD latencies, this really is the same
34426 as the latency for a simple fpu operation like fabs. */
34427 *total
= cost
->fabs
;
34429 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34430 *total
= cost
->add
* 2;
34432 *total
= cost
->add
;
34436 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34437 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34438 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34439 && XEXP (x
, 1) == const0_rtx
)
34441 /* This kind of construct is implemented using test[bwl].
34442 Treat it as if we had an AND. */
34443 *total
= (cost
->add
34444 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34445 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34451 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34456 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34457 /* ??? SSE cost should be used here. */
34458 *total
= cost
->fabs
;
34459 else if (X87_FLOAT_MODE_P (mode
))
34460 *total
= cost
->fabs
;
34461 else if (FLOAT_MODE_P (mode
))
34462 /* ??? SSE vector cost should be used here. */
34463 *total
= cost
->fabs
;
34467 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34468 /* ??? SSE cost should be used here. */
34469 *total
= cost
->fsqrt
;
34470 else if (X87_FLOAT_MODE_P (mode
))
34471 *total
= cost
->fsqrt
;
34472 else if (FLOAT_MODE_P (mode
))
34473 /* ??? SSE vector cost should be used here. */
34474 *total
= cost
->fsqrt
;
34478 if (XINT (x
, 1) == UNSPEC_TP
)
34485 case VEC_DUPLICATE
:
34486 /* ??? Assume all of these vector manipulation patterns are
34487 recognizable. In which case they all pretty much have the
34489 *total
= cost
->fabs
;
34499 static int current_machopic_label_num
;
34501 /* Given a symbol name and its associated stub, write out the
34502 definition of the stub. */
34505 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34507 unsigned int length
;
34508 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34509 int label
= ++current_machopic_label_num
;
34511 /* For 64-bit we shouldn't get here. */
34512 gcc_assert (!TARGET_64BIT
);
34514 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34515 symb
= targetm
.strip_name_encoding (symb
);
34517 length
= strlen (stub
);
34518 binder_name
= XALLOCAVEC (char, length
+ 32);
34519 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34521 length
= strlen (symb
);
34522 symbol_name
= XALLOCAVEC (char, length
+ 32);
34523 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34525 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34527 if (MACHOPIC_ATT_STUB
)
34528 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34529 else if (MACHOPIC_PURE
)
34530 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34532 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34534 fprintf (file
, "%s:\n", stub
);
34535 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34537 if (MACHOPIC_ATT_STUB
)
34539 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34541 else if (MACHOPIC_PURE
)
34544 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34545 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34546 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34547 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34548 label
, lazy_ptr_name
, label
);
34549 fprintf (file
, "\tjmp\t*%%ecx\n");
34552 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34554 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34555 it needs no stub-binding-helper. */
34556 if (MACHOPIC_ATT_STUB
)
34559 fprintf (file
, "%s:\n", binder_name
);
34563 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34564 fprintf (file
, "\tpushl\t%%ecx\n");
34567 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34569 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34571 /* N.B. Keep the correspondence of these
34572 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34573 old-pic/new-pic/non-pic stubs; altering this will break
34574 compatibility with existing dylibs. */
34577 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34578 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34581 /* 16-byte -mdynamic-no-pic stub. */
34582 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34584 fprintf (file
, "%s:\n", lazy_ptr_name
);
34585 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34586 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34588 #endif /* TARGET_MACHO */
34590 /* Order the registers for register allocator. */
34593 x86_order_regs_for_local_alloc (void)
34598 /* First allocate the local general purpose registers. */
34599 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34600 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34601 reg_alloc_order
[pos
++] = i
;
34603 /* Global general purpose registers. */
34604 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34605 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34606 reg_alloc_order
[pos
++] = i
;
34608 /* x87 registers come first in case we are doing FP math
34610 if (!TARGET_SSE_MATH
)
34611 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34612 reg_alloc_order
[pos
++] = i
;
34614 /* SSE registers. */
34615 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34616 reg_alloc_order
[pos
++] = i
;
34617 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34618 reg_alloc_order
[pos
++] = i
;
34620 /* x87 registers. */
34621 if (TARGET_SSE_MATH
)
34622 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34623 reg_alloc_order
[pos
++] = i
;
34625 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34626 reg_alloc_order
[pos
++] = i
;
34628 /* Initialize the rest of array as we do not allocate some registers
34630 while (pos
< FIRST_PSEUDO_REGISTER
)
34631 reg_alloc_order
[pos
++] = 0;
34634 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34635 in struct attribute_spec handler. */
34637 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34639 int flags ATTRIBUTE_UNUSED
,
34640 bool *no_add_attrs
)
34642 if (TREE_CODE (*node
) != FUNCTION_TYPE
34643 && TREE_CODE (*node
) != METHOD_TYPE
34644 && TREE_CODE (*node
) != FIELD_DECL
34645 && TREE_CODE (*node
) != TYPE_DECL
)
34647 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34649 *no_add_attrs
= true;
34654 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34656 *no_add_attrs
= true;
34659 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34663 cst
= TREE_VALUE (args
);
34664 if (TREE_CODE (cst
) != INTEGER_CST
)
34666 warning (OPT_Wattributes
,
34667 "%qE attribute requires an integer constant argument",
34669 *no_add_attrs
= true;
34671 else if (compare_tree_int (cst
, 0) != 0
34672 && compare_tree_int (cst
, 1) != 0)
34674 warning (OPT_Wattributes
,
34675 "argument to %qE attribute is neither zero, nor one",
34677 *no_add_attrs
= true;
34686 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34687 struct attribute_spec.handler. */
34689 ix86_handle_abi_attribute (tree
*node
, tree name
,
34690 tree args ATTRIBUTE_UNUSED
,
34691 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34693 if (TREE_CODE (*node
) != FUNCTION_TYPE
34694 && TREE_CODE (*node
) != METHOD_TYPE
34695 && TREE_CODE (*node
) != FIELD_DECL
34696 && TREE_CODE (*node
) != TYPE_DECL
)
34698 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34700 *no_add_attrs
= true;
34704 /* Can combine regparm with all attributes but fastcall. */
34705 if (is_attribute_p ("ms_abi", name
))
34707 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34709 error ("ms_abi and sysv_abi attributes are not compatible");
34714 else if (is_attribute_p ("sysv_abi", name
))
34716 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34718 error ("ms_abi and sysv_abi attributes are not compatible");
34727 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34728 struct attribute_spec.handler. */
34730 ix86_handle_struct_attribute (tree
*node
, tree name
,
34731 tree args ATTRIBUTE_UNUSED
,
34732 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34735 if (DECL_P (*node
))
34737 if (TREE_CODE (*node
) == TYPE_DECL
)
34738 type
= &TREE_TYPE (*node
);
34743 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34745 warning (OPT_Wattributes
, "%qE attribute ignored",
34747 *no_add_attrs
= true;
34750 else if ((is_attribute_p ("ms_struct", name
)
34751 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34752 || ((is_attribute_p ("gcc_struct", name
)
34753 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34755 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34757 *no_add_attrs
= true;
34764 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34765 tree args ATTRIBUTE_UNUSED
,
34766 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34768 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34770 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34772 *no_add_attrs
= true;
34778 ix86_ms_bitfield_layout_p (const_tree record_type
)
34780 return ((TARGET_MS_BITFIELD_LAYOUT
34781 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34782 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34785 /* Returns an expression indicating where the this parameter is
34786 located on entry to the FUNCTION. */
34789 x86_this_parameter (tree function
)
34791 tree type
= TREE_TYPE (function
);
34792 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34797 const int *parm_regs
;
34799 if (ix86_function_type_abi (type
) == MS_ABI
)
34800 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34802 parm_regs
= x86_64_int_parameter_registers
;
34803 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34806 nregs
= ix86_function_regparm (type
, function
);
34808 if (nregs
> 0 && !stdarg_p (type
))
34811 unsigned int ccvt
= ix86_get_callcvt (type
);
34813 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34814 regno
= aggr
? DX_REG
: CX_REG
;
34815 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34819 return gen_rtx_MEM (SImode
,
34820 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34829 return gen_rtx_MEM (SImode
,
34830 plus_constant (Pmode
,
34831 stack_pointer_rtx
, 4));
34834 return gen_rtx_REG (SImode
, regno
);
34837 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34841 /* Determine whether x86_output_mi_thunk can succeed. */
34844 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34845 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34846 HOST_WIDE_INT vcall_offset
, const_tree function
)
34848 /* 64-bit can handle anything. */
34852 /* For 32-bit, everything's fine if we have one free register. */
34853 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34856 /* Need a free register for vcall_offset. */
34860 /* Need a free register for GOT references. */
34861 if (flag_pic
&& !targetm
.binds_local_p (function
))
34864 /* Otherwise ok. */
34868 /* Output the assembler code for a thunk function. THUNK_DECL is the
34869 declaration for the thunk function itself, FUNCTION is the decl for
34870 the target function. DELTA is an immediate constant offset to be
34871 added to THIS. If VCALL_OFFSET is nonzero, the word at
34872 *(*this + vcall_offset) should be added to THIS. */
34875 x86_output_mi_thunk (FILE *file
,
34876 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34877 HOST_WIDE_INT vcall_offset
, tree function
)
34879 rtx this_param
= x86_this_parameter (function
);
34880 rtx this_reg
, tmp
, fnaddr
;
34881 unsigned int tmp_regno
;
34884 tmp_regno
= R10_REG
;
34887 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34888 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34889 tmp_regno
= AX_REG
;
34890 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34891 tmp_regno
= DX_REG
;
34893 tmp_regno
= CX_REG
;
34896 emit_note (NOTE_INSN_PROLOGUE_END
);
34898 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34899 pull it in now and let DELTA benefit. */
34900 if (REG_P (this_param
))
34901 this_reg
= this_param
;
34902 else if (vcall_offset
)
34904 /* Put the this parameter into %eax. */
34905 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34906 emit_move_insn (this_reg
, this_param
);
34909 this_reg
= NULL_RTX
;
34911 /* Adjust the this parameter by a fixed constant. */
34914 rtx delta_rtx
= GEN_INT (delta
);
34915 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34919 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34921 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34922 emit_move_insn (tmp
, delta_rtx
);
34927 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34930 /* Adjust the this parameter by a value stored in the vtable. */
34933 rtx vcall_addr
, vcall_mem
, this_mem
;
34935 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34937 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34938 if (Pmode
!= ptr_mode
)
34939 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34940 emit_move_insn (tmp
, this_mem
);
34942 /* Adjust the this parameter. */
34943 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34945 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34947 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34948 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34949 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34952 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34953 if (Pmode
!= ptr_mode
)
34954 emit_insn (gen_addsi_1_zext (this_reg
,
34955 gen_rtx_REG (ptr_mode
,
34959 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34962 /* If necessary, drop THIS back to its stack slot. */
34963 if (this_reg
&& this_reg
!= this_param
)
34964 emit_move_insn (this_param
, this_reg
);
34966 fnaddr
= XEXP (DECL_RTL (function
), 0);
34969 if (!flag_pic
|| targetm
.binds_local_p (function
)
34970 || cfun
->machine
->call_abi
== MS_ABI
)
34974 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34975 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34976 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34981 if (!flag_pic
|| targetm
.binds_local_p (function
))
34984 else if (TARGET_MACHO
)
34986 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34987 fnaddr
= XEXP (fnaddr
, 0);
34989 #endif /* TARGET_MACHO */
34992 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
34993 output_set_got (tmp
, NULL_RTX
);
34995 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
34996 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
34997 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35001 /* Our sibling call patterns do not allow memories, because we have no
35002 predicate that can distinguish between frame and non-frame memory.
35003 For our purposes here, we can get away with (ab)using a jump pattern,
35004 because we're going to do no optimization. */
35005 if (MEM_P (fnaddr
))
35006 emit_jump_insn (gen_indirect_jump (fnaddr
));
35009 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35010 fnaddr
= legitimize_pic_address (fnaddr
,
35011 gen_rtx_REG (Pmode
, tmp_regno
));
35013 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35015 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35016 if (GET_MODE (fnaddr
) != word_mode
)
35017 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35018 emit_move_insn (tmp
, fnaddr
);
35022 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35023 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35024 tmp
= emit_call_insn (tmp
);
35025 SIBLING_CALL_P (tmp
) = 1;
35029 /* Emit just enough of rest_of_compilation to get the insns emitted.
35030 Note that use_thunk calls assemble_start_function et al. */
35031 tmp
= get_insns ();
35032 shorten_branches (tmp
);
35033 final_start_function (tmp
, file
, 1);
35034 final (tmp
, file
, 1);
35035 final_end_function ();
35039 x86_file_start (void)
35041 default_file_start ();
35043 darwin_file_start ();
35045 if (X86_FILE_START_VERSION_DIRECTIVE
)
35046 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35047 if (X86_FILE_START_FLTUSED
)
35048 fputs ("\t.global\t__fltused\n", asm_out_file
);
35049 if (ix86_asm_dialect
== ASM_INTEL
)
35050 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35054 x86_field_alignment (tree field
, int computed
)
35056 enum machine_mode mode
;
35057 tree type
= TREE_TYPE (field
);
35059 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35061 mode
= TYPE_MODE (strip_array_types (type
));
35062 if (mode
== DFmode
|| mode
== DCmode
35063 || GET_MODE_CLASS (mode
) == MODE_INT
35064 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35065 return MIN (32, computed
);
35069 /* Output assembler code to FILE to increment profiler label # LABELNO
35070 for profiling a function entry. */
35072 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35074 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35079 #ifndef NO_PROFILE_COUNTERS
35080 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35083 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
35084 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35086 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35090 #ifndef NO_PROFILE_COUNTERS
35091 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35094 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35098 #ifndef NO_PROFILE_COUNTERS
35099 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35102 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35106 /* We don't have exact information about the insn sizes, but we may assume
35107 quite safely that we are informed about all 1 byte insns and memory
35108 address sizes. This is enough to eliminate unnecessary padding in
35112 min_insn_size (rtx insn
)
35116 if (!INSN_P (insn
) || !active_insn_p (insn
))
35119 /* Discard alignments we've emit and jump instructions. */
35120 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35121 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35123 if (JUMP_TABLE_DATA_P (insn
))
35126 /* Important case - calls are always 5 bytes.
35127 It is common to have many calls in the row. */
35129 && symbolic_reference_mentioned_p (PATTERN (insn
))
35130 && !SIBLING_CALL_P (insn
))
35132 len
= get_attr_length (insn
);
35136 /* For normal instructions we rely on get_attr_length being exact,
35137 with a few exceptions. */
35138 if (!JUMP_P (insn
))
35140 enum attr_type type
= get_attr_type (insn
);
35145 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35146 || asm_noperands (PATTERN (insn
)) >= 0)
35153 /* Otherwise trust get_attr_length. */
35157 l
= get_attr_length_address (insn
);
35158 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35167 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35169 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35173 ix86_avoid_jump_mispredicts (void)
35175 rtx insn
, start
= get_insns ();
35176 int nbytes
= 0, njumps
= 0;
35179 /* Look for all minimal intervals of instructions containing 4 jumps.
35180 The intervals are bounded by START and INSN. NBYTES is the total
35181 size of instructions in the interval including INSN and not including
35182 START. When the NBYTES is smaller than 16 bytes, it is possible
35183 that the end of START and INSN ends up in the same 16byte page.
35185 The smallest offset in the page INSN can start is the case where START
35186 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35187 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35189 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35193 if (LABEL_P (insn
))
35195 int align
= label_to_alignment (insn
);
35196 int max_skip
= label_to_max_skip (insn
);
35200 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35201 already in the current 16 byte page, because otherwise
35202 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35203 bytes to reach 16 byte boundary. */
35205 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35208 fprintf (dump_file
, "Label %i with max_skip %i\n",
35209 INSN_UID (insn
), max_skip
);
35212 while (nbytes
+ max_skip
>= 16)
35214 start
= NEXT_INSN (start
);
35215 if ((JUMP_P (start
)
35216 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35217 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35219 njumps
--, isjump
= 1;
35222 nbytes
-= min_insn_size (start
);
35228 min_size
= min_insn_size (insn
);
35229 nbytes
+= min_size
;
35231 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35232 INSN_UID (insn
), min_size
);
35234 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35235 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35243 start
= NEXT_INSN (start
);
35244 if ((JUMP_P (start
)
35245 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35246 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35248 njumps
--, isjump
= 1;
35251 nbytes
-= min_insn_size (start
);
35253 gcc_assert (njumps
>= 0);
35255 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35256 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35258 if (njumps
== 3 && isjump
&& nbytes
< 16)
35260 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35263 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35264 INSN_UID (insn
), padsize
);
35265 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35271 /* AMD Athlon works faster
35272 when RET is not destination of conditional jump or directly preceded
35273 by other jump instruction. We avoid the penalty by inserting NOP just
35274 before the RET instructions in such cases. */
35276 ix86_pad_returns (void)
35281 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35283 basic_block bb
= e
->src
;
35284 rtx ret
= BB_END (bb
);
35286 bool replace
= false;
35288 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35289 || optimize_bb_for_size_p (bb
))
35291 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35292 if (active_insn_p (prev
) || LABEL_P (prev
))
35294 if (prev
&& LABEL_P (prev
))
35299 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35300 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35301 && !(e
->flags
& EDGE_FALLTHRU
))
35306 prev
= prev_active_insn (ret
);
35308 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35311 /* Empty functions get branch mispredict even when
35312 the jump destination is not visible to us. */
35313 if (!prev
&& !optimize_function_for_size_p (cfun
))
35318 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35324 /* Count the minimum number of instructions in BB. Return 4 if the
35325 number of instructions >= 4. */
35328 ix86_count_insn_bb (basic_block bb
)
35331 int insn_count
= 0;
35333 /* Count number of instructions in this block. Return 4 if the number
35334 of instructions >= 4. */
35335 FOR_BB_INSNS (bb
, insn
)
35337 /* Only happen in exit blocks. */
35339 && ANY_RETURN_P (PATTERN (insn
)))
35342 if (NONDEBUG_INSN_P (insn
)
35343 && GET_CODE (PATTERN (insn
)) != USE
35344 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35347 if (insn_count
>= 4)
35356 /* Count the minimum number of instructions in code path in BB.
35357 Return 4 if the number of instructions >= 4. */
35360 ix86_count_insn (basic_block bb
)
35364 int min_prev_count
;
35366 /* Only bother counting instructions along paths with no
35367 more than 2 basic blocks between entry and exit. Given
35368 that BB has an edge to exit, determine if a predecessor
35369 of BB has an edge from entry. If so, compute the number
35370 of instructions in the predecessor block. If there
35371 happen to be multiple such blocks, compute the minimum. */
35372 min_prev_count
= 4;
35373 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35376 edge_iterator prev_ei
;
35378 if (e
->src
== ENTRY_BLOCK_PTR
)
35380 min_prev_count
= 0;
35383 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35385 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35387 int count
= ix86_count_insn_bb (e
->src
);
35388 if (count
< min_prev_count
)
35389 min_prev_count
= count
;
35395 if (min_prev_count
< 4)
35396 min_prev_count
+= ix86_count_insn_bb (bb
);
35398 return min_prev_count
;
35401 /* Pad short function to 4 instructions. */
35404 ix86_pad_short_function (void)
35409 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35411 rtx ret
= BB_END (e
->src
);
35412 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35414 int insn_count
= ix86_count_insn (e
->src
);
35416 /* Pad short function. */
35417 if (insn_count
< 4)
35421 /* Find epilogue. */
35424 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35425 insn
= PREV_INSN (insn
);
35430 /* Two NOPs count as one instruction. */
35431 insn_count
= 2 * (4 - insn_count
);
35432 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35438 /* Implement machine specific optimizations. We implement padding of returns
35439 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35443 /* We are freeing block_for_insn in the toplev to keep compatibility
35444 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35445 compute_bb_for_insn ();
35447 if (optimize
&& optimize_function_for_speed_p (cfun
))
35449 if (TARGET_PAD_SHORT_FUNCTION
)
35450 ix86_pad_short_function ();
35451 else if (TARGET_PAD_RETURNS
)
35452 ix86_pad_returns ();
35453 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35454 if (TARGET_FOUR_JUMP_LIMIT
)
35455 ix86_avoid_jump_mispredicts ();
35460 /* Return nonzero when QImode register that must be represented via REX prefix
35463 x86_extended_QIreg_mentioned_p (rtx insn
)
35466 extract_insn_cached (insn
);
35467 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35468 if (GENERAL_REG_P (recog_data
.operand
[i
])
35469 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35474 /* Return nonzero when P points to register encoded via REX prefix.
35475 Called via for_each_rtx. */
35477 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35479 unsigned int regno
;
35482 regno
= REGNO (*p
);
35483 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35486 /* Return true when INSN mentions register that must be encoded using REX
35489 x86_extended_reg_mentioned_p (rtx insn
)
35491 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35492 extended_reg_mentioned_1
, NULL
);
35495 /* If profitable, negate (without causing overflow) integer constant
35496 of mode MODE at location LOC. Return true in this case. */
35498 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35502 if (!CONST_INT_P (*loc
))
35508 /* DImode x86_64 constants must fit in 32 bits. */
35509 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35520 gcc_unreachable ();
35523 /* Avoid overflows. */
35524 if (mode_signbit_p (mode
, *loc
))
35527 val
= INTVAL (*loc
);
35529 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35530 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35531 if ((val
< 0 && val
!= -128)
35534 *loc
= GEN_INT (-val
);
35541 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35542 optabs would emit if we didn't have TFmode patterns. */
35545 x86_emit_floatuns (rtx operands
[2])
35547 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35548 enum machine_mode mode
, inmode
;
35550 inmode
= GET_MODE (operands
[1]);
35551 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35554 in
= force_reg (inmode
, operands
[1]);
35555 mode
= GET_MODE (out
);
35556 neglab
= gen_label_rtx ();
35557 donelab
= gen_label_rtx ();
35558 f0
= gen_reg_rtx (mode
);
35560 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35562 expand_float (out
, in
, 0);
35564 emit_jump_insn (gen_jump (donelab
));
35567 emit_label (neglab
);
35569 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35571 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35573 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35575 expand_float (f0
, i0
, 0);
35577 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35579 emit_label (donelab
);
35582 /* AVX2 does support 32-byte integer vector operations,
35583 thus the longest vector we are faced with is V32QImode. */
35584 #define MAX_VECT_LEN 32
35586 struct expand_vec_perm_d
35588 rtx target
, op0
, op1
;
35589 unsigned char perm
[MAX_VECT_LEN
];
35590 enum machine_mode vmode
;
35591 unsigned char nelt
;
35592 bool one_operand_p
;
35596 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35597 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35598 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35600 /* Get a vector mode of the same size as the original but with elements
35601 twice as wide. This is only guaranteed to apply to integral vectors. */
35603 static inline enum machine_mode
35604 get_mode_wider_vector (enum machine_mode o
)
35606 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35607 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35608 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35609 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35613 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35614 with all elements equal to VAR. Return true if successful. */
35617 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35618 rtx target
, rtx val
)
35641 /* First attempt to recognize VAL as-is. */
35642 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35643 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35644 if (recog_memoized (insn
) < 0)
35647 /* If that fails, force VAL into a register. */
35650 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35651 seq
= get_insns ();
35654 emit_insn_before (seq
, insn
);
35656 ok
= recog_memoized (insn
) >= 0;
35665 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35669 val
= gen_lowpart (SImode
, val
);
35670 x
= gen_rtx_TRUNCATE (HImode
, val
);
35671 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35672 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35685 struct expand_vec_perm_d dperm
;
35689 memset (&dperm
, 0, sizeof (dperm
));
35690 dperm
.target
= target
;
35691 dperm
.vmode
= mode
;
35692 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35693 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35694 dperm
.one_operand_p
= true;
35696 /* Extend to SImode using a paradoxical SUBREG. */
35697 tmp1
= gen_reg_rtx (SImode
);
35698 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35700 /* Insert the SImode value as low element of a V4SImode vector. */
35701 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35702 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35704 ok
= (expand_vec_perm_1 (&dperm
)
35705 || expand_vec_perm_broadcast_1 (&dperm
));
35717 /* Replicate the value once into the next wider mode and recurse. */
35719 enum machine_mode smode
, wsmode
, wvmode
;
35722 smode
= GET_MODE_INNER (mode
);
35723 wvmode
= get_mode_wider_vector (mode
);
35724 wsmode
= GET_MODE_INNER (wvmode
);
35726 val
= convert_modes (wsmode
, smode
, val
, true);
35727 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35728 GEN_INT (GET_MODE_BITSIZE (smode
)),
35729 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35730 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35732 x
= gen_lowpart (wvmode
, target
);
35733 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35741 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35742 rtx x
= gen_reg_rtx (hvmode
);
35744 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35747 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35748 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35757 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35758 whose ONE_VAR element is VAR, and other elements are zero. Return true
35762 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35763 rtx target
, rtx var
, int one_var
)
35765 enum machine_mode vsimode
;
35768 bool use_vector_set
= false;
35773 /* For SSE4.1, we normally use vector set. But if the second
35774 element is zero and inter-unit moves are OK, we use movq
35776 use_vector_set
= (TARGET_64BIT
35778 && !(TARGET_INTER_UNIT_MOVES
35784 use_vector_set
= TARGET_SSE4_1
;
35787 use_vector_set
= TARGET_SSE2
;
35790 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35797 use_vector_set
= TARGET_AVX
;
35800 /* Use ix86_expand_vector_set in 64bit mode only. */
35801 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35807 if (use_vector_set
)
35809 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35810 var
= force_reg (GET_MODE_INNER (mode
), var
);
35811 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35827 var
= force_reg (GET_MODE_INNER (mode
), var
);
35828 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35829 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35834 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35835 new_target
= gen_reg_rtx (mode
);
35837 new_target
= target
;
35838 var
= force_reg (GET_MODE_INNER (mode
), var
);
35839 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35840 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35841 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35844 /* We need to shuffle the value to the correct position, so
35845 create a new pseudo to store the intermediate result. */
35847 /* With SSE2, we can use the integer shuffle insns. */
35848 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35850 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35852 GEN_INT (one_var
== 1 ? 0 : 1),
35853 GEN_INT (one_var
== 2 ? 0 : 1),
35854 GEN_INT (one_var
== 3 ? 0 : 1)));
35855 if (target
!= new_target
)
35856 emit_move_insn (target
, new_target
);
35860 /* Otherwise convert the intermediate result to V4SFmode and
35861 use the SSE1 shuffle instructions. */
35862 if (mode
!= V4SFmode
)
35864 tmp
= gen_reg_rtx (V4SFmode
);
35865 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35870 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35872 GEN_INT (one_var
== 1 ? 0 : 1),
35873 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35874 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35876 if (mode
!= V4SFmode
)
35877 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35878 else if (tmp
!= target
)
35879 emit_move_insn (target
, tmp
);
35881 else if (target
!= new_target
)
35882 emit_move_insn (target
, new_target
);
35887 vsimode
= V4SImode
;
35893 vsimode
= V2SImode
;
35899 /* Zero extend the variable element to SImode and recurse. */
35900 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35902 x
= gen_reg_rtx (vsimode
);
35903 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35905 gcc_unreachable ();
35907 emit_move_insn (target
, gen_lowpart (mode
, x
));
35915 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35916 consisting of the values in VALS. It is known that all elements
35917 except ONE_VAR are constants. Return true if successful. */
35920 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35921 rtx target
, rtx vals
, int one_var
)
35923 rtx var
= XVECEXP (vals
, 0, one_var
);
35924 enum machine_mode wmode
;
35927 const_vec
= copy_rtx (vals
);
35928 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35929 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35937 /* For the two element vectors, it's just as easy to use
35938 the general case. */
35942 /* Use ix86_expand_vector_set in 64bit mode only. */
35965 /* There's no way to set one QImode entry easily. Combine
35966 the variable value with its adjacent constant value, and
35967 promote to an HImode set. */
35968 x
= XVECEXP (vals
, 0, one_var
^ 1);
35971 var
= convert_modes (HImode
, QImode
, var
, true);
35972 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35973 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35974 x
= GEN_INT (INTVAL (x
) & 0xff);
35978 var
= convert_modes (HImode
, QImode
, var
, true);
35979 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35981 if (x
!= const0_rtx
)
35982 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35983 1, OPTAB_LIB_WIDEN
);
35985 x
= gen_reg_rtx (wmode
);
35986 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35987 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35989 emit_move_insn (target
, gen_lowpart (mode
, x
));
35996 emit_move_insn (target
, const_vec
);
35997 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36001 /* A subroutine of ix86_expand_vector_init_general. Use vector
36002 concatenate to handle the most general case: all values variable,
36003 and none identical. */
36006 ix86_expand_vector_init_concat (enum machine_mode mode
,
36007 rtx target
, rtx
*ops
, int n
)
36009 enum machine_mode cmode
, hmode
= VOIDmode
;
36010 rtx first
[8], second
[4];
36050 gcc_unreachable ();
36053 if (!register_operand (ops
[1], cmode
))
36054 ops
[1] = force_reg (cmode
, ops
[1]);
36055 if (!register_operand (ops
[0], cmode
))
36056 ops
[0] = force_reg (cmode
, ops
[0]);
36057 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36058 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36078 gcc_unreachable ();
36094 gcc_unreachable ();
36099 /* FIXME: We process inputs backward to help RA. PR 36222. */
36102 for (; i
> 0; i
-= 2, j
--)
36104 first
[j
] = gen_reg_rtx (cmode
);
36105 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36106 ix86_expand_vector_init (false, first
[j
],
36107 gen_rtx_PARALLEL (cmode
, v
));
36113 gcc_assert (hmode
!= VOIDmode
);
36114 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36116 second
[j
] = gen_reg_rtx (hmode
);
36117 ix86_expand_vector_init_concat (hmode
, second
[j
],
36121 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36124 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36128 gcc_unreachable ();
36132 /* A subroutine of ix86_expand_vector_init_general. Use vector
36133 interleave to handle the most general case: all values variable,
36134 and none identical. */
36137 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36138 rtx target
, rtx
*ops
, int n
)
36140 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36143 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36144 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36145 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36150 gen_load_even
= gen_vec_setv8hi
;
36151 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36152 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36153 inner_mode
= HImode
;
36154 first_imode
= V4SImode
;
36155 second_imode
= V2DImode
;
36156 third_imode
= VOIDmode
;
36159 gen_load_even
= gen_vec_setv16qi
;
36160 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36161 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36162 inner_mode
= QImode
;
36163 first_imode
= V8HImode
;
36164 second_imode
= V4SImode
;
36165 third_imode
= V2DImode
;
36168 gcc_unreachable ();
36171 for (i
= 0; i
< n
; i
++)
36173 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36174 op0
= gen_reg_rtx (SImode
);
36175 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36177 /* Insert the SImode value as low element of V4SImode vector. */
36178 op1
= gen_reg_rtx (V4SImode
);
36179 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36180 gen_rtx_VEC_DUPLICATE (V4SImode
,
36182 CONST0_RTX (V4SImode
),
36184 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36186 /* Cast the V4SImode vector back to a vector in orignal mode. */
36187 op0
= gen_reg_rtx (mode
);
36188 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36190 /* Load even elements into the second positon. */
36191 emit_insn (gen_load_even (op0
,
36192 force_reg (inner_mode
,
36196 /* Cast vector to FIRST_IMODE vector. */
36197 ops
[i
] = gen_reg_rtx (first_imode
);
36198 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36201 /* Interleave low FIRST_IMODE vectors. */
36202 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36204 op0
= gen_reg_rtx (first_imode
);
36205 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36207 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36208 ops
[j
] = gen_reg_rtx (second_imode
);
36209 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36212 /* Interleave low SECOND_IMODE vectors. */
36213 switch (second_imode
)
36216 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36218 op0
= gen_reg_rtx (second_imode
);
36219 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36222 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36224 ops
[j
] = gen_reg_rtx (third_imode
);
36225 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36227 second_imode
= V2DImode
;
36228 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36232 op0
= gen_reg_rtx (second_imode
);
36233 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36236 /* Cast the SECOND_IMODE vector back to a vector on original
36238 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36239 gen_lowpart (mode
, op0
)));
36243 gcc_unreachable ();
36247 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36248 all values variable, and none identical. */
36251 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36252 rtx target
, rtx vals
)
36254 rtx ops
[32], op0
, op1
;
36255 enum machine_mode half_mode
= VOIDmode
;
36262 if (!mmx_ok
&& !TARGET_SSE
)
36274 n
= GET_MODE_NUNITS (mode
);
36275 for (i
= 0; i
< n
; i
++)
36276 ops
[i
] = XVECEXP (vals
, 0, i
);
36277 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36281 half_mode
= V16QImode
;
36285 half_mode
= V8HImode
;
36289 n
= GET_MODE_NUNITS (mode
);
36290 for (i
= 0; i
< n
; i
++)
36291 ops
[i
] = XVECEXP (vals
, 0, i
);
36292 op0
= gen_reg_rtx (half_mode
);
36293 op1
= gen_reg_rtx (half_mode
);
36294 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36296 ix86_expand_vector_init_interleave (half_mode
, op1
,
36297 &ops
[n
>> 1], n
>> 2);
36298 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36299 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36303 if (!TARGET_SSE4_1
)
36311 /* Don't use ix86_expand_vector_init_interleave if we can't
36312 move from GPR to SSE register directly. */
36313 if (!TARGET_INTER_UNIT_MOVES
)
36316 n
= GET_MODE_NUNITS (mode
);
36317 for (i
= 0; i
< n
; i
++)
36318 ops
[i
] = XVECEXP (vals
, 0, i
);
36319 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36327 gcc_unreachable ();
36331 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36332 enum machine_mode inner_mode
;
36333 rtx words
[4], shift
;
36335 inner_mode
= GET_MODE_INNER (mode
);
36336 n_elts
= GET_MODE_NUNITS (mode
);
36337 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36338 n_elt_per_word
= n_elts
/ n_words
;
36339 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36341 for (i
= 0; i
< n_words
; ++i
)
36343 rtx word
= NULL_RTX
;
36345 for (j
= 0; j
< n_elt_per_word
; ++j
)
36347 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36348 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36354 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36355 word
, 1, OPTAB_LIB_WIDEN
);
36356 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36357 word
, 1, OPTAB_LIB_WIDEN
);
36365 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36366 else if (n_words
== 2)
36368 rtx tmp
= gen_reg_rtx (mode
);
36369 emit_clobber (tmp
);
36370 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36371 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36372 emit_move_insn (target
, tmp
);
36374 else if (n_words
== 4)
36376 rtx tmp
= gen_reg_rtx (V4SImode
);
36377 gcc_assert (word_mode
== SImode
);
36378 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36379 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36380 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36383 gcc_unreachable ();
36387 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36388 instructions unless MMX_OK is true. */
36391 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36393 enum machine_mode mode
= GET_MODE (target
);
36394 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36395 int n_elts
= GET_MODE_NUNITS (mode
);
36396 int n_var
= 0, one_var
= -1;
36397 bool all_same
= true, all_const_zero
= true;
36401 for (i
= 0; i
< n_elts
; ++i
)
36403 x
= XVECEXP (vals
, 0, i
);
36404 if (!(CONST_INT_P (x
)
36405 || GET_CODE (x
) == CONST_DOUBLE
36406 || GET_CODE (x
) == CONST_FIXED
))
36407 n_var
++, one_var
= i
;
36408 else if (x
!= CONST0_RTX (inner_mode
))
36409 all_const_zero
= false;
36410 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36414 /* Constants are best loaded from the constant pool. */
36417 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36421 /* If all values are identical, broadcast the value. */
36423 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36424 XVECEXP (vals
, 0, 0)))
36427 /* Values where only one field is non-constant are best loaded from
36428 the pool and overwritten via move later. */
36432 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36433 XVECEXP (vals
, 0, one_var
),
36437 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36441 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36445 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36447 enum machine_mode mode
= GET_MODE (target
);
36448 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36449 enum machine_mode half_mode
;
36450 bool use_vec_merge
= false;
36452 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36454 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36455 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36456 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36457 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36458 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36459 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36461 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36463 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36464 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36465 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36466 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36467 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36468 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36478 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36479 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36481 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36483 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36484 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36490 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36494 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36495 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36497 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36499 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36500 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36507 /* For the two element vectors, we implement a VEC_CONCAT with
36508 the extraction of the other element. */
36510 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36511 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36514 op0
= val
, op1
= tmp
;
36516 op0
= tmp
, op1
= val
;
36518 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36519 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36524 use_vec_merge
= TARGET_SSE4_1
;
36531 use_vec_merge
= true;
36535 /* tmp = target = A B C D */
36536 tmp
= copy_to_reg (target
);
36537 /* target = A A B B */
36538 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36539 /* target = X A B B */
36540 ix86_expand_vector_set (false, target
, val
, 0);
36541 /* target = A X C D */
36542 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36543 const1_rtx
, const0_rtx
,
36544 GEN_INT (2+4), GEN_INT (3+4)));
36548 /* tmp = target = A B C D */
36549 tmp
= copy_to_reg (target
);
36550 /* tmp = X B C D */
36551 ix86_expand_vector_set (false, tmp
, val
, 0);
36552 /* target = A B X D */
36553 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36554 const0_rtx
, const1_rtx
,
36555 GEN_INT (0+4), GEN_INT (3+4)));
36559 /* tmp = target = A B C D */
36560 tmp
= copy_to_reg (target
);
36561 /* tmp = X B C D */
36562 ix86_expand_vector_set (false, tmp
, val
, 0);
36563 /* target = A B X D */
36564 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36565 const0_rtx
, const1_rtx
,
36566 GEN_INT (2+4), GEN_INT (0+4)));
36570 gcc_unreachable ();
36575 use_vec_merge
= TARGET_SSE4_1
;
36579 /* Element 0 handled by vec_merge below. */
36582 use_vec_merge
= true;
36588 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36589 store into element 0, then shuffle them back. */
36593 order
[0] = GEN_INT (elt
);
36594 order
[1] = const1_rtx
;
36595 order
[2] = const2_rtx
;
36596 order
[3] = GEN_INT (3);
36597 order
[elt
] = const0_rtx
;
36599 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36600 order
[1], order
[2], order
[3]));
36602 ix86_expand_vector_set (false, target
, val
, 0);
36604 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36605 order
[1], order
[2], order
[3]));
36609 /* For SSE1, we have to reuse the V4SF code. */
36610 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36611 gen_lowpart (SFmode
, val
), elt
);
36616 use_vec_merge
= TARGET_SSE2
;
36619 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36623 use_vec_merge
= TARGET_SSE4_1
;
36630 half_mode
= V16QImode
;
36636 half_mode
= V8HImode
;
36642 half_mode
= V4SImode
;
36648 half_mode
= V2DImode
;
36654 half_mode
= V4SFmode
;
36660 half_mode
= V2DFmode
;
36666 /* Compute offset. */
36670 gcc_assert (i
<= 1);
36672 /* Extract the half. */
36673 tmp
= gen_reg_rtx (half_mode
);
36674 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36676 /* Put val in tmp at elt. */
36677 ix86_expand_vector_set (false, tmp
, val
, elt
);
36680 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36689 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36690 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36691 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36695 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36697 emit_move_insn (mem
, target
);
36699 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36700 emit_move_insn (tmp
, val
);
36702 emit_move_insn (target
, mem
);
36707 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36709 enum machine_mode mode
= GET_MODE (vec
);
36710 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36711 bool use_vec_extr
= false;
36724 use_vec_extr
= true;
36728 use_vec_extr
= TARGET_SSE4_1
;
36740 tmp
= gen_reg_rtx (mode
);
36741 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36742 GEN_INT (elt
), GEN_INT (elt
),
36743 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36747 tmp
= gen_reg_rtx (mode
);
36748 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36752 gcc_unreachable ();
36755 use_vec_extr
= true;
36760 use_vec_extr
= TARGET_SSE4_1
;
36774 tmp
= gen_reg_rtx (mode
);
36775 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36776 GEN_INT (elt
), GEN_INT (elt
),
36777 GEN_INT (elt
), GEN_INT (elt
)));
36781 tmp
= gen_reg_rtx (mode
);
36782 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36786 gcc_unreachable ();
36789 use_vec_extr
= true;
36794 /* For SSE1, we have to reuse the V4SF code. */
36795 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36796 gen_lowpart (V4SFmode
, vec
), elt
);
36802 use_vec_extr
= TARGET_SSE2
;
36805 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36809 use_vec_extr
= TARGET_SSE4_1
;
36815 tmp
= gen_reg_rtx (V4SFmode
);
36817 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36819 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36820 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36828 tmp
= gen_reg_rtx (V2DFmode
);
36830 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36832 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36833 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36841 tmp
= gen_reg_rtx (V16QImode
);
36843 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36845 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36846 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36854 tmp
= gen_reg_rtx (V8HImode
);
36856 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36858 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36859 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36867 tmp
= gen_reg_rtx (V4SImode
);
36869 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36871 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36872 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36880 tmp
= gen_reg_rtx (V2DImode
);
36882 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36884 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36885 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36891 /* ??? Could extract the appropriate HImode element and shift. */
36898 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36899 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36901 /* Let the rtl optimizers know about the zero extension performed. */
36902 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36904 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36905 target
= gen_lowpart (SImode
, target
);
36908 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36912 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36914 emit_move_insn (mem
, vec
);
36916 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36917 emit_move_insn (target
, tmp
);
36921 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36922 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36923 The upper bits of DEST are undefined, though they shouldn't cause
36924 exceptions (some bits from src or all zeros are ok). */
36927 emit_reduc_half (rtx dest
, rtx src
, int i
)
36930 switch (GET_MODE (src
))
36934 tem
= gen_sse_movhlps (dest
, src
, src
);
36936 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36937 GEN_INT (1 + 4), GEN_INT (1 + 4));
36940 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36946 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36947 gen_lowpart (V1TImode
, src
),
36952 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36954 tem
= gen_avx_shufps256 (dest
, src
, src
,
36955 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36959 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36961 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36968 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36969 gen_lowpart (V4DImode
, src
),
36970 gen_lowpart (V4DImode
, src
),
36973 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36974 gen_lowpart (V2TImode
, src
),
36978 gcc_unreachable ();
36983 /* Expand a vector reduction. FN is the binary pattern to reduce;
36984 DEST is the destination; IN is the input vector. */
36987 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36989 rtx half
, dst
, vec
= in
;
36990 enum machine_mode mode
= GET_MODE (in
);
36993 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
36995 && mode
== V8HImode
36996 && fn
== gen_uminv8hi3
)
36998 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37002 for (i
= GET_MODE_BITSIZE (mode
);
37003 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37006 half
= gen_reg_rtx (mode
);
37007 emit_reduc_half (half
, vec
, i
);
37008 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37011 dst
= gen_reg_rtx (mode
);
37012 emit_insn (fn (dst
, half
, vec
));
37017 /* Target hook for scalar_mode_supported_p. */
37019 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37021 if (DECIMAL_FLOAT_MODE_P (mode
))
37022 return default_decimal_float_supported_p ();
37023 else if (mode
== TFmode
)
37026 return default_scalar_mode_supported_p (mode
);
37029 /* Implements target hook vector_mode_supported_p. */
37031 ix86_vector_mode_supported_p (enum machine_mode mode
)
37033 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37035 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37037 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37039 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37041 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37046 /* Target hook for c_mode_for_suffix. */
37047 static enum machine_mode
37048 ix86_c_mode_for_suffix (char suffix
)
37058 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37060 We do this in the new i386 backend to maintain source compatibility
37061 with the old cc0-based compiler. */
37064 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37065 tree inputs ATTRIBUTE_UNUSED
,
37068 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37070 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37075 /* Implements target vector targetm.asm.encode_section_info. */
37077 static void ATTRIBUTE_UNUSED
37078 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37080 default_encode_section_info (decl
, rtl
, first
);
37082 if (TREE_CODE (decl
) == VAR_DECL
37083 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37084 && ix86_in_large_data_p (decl
))
37085 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37088 /* Worker function for REVERSE_CONDITION. */
37091 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37093 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37094 ? reverse_condition (code
)
37095 : reverse_condition_maybe_unordered (code
));
37098 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37102 output_387_reg_move (rtx insn
, rtx
*operands
)
37104 if (REG_P (operands
[0]))
37106 if (REG_P (operands
[1])
37107 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37109 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37110 return output_387_ffreep (operands
, 0);
37111 return "fstp\t%y0";
37113 if (STACK_TOP_P (operands
[0]))
37114 return "fld%Z1\t%y1";
37117 else if (MEM_P (operands
[0]))
37119 gcc_assert (REG_P (operands
[1]));
37120 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37121 return "fstp%Z0\t%y0";
37124 /* There is no non-popping store to memory for XFmode.
37125 So if we need one, follow the store with a load. */
37126 if (GET_MODE (operands
[0]) == XFmode
)
37127 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37129 return "fst%Z0\t%y0";
37136 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37137 FP status register is set. */
37140 ix86_emit_fp_unordered_jump (rtx label
)
37142 rtx reg
= gen_reg_rtx (HImode
);
37145 emit_insn (gen_x86_fnstsw_1 (reg
));
37147 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37149 emit_insn (gen_x86_sahf_1 (reg
));
37151 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37152 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37156 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37158 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37159 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37162 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37163 gen_rtx_LABEL_REF (VOIDmode
, label
),
37165 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37167 emit_jump_insn (temp
);
37168 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37171 /* Output code to perform a log1p XFmode calculation. */
37173 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37175 rtx label1
= gen_label_rtx ();
37176 rtx label2
= gen_label_rtx ();
37178 rtx tmp
= gen_reg_rtx (XFmode
);
37179 rtx tmp2
= gen_reg_rtx (XFmode
);
37182 emit_insn (gen_absxf2 (tmp
, op1
));
37183 test
= gen_rtx_GE (VOIDmode
, tmp
,
37184 CONST_DOUBLE_FROM_REAL_VALUE (
37185 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37187 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37189 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37190 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37191 emit_jump (label2
);
37193 emit_label (label1
);
37194 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37195 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37196 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37197 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37199 emit_label (label2
);
37202 /* Emit code for round calculation. */
37203 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37205 enum machine_mode inmode
= GET_MODE (op1
);
37206 enum machine_mode outmode
= GET_MODE (op0
);
37207 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37208 rtx scratch
= gen_reg_rtx (HImode
);
37209 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37210 rtx jump_label
= gen_label_rtx ();
37212 rtx (*gen_abs
) (rtx
, rtx
);
37213 rtx (*gen_neg
) (rtx
, rtx
);
37218 gen_abs
= gen_abssf2
;
37221 gen_abs
= gen_absdf2
;
37224 gen_abs
= gen_absxf2
;
37227 gcc_unreachable ();
37233 gen_neg
= gen_negsf2
;
37236 gen_neg
= gen_negdf2
;
37239 gen_neg
= gen_negxf2
;
37242 gen_neg
= gen_neghi2
;
37245 gen_neg
= gen_negsi2
;
37248 gen_neg
= gen_negdi2
;
37251 gcc_unreachable ();
37254 e1
= gen_reg_rtx (inmode
);
37255 e2
= gen_reg_rtx (inmode
);
37256 res
= gen_reg_rtx (outmode
);
37258 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37260 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37262 /* scratch = fxam(op1) */
37263 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37264 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37266 /* e1 = fabs(op1) */
37267 emit_insn (gen_abs (e1
, op1
));
37269 /* e2 = e1 + 0.5 */
37270 half
= force_reg (inmode
, half
);
37271 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37272 gen_rtx_PLUS (inmode
, e1
, half
)));
37274 /* res = floor(e2) */
37275 if (inmode
!= XFmode
)
37277 tmp1
= gen_reg_rtx (XFmode
);
37279 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37280 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37290 rtx tmp0
= gen_reg_rtx (XFmode
);
37292 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37294 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37295 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37296 UNSPEC_TRUNC_NOOP
)));
37300 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37303 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37306 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37309 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37312 gcc_unreachable ();
37315 /* flags = signbit(a) */
37316 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37318 /* if (flags) then res = -res */
37319 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37320 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37321 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37323 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37324 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37325 JUMP_LABEL (insn
) = jump_label
;
37327 emit_insn (gen_neg (res
, res
));
37329 emit_label (jump_label
);
37330 LABEL_NUSES (jump_label
) = 1;
37332 emit_move_insn (op0
, res
);
37335 /* Output code to perform a Newton-Rhapson approximation of a single precision
37336 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37338 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37340 rtx x0
, x1
, e0
, e1
;
37342 x0
= gen_reg_rtx (mode
);
37343 e0
= gen_reg_rtx (mode
);
37344 e1
= gen_reg_rtx (mode
);
37345 x1
= gen_reg_rtx (mode
);
37347 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37349 b
= force_reg (mode
, b
);
37351 /* x0 = rcp(b) estimate */
37352 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37353 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37356 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37357 gen_rtx_MULT (mode
, x0
, b
)));
37360 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37361 gen_rtx_MULT (mode
, x0
, e0
)));
37364 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37365 gen_rtx_PLUS (mode
, x0
, x0
)));
37368 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37369 gen_rtx_MINUS (mode
, e1
, e0
)));
37372 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37373 gen_rtx_MULT (mode
, a
, x1
)));
37376 /* Output code to perform a Newton-Rhapson approximation of a
37377 single precision floating point [reciprocal] square root. */
37379 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37382 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37385 x0
= gen_reg_rtx (mode
);
37386 e0
= gen_reg_rtx (mode
);
37387 e1
= gen_reg_rtx (mode
);
37388 e2
= gen_reg_rtx (mode
);
37389 e3
= gen_reg_rtx (mode
);
37391 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37392 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37394 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37395 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37397 if (VECTOR_MODE_P (mode
))
37399 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37400 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37403 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37404 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37406 a
= force_reg (mode
, a
);
37408 /* x0 = rsqrt(a) estimate */
37409 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37410 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37413 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37418 zero
= gen_reg_rtx (mode
);
37419 mask
= gen_reg_rtx (mode
);
37421 zero
= force_reg (mode
, CONST0_RTX(mode
));
37422 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37423 gen_rtx_NE (mode
, zero
, a
)));
37425 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37426 gen_rtx_AND (mode
, x0
, mask
)));
37430 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37431 gen_rtx_MULT (mode
, x0
, a
)));
37433 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37434 gen_rtx_MULT (mode
, e0
, x0
)));
37437 mthree
= force_reg (mode
, mthree
);
37438 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37439 gen_rtx_PLUS (mode
, e1
, mthree
)));
37441 mhalf
= force_reg (mode
, mhalf
);
37443 /* e3 = -.5 * x0 */
37444 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37445 gen_rtx_MULT (mode
, x0
, mhalf
)));
37447 /* e3 = -.5 * e0 */
37448 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37449 gen_rtx_MULT (mode
, e0
, mhalf
)));
37450 /* ret = e2 * e3 */
37451 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37452 gen_rtx_MULT (mode
, e2
, e3
)));
37455 #ifdef TARGET_SOLARIS
37456 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37459 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37462 /* With Binutils 2.15, the "@unwind" marker must be specified on
37463 every occurrence of the ".eh_frame" section, not just the first
37466 && strcmp (name
, ".eh_frame") == 0)
37468 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37469 flags
& SECTION_WRITE
? "aw" : "a");
37474 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37476 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37481 default_elf_asm_named_section (name
, flags
, decl
);
37483 #endif /* TARGET_SOLARIS */
37485 /* Return the mangling of TYPE if it is an extended fundamental type. */
37487 static const char *
37488 ix86_mangle_type (const_tree type
)
37490 type
= TYPE_MAIN_VARIANT (type
);
37492 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37493 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37496 switch (TYPE_MODE (type
))
37499 /* __float128 is "g". */
37502 /* "long double" or __float80 is "e". */
37509 /* For 32-bit code we can save PIC register setup by using
37510 __stack_chk_fail_local hidden function instead of calling
37511 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37512 register, so it is better to call __stack_chk_fail directly. */
37514 static tree ATTRIBUTE_UNUSED
37515 ix86_stack_protect_fail (void)
37517 return TARGET_64BIT
37518 ? default_external_stack_protect_fail ()
37519 : default_hidden_stack_protect_fail ();
37522 /* Select a format to encode pointers in exception handling data. CODE
37523 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37524 true if the symbol may be affected by dynamic relocations.
37526 ??? All x86 object file formats are capable of representing this.
37527 After all, the relocation needed is the same as for the call insn.
37528 Whether or not a particular assembler allows us to enter such, I
37529 guess we'll have to see. */
37531 asm_preferred_eh_data_format (int code
, int global
)
37535 int type
= DW_EH_PE_sdata8
;
37537 || ix86_cmodel
== CM_SMALL_PIC
37538 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37539 type
= DW_EH_PE_sdata4
;
37540 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37542 if (ix86_cmodel
== CM_SMALL
37543 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37544 return DW_EH_PE_udata4
;
37545 return DW_EH_PE_absptr
;
37548 /* Expand copysign from SIGN to the positive value ABS_VALUE
37549 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37552 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37554 enum machine_mode mode
= GET_MODE (sign
);
37555 rtx sgn
= gen_reg_rtx (mode
);
37556 if (mask
== NULL_RTX
)
37558 enum machine_mode vmode
;
37560 if (mode
== SFmode
)
37562 else if (mode
== DFmode
)
37567 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37568 if (!VECTOR_MODE_P (mode
))
37570 /* We need to generate a scalar mode mask in this case. */
37571 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37572 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37573 mask
= gen_reg_rtx (mode
);
37574 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37578 mask
= gen_rtx_NOT (mode
, mask
);
37579 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37580 gen_rtx_AND (mode
, mask
, sign
)));
37581 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37582 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37585 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37586 mask for masking out the sign-bit is stored in *SMASK, if that is
37589 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37591 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37594 xa
= gen_reg_rtx (mode
);
37595 if (mode
== SFmode
)
37597 else if (mode
== DFmode
)
37601 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37602 if (!VECTOR_MODE_P (mode
))
37604 /* We need to generate a scalar mode mask in this case. */
37605 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37606 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37607 mask
= gen_reg_rtx (mode
);
37608 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37610 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37611 gen_rtx_AND (mode
, op0
, mask
)));
37619 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37620 swapping the operands if SWAP_OPERANDS is true. The expanded
37621 code is a forward jump to a newly created label in case the
37622 comparison is true. The generated label rtx is returned. */
37624 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37625 bool swap_operands
)
37636 label
= gen_label_rtx ();
37637 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37638 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37639 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37640 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37641 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37642 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37643 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37644 JUMP_LABEL (tmp
) = label
;
37649 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37650 using comparison code CODE. Operands are swapped for the comparison if
37651 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37653 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37654 bool swap_operands
)
37656 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37657 enum machine_mode mode
= GET_MODE (op0
);
37658 rtx mask
= gen_reg_rtx (mode
);
37667 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37669 emit_insn (insn (mask
, op0
, op1
,
37670 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37674 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37675 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37677 ix86_gen_TWO52 (enum machine_mode mode
)
37679 REAL_VALUE_TYPE TWO52r
;
37682 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37683 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37684 TWO52
= force_reg (mode
, TWO52
);
37689 /* Expand SSE sequence for computing lround from OP1 storing
37692 ix86_expand_lround (rtx op0
, rtx op1
)
37694 /* C code for the stuff we're doing below:
37695 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37698 enum machine_mode mode
= GET_MODE (op1
);
37699 const struct real_format
*fmt
;
37700 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37703 /* load nextafter (0.5, 0.0) */
37704 fmt
= REAL_MODE_FORMAT (mode
);
37705 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37706 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37708 /* adj = copysign (0.5, op1) */
37709 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37710 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37712 /* adj = op1 + adj */
37713 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37715 /* op0 = (imode)adj */
37716 expand_fix (op0
, adj
, 0);
37719 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37722 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37724 /* C code for the stuff we're doing below (for do_floor):
37726 xi -= (double)xi > op1 ? 1 : 0;
37729 enum machine_mode fmode
= GET_MODE (op1
);
37730 enum machine_mode imode
= GET_MODE (op0
);
37731 rtx ireg
, freg
, label
, tmp
;
37733 /* reg = (long)op1 */
37734 ireg
= gen_reg_rtx (imode
);
37735 expand_fix (ireg
, op1
, 0);
37737 /* freg = (double)reg */
37738 freg
= gen_reg_rtx (fmode
);
37739 expand_float (freg
, ireg
, 0);
37741 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37742 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37743 freg
, op1
, !do_floor
);
37744 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37745 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37746 emit_move_insn (ireg
, tmp
);
37748 emit_label (label
);
37749 LABEL_NUSES (label
) = 1;
37751 emit_move_insn (op0
, ireg
);
37754 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37755 result in OPERAND0. */
37757 ix86_expand_rint (rtx operand0
, rtx operand1
)
37759 /* C code for the stuff we're doing below:
37760 xa = fabs (operand1);
37761 if (!isless (xa, 2**52))
37763 xa = xa + 2**52 - 2**52;
37764 return copysign (xa, operand1);
37766 enum machine_mode mode
= GET_MODE (operand0
);
37767 rtx res
, xa
, label
, TWO52
, mask
;
37769 res
= gen_reg_rtx (mode
);
37770 emit_move_insn (res
, operand1
);
37772 /* xa = abs (operand1) */
37773 xa
= ix86_expand_sse_fabs (res
, &mask
);
37775 /* if (!isless (xa, TWO52)) goto label; */
37776 TWO52
= ix86_gen_TWO52 (mode
);
37777 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37779 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37780 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37782 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37784 emit_label (label
);
37785 LABEL_NUSES (label
) = 1;
37787 emit_move_insn (operand0
, res
);
37790 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37793 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37795 /* C code for the stuff we expand below.
37796 double xa = fabs (x), x2;
37797 if (!isless (xa, TWO52))
37799 xa = xa + TWO52 - TWO52;
37800 x2 = copysign (xa, x);
37809 enum machine_mode mode
= GET_MODE (operand0
);
37810 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37812 TWO52
= ix86_gen_TWO52 (mode
);
37814 /* Temporary for holding the result, initialized to the input
37815 operand to ease control flow. */
37816 res
= gen_reg_rtx (mode
);
37817 emit_move_insn (res
, operand1
);
37819 /* xa = abs (operand1) */
37820 xa
= ix86_expand_sse_fabs (res
, &mask
);
37822 /* if (!isless (xa, TWO52)) goto label; */
37823 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37825 /* xa = xa + TWO52 - TWO52; */
37826 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37827 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37829 /* xa = copysign (xa, operand1) */
37830 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37832 /* generate 1.0 or -1.0 */
37833 one
= force_reg (mode
,
37834 const_double_from_real_value (do_floor
37835 ? dconst1
: dconstm1
, mode
));
37837 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37838 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37839 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37840 gen_rtx_AND (mode
, one
, tmp
)));
37841 /* We always need to subtract here to preserve signed zero. */
37842 tmp
= expand_simple_binop (mode
, MINUS
,
37843 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37844 emit_move_insn (res
, tmp
);
37846 emit_label (label
);
37847 LABEL_NUSES (label
) = 1;
37849 emit_move_insn (operand0
, res
);
37852 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37855 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37857 /* C code for the stuff we expand below.
37858 double xa = fabs (x), x2;
37859 if (!isless (xa, TWO52))
37861 x2 = (double)(long)x;
37868 if (HONOR_SIGNED_ZEROS (mode))
37869 return copysign (x2, x);
37872 enum machine_mode mode
= GET_MODE (operand0
);
37873 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37875 TWO52
= ix86_gen_TWO52 (mode
);
37877 /* Temporary for holding the result, initialized to the input
37878 operand to ease control flow. */
37879 res
= gen_reg_rtx (mode
);
37880 emit_move_insn (res
, operand1
);
37882 /* xa = abs (operand1) */
37883 xa
= ix86_expand_sse_fabs (res
, &mask
);
37885 /* if (!isless (xa, TWO52)) goto label; */
37886 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37888 /* xa = (double)(long)x */
37889 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37890 expand_fix (xi
, res
, 0);
37891 expand_float (xa
, xi
, 0);
37894 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37896 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37897 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37898 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37899 gen_rtx_AND (mode
, one
, tmp
)));
37900 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37901 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37902 emit_move_insn (res
, tmp
);
37904 if (HONOR_SIGNED_ZEROS (mode
))
37905 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37907 emit_label (label
);
37908 LABEL_NUSES (label
) = 1;
37910 emit_move_insn (operand0
, res
);
37913 /* Expand SSE sequence for computing round from OPERAND1 storing
37914 into OPERAND0. Sequence that works without relying on DImode truncation
37915 via cvttsd2siq that is only available on 64bit targets. */
37917 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37919 /* C code for the stuff we expand below.
37920 double xa = fabs (x), xa2, x2;
37921 if (!isless (xa, TWO52))
37923 Using the absolute value and copying back sign makes
37924 -0.0 -> -0.0 correct.
37925 xa2 = xa + TWO52 - TWO52;
37930 else if (dxa > 0.5)
37932 x2 = copysign (xa2, x);
37935 enum machine_mode mode
= GET_MODE (operand0
);
37936 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37938 TWO52
= ix86_gen_TWO52 (mode
);
37940 /* Temporary for holding the result, initialized to the input
37941 operand to ease control flow. */
37942 res
= gen_reg_rtx (mode
);
37943 emit_move_insn (res
, operand1
);
37945 /* xa = abs (operand1) */
37946 xa
= ix86_expand_sse_fabs (res
, &mask
);
37948 /* if (!isless (xa, TWO52)) goto label; */
37949 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37951 /* xa2 = xa + TWO52 - TWO52; */
37952 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37953 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37955 /* dxa = xa2 - xa; */
37956 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37958 /* generate 0.5, 1.0 and -0.5 */
37959 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37960 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37961 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37965 tmp
= gen_reg_rtx (mode
);
37966 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37967 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37968 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37969 gen_rtx_AND (mode
, one
, tmp
)));
37970 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37971 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37972 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37973 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37974 gen_rtx_AND (mode
, one
, tmp
)));
37975 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37977 /* res = copysign (xa2, operand1) */
37978 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37980 emit_label (label
);
37981 LABEL_NUSES (label
) = 1;
37983 emit_move_insn (operand0
, res
);
37986 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37989 ix86_expand_trunc (rtx operand0
, rtx operand1
)
37991 /* C code for SSE variant we expand below.
37992 double xa = fabs (x), x2;
37993 if (!isless (xa, TWO52))
37995 x2 = (double)(long)x;
37996 if (HONOR_SIGNED_ZEROS (mode))
37997 return copysign (x2, x);
38000 enum machine_mode mode
= GET_MODE (operand0
);
38001 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38003 TWO52
= ix86_gen_TWO52 (mode
);
38005 /* Temporary for holding the result, initialized to the input
38006 operand to ease control flow. */
38007 res
= gen_reg_rtx (mode
);
38008 emit_move_insn (res
, operand1
);
38010 /* xa = abs (operand1) */
38011 xa
= ix86_expand_sse_fabs (res
, &mask
);
38013 /* if (!isless (xa, TWO52)) goto label; */
38014 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38016 /* x = (double)(long)x */
38017 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38018 expand_fix (xi
, res
, 0);
38019 expand_float (res
, xi
, 0);
38021 if (HONOR_SIGNED_ZEROS (mode
))
38022 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38024 emit_label (label
);
38025 LABEL_NUSES (label
) = 1;
38027 emit_move_insn (operand0
, res
);
38030 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38033 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38035 enum machine_mode mode
= GET_MODE (operand0
);
38036 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38038 /* C code for SSE variant we expand below.
38039 double xa = fabs (x), x2;
38040 if (!isless (xa, TWO52))
38042 xa2 = xa + TWO52 - TWO52;
38046 x2 = copysign (xa2, x);
38050 TWO52
= ix86_gen_TWO52 (mode
);
38052 /* Temporary for holding the result, initialized to the input
38053 operand to ease control flow. */
38054 res
= gen_reg_rtx (mode
);
38055 emit_move_insn (res
, operand1
);
38057 /* xa = abs (operand1) */
38058 xa
= ix86_expand_sse_fabs (res
, &smask
);
38060 /* if (!isless (xa, TWO52)) goto label; */
38061 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38063 /* res = xa + TWO52 - TWO52; */
38064 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38065 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38066 emit_move_insn (res
, tmp
);
38069 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38071 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38072 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38073 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38074 gen_rtx_AND (mode
, mask
, one
)));
38075 tmp
= expand_simple_binop (mode
, MINUS
,
38076 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38077 emit_move_insn (res
, tmp
);
38079 /* res = copysign (res, operand1) */
38080 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38082 emit_label (label
);
38083 LABEL_NUSES (label
) = 1;
38085 emit_move_insn (operand0
, res
);
38088 /* Expand SSE sequence for computing round from OPERAND1 storing
38091 ix86_expand_round (rtx operand0
, rtx operand1
)
38093 /* C code for the stuff we're doing below:
38094 double xa = fabs (x);
38095 if (!isless (xa, TWO52))
38097 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38098 return copysign (xa, x);
38100 enum machine_mode mode
= GET_MODE (operand0
);
38101 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38102 const struct real_format
*fmt
;
38103 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38105 /* Temporary for holding the result, initialized to the input
38106 operand to ease control flow. */
38107 res
= gen_reg_rtx (mode
);
38108 emit_move_insn (res
, operand1
);
38110 TWO52
= ix86_gen_TWO52 (mode
);
38111 xa
= ix86_expand_sse_fabs (res
, &mask
);
38112 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38114 /* load nextafter (0.5, 0.0) */
38115 fmt
= REAL_MODE_FORMAT (mode
);
38116 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38117 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38119 /* xa = xa + 0.5 */
38120 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38121 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38123 /* xa = (double)(int64_t)xa */
38124 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38125 expand_fix (xi
, xa
, 0);
38126 expand_float (xa
, xi
, 0);
38128 /* res = copysign (xa, operand1) */
38129 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38131 emit_label (label
);
38132 LABEL_NUSES (label
) = 1;
38134 emit_move_insn (operand0
, res
);
38137 /* Expand SSE sequence for computing round
38138 from OP1 storing into OP0 using sse4 round insn. */
38140 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38142 enum machine_mode mode
= GET_MODE (op0
);
38143 rtx e1
, e2
, res
, half
;
38144 const struct real_format
*fmt
;
38145 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38146 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38147 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38152 gen_copysign
= gen_copysignsf3
;
38153 gen_round
= gen_sse4_1_roundsf2
;
38156 gen_copysign
= gen_copysigndf3
;
38157 gen_round
= gen_sse4_1_rounddf2
;
38160 gcc_unreachable ();
38163 /* round (a) = trunc (a + copysign (0.5, a)) */
38165 /* load nextafter (0.5, 0.0) */
38166 fmt
= REAL_MODE_FORMAT (mode
);
38167 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38168 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38169 half
= const_double_from_real_value (pred_half
, mode
);
38171 /* e1 = copysign (0.5, op1) */
38172 e1
= gen_reg_rtx (mode
);
38173 emit_insn (gen_copysign (e1
, half
, op1
));
38175 /* e2 = op1 + e1 */
38176 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38178 /* res = trunc (e2) */
38179 res
= gen_reg_rtx (mode
);
38180 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38182 emit_move_insn (op0
, res
);
38186 /* Table of valid machine attributes. */
38187 static const struct attribute_spec ix86_attribute_table
[] =
38189 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38190 affects_type_identity } */
38191 /* Stdcall attribute says callee is responsible for popping arguments
38192 if they are not variable. */
38193 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38195 /* Fastcall attribute says callee is responsible for popping arguments
38196 if they are not variable. */
38197 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38199 /* Thiscall attribute says callee is responsible for popping arguments
38200 if they are not variable. */
38201 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38203 /* Cdecl attribute says the callee is a normal C declaration */
38204 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38206 /* Regparm attribute specifies how many integer arguments are to be
38207 passed in registers. */
38208 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38210 /* Sseregparm attribute says we are using x86_64 calling conventions
38211 for FP arguments. */
38212 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38214 /* The transactional memory builtins are implicitly regparm or fastcall
38215 depending on the ABI. Override the generic do-nothing attribute that
38216 these builtins were declared with. */
38217 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38219 /* force_align_arg_pointer says this function realigns the stack at entry. */
38220 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38221 false, true, true, ix86_handle_cconv_attribute
, false },
38222 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38223 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38224 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38225 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38228 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38230 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38232 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38233 SUBTARGET_ATTRIBUTE_TABLE
,
38235 /* ms_abi and sysv_abi calling convention function attributes. */
38236 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38237 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38238 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38240 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38241 ix86_handle_callee_pop_aggregate_return
, true },
38243 { NULL
, 0, 0, false, false, false, NULL
, false }
38246 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38248 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38250 int misalign ATTRIBUTE_UNUSED
)
38254 switch (type_of_cost
)
38257 return ix86_cost
->scalar_stmt_cost
;
38260 return ix86_cost
->scalar_load_cost
;
38263 return ix86_cost
->scalar_store_cost
;
38266 return ix86_cost
->vec_stmt_cost
;
38269 return ix86_cost
->vec_align_load_cost
;
38272 return ix86_cost
->vec_store_cost
;
38274 case vec_to_scalar
:
38275 return ix86_cost
->vec_to_scalar_cost
;
38277 case scalar_to_vec
:
38278 return ix86_cost
->scalar_to_vec_cost
;
38280 case unaligned_load
:
38281 case unaligned_store
:
38282 return ix86_cost
->vec_unalign_load_cost
;
38284 case cond_branch_taken
:
38285 return ix86_cost
->cond_taken_branch_cost
;
38287 case cond_branch_not_taken
:
38288 return ix86_cost
->cond_not_taken_branch_cost
;
38291 case vec_promote_demote
:
38292 return ix86_cost
->vec_stmt_cost
;
38294 case vec_construct
:
38295 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38296 return elements
/ 2 + 1;
38299 gcc_unreachable ();
38303 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38304 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38305 insn every time. */
38307 static GTY(()) rtx vselect_insn
;
38309 /* Initialize vselect_insn. */
38312 init_vselect_insn (void)
38317 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38318 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38319 XVECEXP (x
, 0, i
) = const0_rtx
;
38320 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38322 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38324 vselect_insn
= emit_insn (x
);
38328 /* Construct (set target (vec_select op0 (parallel perm))) and
38329 return true if that's a valid instruction in the active ISA. */
38332 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38333 unsigned nelt
, bool testing_p
)
38336 rtx x
, save_vconcat
;
38339 if (vselect_insn
== NULL_RTX
)
38340 init_vselect_insn ();
38342 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38343 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38344 for (i
= 0; i
< nelt
; ++i
)
38345 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38346 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38347 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38348 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38349 SET_DEST (PATTERN (vselect_insn
)) = target
;
38350 icode
= recog_memoized (vselect_insn
);
38352 if (icode
>= 0 && !testing_p
)
38353 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38355 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38356 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38357 INSN_CODE (vselect_insn
) = -1;
38362 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38365 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38366 const unsigned char *perm
, unsigned nelt
,
38369 enum machine_mode v2mode
;
38373 if (vselect_insn
== NULL_RTX
)
38374 init_vselect_insn ();
38376 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38377 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38378 PUT_MODE (x
, v2mode
);
38381 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38382 XEXP (x
, 0) = const0_rtx
;
38383 XEXP (x
, 1) = const0_rtx
;
38387 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38388 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38391 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38393 enum machine_mode vmode
= d
->vmode
;
38394 unsigned i
, mask
, nelt
= d
->nelt
;
38395 rtx target
, op0
, op1
, x
;
38396 rtx rperm
[32], vperm
;
38398 if (d
->one_operand_p
)
38400 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38402 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38404 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38409 /* This is a blend, not a permute. Elements must stay in their
38410 respective lanes. */
38411 for (i
= 0; i
< nelt
; ++i
)
38413 unsigned e
= d
->perm
[i
];
38414 if (!(e
== i
|| e
== i
+ nelt
))
38421 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38422 decision should be extracted elsewhere, so that we only try that
38423 sequence once all budget==3 options have been tried. */
38424 target
= d
->target
;
38437 for (i
= 0; i
< nelt
; ++i
)
38438 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38442 for (i
= 0; i
< 2; ++i
)
38443 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38448 for (i
= 0; i
< 4; ++i
)
38449 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38454 /* See if bytes move in pairs so we can use pblendw with
38455 an immediate argument, rather than pblendvb with a vector
38457 for (i
= 0; i
< 16; i
+= 2)
38458 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38461 for (i
= 0; i
< nelt
; ++i
)
38462 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38465 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38466 vperm
= force_reg (vmode
, vperm
);
38468 if (GET_MODE_SIZE (vmode
) == 16)
38469 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38471 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38475 for (i
= 0; i
< 8; ++i
)
38476 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38481 target
= gen_lowpart (vmode
, target
);
38482 op0
= gen_lowpart (vmode
, op0
);
38483 op1
= gen_lowpart (vmode
, op1
);
38487 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38488 for (i
= 0; i
< 32; i
+= 2)
38489 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38491 /* See if bytes move in quadruplets. If yes, vpblendd
38492 with immediate can be used. */
38493 for (i
= 0; i
< 32; i
+= 4)
38494 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38498 /* See if bytes move the same in both lanes. If yes,
38499 vpblendw with immediate can be used. */
38500 for (i
= 0; i
< 16; i
+= 2)
38501 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38504 /* Use vpblendw. */
38505 for (i
= 0; i
< 16; ++i
)
38506 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38511 /* Use vpblendd. */
38512 for (i
= 0; i
< 8; ++i
)
38513 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38518 /* See if words move in pairs. If yes, vpblendd can be used. */
38519 for (i
= 0; i
< 16; i
+= 2)
38520 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38524 /* See if words move the same in both lanes. If not,
38525 vpblendvb must be used. */
38526 for (i
= 0; i
< 8; i
++)
38527 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38529 /* Use vpblendvb. */
38530 for (i
= 0; i
< 32; ++i
)
38531 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38535 target
= gen_lowpart (vmode
, target
);
38536 op0
= gen_lowpart (vmode
, op0
);
38537 op1
= gen_lowpart (vmode
, op1
);
38538 goto finish_pblendvb
;
38541 /* Use vpblendw. */
38542 for (i
= 0; i
< 16; ++i
)
38543 mask
|= (d
->perm
[i
] >= 16) << i
;
38547 /* Use vpblendd. */
38548 for (i
= 0; i
< 8; ++i
)
38549 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38554 /* Use vpblendd. */
38555 for (i
= 0; i
< 4; ++i
)
38556 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38561 gcc_unreachable ();
38564 /* This matches five different patterns with the different modes. */
38565 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38566 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38572 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38573 in terms of the variable form of vpermilps.
38575 Note that we will have already failed the immediate input vpermilps,
38576 which requires that the high and low part shuffle be identical; the
38577 variable form doesn't require that. */
38580 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38582 rtx rperm
[8], vperm
;
38585 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38588 /* We can only permute within the 128-bit lane. */
38589 for (i
= 0; i
< 8; ++i
)
38591 unsigned e
= d
->perm
[i
];
38592 if (i
< 4 ? e
>= 4 : e
< 4)
38599 for (i
= 0; i
< 8; ++i
)
38601 unsigned e
= d
->perm
[i
];
38603 /* Within each 128-bit lane, the elements of op0 are numbered
38604 from 0 and the elements of op1 are numbered from 4. */
38610 rperm
[i
] = GEN_INT (e
);
38613 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38614 vperm
= force_reg (V8SImode
, vperm
);
38615 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38620 /* Return true if permutation D can be performed as VMODE permutation
38624 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38626 unsigned int i
, j
, chunk
;
38628 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38629 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38630 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38633 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38636 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38637 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38638 if (d
->perm
[i
] & (chunk
- 1))
38641 for (j
= 1; j
< chunk
; ++j
)
38642 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38648 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38649 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38652 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38654 unsigned i
, nelt
, eltsz
, mask
;
38655 unsigned char perm
[32];
38656 enum machine_mode vmode
= V16QImode
;
38657 rtx rperm
[32], vperm
, target
, op0
, op1
;
38661 if (!d
->one_operand_p
)
38663 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38666 && valid_perm_using_mode_p (V2TImode
, d
))
38671 /* Use vperm2i128 insn. The pattern uses
38672 V4DImode instead of V2TImode. */
38673 target
= gen_lowpart (V4DImode
, d
->target
);
38674 op0
= gen_lowpart (V4DImode
, d
->op0
);
38675 op1
= gen_lowpart (V4DImode
, d
->op1
);
38677 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38678 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38679 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38687 if (GET_MODE_SIZE (d
->vmode
) == 16)
38692 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38697 /* V4DImode should be already handled through
38698 expand_vselect by vpermq instruction. */
38699 gcc_assert (d
->vmode
!= V4DImode
);
38702 if (d
->vmode
== V8SImode
38703 || d
->vmode
== V16HImode
38704 || d
->vmode
== V32QImode
)
38706 /* First see if vpermq can be used for
38707 V8SImode/V16HImode/V32QImode. */
38708 if (valid_perm_using_mode_p (V4DImode
, d
))
38710 for (i
= 0; i
< 4; i
++)
38711 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38714 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38715 gen_lowpart (V4DImode
, d
->op0
),
38719 /* Next see if vpermd can be used. */
38720 if (valid_perm_using_mode_p (V8SImode
, d
))
38723 /* Or if vpermps can be used. */
38724 else if (d
->vmode
== V8SFmode
)
38727 if (vmode
== V32QImode
)
38729 /* vpshufb only works intra lanes, it is not
38730 possible to shuffle bytes in between the lanes. */
38731 for (i
= 0; i
< nelt
; ++i
)
38732 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38743 if (vmode
== V8SImode
)
38744 for (i
= 0; i
< 8; ++i
)
38745 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38748 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38749 if (!d
->one_operand_p
)
38750 mask
= 2 * nelt
- 1;
38751 else if (vmode
== V16QImode
)
38754 mask
= nelt
/ 2 - 1;
38756 for (i
= 0; i
< nelt
; ++i
)
38758 unsigned j
, e
= d
->perm
[i
] & mask
;
38759 for (j
= 0; j
< eltsz
; ++j
)
38760 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38764 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38765 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38766 vperm
= force_reg (vmode
, vperm
);
38768 target
= gen_lowpart (vmode
, d
->target
);
38769 op0
= gen_lowpart (vmode
, d
->op0
);
38770 if (d
->one_operand_p
)
38772 if (vmode
== V16QImode
)
38773 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38774 else if (vmode
== V32QImode
)
38775 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38776 else if (vmode
== V8SFmode
)
38777 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38779 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38783 op1
= gen_lowpart (vmode
, d
->op1
);
38784 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38790 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38791 in a single instruction. */
38794 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38796 unsigned i
, nelt
= d
->nelt
;
38797 unsigned char perm2
[MAX_VECT_LEN
];
38799 /* Check plain VEC_SELECT first, because AVX has instructions that could
38800 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38801 input where SEL+CONCAT may not. */
38802 if (d
->one_operand_p
)
38804 int mask
= nelt
- 1;
38805 bool identity_perm
= true;
38806 bool broadcast_perm
= true;
38808 for (i
= 0; i
< nelt
; i
++)
38810 perm2
[i
] = d
->perm
[i
] & mask
;
38812 identity_perm
= false;
38814 broadcast_perm
= false;
38820 emit_move_insn (d
->target
, d
->op0
);
38823 else if (broadcast_perm
&& TARGET_AVX2
)
38825 /* Use vpbroadcast{b,w,d}. */
38826 rtx (*gen
) (rtx
, rtx
) = NULL
;
38830 gen
= gen_avx2_pbroadcastv32qi_1
;
38833 gen
= gen_avx2_pbroadcastv16hi_1
;
38836 gen
= gen_avx2_pbroadcastv8si_1
;
38839 gen
= gen_avx2_pbroadcastv16qi
;
38842 gen
= gen_avx2_pbroadcastv8hi
;
38845 gen
= gen_avx2_vec_dupv8sf_1
;
38847 /* For other modes prefer other shuffles this function creates. */
38853 emit_insn (gen (d
->target
, d
->op0
));
38858 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38861 /* There are plenty of patterns in sse.md that are written for
38862 SEL+CONCAT and are not replicated for a single op. Perhaps
38863 that should be changed, to avoid the nastiness here. */
38865 /* Recognize interleave style patterns, which means incrementing
38866 every other permutation operand. */
38867 for (i
= 0; i
< nelt
; i
+= 2)
38869 perm2
[i
] = d
->perm
[i
] & mask
;
38870 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38872 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38876 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38879 for (i
= 0; i
< nelt
; i
+= 4)
38881 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38882 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38883 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38884 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38887 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38893 /* Finally, try the fully general two operand permute. */
38894 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38898 /* Recognize interleave style patterns with reversed operands. */
38899 if (!d
->one_operand_p
)
38901 for (i
= 0; i
< nelt
; ++i
)
38903 unsigned e
= d
->perm
[i
];
38911 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38916 /* Try the SSE4.1 blend variable merge instructions. */
38917 if (expand_vec_perm_blend (d
))
38920 /* Try one of the AVX vpermil variable permutations. */
38921 if (expand_vec_perm_vpermil (d
))
38924 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38925 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38926 if (expand_vec_perm_pshufb (d
))
38932 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38933 in terms of a pair of pshuflw + pshufhw instructions. */
38936 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38938 unsigned char perm2
[MAX_VECT_LEN
];
38942 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38945 /* The two permutations only operate in 64-bit lanes. */
38946 for (i
= 0; i
< 4; ++i
)
38947 if (d
->perm
[i
] >= 4)
38949 for (i
= 4; i
< 8; ++i
)
38950 if (d
->perm
[i
] < 4)
38956 /* Emit the pshuflw. */
38957 memcpy (perm2
, d
->perm
, 4);
38958 for (i
= 4; i
< 8; ++i
)
38960 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38963 /* Emit the pshufhw. */
38964 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38965 for (i
= 0; i
< 4; ++i
)
38967 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38974 the permutation using the SSSE3 palignr instruction. This succeeds
38975 when all of the elements in PERM fit within one vector and we merely
38976 need to shift them down so that a single vector permutation has a
38977 chance to succeed. */
38980 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38982 unsigned i
, nelt
= d
->nelt
;
38987 /* Even with AVX, palignr only operates on 128-bit vectors. */
38988 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38991 min
= nelt
, max
= 0;
38992 for (i
= 0; i
< nelt
; ++i
)
38994 unsigned e
= d
->perm
[i
];
39000 if (min
== 0 || max
- min
>= nelt
)
39003 /* Given that we have SSSE3, we know we'll be able to implement the
39004 single operand permutation after the palignr with pshufb. */
39008 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39009 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39010 gen_lowpart (TImode
, d
->op1
),
39011 gen_lowpart (TImode
, d
->op0
), shift
));
39013 d
->op0
= d
->op1
= d
->target
;
39014 d
->one_operand_p
= true;
39017 for (i
= 0; i
< nelt
; ++i
)
39019 unsigned e
= d
->perm
[i
] - min
;
39025 /* Test for the degenerate case where the alignment by itself
39026 produces the desired permutation. */
39030 ok
= expand_vec_perm_1 (d
);
39036 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39039 a two vector permutation into a single vector permutation by using
39040 an interleave operation to merge the vectors. */
39043 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39045 struct expand_vec_perm_d dremap
, dfinal
;
39046 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39047 unsigned HOST_WIDE_INT contents
;
39048 unsigned char remap
[2 * MAX_VECT_LEN
];
39050 bool ok
, same_halves
= false;
39052 if (GET_MODE_SIZE (d
->vmode
) == 16)
39054 if (d
->one_operand_p
)
39057 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39061 /* For 32-byte modes allow even d->one_operand_p.
39062 The lack of cross-lane shuffling in some instructions
39063 might prevent a single insn shuffle. */
39065 dfinal
.testing_p
= true;
39066 /* If expand_vec_perm_interleave3 can expand this into
39067 a 3 insn sequence, give up and let it be expanded as
39068 3 insn sequence. While that is one insn longer,
39069 it doesn't need a memory operand and in the common
39070 case that both interleave low and high permutations
39071 with the same operands are adjacent needs 4 insns
39072 for both after CSE. */
39073 if (expand_vec_perm_interleave3 (&dfinal
))
39079 /* Examine from whence the elements come. */
39081 for (i
= 0; i
< nelt
; ++i
)
39082 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39084 memset (remap
, 0xff, sizeof (remap
));
39087 if (GET_MODE_SIZE (d
->vmode
) == 16)
39089 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39091 /* Split the two input vectors into 4 halves. */
39092 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39097 /* If the elements from the low halves use interleave low, and similarly
39098 for interleave high. If the elements are from mis-matched halves, we
39099 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39100 if ((contents
& (h1
| h3
)) == contents
)
39103 for (i
= 0; i
< nelt2
; ++i
)
39106 remap
[i
+ nelt
] = i
* 2 + 1;
39107 dremap
.perm
[i
* 2] = i
;
39108 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39110 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39111 dremap
.vmode
= V4SFmode
;
39113 else if ((contents
& (h2
| h4
)) == contents
)
39116 for (i
= 0; i
< nelt2
; ++i
)
39118 remap
[i
+ nelt2
] = i
* 2;
39119 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39120 dremap
.perm
[i
* 2] = i
+ nelt2
;
39121 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39123 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39124 dremap
.vmode
= V4SFmode
;
39126 else if ((contents
& (h1
| h4
)) == contents
)
39129 for (i
= 0; i
< nelt2
; ++i
)
39132 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39133 dremap
.perm
[i
] = i
;
39134 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39139 dremap
.vmode
= V2DImode
;
39141 dremap
.perm
[0] = 0;
39142 dremap
.perm
[1] = 3;
39145 else if ((contents
& (h2
| h3
)) == contents
)
39148 for (i
= 0; i
< nelt2
; ++i
)
39150 remap
[i
+ nelt2
] = i
;
39151 remap
[i
+ nelt
] = i
+ nelt2
;
39152 dremap
.perm
[i
] = i
+ nelt2
;
39153 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39158 dremap
.vmode
= V2DImode
;
39160 dremap
.perm
[0] = 1;
39161 dremap
.perm
[1] = 2;
39169 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39170 unsigned HOST_WIDE_INT q
[8];
39171 unsigned int nonzero_halves
[4];
39173 /* Split the two input vectors into 8 quarters. */
39174 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39175 for (i
= 1; i
< 8; ++i
)
39176 q
[i
] = q
[0] << (nelt4
* i
);
39177 for (i
= 0; i
< 4; ++i
)
39178 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39180 nonzero_halves
[nzcnt
] = i
;
39186 gcc_assert (d
->one_operand_p
);
39187 nonzero_halves
[1] = nonzero_halves
[0];
39188 same_halves
= true;
39190 else if (d
->one_operand_p
)
39192 gcc_assert (nonzero_halves
[0] == 0);
39193 gcc_assert (nonzero_halves
[1] == 1);
39198 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39200 /* Attempt to increase the likelihood that dfinal
39201 shuffle will be intra-lane. */
39202 char tmph
= nonzero_halves
[0];
39203 nonzero_halves
[0] = nonzero_halves
[1];
39204 nonzero_halves
[1] = tmph
;
39207 /* vperm2f128 or vperm2i128. */
39208 for (i
= 0; i
< nelt2
; ++i
)
39210 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39211 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39212 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39213 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39216 if (d
->vmode
!= V8SFmode
39217 && d
->vmode
!= V4DFmode
39218 && d
->vmode
!= V8SImode
)
39220 dremap
.vmode
= V8SImode
;
39222 for (i
= 0; i
< 4; ++i
)
39224 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39225 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39229 else if (d
->one_operand_p
)
39231 else if (TARGET_AVX2
39232 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39235 for (i
= 0; i
< nelt4
; ++i
)
39238 remap
[i
+ nelt
] = i
* 2 + 1;
39239 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39240 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39241 dremap
.perm
[i
* 2] = i
;
39242 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39243 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39244 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39247 else if (TARGET_AVX2
39248 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39251 for (i
= 0; i
< nelt4
; ++i
)
39253 remap
[i
+ nelt4
] = i
* 2;
39254 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39255 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39256 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39257 dremap
.perm
[i
* 2] = i
+ nelt4
;
39258 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39259 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39260 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39267 /* Use the remapping array set up above to move the elements from their
39268 swizzled locations into their final destinations. */
39270 for (i
= 0; i
< nelt
; ++i
)
39272 unsigned e
= remap
[d
->perm
[i
]];
39273 gcc_assert (e
< nelt
);
39274 /* If same_halves is true, both halves of the remapped vector are the
39275 same. Avoid cross-lane accesses if possible. */
39276 if (same_halves
&& i
>= nelt2
)
39278 gcc_assert (e
< nelt2
);
39279 dfinal
.perm
[i
] = e
+ nelt2
;
39282 dfinal
.perm
[i
] = e
;
39284 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39285 dfinal
.op1
= dfinal
.op0
;
39286 dfinal
.one_operand_p
= true;
39287 dremap
.target
= dfinal
.op0
;
39289 /* Test if the final remap can be done with a single insn. For V4SFmode or
39290 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39292 ok
= expand_vec_perm_1 (&dfinal
);
39293 seq
= get_insns ();
39302 if (dremap
.vmode
!= dfinal
.vmode
)
39304 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39305 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39306 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39309 ok
= expand_vec_perm_1 (&dremap
);
39316 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39317 a single vector cross-lane permutation into vpermq followed
39318 by any of the single insn permutations. */
39321 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39323 struct expand_vec_perm_d dremap
, dfinal
;
39324 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39325 unsigned contents
[2];
39329 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39330 && d
->one_operand_p
))
39335 for (i
= 0; i
< nelt2
; ++i
)
39337 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39338 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39341 for (i
= 0; i
< 2; ++i
)
39343 unsigned int cnt
= 0;
39344 for (j
= 0; j
< 4; ++j
)
39345 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39353 dremap
.vmode
= V4DImode
;
39355 dremap
.target
= gen_reg_rtx (V4DImode
);
39356 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39357 dremap
.op1
= dremap
.op0
;
39358 dremap
.one_operand_p
= true;
39359 for (i
= 0; i
< 2; ++i
)
39361 unsigned int cnt
= 0;
39362 for (j
= 0; j
< 4; ++j
)
39363 if ((contents
[i
] & (1u << j
)) != 0)
39364 dremap
.perm
[2 * i
+ cnt
++] = j
;
39365 for (; cnt
< 2; ++cnt
)
39366 dremap
.perm
[2 * i
+ cnt
] = 0;
39370 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39371 dfinal
.op1
= dfinal
.op0
;
39372 dfinal
.one_operand_p
= true;
39373 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39377 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39378 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39380 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39381 dfinal
.perm
[i
] |= nelt4
;
39383 gcc_unreachable ();
39386 ok
= expand_vec_perm_1 (&dremap
);
39389 ok
= expand_vec_perm_1 (&dfinal
);
39395 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39396 a vector permutation using two instructions, vperm2f128 resp.
39397 vperm2i128 followed by any single in-lane permutation. */
39400 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39402 struct expand_vec_perm_d dfirst
, dsecond
;
39403 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39407 || GET_MODE_SIZE (d
->vmode
) != 32
39408 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39412 dsecond
.one_operand_p
= false;
39413 dsecond
.testing_p
= true;
39415 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39416 immediate. For perm < 16 the second permutation uses
39417 d->op0 as first operand, for perm >= 16 it uses d->op1
39418 as first operand. The second operand is the result of
39420 for (perm
= 0; perm
< 32; perm
++)
39422 /* Ignore permutations which do not move anything cross-lane. */
39425 /* The second shuffle for e.g. V4DFmode has
39426 0123 and ABCD operands.
39427 Ignore AB23, as 23 is already in the second lane
39428 of the first operand. */
39429 if ((perm
& 0xc) == (1 << 2)) continue;
39430 /* And 01CD, as 01 is in the first lane of the first
39432 if ((perm
& 3) == 0) continue;
39433 /* And 4567, as then the vperm2[fi]128 doesn't change
39434 anything on the original 4567 second operand. */
39435 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39439 /* The second shuffle for e.g. V4DFmode has
39440 4567 and ABCD operands.
39441 Ignore AB67, as 67 is already in the second lane
39442 of the first operand. */
39443 if ((perm
& 0xc) == (3 << 2)) continue;
39444 /* And 45CD, as 45 is in the first lane of the first
39446 if ((perm
& 3) == 2) continue;
39447 /* And 0123, as then the vperm2[fi]128 doesn't change
39448 anything on the original 0123 first operand. */
39449 if ((perm
& 0xf) == (1 << 2)) continue;
39452 for (i
= 0; i
< nelt
; i
++)
39454 j
= d
->perm
[i
] / nelt2
;
39455 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39456 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39457 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39458 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39466 ok
= expand_vec_perm_1 (&dsecond
);
39477 /* Found a usable second shuffle. dfirst will be
39478 vperm2f128 on d->op0 and d->op1. */
39479 dsecond
.testing_p
= false;
39481 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39482 for (i
= 0; i
< nelt
; i
++)
39483 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39484 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39486 ok
= expand_vec_perm_1 (&dfirst
);
39489 /* And dsecond is some single insn shuffle, taking
39490 d->op0 and result of vperm2f128 (if perm < 16) or
39491 d->op1 and result of vperm2f128 (otherwise). */
39492 dsecond
.op1
= dfirst
.target
;
39494 dsecond
.op0
= dfirst
.op1
;
39496 ok
= expand_vec_perm_1 (&dsecond
);
39502 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39503 if (d
->one_operand_p
)
39510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39511 a two vector permutation using 2 intra-lane interleave insns
39512 and cross-lane shuffle for 32-byte vectors. */
39515 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39518 rtx (*gen
) (rtx
, rtx
, rtx
);
39520 if (d
->one_operand_p
)
39522 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39524 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39530 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39532 for (i
= 0; i
< nelt
; i
+= 2)
39533 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39534 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39544 gen
= gen_vec_interleave_highv32qi
;
39546 gen
= gen_vec_interleave_lowv32qi
;
39550 gen
= gen_vec_interleave_highv16hi
;
39552 gen
= gen_vec_interleave_lowv16hi
;
39556 gen
= gen_vec_interleave_highv8si
;
39558 gen
= gen_vec_interleave_lowv8si
;
39562 gen
= gen_vec_interleave_highv4di
;
39564 gen
= gen_vec_interleave_lowv4di
;
39568 gen
= gen_vec_interleave_highv8sf
;
39570 gen
= gen_vec_interleave_lowv8sf
;
39574 gen
= gen_vec_interleave_highv4df
;
39576 gen
= gen_vec_interleave_lowv4df
;
39579 gcc_unreachable ();
39582 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39586 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39587 a single vector permutation using a single intra-lane vector
39588 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39589 the non-swapped and swapped vectors together. */
39592 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39594 struct expand_vec_perm_d dfirst
, dsecond
;
39595 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39598 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39602 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39603 || !d
->one_operand_p
)
39607 for (i
= 0; i
< nelt
; i
++)
39608 dfirst
.perm
[i
] = 0xff;
39609 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39611 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39612 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39614 dfirst
.perm
[j
] = d
->perm
[i
];
39618 for (i
= 0; i
< nelt
; i
++)
39619 if (dfirst
.perm
[i
] == 0xff)
39620 dfirst
.perm
[i
] = i
;
39623 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39626 ok
= expand_vec_perm_1 (&dfirst
);
39627 seq
= get_insns ();
39639 dsecond
.op0
= dfirst
.target
;
39640 dsecond
.op1
= dfirst
.target
;
39641 dsecond
.one_operand_p
= true;
39642 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39643 for (i
= 0; i
< nelt
; i
++)
39644 dsecond
.perm
[i
] = i
^ nelt2
;
39646 ok
= expand_vec_perm_1 (&dsecond
);
39649 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39650 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39654 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39655 permutation using two vperm2f128, followed by a vshufpd insn blending
39656 the two vectors together. */
39659 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39661 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39664 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39674 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39675 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39676 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39677 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39678 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39679 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39680 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39681 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39682 dthird
.perm
[0] = (d
->perm
[0] % 2);
39683 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39684 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39685 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39687 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39688 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39689 dthird
.op0
= dfirst
.target
;
39690 dthird
.op1
= dsecond
.target
;
39691 dthird
.one_operand_p
= false;
39693 canonicalize_perm (&dfirst
);
39694 canonicalize_perm (&dsecond
);
39696 ok
= expand_vec_perm_1 (&dfirst
)
39697 && expand_vec_perm_1 (&dsecond
)
39698 && expand_vec_perm_1 (&dthird
);
39705 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39706 permutation with two pshufb insns and an ior. We should have already
39707 failed all two instruction sequences. */
39710 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39712 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39713 unsigned int i
, nelt
, eltsz
;
39715 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39717 gcc_assert (!d
->one_operand_p
);
39720 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39722 /* Generate two permutation masks. If the required element is within
39723 the given vector it is shuffled into the proper lane. If the required
39724 element is in the other vector, force a zero into the lane by setting
39725 bit 7 in the permutation mask. */
39726 m128
= GEN_INT (-128);
39727 for (i
= 0; i
< nelt
; ++i
)
39729 unsigned j
, e
= d
->perm
[i
];
39730 unsigned which
= (e
>= nelt
);
39734 for (j
= 0; j
< eltsz
; ++j
)
39736 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39737 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39741 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39742 vperm
= force_reg (V16QImode
, vperm
);
39744 l
= gen_reg_rtx (V16QImode
);
39745 op
= gen_lowpart (V16QImode
, d
->op0
);
39746 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39748 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39749 vperm
= force_reg (V16QImode
, vperm
);
39751 h
= gen_reg_rtx (V16QImode
);
39752 op
= gen_lowpart (V16QImode
, d
->op1
);
39753 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39755 op
= gen_lowpart (V16QImode
, d
->target
);
39756 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39761 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39762 with two vpshufb insns, vpermq and vpor. We should have already failed
39763 all two or three instruction sequences. */
39766 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39768 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39769 unsigned int i
, nelt
, eltsz
;
39772 || !d
->one_operand_p
39773 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39780 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39782 /* Generate two permutation masks. If the required element is within
39783 the same lane, it is shuffled in. If the required element from the
39784 other lane, force a zero by setting bit 7 in the permutation mask.
39785 In the other mask the mask has non-negative elements if element
39786 is requested from the other lane, but also moved to the other lane,
39787 so that the result of vpshufb can have the two V2TImode halves
39789 m128
= GEN_INT (-128);
39790 for (i
= 0; i
< nelt
; ++i
)
39792 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39793 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39795 for (j
= 0; j
< eltsz
; ++j
)
39797 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39798 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39802 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39803 vperm
= force_reg (V32QImode
, vperm
);
39805 h
= gen_reg_rtx (V32QImode
);
39806 op
= gen_lowpart (V32QImode
, d
->op0
);
39807 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39809 /* Swap the 128-byte lanes of h into hp. */
39810 hp
= gen_reg_rtx (V4DImode
);
39811 op
= gen_lowpart (V4DImode
, h
);
39812 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39815 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39816 vperm
= force_reg (V32QImode
, vperm
);
39818 l
= gen_reg_rtx (V32QImode
);
39819 op
= gen_lowpart (V32QImode
, d
->op0
);
39820 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39822 op
= gen_lowpart (V32QImode
, d
->target
);
39823 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39828 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39829 and extract-odd permutations of two V32QImode and V16QImode operand
39830 with two vpshufb insns, vpor and vpermq. We should have already
39831 failed all two or three instruction sequences. */
39834 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39836 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39837 unsigned int i
, nelt
, eltsz
;
39840 || d
->one_operand_p
39841 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39844 for (i
= 0; i
< d
->nelt
; ++i
)
39845 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39852 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39854 /* Generate two permutation masks. In the first permutation mask
39855 the first quarter will contain indexes for the first half
39856 of the op0, the second quarter will contain bit 7 set, third quarter
39857 will contain indexes for the second half of the op0 and the
39858 last quarter bit 7 set. In the second permutation mask
39859 the first quarter will contain bit 7 set, the second quarter
39860 indexes for the first half of the op1, the third quarter bit 7 set
39861 and last quarter indexes for the second half of the op1.
39862 I.e. the first mask e.g. for V32QImode extract even will be:
39863 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39864 (all values masked with 0xf except for -128) and second mask
39865 for extract even will be
39866 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39867 m128
= GEN_INT (-128);
39868 for (i
= 0; i
< nelt
; ++i
)
39870 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39871 unsigned which
= d
->perm
[i
] >= nelt
;
39872 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39874 for (j
= 0; j
< eltsz
; ++j
)
39876 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39877 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39881 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39882 vperm
= force_reg (V32QImode
, vperm
);
39884 l
= gen_reg_rtx (V32QImode
);
39885 op
= gen_lowpart (V32QImode
, d
->op0
);
39886 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39888 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39889 vperm
= force_reg (V32QImode
, vperm
);
39891 h
= gen_reg_rtx (V32QImode
);
39892 op
= gen_lowpart (V32QImode
, d
->op1
);
39893 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39895 ior
= gen_reg_rtx (V32QImode
);
39896 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39898 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39899 op
= gen_lowpart (V4DImode
, d
->target
);
39900 ior
= gen_lowpart (V4DImode
, ior
);
39901 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39902 const1_rtx
, GEN_INT (3)));
39907 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39908 and extract-odd permutations. */
39911 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39918 t1
= gen_reg_rtx (V4DFmode
);
39919 t2
= gen_reg_rtx (V4DFmode
);
39921 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39922 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39923 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39925 /* Now an unpck[lh]pd will produce the result required. */
39927 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39929 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39935 int mask
= odd
? 0xdd : 0x88;
39937 t1
= gen_reg_rtx (V8SFmode
);
39938 t2
= gen_reg_rtx (V8SFmode
);
39939 t3
= gen_reg_rtx (V8SFmode
);
39941 /* Shuffle within the 128-bit lanes to produce:
39942 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39943 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39946 /* Shuffle the lanes around to produce:
39947 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39948 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39951 /* Shuffle within the 128-bit lanes to produce:
39952 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39953 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39955 /* Shuffle within the 128-bit lanes to produce:
39956 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39957 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39959 /* Shuffle the lanes around to produce:
39960 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39961 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39970 /* These are always directly implementable by expand_vec_perm_1. */
39971 gcc_unreachable ();
39975 return expand_vec_perm_pshufb2 (d
);
39978 /* We need 2*log2(N)-1 operations to achieve odd/even
39979 with interleave. */
39980 t1
= gen_reg_rtx (V8HImode
);
39981 t2
= gen_reg_rtx (V8HImode
);
39982 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39983 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39984 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39985 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39987 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39989 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
39996 return expand_vec_perm_pshufb2 (d
);
39999 t1
= gen_reg_rtx (V16QImode
);
40000 t2
= gen_reg_rtx (V16QImode
);
40001 t3
= gen_reg_rtx (V16QImode
);
40002 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40003 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40004 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40005 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40006 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40007 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40009 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40011 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40018 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40023 struct expand_vec_perm_d d_copy
= *d
;
40024 d_copy
.vmode
= V4DFmode
;
40025 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40026 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40027 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40028 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40031 t1
= gen_reg_rtx (V4DImode
);
40032 t2
= gen_reg_rtx (V4DImode
);
40034 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40035 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40036 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40038 /* Now an vpunpck[lh]qdq will produce the result required. */
40040 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40042 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40049 struct expand_vec_perm_d d_copy
= *d
;
40050 d_copy
.vmode
= V8SFmode
;
40051 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40052 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40053 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40054 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40057 t1
= gen_reg_rtx (V8SImode
);
40058 t2
= gen_reg_rtx (V8SImode
);
40060 /* Shuffle the lanes around into
40061 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40062 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40063 gen_lowpart (V4DImode
, d
->op0
),
40064 gen_lowpart (V4DImode
, d
->op1
),
40066 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40067 gen_lowpart (V4DImode
, d
->op0
),
40068 gen_lowpart (V4DImode
, d
->op1
),
40071 /* Swap the 2nd and 3rd position in each lane into
40072 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40073 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40074 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40075 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40076 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40078 /* Now an vpunpck[lh]qdq will produce
40079 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40081 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40082 gen_lowpart (V4DImode
, t1
),
40083 gen_lowpart (V4DImode
, t2
));
40085 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40086 gen_lowpart (V4DImode
, t1
),
40087 gen_lowpart (V4DImode
, t2
));
40092 gcc_unreachable ();
40098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40099 extract-even and extract-odd permutations. */
40102 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40104 unsigned i
, odd
, nelt
= d
->nelt
;
40107 if (odd
!= 0 && odd
!= 1)
40110 for (i
= 1; i
< nelt
; ++i
)
40111 if (d
->perm
[i
] != 2 * i
+ odd
)
40114 return expand_vec_perm_even_odd_1 (d
, odd
);
40117 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40118 permutations. We assume that expand_vec_perm_1 has already failed. */
40121 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40123 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40124 enum machine_mode vmode
= d
->vmode
;
40125 unsigned char perm2
[4];
40133 /* These are special-cased in sse.md so that we can optionally
40134 use the vbroadcast instruction. They expand to two insns
40135 if the input happens to be in a register. */
40136 gcc_unreachable ();
40142 /* These are always implementable using standard shuffle patterns. */
40143 gcc_unreachable ();
40147 /* These can be implemented via interleave. We save one insn by
40148 stopping once we have promoted to V4SImode and then use pshufd. */
40152 rtx (*gen
) (rtx
, rtx
, rtx
)
40153 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40154 : gen_vec_interleave_lowv8hi
;
40158 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40159 : gen_vec_interleave_highv8hi
;
40164 dest
= gen_reg_rtx (vmode
);
40165 emit_insn (gen (dest
, op0
, op0
));
40166 vmode
= get_mode_wider_vector (vmode
);
40167 op0
= gen_lowpart (vmode
, dest
);
40169 while (vmode
!= V4SImode
);
40171 memset (perm2
, elt
, 4);
40172 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40181 /* For AVX2 broadcasts of the first element vpbroadcast* or
40182 vpermq should be used by expand_vec_perm_1. */
40183 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40187 gcc_unreachable ();
40191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40192 broadcast permutations. */
40195 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40197 unsigned i
, elt
, nelt
= d
->nelt
;
40199 if (!d
->one_operand_p
)
40203 for (i
= 1; i
< nelt
; ++i
)
40204 if (d
->perm
[i
] != elt
)
40207 return expand_vec_perm_broadcast_1 (d
);
40210 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40211 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40212 all the shorter instruction sequences. */
40215 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40217 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40218 unsigned int i
, nelt
, eltsz
;
40222 || d
->one_operand_p
40223 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40230 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40232 /* Generate 4 permutation masks. If the required element is within
40233 the same lane, it is shuffled in. If the required element from the
40234 other lane, force a zero by setting bit 7 in the permutation mask.
40235 In the other mask the mask has non-negative elements if element
40236 is requested from the other lane, but also moved to the other lane,
40237 so that the result of vpshufb can have the two V2TImode halves
40239 m128
= GEN_INT (-128);
40240 for (i
= 0; i
< 32; ++i
)
40242 rperm
[0][i
] = m128
;
40243 rperm
[1][i
] = m128
;
40244 rperm
[2][i
] = m128
;
40245 rperm
[3][i
] = m128
;
40251 for (i
= 0; i
< nelt
; ++i
)
40253 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40254 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40255 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40257 for (j
= 0; j
< eltsz
; ++j
)
40258 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40259 used
[which
] = true;
40262 for (i
= 0; i
< 2; ++i
)
40264 if (!used
[2 * i
+ 1])
40269 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40270 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40271 vperm
= force_reg (V32QImode
, vperm
);
40272 h
[i
] = gen_reg_rtx (V32QImode
);
40273 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40274 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40277 /* Swap the 128-byte lanes of h[X]. */
40278 for (i
= 0; i
< 2; ++i
)
40280 if (h
[i
] == NULL_RTX
)
40282 op
= gen_reg_rtx (V4DImode
);
40283 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40284 const2_rtx
, GEN_INT (3), const0_rtx
,
40286 h
[i
] = gen_lowpart (V32QImode
, op
);
40289 for (i
= 0; i
< 2; ++i
)
40296 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40297 vperm
= force_reg (V32QImode
, vperm
);
40298 l
[i
] = gen_reg_rtx (V32QImode
);
40299 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40300 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40303 for (i
= 0; i
< 2; ++i
)
40307 op
= gen_reg_rtx (V32QImode
);
40308 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40315 gcc_assert (l
[0] && l
[1]);
40316 op
= gen_lowpart (V32QImode
, d
->target
);
40317 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40321 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40322 With all of the interface bits taken care of, perform the expansion
40323 in D and return true on success. */
40326 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40328 /* Try a single instruction expansion. */
40329 if (expand_vec_perm_1 (d
))
40332 /* Try sequences of two instructions. */
40334 if (expand_vec_perm_pshuflw_pshufhw (d
))
40337 if (expand_vec_perm_palignr (d
))
40340 if (expand_vec_perm_interleave2 (d
))
40343 if (expand_vec_perm_broadcast (d
))
40346 if (expand_vec_perm_vpermq_perm_1 (d
))
40349 if (expand_vec_perm_vperm2f128 (d
))
40352 /* Try sequences of three instructions. */
40354 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40357 if (expand_vec_perm_pshufb2 (d
))
40360 if (expand_vec_perm_interleave3 (d
))
40363 if (expand_vec_perm_vperm2f128_vblend (d
))
40366 /* Try sequences of four instructions. */
40368 if (expand_vec_perm_vpshufb2_vpermq (d
))
40371 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40374 /* ??? Look for narrow permutations whose element orderings would
40375 allow the promotion to a wider mode. */
40377 /* ??? Look for sequences of interleave or a wider permute that place
40378 the data into the correct lanes for a half-vector shuffle like
40379 pshuf[lh]w or vpermilps. */
40381 /* ??? Look for sequences of interleave that produce the desired results.
40382 The combinatorics of punpck[lh] get pretty ugly... */
40384 if (expand_vec_perm_even_odd (d
))
40387 /* Even longer sequences. */
40388 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40394 /* If a permutation only uses one operand, make it clear. Returns true
40395 if the permutation references both operands. */
40398 canonicalize_perm (struct expand_vec_perm_d
*d
)
40400 int i
, which
, nelt
= d
->nelt
;
40402 for (i
= which
= 0; i
< nelt
; ++i
)
40403 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40405 d
->one_operand_p
= true;
40412 if (!rtx_equal_p (d
->op0
, d
->op1
))
40414 d
->one_operand_p
= false;
40417 /* The elements of PERM do not suggest that only the first operand
40418 is used, but both operands are identical. Allow easier matching
40419 of the permutation by folding the permutation into the single
40424 for (i
= 0; i
< nelt
; ++i
)
40425 d
->perm
[i
] &= nelt
- 1;
40434 return (which
== 3);
40438 ix86_expand_vec_perm_const (rtx operands
[4])
40440 struct expand_vec_perm_d d
;
40441 unsigned char perm
[MAX_VECT_LEN
];
40446 d
.target
= operands
[0];
40447 d
.op0
= operands
[1];
40448 d
.op1
= operands
[2];
40451 d
.vmode
= GET_MODE (d
.target
);
40452 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40453 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40454 d
.testing_p
= false;
40456 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40457 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40458 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40460 for (i
= 0; i
< nelt
; ++i
)
40462 rtx e
= XVECEXP (sel
, 0, i
);
40463 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40468 two_args
= canonicalize_perm (&d
);
40470 if (ix86_expand_vec_perm_const_1 (&d
))
40473 /* If the selector says both arguments are needed, but the operands are the
40474 same, the above tried to expand with one_operand_p and flattened selector.
40475 If that didn't work, retry without one_operand_p; we succeeded with that
40477 if (two_args
&& d
.one_operand_p
)
40479 d
.one_operand_p
= false;
40480 memcpy (d
.perm
, perm
, sizeof (perm
));
40481 return ix86_expand_vec_perm_const_1 (&d
);
40487 /* Implement targetm.vectorize.vec_perm_const_ok. */
40490 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40491 const unsigned char *sel
)
40493 struct expand_vec_perm_d d
;
40494 unsigned int i
, nelt
, which
;
40498 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40499 d
.testing_p
= true;
40501 /* Given sufficient ISA support we can just return true here
40502 for selected vector modes. */
40503 if (GET_MODE_SIZE (d
.vmode
) == 16)
40505 /* All implementable with a single vpperm insn. */
40508 /* All implementable with 2 pshufb + 1 ior. */
40511 /* All implementable with shufpd or unpck[lh]pd. */
40516 /* Extract the values from the vector CST into the permutation
40518 memcpy (d
.perm
, sel
, nelt
);
40519 for (i
= which
= 0; i
< nelt
; ++i
)
40521 unsigned char e
= d
.perm
[i
];
40522 gcc_assert (e
< 2 * nelt
);
40523 which
|= (e
< nelt
? 1 : 2);
40526 /* For all elements from second vector, fold the elements to first. */
40528 for (i
= 0; i
< nelt
; ++i
)
40531 /* Check whether the mask can be applied to the vector type. */
40532 d
.one_operand_p
= (which
!= 3);
40534 /* Implementable with shufps or pshufd. */
40535 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40538 /* Otherwise we have to go through the motions and see if we can
40539 figure out how to generate the requested permutation. */
40540 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40541 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40542 if (!d
.one_operand_p
)
40543 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40546 ret
= ix86_expand_vec_perm_const_1 (&d
);
40553 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40555 struct expand_vec_perm_d d
;
40561 d
.vmode
= GET_MODE (targ
);
40562 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40563 d
.one_operand_p
= false;
40564 d
.testing_p
= false;
40566 for (i
= 0; i
< nelt
; ++i
)
40567 d
.perm
[i
] = i
* 2 + odd
;
40569 /* We'll either be able to implement the permutation directly... */
40570 if (expand_vec_perm_1 (&d
))
40573 /* ... or we use the special-case patterns. */
40574 expand_vec_perm_even_odd_1 (&d
, odd
);
40578 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40580 struct expand_vec_perm_d d
;
40581 unsigned i
, nelt
, base
;
40587 d
.vmode
= GET_MODE (targ
);
40588 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40589 d
.one_operand_p
= false;
40590 d
.testing_p
= false;
40592 base
= high_p
? nelt
/ 2 : 0;
40593 for (i
= 0; i
< nelt
/ 2; ++i
)
40595 d
.perm
[i
* 2] = i
+ base
;
40596 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40599 /* Note that for AVX this isn't one instruction. */
40600 ok
= ix86_expand_vec_perm_const_1 (&d
);
40605 /* Expand a vector operation CODE for a V*QImode in terms of the
40606 same operation on V*HImode. */
40609 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40611 enum machine_mode qimode
= GET_MODE (dest
);
40612 enum machine_mode himode
;
40613 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40614 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40615 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40616 struct expand_vec_perm_d d
;
40617 bool ok
, full_interleave
;
40618 bool uns_p
= false;
40625 gen_il
= gen_vec_interleave_lowv16qi
;
40626 gen_ih
= gen_vec_interleave_highv16qi
;
40629 himode
= V16HImode
;
40630 gen_il
= gen_avx2_interleave_lowv32qi
;
40631 gen_ih
= gen_avx2_interleave_highv32qi
;
40634 gcc_unreachable ();
40637 op2_l
= op2_h
= op2
;
40641 /* Unpack data such that we've got a source byte in each low byte of
40642 each word. We don't care what goes into the high byte of each word.
40643 Rather than trying to get zero in there, most convenient is to let
40644 it be a copy of the low byte. */
40645 op2_l
= gen_reg_rtx (qimode
);
40646 op2_h
= gen_reg_rtx (qimode
);
40647 emit_insn (gen_il (op2_l
, op2
, op2
));
40648 emit_insn (gen_ih (op2_h
, op2
, op2
));
40651 op1_l
= gen_reg_rtx (qimode
);
40652 op1_h
= gen_reg_rtx (qimode
);
40653 emit_insn (gen_il (op1_l
, op1
, op1
));
40654 emit_insn (gen_ih (op1_h
, op1
, op1
));
40655 full_interleave
= qimode
== V16QImode
;
40663 op1_l
= gen_reg_rtx (himode
);
40664 op1_h
= gen_reg_rtx (himode
);
40665 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40666 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40667 full_interleave
= true;
40670 gcc_unreachable ();
40673 /* Perform the operation. */
40674 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40676 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40678 gcc_assert (res_l
&& res_h
);
40680 /* Merge the data back into the right place. */
40682 d
.op0
= gen_lowpart (qimode
, res_l
);
40683 d
.op1
= gen_lowpart (qimode
, res_h
);
40685 d
.nelt
= GET_MODE_NUNITS (qimode
);
40686 d
.one_operand_p
= false;
40687 d
.testing_p
= false;
40689 if (full_interleave
)
40691 /* For SSE2, we used an full interleave, so the desired
40692 results are in the even elements. */
40693 for (i
= 0; i
< 32; ++i
)
40698 /* For AVX, the interleave used above was not cross-lane. So the
40699 extraction is evens but with the second and third quarter swapped.
40700 Happily, that is even one insn shorter than even extraction. */
40701 for (i
= 0; i
< 32; ++i
)
40702 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40705 ok
= ix86_expand_vec_perm_const_1 (&d
);
40708 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40709 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40713 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40714 bool uns_p
, bool odd_p
)
40716 enum machine_mode mode
= GET_MODE (op1
);
40717 enum machine_mode wmode
= GET_MODE (dest
);
40720 /* We only play even/odd games with vectors of SImode. */
40721 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40723 /* If we're looking for the odd results, shift those members down to
40724 the even slots. For some cpus this is faster than a PSHUFD. */
40727 if (TARGET_XOP
&& mode
== V4SImode
)
40729 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40730 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40734 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40735 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40736 x
, NULL
, 1, OPTAB_DIRECT
);
40737 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40738 x
, NULL
, 1, OPTAB_DIRECT
);
40739 op1
= gen_lowpart (mode
, op1
);
40740 op2
= gen_lowpart (mode
, op2
);
40743 if (mode
== V8SImode
)
40746 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40748 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40751 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40752 else if (TARGET_SSE4_1
)
40753 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40756 rtx s1
, s2
, t0
, t1
, t2
;
40758 /* The easiest way to implement this without PMULDQ is to go through
40759 the motions as if we are performing a full 64-bit multiply. With
40760 the exception that we need to do less shuffling of the elements. */
40762 /* Compute the sign-extension, aka highparts, of the two operands. */
40763 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40764 op1
, pc_rtx
, pc_rtx
);
40765 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40766 op2
, pc_rtx
, pc_rtx
);
40768 /* Multiply LO(A) * HI(B), and vice-versa. */
40769 t1
= gen_reg_rtx (wmode
);
40770 t2
= gen_reg_rtx (wmode
);
40771 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40772 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40774 /* Multiply LO(A) * LO(B). */
40775 t0
= gen_reg_rtx (wmode
);
40776 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40778 /* Combine and shift the highparts into place. */
40779 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40780 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40783 /* Combine high and low parts. */
40784 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40791 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40792 bool uns_p
, bool high_p
)
40794 enum machine_mode wmode
= GET_MODE (dest
);
40795 enum machine_mode mode
= GET_MODE (op1
);
40796 rtx t1
, t2
, t3
, t4
, mask
;
40801 t1
= gen_reg_rtx (mode
);
40802 t2
= gen_reg_rtx (mode
);
40803 if (TARGET_XOP
&& !uns_p
)
40805 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40806 shuffle the elements once so that all elements are in the right
40807 place for immediate use: { A C B D }. */
40808 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40809 const1_rtx
, GEN_INT (3)));
40810 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40811 const1_rtx
, GEN_INT (3)));
40815 /* Put the elements into place for the multiply. */
40816 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40817 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40820 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40824 /* Shuffle the elements between the lanes. After this we
40825 have { A B E F | C D G H } for each operand. */
40826 t1
= gen_reg_rtx (V4DImode
);
40827 t2
= gen_reg_rtx (V4DImode
);
40828 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40829 const0_rtx
, const2_rtx
,
40830 const1_rtx
, GEN_INT (3)));
40831 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40832 const0_rtx
, const2_rtx
,
40833 const1_rtx
, GEN_INT (3)));
40835 /* Shuffle the elements within the lanes. After this we
40836 have { A A B B | C C D D } or { E E F F | G G H H }. */
40837 t3
= gen_reg_rtx (V8SImode
);
40838 t4
= gen_reg_rtx (V8SImode
);
40839 mask
= GEN_INT (high_p
40840 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40841 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40842 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40843 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40845 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40850 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40851 uns_p
, OPTAB_DIRECT
);
40852 t2
= expand_binop (mode
,
40853 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40854 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40855 gcc_assert (t1
&& t2
);
40857 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40862 t1
= gen_reg_rtx (wmode
);
40863 t2
= gen_reg_rtx (wmode
);
40864 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40865 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40867 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40871 gcc_unreachable ();
40876 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40880 res_1
= gen_reg_rtx (V4SImode
);
40881 res_2
= gen_reg_rtx (V4SImode
);
40882 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40883 op1
, op2
, true, false);
40884 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40885 op1
, op2
, true, true);
40887 /* Move the results in element 2 down to element 1; we don't care
40888 what goes in elements 2 and 3. Then we can merge the parts
40889 back together with an interleave.
40891 Note that two other sequences were tried:
40892 (1) Use interleaves at the start instead of psrldq, which allows
40893 us to use a single shufps to merge things back at the end.
40894 (2) Use shufps here to combine the two vectors, then pshufd to
40895 put the elements in the correct order.
40896 In both cases the cost of the reformatting stall was too high
40897 and the overall sequence slower. */
40899 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40900 const0_rtx
, const0_rtx
));
40901 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40902 const0_rtx
, const0_rtx
));
40903 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40905 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40909 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40911 enum machine_mode mode
= GET_MODE (op0
);
40912 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40914 if (TARGET_XOP
&& mode
== V2DImode
)
40916 /* op1: A,B,C,D, op2: E,F,G,H */
40917 op1
= gen_lowpart (V4SImode
, op1
);
40918 op2
= gen_lowpart (V4SImode
, op2
);
40920 t1
= gen_reg_rtx (V4SImode
);
40921 t2
= gen_reg_rtx (V4SImode
);
40922 t3
= gen_reg_rtx (V2DImode
);
40923 t4
= gen_reg_rtx (V2DImode
);
40926 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40932 /* t2: (B*E),(A*F),(D*G),(C*H) */
40933 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40935 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40936 emit_insn (gen_xop_phadddq (t3
, t2
));
40938 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40939 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40941 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40942 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40946 enum machine_mode nmode
;
40947 rtx (*umul
) (rtx
, rtx
, rtx
);
40949 if (mode
== V2DImode
)
40951 umul
= gen_vec_widen_umult_even_v4si
;
40954 else if (mode
== V4DImode
)
40956 umul
= gen_vec_widen_umult_even_v8si
;
40960 gcc_unreachable ();
40963 /* Multiply low parts. */
40964 t1
= gen_reg_rtx (mode
);
40965 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40967 /* Shift input vectors right 32 bits so we can multiply high parts. */
40969 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40970 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40972 /* Multiply high parts by low parts. */
40973 t4
= gen_reg_rtx (mode
);
40974 t5
= gen_reg_rtx (mode
);
40975 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40976 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40978 /* Combine and shift the highparts back. */
40979 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40980 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40982 /* Combine high and low parts. */
40983 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40986 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40987 gen_rtx_MULT (mode
, op1
, op2
));
40990 /* Expand an insert into a vector register through pinsr insn.
40991 Return true if successful. */
40994 ix86_expand_pinsr (rtx
*operands
)
40996 rtx dst
= operands
[0];
40997 rtx src
= operands
[3];
40999 unsigned int size
= INTVAL (operands
[1]);
41000 unsigned int pos
= INTVAL (operands
[2]);
41002 if (GET_CODE (dst
) == SUBREG
)
41004 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41005 dst
= SUBREG_REG (dst
);
41008 if (GET_CODE (src
) == SUBREG
)
41009 src
= SUBREG_REG (src
);
41011 switch (GET_MODE (dst
))
41018 enum machine_mode srcmode
, dstmode
;
41019 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41021 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41026 if (!TARGET_SSE4_1
)
41028 dstmode
= V16QImode
;
41029 pinsr
= gen_sse4_1_pinsrb
;
41035 dstmode
= V8HImode
;
41036 pinsr
= gen_sse2_pinsrw
;
41040 if (!TARGET_SSE4_1
)
41042 dstmode
= V4SImode
;
41043 pinsr
= gen_sse4_1_pinsrd
;
41047 gcc_assert (TARGET_64BIT
);
41048 if (!TARGET_SSE4_1
)
41050 dstmode
= V2DImode
;
41051 pinsr
= gen_sse4_1_pinsrq
;
41058 dst
= gen_lowpart (dstmode
, dst
);
41059 src
= gen_lowpart (srcmode
, src
);
41063 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41072 /* This function returns the calling abi specific va_list type node.
41073 It returns the FNDECL specific va_list type. */
41076 ix86_fn_abi_va_list (tree fndecl
)
41079 return va_list_type_node
;
41080 gcc_assert (fndecl
!= NULL_TREE
);
41082 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41083 return ms_va_list_type_node
;
41085 return sysv_va_list_type_node
;
41088 /* Returns the canonical va_list type specified by TYPE. If there
41089 is no valid TYPE provided, it return NULL_TREE. */
41092 ix86_canonical_va_list_type (tree type
)
41096 /* Resolve references and pointers to va_list type. */
41097 if (TREE_CODE (type
) == MEM_REF
)
41098 type
= TREE_TYPE (type
);
41099 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41100 type
= TREE_TYPE (type
);
41101 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41102 type
= TREE_TYPE (type
);
41104 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41106 wtype
= va_list_type_node
;
41107 gcc_assert (wtype
!= NULL_TREE
);
41109 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41111 /* If va_list is an array type, the argument may have decayed
41112 to a pointer type, e.g. by being passed to another function.
41113 In that case, unwrap both types so that we can compare the
41114 underlying records. */
41115 if (TREE_CODE (htype
) == ARRAY_TYPE
41116 || POINTER_TYPE_P (htype
))
41118 wtype
= TREE_TYPE (wtype
);
41119 htype
= TREE_TYPE (htype
);
41122 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41123 return va_list_type_node
;
41124 wtype
= sysv_va_list_type_node
;
41125 gcc_assert (wtype
!= NULL_TREE
);
41127 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41129 /* If va_list is an array type, the argument may have decayed
41130 to a pointer type, e.g. by being passed to another function.
41131 In that case, unwrap both types so that we can compare the
41132 underlying records. */
41133 if (TREE_CODE (htype
) == ARRAY_TYPE
41134 || POINTER_TYPE_P (htype
))
41136 wtype
= TREE_TYPE (wtype
);
41137 htype
= TREE_TYPE (htype
);
41140 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41141 return sysv_va_list_type_node
;
41142 wtype
= ms_va_list_type_node
;
41143 gcc_assert (wtype
!= NULL_TREE
);
41145 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41147 /* If va_list is an array type, the argument may have decayed
41148 to a pointer type, e.g. by being passed to another function.
41149 In that case, unwrap both types so that we can compare the
41150 underlying records. */
41151 if (TREE_CODE (htype
) == ARRAY_TYPE
41152 || POINTER_TYPE_P (htype
))
41154 wtype
= TREE_TYPE (wtype
);
41155 htype
= TREE_TYPE (htype
);
41158 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41159 return ms_va_list_type_node
;
41162 return std_canonical_va_list_type (type
);
41165 /* Iterate through the target-specific builtin types for va_list.
41166 IDX denotes the iterator, *PTREE is set to the result type of
41167 the va_list builtin, and *PNAME to its internal type.
41168 Returns zero if there is no element for this index, otherwise
41169 IDX should be increased upon the next call.
41170 Note, do not iterate a base builtin's name like __builtin_va_list.
41171 Used from c_common_nodes_and_builtins. */
41174 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41184 *ptree
= ms_va_list_type_node
;
41185 *pname
= "__builtin_ms_va_list";
41189 *ptree
= sysv_va_list_type_node
;
41190 *pname
= "__builtin_sysv_va_list";
41198 #undef TARGET_SCHED_DISPATCH
41199 #define TARGET_SCHED_DISPATCH has_dispatch
41200 #undef TARGET_SCHED_DISPATCH_DO
41201 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41202 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41203 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41204 #undef TARGET_SCHED_REORDER
41205 #define TARGET_SCHED_REORDER ix86_sched_reorder
41206 #undef TARGET_SCHED_ADJUST_PRIORITY
41207 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41208 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41209 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41211 /* The size of the dispatch window is the total number of bytes of
41212 object code allowed in a window. */
41213 #define DISPATCH_WINDOW_SIZE 16
41215 /* Number of dispatch windows considered for scheduling. */
41216 #define MAX_DISPATCH_WINDOWS 3
41218 /* Maximum number of instructions in a window. */
41221 /* Maximum number of immediate operands in a window. */
41224 /* Maximum number of immediate bits allowed in a window. */
41225 #define MAX_IMM_SIZE 128
41227 /* Maximum number of 32 bit immediates allowed in a window. */
41228 #define MAX_IMM_32 4
41230 /* Maximum number of 64 bit immediates allowed in a window. */
41231 #define MAX_IMM_64 2
41233 /* Maximum total of loads or prefetches allowed in a window. */
41236 /* Maximum total of stores allowed in a window. */
41237 #define MAX_STORE 1
41243 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41244 enum dispatch_group
{
41259 /* Number of allowable groups in a dispatch window. It is an array
41260 indexed by dispatch_group enum. 100 is used as a big number,
41261 because the number of these kind of operations does not have any
41262 effect in dispatch window, but we need them for other reasons in
41264 static unsigned int num_allowable_groups
[disp_last
] = {
41265 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41268 char group_name
[disp_last
+ 1][16] = {
41269 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41270 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41271 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41274 /* Instruction path. */
41277 path_single
, /* Single micro op. */
41278 path_double
, /* Double micro op. */
41279 path_multi
, /* Instructions with more than 2 micro op.. */
41283 /* sched_insn_info defines a window to the instructions scheduled in
41284 the basic block. It contains a pointer to the insn_info table and
41285 the instruction scheduled.
41287 Windows are allocated for each basic block and are linked
41289 typedef struct sched_insn_info_s
{
41291 enum dispatch_group group
;
41292 enum insn_path path
;
41297 /* Linked list of dispatch windows. This is a two way list of
41298 dispatch windows of a basic block. It contains information about
41299 the number of uops in the window and the total number of
41300 instructions and of bytes in the object code for this dispatch
41302 typedef struct dispatch_windows_s
{
41303 int num_insn
; /* Number of insn in the window. */
41304 int num_uops
; /* Number of uops in the window. */
41305 int window_size
; /* Number of bytes in the window. */
41306 int window_num
; /* Window number between 0 or 1. */
41307 int num_imm
; /* Number of immediates in an insn. */
41308 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41309 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41310 int imm_size
; /* Total immediates in the window. */
41311 int num_loads
; /* Total memory loads in the window. */
41312 int num_stores
; /* Total memory stores in the window. */
41313 int violation
; /* Violation exists in window. */
41314 sched_insn_info
*window
; /* Pointer to the window. */
41315 struct dispatch_windows_s
*next
;
41316 struct dispatch_windows_s
*prev
;
41317 } dispatch_windows
;
41319 /* Immediate valuse used in an insn. */
41320 typedef struct imm_info_s
41327 static dispatch_windows
*dispatch_window_list
;
41328 static dispatch_windows
*dispatch_window_list1
;
41330 /* Get dispatch group of insn. */
41332 static enum dispatch_group
41333 get_mem_group (rtx insn
)
41335 enum attr_memory memory
;
41337 if (INSN_CODE (insn
) < 0)
41338 return disp_no_group
;
41339 memory
= get_attr_memory (insn
);
41340 if (memory
== MEMORY_STORE
)
41343 if (memory
== MEMORY_LOAD
)
41346 if (memory
== MEMORY_BOTH
)
41347 return disp_load_store
;
41349 return disp_no_group
;
41352 /* Return true if insn is a compare instruction. */
41357 enum attr_type type
;
41359 type
= get_attr_type (insn
);
41360 return (type
== TYPE_TEST
41361 || type
== TYPE_ICMP
41362 || type
== TYPE_FCMP
41363 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41366 /* Return true if a dispatch violation encountered. */
41369 dispatch_violation (void)
41371 if (dispatch_window_list
->next
)
41372 return dispatch_window_list
->next
->violation
;
41373 return dispatch_window_list
->violation
;
41376 /* Return true if insn is a branch instruction. */
41379 is_branch (rtx insn
)
41381 return (CALL_P (insn
) || JUMP_P (insn
));
41384 /* Return true if insn is a prefetch instruction. */
41387 is_prefetch (rtx insn
)
41389 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41392 /* This function initializes a dispatch window and the list container holding a
41393 pointer to the window. */
41396 init_window (int window_num
)
41399 dispatch_windows
*new_list
;
41401 if (window_num
== 0)
41402 new_list
= dispatch_window_list
;
41404 new_list
= dispatch_window_list1
;
41406 new_list
->num_insn
= 0;
41407 new_list
->num_uops
= 0;
41408 new_list
->window_size
= 0;
41409 new_list
->next
= NULL
;
41410 new_list
->prev
= NULL
;
41411 new_list
->window_num
= window_num
;
41412 new_list
->num_imm
= 0;
41413 new_list
->num_imm_32
= 0;
41414 new_list
->num_imm_64
= 0;
41415 new_list
->imm_size
= 0;
41416 new_list
->num_loads
= 0;
41417 new_list
->num_stores
= 0;
41418 new_list
->violation
= false;
41420 for (i
= 0; i
< MAX_INSN
; i
++)
41422 new_list
->window
[i
].insn
= NULL
;
41423 new_list
->window
[i
].group
= disp_no_group
;
41424 new_list
->window
[i
].path
= no_path
;
41425 new_list
->window
[i
].byte_len
= 0;
41426 new_list
->window
[i
].imm_bytes
= 0;
41431 /* This function allocates and initializes a dispatch window and the
41432 list container holding a pointer to the window. */
41434 static dispatch_windows
*
41435 allocate_window (void)
41437 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41438 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41443 /* This routine initializes the dispatch scheduling information. It
41444 initiates building dispatch scheduler tables and constructs the
41445 first dispatch window. */
41448 init_dispatch_sched (void)
41450 /* Allocate a dispatch list and a window. */
41451 dispatch_window_list
= allocate_window ();
41452 dispatch_window_list1
= allocate_window ();
41457 /* This function returns true if a branch is detected. End of a basic block
41458 does not have to be a branch, but here we assume only branches end a
41462 is_end_basic_block (enum dispatch_group group
)
41464 return group
== disp_branch
;
41467 /* This function is called when the end of a window processing is reached. */
41470 process_end_window (void)
41472 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41473 if (dispatch_window_list
->next
)
41475 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41476 gcc_assert (dispatch_window_list
->window_size
41477 + dispatch_window_list1
->window_size
<= 48);
41483 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41484 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41485 for 48 bytes of instructions. Note that these windows are not dispatch
41486 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41488 static dispatch_windows
*
41489 allocate_next_window (int window_num
)
41491 if (window_num
== 0)
41493 if (dispatch_window_list
->next
)
41496 return dispatch_window_list
;
41499 dispatch_window_list
->next
= dispatch_window_list1
;
41500 dispatch_window_list1
->prev
= dispatch_window_list
;
41502 return dispatch_window_list1
;
41505 /* Increment the number of immediate operands of an instruction. */
41508 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41513 switch ( GET_CODE (*in_rtx
))
41518 (imm_values
->imm
)++;
41519 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41520 (imm_values
->imm32
)++;
41522 (imm_values
->imm64
)++;
41526 (imm_values
->imm
)++;
41527 (imm_values
->imm64
)++;
41531 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41533 (imm_values
->imm
)++;
41534 (imm_values
->imm32
)++;
41545 /* Compute number of immediate operands of an instruction. */
41548 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41550 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41551 (rtx_function
) find_constant_1
, (void *) imm_values
);
41554 /* Return total size of immediate operands of an instruction along with number
41555 of corresponding immediate-operands. It initializes its parameters to zero
41556 befor calling FIND_CONSTANT.
41557 INSN is the input instruction. IMM is the total of immediates.
41558 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41562 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41564 imm_info imm_values
= {0, 0, 0};
41566 find_constant (insn
, &imm_values
);
41567 *imm
= imm_values
.imm
;
41568 *imm32
= imm_values
.imm32
;
41569 *imm64
= imm_values
.imm64
;
41570 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41573 /* This function indicates if an operand of an instruction is an
41577 has_immediate (rtx insn
)
41579 int num_imm_operand
;
41580 int num_imm32_operand
;
41581 int num_imm64_operand
;
41584 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41585 &num_imm64_operand
);
41589 /* Return single or double path for instructions. */
41591 static enum insn_path
41592 get_insn_path (rtx insn
)
41594 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41596 if ((int)path
== 0)
41597 return path_single
;
41599 if ((int)path
== 1)
41600 return path_double
;
41605 /* Return insn dispatch group. */
41607 static enum dispatch_group
41608 get_insn_group (rtx insn
)
41610 enum dispatch_group group
= get_mem_group (insn
);
41614 if (is_branch (insn
))
41615 return disp_branch
;
41620 if (has_immediate (insn
))
41623 if (is_prefetch (insn
))
41624 return disp_prefetch
;
41626 return disp_no_group
;
41629 /* Count number of GROUP restricted instructions in a dispatch
41630 window WINDOW_LIST. */
41633 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41635 enum dispatch_group group
= get_insn_group (insn
);
41637 int num_imm_operand
;
41638 int num_imm32_operand
;
41639 int num_imm64_operand
;
41641 if (group
== disp_no_group
)
41644 if (group
== disp_imm
)
41646 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41647 &num_imm64_operand
);
41648 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41649 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41650 || (num_imm32_operand
> 0
41651 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41652 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41653 || (num_imm64_operand
> 0
41654 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41655 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41656 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41657 && num_imm64_operand
> 0
41658 && ((window_list
->num_imm_64
> 0
41659 && window_list
->num_insn
>= 2)
41660 || window_list
->num_insn
>= 3)))
41666 if ((group
== disp_load_store
41667 && (window_list
->num_loads
>= MAX_LOAD
41668 || window_list
->num_stores
>= MAX_STORE
))
41669 || ((group
== disp_load
41670 || group
== disp_prefetch
)
41671 && window_list
->num_loads
>= MAX_LOAD
)
41672 || (group
== disp_store
41673 && window_list
->num_stores
>= MAX_STORE
))
41679 /* This function returns true if insn satisfies dispatch rules on the
41680 last window scheduled. */
41683 fits_dispatch_window (rtx insn
)
41685 dispatch_windows
*window_list
= dispatch_window_list
;
41686 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41687 unsigned int num_restrict
;
41688 enum dispatch_group group
= get_insn_group (insn
);
41689 enum insn_path path
= get_insn_path (insn
);
41692 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41693 instructions should be given the lowest priority in the
41694 scheduling process in Haifa scheduler to make sure they will be
41695 scheduled in the same dispatch window as the reference to them. */
41696 if (group
== disp_jcc
|| group
== disp_cmp
)
41699 /* Check nonrestricted. */
41700 if (group
== disp_no_group
|| group
== disp_branch
)
41703 /* Get last dispatch window. */
41704 if (window_list_next
)
41705 window_list
= window_list_next
;
41707 if (window_list
->window_num
== 1)
41709 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41712 || (min_insn_size (insn
) + sum
) >= 48)
41713 /* Window 1 is full. Go for next window. */
41717 num_restrict
= count_num_restricted (insn
, window_list
);
41719 if (num_restrict
> num_allowable_groups
[group
])
41722 /* See if it fits in the first window. */
41723 if (window_list
->window_num
== 0)
41725 /* The first widow should have only single and double path
41727 if (path
== path_double
41728 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41730 else if (path
!= path_single
)
41736 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41737 dispatch window WINDOW_LIST. */
41740 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41742 int byte_len
= min_insn_size (insn
);
41743 int num_insn
= window_list
->num_insn
;
41745 sched_insn_info
*window
= window_list
->window
;
41746 enum dispatch_group group
= get_insn_group (insn
);
41747 enum insn_path path
= get_insn_path (insn
);
41748 int num_imm_operand
;
41749 int num_imm32_operand
;
41750 int num_imm64_operand
;
41752 if (!window_list
->violation
&& group
!= disp_cmp
41753 && !fits_dispatch_window (insn
))
41754 window_list
->violation
= true;
41756 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41757 &num_imm64_operand
);
41759 /* Initialize window with new instruction. */
41760 window
[num_insn
].insn
= insn
;
41761 window
[num_insn
].byte_len
= byte_len
;
41762 window
[num_insn
].group
= group
;
41763 window
[num_insn
].path
= path
;
41764 window
[num_insn
].imm_bytes
= imm_size
;
41766 window_list
->window_size
+= byte_len
;
41767 window_list
->num_insn
= num_insn
+ 1;
41768 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41769 window_list
->imm_size
+= imm_size
;
41770 window_list
->num_imm
+= num_imm_operand
;
41771 window_list
->num_imm_32
+= num_imm32_operand
;
41772 window_list
->num_imm_64
+= num_imm64_operand
;
41774 if (group
== disp_store
)
41775 window_list
->num_stores
+= 1;
41776 else if (group
== disp_load
41777 || group
== disp_prefetch
)
41778 window_list
->num_loads
+= 1;
41779 else if (group
== disp_load_store
)
41781 window_list
->num_stores
+= 1;
41782 window_list
->num_loads
+= 1;
41786 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41787 If the total bytes of instructions or the number of instructions in
41788 the window exceed allowable, it allocates a new window. */
41791 add_to_dispatch_window (rtx insn
)
41794 dispatch_windows
*window_list
;
41795 dispatch_windows
*next_list
;
41796 dispatch_windows
*window0_list
;
41797 enum insn_path path
;
41798 enum dispatch_group insn_group
;
41806 if (INSN_CODE (insn
) < 0)
41809 byte_len
= min_insn_size (insn
);
41810 window_list
= dispatch_window_list
;
41811 next_list
= window_list
->next
;
41812 path
= get_insn_path (insn
);
41813 insn_group
= get_insn_group (insn
);
41815 /* Get the last dispatch window. */
41817 window_list
= dispatch_window_list
->next
;
41819 if (path
== path_single
)
41821 else if (path
== path_double
)
41824 insn_num_uops
= (int) path
;
41826 /* If current window is full, get a new window.
41827 Window number zero is full, if MAX_INSN uops are scheduled in it.
41828 Window number one is full, if window zero's bytes plus window
41829 one's bytes is 32, or if the bytes of the new instruction added
41830 to the total makes it greater than 48, or it has already MAX_INSN
41831 instructions in it. */
41832 num_insn
= window_list
->num_insn
;
41833 num_uops
= window_list
->num_uops
;
41834 window_num
= window_list
->window_num
;
41835 insn_fits
= fits_dispatch_window (insn
);
41837 if (num_insn
>= MAX_INSN
41838 || num_uops
+ insn_num_uops
> MAX_INSN
41841 window_num
= ~window_num
& 1;
41842 window_list
= allocate_next_window (window_num
);
41845 if (window_num
== 0)
41847 add_insn_window (insn
, window_list
, insn_num_uops
);
41848 if (window_list
->num_insn
>= MAX_INSN
41849 && insn_group
== disp_branch
)
41851 process_end_window ();
41855 else if (window_num
== 1)
41857 window0_list
= window_list
->prev
;
41858 sum
= window0_list
->window_size
+ window_list
->window_size
;
41860 || (byte_len
+ sum
) >= 48)
41862 process_end_window ();
41863 window_list
= dispatch_window_list
;
41866 add_insn_window (insn
, window_list
, insn_num_uops
);
41869 gcc_unreachable ();
41871 if (is_end_basic_block (insn_group
))
41873 /* End of basic block is reached do end-basic-block process. */
41874 process_end_window ();
41879 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41881 DEBUG_FUNCTION
static void
41882 debug_dispatch_window_file (FILE *file
, int window_num
)
41884 dispatch_windows
*list
;
41887 if (window_num
== 0)
41888 list
= dispatch_window_list
;
41890 list
= dispatch_window_list1
;
41892 fprintf (file
, "Window #%d:\n", list
->window_num
);
41893 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41894 list
->num_insn
, list
->num_uops
, list
->window_size
);
41895 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41896 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41898 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41900 fprintf (file
, " insn info:\n");
41902 for (i
= 0; i
< MAX_INSN
; i
++)
41904 if (!list
->window
[i
].insn
)
41906 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41907 i
, group_name
[list
->window
[i
].group
],
41908 i
, (void *)list
->window
[i
].insn
,
41909 i
, list
->window
[i
].path
,
41910 i
, list
->window
[i
].byte_len
,
41911 i
, list
->window
[i
].imm_bytes
);
41915 /* Print to stdout a dispatch window. */
41917 DEBUG_FUNCTION
void
41918 debug_dispatch_window (int window_num
)
41920 debug_dispatch_window_file (stdout
, window_num
);
41923 /* Print INSN dispatch information to FILE. */
41925 DEBUG_FUNCTION
static void
41926 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41929 enum insn_path path
;
41930 enum dispatch_group group
;
41932 int num_imm_operand
;
41933 int num_imm32_operand
;
41934 int num_imm64_operand
;
41936 if (INSN_CODE (insn
) < 0)
41939 byte_len
= min_insn_size (insn
);
41940 path
= get_insn_path (insn
);
41941 group
= get_insn_group (insn
);
41942 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41943 &num_imm64_operand
);
41945 fprintf (file
, " insn info:\n");
41946 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41947 group_name
[group
], path
, byte_len
);
41948 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41949 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41952 /* Print to STDERR the status of the ready list with respect to
41953 dispatch windows. */
41955 DEBUG_FUNCTION
void
41956 debug_ready_dispatch (void)
41959 int no_ready
= number_in_ready ();
41961 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41963 for (i
= 0; i
< no_ready
; i
++)
41964 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41967 /* This routine is the driver of the dispatch scheduler. */
41970 do_dispatch (rtx insn
, int mode
)
41972 if (mode
== DISPATCH_INIT
)
41973 init_dispatch_sched ();
41974 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41975 add_to_dispatch_window (insn
);
41978 /* Return TRUE if Dispatch Scheduling is supported. */
41981 has_dispatch (rtx insn
, int action
)
41983 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41984 && flag_dispatch_scheduler
)
41990 case IS_DISPATCH_ON
:
41995 return is_cmp (insn
);
41997 case DISPATCH_VIOLATION
:
41998 return dispatch_violation ();
42000 case FITS_DISPATCH_WINDOW
:
42001 return fits_dispatch_window (insn
);
42007 /* Implementation of reassociation_width target hook used by
42008 reassoc phase to identify parallelism level in reassociated
42009 tree. Statements tree_code is passed in OPC. Arguments type
42012 Currently parallel reassociation is enabled for Atom
42013 processors only and we set reassociation width to be 2
42014 because Atom may issue up to 2 instructions per cycle.
42016 Return value should be fixed if parallel reassociation is
42017 enabled for other processors. */
42020 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42021 enum machine_mode mode
)
42025 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42027 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42033 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42034 place emms and femms instructions. */
42036 static enum machine_mode
42037 ix86_preferred_simd_mode (enum machine_mode mode
)
42045 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42047 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42049 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42051 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42054 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42060 if (!TARGET_VECTORIZE_DOUBLE
)
42062 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42064 else if (TARGET_SSE2
)
42073 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42076 static unsigned int
42077 ix86_autovectorize_vector_sizes (void)
42079 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42084 /* Return class of registers which could be used for pseudo of MODE
42085 and of class RCLASS for spilling instead of memory. Return NO_REGS
42086 if it is not possible or non-profitable. */
42088 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42090 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42091 && hard_reg_set_subset_p (reg_class_contents
[rclass
],
42092 reg_class_contents
[GENERAL_REGS
])
42093 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
)))
42098 /* Implement targetm.vectorize.init_cost. */
42101 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42103 unsigned *cost
= XNEWVEC (unsigned, 3);
42104 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42108 /* Implement targetm.vectorize.add_stmt_cost. */
42111 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42112 struct _stmt_vec_info
*stmt_info
, int misalign
,
42113 enum vect_cost_model_location where
)
42115 unsigned *cost
= (unsigned *) data
;
42116 unsigned retval
= 0;
42118 if (flag_vect_cost_model
)
42120 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42121 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42123 /* Statements in an inner loop relative to the loop being
42124 vectorized are weighted more heavily. The value here is
42125 arbitrary and could potentially be improved with analysis. */
42126 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42127 count
*= 50; /* FIXME. */
42129 retval
= (unsigned) (count
* stmt_cost
);
42130 cost
[where
] += retval
;
42136 /* Implement targetm.vectorize.finish_cost. */
42139 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42140 unsigned *body_cost
, unsigned *epilogue_cost
)
42142 unsigned *cost
= (unsigned *) data
;
42143 *prologue_cost
= cost
[vect_prologue
];
42144 *body_cost
= cost
[vect_body
];
42145 *epilogue_cost
= cost
[vect_epilogue
];
42148 /* Implement targetm.vectorize.destroy_cost_data. */
42151 ix86_destroy_cost_data (void *data
)
42156 /* Validate target specific memory model bits in VAL. */
42158 static unsigned HOST_WIDE_INT
42159 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42161 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42164 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42166 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42168 warning (OPT_Winvalid_memory_model
,
42169 "Unknown architecture specific memory model");
42170 return MEMMODEL_SEQ_CST
;
42172 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42173 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42175 warning (OPT_Winvalid_memory_model
,
42176 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42177 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42179 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42181 warning (OPT_Winvalid_memory_model
,
42182 "HLE_RELEASE not used with RELEASE or stronger memory model");
42183 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42188 /* Initialize the GCC target structure. */
42189 #undef TARGET_RETURN_IN_MEMORY
42190 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42192 #undef TARGET_LEGITIMIZE_ADDRESS
42193 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42195 #undef TARGET_ATTRIBUTE_TABLE
42196 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42197 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42198 # undef TARGET_MERGE_DECL_ATTRIBUTES
42199 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42202 #undef TARGET_COMP_TYPE_ATTRIBUTES
42203 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42205 #undef TARGET_INIT_BUILTINS
42206 #define TARGET_INIT_BUILTINS ix86_init_builtins
42207 #undef TARGET_BUILTIN_DECL
42208 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42209 #undef TARGET_EXPAND_BUILTIN
42210 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42212 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42213 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42214 ix86_builtin_vectorized_function
42216 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42217 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42219 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42220 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42222 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42223 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42225 #undef TARGET_BUILTIN_RECIPROCAL
42226 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42228 #undef TARGET_ASM_FUNCTION_EPILOGUE
42229 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42231 #undef TARGET_ENCODE_SECTION_INFO
42232 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42233 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42235 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42238 #undef TARGET_ASM_OPEN_PAREN
42239 #define TARGET_ASM_OPEN_PAREN ""
42240 #undef TARGET_ASM_CLOSE_PAREN
42241 #define TARGET_ASM_CLOSE_PAREN ""
42243 #undef TARGET_ASM_BYTE_OP
42244 #define TARGET_ASM_BYTE_OP ASM_BYTE
42246 #undef TARGET_ASM_ALIGNED_HI_OP
42247 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42248 #undef TARGET_ASM_ALIGNED_SI_OP
42249 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42251 #undef TARGET_ASM_ALIGNED_DI_OP
42252 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42255 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42256 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42258 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42259 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42261 #undef TARGET_ASM_UNALIGNED_HI_OP
42262 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42263 #undef TARGET_ASM_UNALIGNED_SI_OP
42264 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42265 #undef TARGET_ASM_UNALIGNED_DI_OP
42266 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42268 #undef TARGET_PRINT_OPERAND
42269 #define TARGET_PRINT_OPERAND ix86_print_operand
42270 #undef TARGET_PRINT_OPERAND_ADDRESS
42271 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42272 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42273 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42274 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42275 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42277 #undef TARGET_SCHED_INIT_GLOBAL
42278 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42279 #undef TARGET_SCHED_ADJUST_COST
42280 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42281 #undef TARGET_SCHED_ISSUE_RATE
42282 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42283 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42284 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42285 ia32_multipass_dfa_lookahead
42287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42288 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42290 #undef TARGET_MEMMODEL_CHECK
42291 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42294 #undef TARGET_HAVE_TLS
42295 #define TARGET_HAVE_TLS true
42297 #undef TARGET_CANNOT_FORCE_CONST_MEM
42298 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42299 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42300 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42302 #undef TARGET_DELEGITIMIZE_ADDRESS
42303 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42305 #undef TARGET_MS_BITFIELD_LAYOUT_P
42306 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42309 #undef TARGET_BINDS_LOCAL_P
42310 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42312 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42313 #undef TARGET_BINDS_LOCAL_P
42314 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42317 #undef TARGET_ASM_OUTPUT_MI_THUNK
42318 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42319 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42320 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42322 #undef TARGET_ASM_FILE_START
42323 #define TARGET_ASM_FILE_START x86_file_start
42325 #undef TARGET_OPTION_OVERRIDE
42326 #define TARGET_OPTION_OVERRIDE ix86_option_override
42328 #undef TARGET_REGISTER_MOVE_COST
42329 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42330 #undef TARGET_MEMORY_MOVE_COST
42331 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42332 #undef TARGET_RTX_COSTS
42333 #define TARGET_RTX_COSTS ix86_rtx_costs
42334 #undef TARGET_ADDRESS_COST
42335 #define TARGET_ADDRESS_COST ix86_address_cost
42337 #undef TARGET_FIXED_CONDITION_CODE_REGS
42338 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42339 #undef TARGET_CC_MODES_COMPATIBLE
42340 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42342 #undef TARGET_MACHINE_DEPENDENT_REORG
42343 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42345 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42346 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42348 #undef TARGET_BUILD_BUILTIN_VA_LIST
42349 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42351 #undef TARGET_FOLD_BUILTIN
42352 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42354 #undef TARGET_COMPARE_VERSION_PRIORITY
42355 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42357 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42358 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42359 ix86_generate_version_dispatcher_body
42361 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42362 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42363 ix86_get_function_versions_dispatcher
42365 #undef TARGET_ENUM_VA_LIST_P
42366 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42368 #undef TARGET_FN_ABI_VA_LIST
42369 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42371 #undef TARGET_CANONICAL_VA_LIST_TYPE
42372 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42374 #undef TARGET_EXPAND_BUILTIN_VA_START
42375 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42377 #undef TARGET_MD_ASM_CLOBBERS
42378 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42380 #undef TARGET_PROMOTE_PROTOTYPES
42381 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42382 #undef TARGET_STRUCT_VALUE_RTX
42383 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42384 #undef TARGET_SETUP_INCOMING_VARARGS
42385 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42386 #undef TARGET_MUST_PASS_IN_STACK
42387 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42388 #undef TARGET_FUNCTION_ARG_ADVANCE
42389 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42390 #undef TARGET_FUNCTION_ARG
42391 #define TARGET_FUNCTION_ARG ix86_function_arg
42392 #undef TARGET_FUNCTION_ARG_BOUNDARY
42393 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42394 #undef TARGET_PASS_BY_REFERENCE
42395 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42396 #undef TARGET_INTERNAL_ARG_POINTER
42397 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42398 #undef TARGET_UPDATE_STACK_BOUNDARY
42399 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42400 #undef TARGET_GET_DRAP_RTX
42401 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42402 #undef TARGET_STRICT_ARGUMENT_NAMING
42403 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42404 #undef TARGET_STATIC_CHAIN
42405 #define TARGET_STATIC_CHAIN ix86_static_chain
42406 #undef TARGET_TRAMPOLINE_INIT
42407 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42408 #undef TARGET_RETURN_POPS_ARGS
42409 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42411 #undef TARGET_LEGITIMATE_COMBINED_INSN
42412 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42414 #undef TARGET_ASAN_SHADOW_OFFSET
42415 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42417 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42418 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42420 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42421 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42423 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42424 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42426 #undef TARGET_C_MODE_FOR_SUFFIX
42427 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42430 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42431 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42434 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42435 #undef TARGET_INSERT_ATTRIBUTES
42436 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42439 #undef TARGET_MANGLE_TYPE
42440 #define TARGET_MANGLE_TYPE ix86_mangle_type
42443 #undef TARGET_STACK_PROTECT_FAIL
42444 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42447 #undef TARGET_FUNCTION_VALUE
42448 #define TARGET_FUNCTION_VALUE ix86_function_value
42450 #undef TARGET_FUNCTION_VALUE_REGNO_P
42451 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42453 #undef TARGET_PROMOTE_FUNCTION_MODE
42454 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42456 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42457 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42459 #undef TARGET_INSTANTIATE_DECLS
42460 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42462 #undef TARGET_SECONDARY_RELOAD
42463 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42465 #undef TARGET_CLASS_MAX_NREGS
42466 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42468 #undef TARGET_PREFERRED_RELOAD_CLASS
42469 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42470 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42471 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42472 #undef TARGET_CLASS_LIKELY_SPILLED_P
42473 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42475 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42476 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42477 ix86_builtin_vectorization_cost
42478 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42479 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42480 ix86_vectorize_vec_perm_const_ok
42481 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42482 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42483 ix86_preferred_simd_mode
42484 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42485 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42486 ix86_autovectorize_vector_sizes
42487 #undef TARGET_VECTORIZE_INIT_COST
42488 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42489 #undef TARGET_VECTORIZE_ADD_STMT_COST
42490 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42491 #undef TARGET_VECTORIZE_FINISH_COST
42492 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42493 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42494 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42496 #undef TARGET_SET_CURRENT_FUNCTION
42497 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42499 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42500 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42502 #undef TARGET_OPTION_SAVE
42503 #define TARGET_OPTION_SAVE ix86_function_specific_save
42505 #undef TARGET_OPTION_RESTORE
42506 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42508 #undef TARGET_OPTION_PRINT
42509 #define TARGET_OPTION_PRINT ix86_function_specific_print
42511 #undef TARGET_OPTION_FUNCTION_VERSIONS
42512 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42514 #undef TARGET_CAN_INLINE_P
42515 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42517 #undef TARGET_EXPAND_TO_RTL_HOOK
42518 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42520 #undef TARGET_LEGITIMATE_ADDRESS_P
42521 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42523 #undef TARGET_LRA_P
42524 #define TARGET_LRA_P hook_bool_void_true
42526 #undef TARGET_REGISTER_PRIORITY
42527 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42529 #undef TARGET_LEGITIMATE_CONSTANT_P
42530 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42532 #undef TARGET_FRAME_POINTER_REQUIRED
42533 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42535 #undef TARGET_CAN_ELIMINATE
42536 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42538 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42539 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42541 #undef TARGET_ASM_CODE_END
42542 #define TARGET_ASM_CODE_END ix86_code_end
42544 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42545 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42548 #undef TARGET_INIT_LIBFUNCS
42549 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42552 #undef TARGET_SPILL_CLASS
42553 #define TARGET_SPILL_CLASS ix86_spill_class
42555 struct gcc_target targetm
= TARGET_INITIALIZER
;
42557 #include "gt-i386.h"