1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
67 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
69 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
71 #ifndef CHECK_STACK_LIMIT
72 #define CHECK_STACK_LIMIT (-1)
75 /* Return index of given mode in mult and division cost tables. */
76 #define MODE_INDEX(mode) \
77 ((mode) == QImode ? 0 \
78 : (mode) == HImode ? 1 \
79 : (mode) == SImode ? 2 \
80 : (mode) == DImode ? 3 \
83 /* Processor costs (relative to an add) */
84 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
85 #define COSTS_N_BYTES(N) ((N) * 2)
87 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
89 static stringop_algs ix86_size_memcpy
[2] = {
90 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
92 static stringop_algs ix86_size_memset
[2] = {
93 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
97 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
98 COSTS_N_BYTES (2), /* cost of an add instruction */
99 COSTS_N_BYTES (3), /* cost of a lea instruction */
100 COSTS_N_BYTES (2), /* variable shift costs */
101 COSTS_N_BYTES (3), /* constant shift costs */
102 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
103 COSTS_N_BYTES (3), /* HI */
104 COSTS_N_BYTES (3), /* SI */
105 COSTS_N_BYTES (3), /* DI */
106 COSTS_N_BYTES (5)}, /* other */
107 0, /* cost of multiply per each bit set */
108 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
109 COSTS_N_BYTES (3), /* HI */
110 COSTS_N_BYTES (3), /* SI */
111 COSTS_N_BYTES (3), /* DI */
112 COSTS_N_BYTES (5)}, /* other */
113 COSTS_N_BYTES (3), /* cost of movsx */
114 COSTS_N_BYTES (3), /* cost of movzx */
115 0, /* "large" insn */
117 2, /* cost for loading QImode using movzbl */
118 {2, 2, 2}, /* cost of loading integer registers
119 in QImode, HImode and SImode.
120 Relative to reg-reg move (2). */
121 {2, 2, 2}, /* cost of storing integer registers */
122 2, /* cost of reg,reg fld/fst */
123 {2, 2, 2}, /* cost of loading fp registers
124 in SFmode, DFmode and XFmode */
125 {2, 2, 2}, /* cost of storing fp registers
126 in SFmode, DFmode and XFmode */
127 3, /* cost of moving MMX register */
128 {3, 3}, /* cost of loading MMX registers
129 in SImode and DImode */
130 {3, 3}, /* cost of storing MMX registers
131 in SImode and DImode */
132 3, /* cost of moving SSE register */
133 {3, 3, 3}, /* cost of loading SSE registers
134 in SImode, DImode and TImode */
135 {3, 3, 3}, /* cost of storing SSE registers
136 in SImode, DImode and TImode */
137 3, /* MMX or SSE register to integer */
138 0, /* size of l1 cache */
139 0, /* size of l2 cache */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
144 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
145 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
146 COSTS_N_BYTES (2), /* cost of FABS instruction. */
147 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
148 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
151 1, /* scalar_stmt_cost. */
152 1, /* scalar load_cost. */
153 1, /* scalar_store_cost. */
154 1, /* vec_stmt_cost. */
155 1, /* vec_to_scalar_cost. */
156 1, /* scalar_to_vec_cost. */
157 1, /* vec_align_load_cost. */
158 1, /* vec_unalign_load_cost. */
159 1, /* vec_store_cost. */
160 1, /* cond_taken_branch_cost. */
161 1, /* cond_not_taken_branch_cost. */
164 /* Processor costs (relative to an add) */
165 static stringop_algs i386_memcpy
[2] = {
166 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
167 DUMMY_STRINGOP_ALGS
};
168 static stringop_algs i386_memset
[2] = {
169 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
170 DUMMY_STRINGOP_ALGS
};
173 struct processor_costs i386_cost
= { /* 386 specific costs */
174 COSTS_N_INSNS (1), /* cost of an add instruction */
175 COSTS_N_INSNS (1), /* cost of a lea instruction */
176 COSTS_N_INSNS (3), /* variable shift costs */
177 COSTS_N_INSNS (2), /* constant shift costs */
178 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
179 COSTS_N_INSNS (6), /* HI */
180 COSTS_N_INSNS (6), /* SI */
181 COSTS_N_INSNS (6), /* DI */
182 COSTS_N_INSNS (6)}, /* other */
183 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
184 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
185 COSTS_N_INSNS (23), /* HI */
186 COSTS_N_INSNS (23), /* SI */
187 COSTS_N_INSNS (23), /* DI */
188 COSTS_N_INSNS (23)}, /* other */
189 COSTS_N_INSNS (3), /* cost of movsx */
190 COSTS_N_INSNS (2), /* cost of movzx */
191 15, /* "large" insn */
193 4, /* cost for loading QImode using movzbl */
194 {2, 4, 2}, /* cost of loading integer registers
195 in QImode, HImode and SImode.
196 Relative to reg-reg move (2). */
197 {2, 4, 2}, /* cost of storing integer registers */
198 2, /* cost of reg,reg fld/fst */
199 {8, 8, 8}, /* cost of loading fp registers
200 in SFmode, DFmode and XFmode */
201 {8, 8, 8}, /* cost of storing fp registers
202 in SFmode, DFmode and XFmode */
203 2, /* cost of moving MMX register */
204 {4, 8}, /* cost of loading MMX registers
205 in SImode and DImode */
206 {4, 8}, /* cost of storing MMX registers
207 in SImode and DImode */
208 2, /* cost of moving SSE register */
209 {4, 8, 16}, /* cost of loading SSE registers
210 in SImode, DImode and TImode */
211 {4, 8, 16}, /* cost of storing SSE registers
212 in SImode, DImode and TImode */
213 3, /* MMX or SSE register to integer */
214 0, /* size of l1 cache */
215 0, /* size of l2 cache */
216 0, /* size of prefetch block */
217 0, /* number of parallel prefetches */
219 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
220 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
221 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
222 COSTS_N_INSNS (22), /* cost of FABS instruction. */
223 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
224 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
227 1, /* scalar_stmt_cost. */
228 1, /* scalar load_cost. */
229 1, /* scalar_store_cost. */
230 1, /* vec_stmt_cost. */
231 1, /* vec_to_scalar_cost. */
232 1, /* scalar_to_vec_cost. */
233 1, /* vec_align_load_cost. */
234 2, /* vec_unalign_load_cost. */
235 1, /* vec_store_cost. */
236 3, /* cond_taken_branch_cost. */
237 1, /* cond_not_taken_branch_cost. */
240 static stringop_algs i486_memcpy
[2] = {
241 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
242 DUMMY_STRINGOP_ALGS
};
243 static stringop_algs i486_memset
[2] = {
244 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
245 DUMMY_STRINGOP_ALGS
};
248 struct processor_costs i486_cost
= { /* 486 specific costs */
249 COSTS_N_INSNS (1), /* cost of an add instruction */
250 COSTS_N_INSNS (1), /* cost of a lea instruction */
251 COSTS_N_INSNS (3), /* variable shift costs */
252 COSTS_N_INSNS (2), /* constant shift costs */
253 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
254 COSTS_N_INSNS (12), /* HI */
255 COSTS_N_INSNS (12), /* SI */
256 COSTS_N_INSNS (12), /* DI */
257 COSTS_N_INSNS (12)}, /* other */
258 1, /* cost of multiply per each bit set */
259 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
260 COSTS_N_INSNS (40), /* HI */
261 COSTS_N_INSNS (40), /* SI */
262 COSTS_N_INSNS (40), /* DI */
263 COSTS_N_INSNS (40)}, /* other */
264 COSTS_N_INSNS (3), /* cost of movsx */
265 COSTS_N_INSNS (2), /* cost of movzx */
266 15, /* "large" insn */
268 4, /* cost for loading QImode using movzbl */
269 {2, 4, 2}, /* cost of loading integer registers
270 in QImode, HImode and SImode.
271 Relative to reg-reg move (2). */
272 {2, 4, 2}, /* cost of storing integer registers */
273 2, /* cost of reg,reg fld/fst */
274 {8, 8, 8}, /* cost of loading fp registers
275 in SFmode, DFmode and XFmode */
276 {8, 8, 8}, /* cost of storing fp registers
277 in SFmode, DFmode and XFmode */
278 2, /* cost of moving MMX register */
279 {4, 8}, /* cost of loading MMX registers
280 in SImode and DImode */
281 {4, 8}, /* cost of storing MMX registers
282 in SImode and DImode */
283 2, /* cost of moving SSE register */
284 {4, 8, 16}, /* cost of loading SSE registers
285 in SImode, DImode and TImode */
286 {4, 8, 16}, /* cost of storing SSE registers
287 in SImode, DImode and TImode */
288 3, /* MMX or SSE register to integer */
289 4, /* size of l1 cache. 486 has 8kB cache
290 shared for code and data, so 4kB is
291 not really precise. */
292 4, /* size of l2 cache */
293 0, /* size of prefetch block */
294 0, /* number of parallel prefetches */
296 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
297 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
298 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
299 COSTS_N_INSNS (3), /* cost of FABS instruction. */
300 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
301 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
304 1, /* scalar_stmt_cost. */
305 1, /* scalar load_cost. */
306 1, /* scalar_store_cost. */
307 1, /* vec_stmt_cost. */
308 1, /* vec_to_scalar_cost. */
309 1, /* scalar_to_vec_cost. */
310 1, /* vec_align_load_cost. */
311 2, /* vec_unalign_load_cost. */
312 1, /* vec_store_cost. */
313 3, /* cond_taken_branch_cost. */
314 1, /* cond_not_taken_branch_cost. */
317 static stringop_algs pentium_memcpy
[2] = {
318 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
319 DUMMY_STRINGOP_ALGS
};
320 static stringop_algs pentium_memset
[2] = {
321 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
322 DUMMY_STRINGOP_ALGS
};
325 struct processor_costs pentium_cost
= {
326 COSTS_N_INSNS (1), /* cost of an add instruction */
327 COSTS_N_INSNS (1), /* cost of a lea instruction */
328 COSTS_N_INSNS (4), /* variable shift costs */
329 COSTS_N_INSNS (1), /* constant shift costs */
330 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
331 COSTS_N_INSNS (11), /* HI */
332 COSTS_N_INSNS (11), /* SI */
333 COSTS_N_INSNS (11), /* DI */
334 COSTS_N_INSNS (11)}, /* other */
335 0, /* cost of multiply per each bit set */
336 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
337 COSTS_N_INSNS (25), /* HI */
338 COSTS_N_INSNS (25), /* SI */
339 COSTS_N_INSNS (25), /* DI */
340 COSTS_N_INSNS (25)}, /* other */
341 COSTS_N_INSNS (3), /* cost of movsx */
342 COSTS_N_INSNS (2), /* cost of movzx */
343 8, /* "large" insn */
345 6, /* cost for loading QImode using movzbl */
346 {2, 4, 2}, /* cost of loading integer registers
347 in QImode, HImode and SImode.
348 Relative to reg-reg move (2). */
349 {2, 4, 2}, /* cost of storing integer registers */
350 2, /* cost of reg,reg fld/fst */
351 {2, 2, 6}, /* cost of loading fp registers
352 in SFmode, DFmode and XFmode */
353 {4, 4, 6}, /* cost of storing fp registers
354 in SFmode, DFmode and XFmode */
355 8, /* cost of moving MMX register */
356 {8, 8}, /* cost of loading MMX registers
357 in SImode and DImode */
358 {8, 8}, /* cost of storing MMX registers
359 in SImode and DImode */
360 2, /* cost of moving SSE register */
361 {4, 8, 16}, /* cost of loading SSE registers
362 in SImode, DImode and TImode */
363 {4, 8, 16}, /* cost of storing SSE registers
364 in SImode, DImode and TImode */
365 3, /* MMX or SSE register to integer */
366 8, /* size of l1 cache. */
367 8, /* size of l2 cache */
368 0, /* size of prefetch block */
369 0, /* number of parallel prefetches */
371 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
372 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
373 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
374 COSTS_N_INSNS (1), /* cost of FABS instruction. */
375 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
376 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
379 1, /* scalar_stmt_cost. */
380 1, /* scalar load_cost. */
381 1, /* scalar_store_cost. */
382 1, /* vec_stmt_cost. */
383 1, /* vec_to_scalar_cost. */
384 1, /* scalar_to_vec_cost. */
385 1, /* vec_align_load_cost. */
386 2, /* vec_unalign_load_cost. */
387 1, /* vec_store_cost. */
388 3, /* cond_taken_branch_cost. */
389 1, /* cond_not_taken_branch_cost. */
392 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
393 (we ensure the alignment). For small blocks inline loop is still a
394 noticeable win, for bigger blocks either rep movsl or rep movsb is
395 way to go. Rep movsb has apparently more expensive startup time in CPU,
396 but after 4K the difference is down in the noise. */
397 static stringop_algs pentiumpro_memcpy
[2] = {
398 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
399 {8192, rep_prefix_4_byte
, false},
400 {-1, rep_prefix_1_byte
, false}}},
401 DUMMY_STRINGOP_ALGS
};
402 static stringop_algs pentiumpro_memset
[2] = {
403 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
404 {8192, rep_prefix_4_byte
, false},
405 {-1, libcall
, false}}},
406 DUMMY_STRINGOP_ALGS
};
408 struct processor_costs pentiumpro_cost
= {
409 COSTS_N_INSNS (1), /* cost of an add instruction */
410 COSTS_N_INSNS (1), /* cost of a lea instruction */
411 COSTS_N_INSNS (1), /* variable shift costs */
412 COSTS_N_INSNS (1), /* constant shift costs */
413 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
414 COSTS_N_INSNS (4), /* HI */
415 COSTS_N_INSNS (4), /* SI */
416 COSTS_N_INSNS (4), /* DI */
417 COSTS_N_INSNS (4)}, /* other */
418 0, /* cost of multiply per each bit set */
419 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
420 COSTS_N_INSNS (17), /* HI */
421 COSTS_N_INSNS (17), /* SI */
422 COSTS_N_INSNS (17), /* DI */
423 COSTS_N_INSNS (17)}, /* other */
424 COSTS_N_INSNS (1), /* cost of movsx */
425 COSTS_N_INSNS (1), /* cost of movzx */
426 8, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 4, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 2, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of storing fp registers
437 in SFmode, DFmode and XFmode */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 2, /* cost of moving SSE register */
444 {2, 2, 8}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 3, /* MMX or SSE register to integer */
449 8, /* size of l1 cache. */
450 256, /* size of l2 cache */
451 32, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
455 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
456 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
459 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
462 1, /* scalar_stmt_cost. */
463 1, /* scalar load_cost. */
464 1, /* scalar_store_cost. */
465 1, /* vec_stmt_cost. */
466 1, /* vec_to_scalar_cost. */
467 1, /* scalar_to_vec_cost. */
468 1, /* vec_align_load_cost. */
469 2, /* vec_unalign_load_cost. */
470 1, /* vec_store_cost. */
471 3, /* cond_taken_branch_cost. */
472 1, /* cond_not_taken_branch_cost. */
475 static stringop_algs geode_memcpy
[2] = {
476 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
477 DUMMY_STRINGOP_ALGS
};
478 static stringop_algs geode_memset
[2] = {
479 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
480 DUMMY_STRINGOP_ALGS
};
482 struct processor_costs geode_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (1), /* cost of a lea instruction */
485 COSTS_N_INSNS (2), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (4), /* HI */
489 COSTS_N_INSNS (7), /* SI */
490 COSTS_N_INSNS (7), /* DI */
491 COSTS_N_INSNS (7)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (23), /* HI */
495 COSTS_N_INSNS (39), /* SI */
496 COSTS_N_INSNS (39), /* DI */
497 COSTS_N_INSNS (39)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 1, /* cost for loading QImode using movzbl */
503 {1, 1, 1}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {1, 1, 1}, /* cost of storing integer registers */
507 1, /* cost of reg,reg fld/fst */
508 {1, 1, 1}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {4, 6, 6}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
513 1, /* cost of moving MMX register */
514 {1, 1}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {1, 1}, /* cost of storing MMX registers
517 in SImode and DImode */
518 1, /* cost of moving SSE register */
519 {1, 1, 1}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {1, 1, 1}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 1, /* MMX or SSE register to integer */
524 64, /* size of l1 cache. */
525 128, /* size of l2 cache. */
526 32, /* size of prefetch block */
527 1, /* number of parallel prefetches */
529 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
530 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
531 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
532 COSTS_N_INSNS (1), /* cost of FABS instruction. */
533 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
534 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
537 1, /* scalar_stmt_cost. */
538 1, /* scalar load_cost. */
539 1, /* scalar_store_cost. */
540 1, /* vec_stmt_cost. */
541 1, /* vec_to_scalar_cost. */
542 1, /* scalar_to_vec_cost. */
543 1, /* vec_align_load_cost. */
544 2, /* vec_unalign_load_cost. */
545 1, /* vec_store_cost. */
546 3, /* cond_taken_branch_cost. */
547 1, /* cond_not_taken_branch_cost. */
550 static stringop_algs k6_memcpy
[2] = {
551 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
552 DUMMY_STRINGOP_ALGS
};
553 static stringop_algs k6_memset
[2] = {
554 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
555 DUMMY_STRINGOP_ALGS
};
557 struct processor_costs k6_cost
= {
558 COSTS_N_INSNS (1), /* cost of an add instruction */
559 COSTS_N_INSNS (2), /* cost of a lea instruction */
560 COSTS_N_INSNS (1), /* variable shift costs */
561 COSTS_N_INSNS (1), /* constant shift costs */
562 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
563 COSTS_N_INSNS (3), /* HI */
564 COSTS_N_INSNS (3), /* SI */
565 COSTS_N_INSNS (3), /* DI */
566 COSTS_N_INSNS (3)}, /* other */
567 0, /* cost of multiply per each bit set */
568 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
569 COSTS_N_INSNS (18), /* HI */
570 COSTS_N_INSNS (18), /* SI */
571 COSTS_N_INSNS (18), /* DI */
572 COSTS_N_INSNS (18)}, /* other */
573 COSTS_N_INSNS (2), /* cost of movsx */
574 COSTS_N_INSNS (2), /* cost of movzx */
575 8, /* "large" insn */
577 3, /* cost for loading QImode using movzbl */
578 {4, 5, 4}, /* cost of loading integer registers
579 in QImode, HImode and SImode.
580 Relative to reg-reg move (2). */
581 {2, 3, 2}, /* cost of storing integer registers */
582 4, /* cost of reg,reg fld/fst */
583 {6, 6, 6}, /* cost of loading fp registers
584 in SFmode, DFmode and XFmode */
585 {4, 4, 4}, /* cost of storing fp registers
586 in SFmode, DFmode and XFmode */
587 2, /* cost of moving MMX register */
588 {2, 2}, /* cost of loading MMX registers
589 in SImode and DImode */
590 {2, 2}, /* cost of storing MMX registers
591 in SImode and DImode */
592 2, /* cost of moving SSE register */
593 {2, 2, 8}, /* cost of loading SSE registers
594 in SImode, DImode and TImode */
595 {2, 2, 8}, /* cost of storing SSE registers
596 in SImode, DImode and TImode */
597 6, /* MMX or SSE register to integer */
598 32, /* size of l1 cache. */
599 32, /* size of l2 cache. Some models
600 have integrated l2 cache, but
601 optimizing for k6 is not important
602 enough to worry about that. */
603 32, /* size of prefetch block */
604 1, /* number of parallel prefetches */
606 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
607 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
608 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
609 COSTS_N_INSNS (2), /* cost of FABS instruction. */
610 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
611 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
614 1, /* scalar_stmt_cost. */
615 1, /* scalar load_cost. */
616 1, /* scalar_store_cost. */
617 1, /* vec_stmt_cost. */
618 1, /* vec_to_scalar_cost. */
619 1, /* scalar_to_vec_cost. */
620 1, /* vec_align_load_cost. */
621 2, /* vec_unalign_load_cost. */
622 1, /* vec_store_cost. */
623 3, /* cond_taken_branch_cost. */
624 1, /* cond_not_taken_branch_cost. */
627 /* For some reason, Athlon deals better with REP prefix (relative to loops)
628 compared to K8. Alignment becomes important after 8 bytes for memcpy and
629 128 bytes for memset. */
630 static stringop_algs athlon_memcpy
[2] = {
631 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
632 DUMMY_STRINGOP_ALGS
};
633 static stringop_algs athlon_memset
[2] = {
634 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
635 DUMMY_STRINGOP_ALGS
};
637 struct processor_costs athlon_cost
= {
638 COSTS_N_INSNS (1), /* cost of an add instruction */
639 COSTS_N_INSNS (2), /* cost of a lea instruction */
640 COSTS_N_INSNS (1), /* variable shift costs */
641 COSTS_N_INSNS (1), /* constant shift costs */
642 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
643 COSTS_N_INSNS (5), /* HI */
644 COSTS_N_INSNS (5), /* SI */
645 COSTS_N_INSNS (5), /* DI */
646 COSTS_N_INSNS (5)}, /* other */
647 0, /* cost of multiply per each bit set */
648 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
649 COSTS_N_INSNS (26), /* HI */
650 COSTS_N_INSNS (42), /* SI */
651 COSTS_N_INSNS (74), /* DI */
652 COSTS_N_INSNS (74)}, /* other */
653 COSTS_N_INSNS (1), /* cost of movsx */
654 COSTS_N_INSNS (1), /* cost of movzx */
655 8, /* "large" insn */
657 4, /* cost for loading QImode using movzbl */
658 {3, 4, 3}, /* cost of loading integer registers
659 in QImode, HImode and SImode.
660 Relative to reg-reg move (2). */
661 {3, 4, 3}, /* cost of storing integer registers */
662 4, /* cost of reg,reg fld/fst */
663 {4, 4, 12}, /* cost of loading fp registers
664 in SFmode, DFmode and XFmode */
665 {6, 6, 8}, /* cost of storing fp registers
666 in SFmode, DFmode and XFmode */
667 2, /* cost of moving MMX register */
668 {4, 4}, /* cost of loading MMX registers
669 in SImode and DImode */
670 {4, 4}, /* cost of storing MMX registers
671 in SImode and DImode */
672 2, /* cost of moving SSE register */
673 {4, 4, 6}, /* cost of loading SSE registers
674 in SImode, DImode and TImode */
675 {4, 4, 5}, /* cost of storing SSE registers
676 in SImode, DImode and TImode */
677 5, /* MMX or SSE register to integer */
678 64, /* size of l1 cache. */
679 256, /* size of l2 cache. */
680 64, /* size of prefetch block */
681 6, /* number of parallel prefetches */
683 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
684 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
685 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
686 COSTS_N_INSNS (2), /* cost of FABS instruction. */
687 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
688 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
691 1, /* scalar_stmt_cost. */
692 1, /* scalar load_cost. */
693 1, /* scalar_store_cost. */
694 1, /* vec_stmt_cost. */
695 1, /* vec_to_scalar_cost. */
696 1, /* scalar_to_vec_cost. */
697 1, /* vec_align_load_cost. */
698 2, /* vec_unalign_load_cost. */
699 1, /* vec_store_cost. */
700 3, /* cond_taken_branch_cost. */
701 1, /* cond_not_taken_branch_cost. */
704 /* K8 has optimized REP instruction for medium sized blocks, but for very
705 small blocks it is better to use loop. For large blocks, libcall can
706 do nontemporary accesses and beat inline considerably. */
707 static stringop_algs k8_memcpy
[2] = {
708 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
709 {-1, rep_prefix_4_byte
, false}}},
710 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
711 {-1, libcall
, false}}}};
712 static stringop_algs k8_memset
[2] = {
713 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
714 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
715 {libcall
, {{48, unrolled_loop
, false},
716 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
718 struct processor_costs k8_cost
= {
719 COSTS_N_INSNS (1), /* cost of an add instruction */
720 COSTS_N_INSNS (2), /* cost of a lea instruction */
721 COSTS_N_INSNS (1), /* variable shift costs */
722 COSTS_N_INSNS (1), /* constant shift costs */
723 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
724 COSTS_N_INSNS (4), /* HI */
725 COSTS_N_INSNS (3), /* SI */
726 COSTS_N_INSNS (4), /* DI */
727 COSTS_N_INSNS (5)}, /* other */
728 0, /* cost of multiply per each bit set */
729 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
730 COSTS_N_INSNS (26), /* HI */
731 COSTS_N_INSNS (42), /* SI */
732 COSTS_N_INSNS (74), /* DI */
733 COSTS_N_INSNS (74)}, /* other */
734 COSTS_N_INSNS (1), /* cost of movsx */
735 COSTS_N_INSNS (1), /* cost of movzx */
736 8, /* "large" insn */
738 4, /* cost for loading QImode using movzbl */
739 {3, 4, 3}, /* cost of loading integer registers
740 in QImode, HImode and SImode.
741 Relative to reg-reg move (2). */
742 {3, 4, 3}, /* cost of storing integer registers */
743 4, /* cost of reg,reg fld/fst */
744 {4, 4, 12}, /* cost of loading fp registers
745 in SFmode, DFmode and XFmode */
746 {6, 6, 8}, /* cost of storing fp registers
747 in SFmode, DFmode and XFmode */
748 2, /* cost of moving MMX register */
749 {3, 3}, /* cost of loading MMX registers
750 in SImode and DImode */
751 {4, 4}, /* cost of storing MMX registers
752 in SImode and DImode */
753 2, /* cost of moving SSE register */
754 {4, 3, 6}, /* cost of loading SSE registers
755 in SImode, DImode and TImode */
756 {4, 4, 5}, /* cost of storing SSE registers
757 in SImode, DImode and TImode */
758 5, /* MMX or SSE register to integer */
759 64, /* size of l1 cache. */
760 512, /* size of l2 cache. */
761 64, /* size of prefetch block */
762 /* New AMD processors never drop prefetches; if they cannot be performed
763 immediately, they are queued. We set number of simultaneous prefetches
764 to a large constant to reflect this (it probably is not a good idea not
765 to limit number of prefetches at all, as their execution also takes some
767 100, /* number of parallel prefetches */
769 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (2), /* cost of FABS instruction. */
773 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
778 4, /* scalar_stmt_cost. */
779 2, /* scalar load_cost. */
780 2, /* scalar_store_cost. */
781 5, /* vec_stmt_cost. */
782 0, /* vec_to_scalar_cost. */
783 2, /* scalar_to_vec_cost. */
784 2, /* vec_align_load_cost. */
785 3, /* vec_unalign_load_cost. */
786 3, /* vec_store_cost. */
787 3, /* cond_taken_branch_cost. */
788 2, /* cond_not_taken_branch_cost. */
791 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
792 very small blocks it is better to use loop. For large blocks, libcall can
793 do nontemporary accesses and beat inline considerably. */
794 static stringop_algs amdfam10_memcpy
[2] = {
795 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
796 {-1, rep_prefix_4_byte
, false}}},
797 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
798 {-1, libcall
, false}}}};
799 static stringop_algs amdfam10_memset
[2] = {
800 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
801 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
802 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
803 {-1, libcall
, false}}}};
804 struct processor_costs amdfam10_cost
= {
805 COSTS_N_INSNS (1), /* cost of an add instruction */
806 COSTS_N_INSNS (2), /* cost of a lea instruction */
807 COSTS_N_INSNS (1), /* variable shift costs */
808 COSTS_N_INSNS (1), /* constant shift costs */
809 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
810 COSTS_N_INSNS (4), /* HI */
811 COSTS_N_INSNS (3), /* SI */
812 COSTS_N_INSNS (4), /* DI */
813 COSTS_N_INSNS (5)}, /* other */
814 0, /* cost of multiply per each bit set */
815 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
816 COSTS_N_INSNS (35), /* HI */
817 COSTS_N_INSNS (51), /* SI */
818 COSTS_N_INSNS (83), /* DI */
819 COSTS_N_INSNS (83)}, /* other */
820 COSTS_N_INSNS (1), /* cost of movsx */
821 COSTS_N_INSNS (1), /* cost of movzx */
822 8, /* "large" insn */
824 4, /* cost for loading QImode using movzbl */
825 {3, 4, 3}, /* cost of loading integer registers
826 in QImode, HImode and SImode.
827 Relative to reg-reg move (2). */
828 {3, 4, 3}, /* cost of storing integer registers */
829 4, /* cost of reg,reg fld/fst */
830 {4, 4, 12}, /* cost of loading fp registers
831 in SFmode, DFmode and XFmode */
832 {6, 6, 8}, /* cost of storing fp registers
833 in SFmode, DFmode and XFmode */
834 2, /* cost of moving MMX register */
835 {3, 3}, /* cost of loading MMX registers
836 in SImode and DImode */
837 {4, 4}, /* cost of storing MMX registers
838 in SImode and DImode */
839 2, /* cost of moving SSE register */
840 {4, 4, 3}, /* cost of loading SSE registers
841 in SImode, DImode and TImode */
842 {4, 4, 5}, /* cost of storing SSE registers
843 in SImode, DImode and TImode */
844 3, /* MMX or SSE register to integer */
846 MOVD reg64, xmmreg Double FSTORE 4
847 MOVD reg32, xmmreg Double FSTORE 4
849 MOVD reg64, xmmreg Double FADD 3
851 MOVD reg32, xmmreg Double FADD 3
853 64, /* size of l1 cache. */
854 512, /* size of l2 cache. */
855 64, /* size of prefetch block */
856 /* New AMD processors never drop prefetches; if they cannot be performed
857 immediately, they are queued. We set number of simultaneous prefetches
858 to a large constant to reflect this (it probably is not a good idea not
859 to limit number of prefetches at all, as their execution also takes some
861 100, /* number of parallel prefetches */
863 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
864 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
865 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
866 COSTS_N_INSNS (2), /* cost of FABS instruction. */
867 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
868 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
872 4, /* scalar_stmt_cost. */
873 2, /* scalar load_cost. */
874 2, /* scalar_store_cost. */
875 6, /* vec_stmt_cost. */
876 0, /* vec_to_scalar_cost. */
877 2, /* scalar_to_vec_cost. */
878 2, /* vec_align_load_cost. */
879 2, /* vec_unalign_load_cost. */
880 2, /* vec_store_cost. */
881 2, /* cond_taken_branch_cost. */
882 1, /* cond_not_taken_branch_cost. */
885 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
886 very small blocks it is better to use loop. For large blocks, libcall
887 can do nontemporary accesses and beat inline considerably. */
888 static stringop_algs bdver1_memcpy
[2] = {
889 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
890 {-1, rep_prefix_4_byte
, false}}},
891 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
892 {-1, libcall
, false}}}};
893 static stringop_algs bdver1_memset
[2] = {
894 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
895 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
896 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
897 {-1, libcall
, false}}}};
899 const struct processor_costs bdver1_cost
= {
900 COSTS_N_INSNS (1), /* cost of an add instruction */
901 COSTS_N_INSNS (1), /* cost of a lea instruction */
902 COSTS_N_INSNS (1), /* variable shift costs */
903 COSTS_N_INSNS (1), /* constant shift costs */
904 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
905 COSTS_N_INSNS (4), /* HI */
906 COSTS_N_INSNS (4), /* SI */
907 COSTS_N_INSNS (6), /* DI */
908 COSTS_N_INSNS (6)}, /* other */
909 0, /* cost of multiply per each bit set */
910 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
911 COSTS_N_INSNS (35), /* HI */
912 COSTS_N_INSNS (51), /* SI */
913 COSTS_N_INSNS (83), /* DI */
914 COSTS_N_INSNS (83)}, /* other */
915 COSTS_N_INSNS (1), /* cost of movsx */
916 COSTS_N_INSNS (1), /* cost of movzx */
917 8, /* "large" insn */
919 4, /* cost for loading QImode using movzbl */
920 {5, 5, 4}, /* cost of loading integer registers
921 in QImode, HImode and SImode.
922 Relative to reg-reg move (2). */
923 {4, 4, 4}, /* cost of storing integer registers */
924 2, /* cost of reg,reg fld/fst */
925 {5, 5, 12}, /* cost of loading fp registers
926 in SFmode, DFmode and XFmode */
927 {4, 4, 8}, /* cost of storing fp registers
928 in SFmode, DFmode and XFmode */
929 2, /* cost of moving MMX register */
930 {4, 4}, /* cost of loading MMX registers
931 in SImode and DImode */
932 {4, 4}, /* cost of storing MMX registers
933 in SImode and DImode */
934 2, /* cost of moving SSE register */
935 {4, 4, 4}, /* cost of loading SSE registers
936 in SImode, DImode and TImode */
937 {4, 4, 4}, /* cost of storing SSE registers
938 in SImode, DImode and TImode */
939 2, /* MMX or SSE register to integer */
941 MOVD reg64, xmmreg Double FSTORE 4
942 MOVD reg32, xmmreg Double FSTORE 4
944 MOVD reg64, xmmreg Double FADD 3
946 MOVD reg32, xmmreg Double FADD 3
948 16, /* size of l1 cache. */
949 2048, /* size of l2 cache. */
950 64, /* size of prefetch block */
951 /* New AMD processors never drop prefetches; if they cannot be performed
952 immediately, they are queued. We set number of simultaneous prefetches
953 to a large constant to reflect this (it probably is not a good idea not
954 to limit number of prefetches at all, as their execution also takes some
956 100, /* number of parallel prefetches */
958 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
959 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
960 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
961 COSTS_N_INSNS (2), /* cost of FABS instruction. */
962 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
963 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
967 6, /* scalar_stmt_cost. */
968 4, /* scalar load_cost. */
969 4, /* scalar_store_cost. */
970 6, /* vec_stmt_cost. */
971 0, /* vec_to_scalar_cost. */
972 2, /* scalar_to_vec_cost. */
973 4, /* vec_align_load_cost. */
974 4, /* vec_unalign_load_cost. */
975 4, /* vec_store_cost. */
976 2, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
980 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
981 very small blocks it is better to use loop. For large blocks, libcall
982 can do nontemporary accesses and beat inline considerably. */
984 static stringop_algs bdver2_memcpy
[2] = {
985 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
986 {-1, rep_prefix_4_byte
, false}}},
987 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
988 {-1, libcall
, false}}}};
989 static stringop_algs bdver2_memset
[2] = {
990 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
991 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
992 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
993 {-1, libcall
, false}}}};
995 const struct processor_costs bdver2_cost
= {
996 COSTS_N_INSNS (1), /* cost of an add instruction */
997 COSTS_N_INSNS (1), /* cost of a lea instruction */
998 COSTS_N_INSNS (1), /* variable shift costs */
999 COSTS_N_INSNS (1), /* constant shift costs */
1000 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1001 COSTS_N_INSNS (4), /* HI */
1002 COSTS_N_INSNS (4), /* SI */
1003 COSTS_N_INSNS (6), /* DI */
1004 COSTS_N_INSNS (6)}, /* other */
1005 0, /* cost of multiply per each bit set */
1006 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1007 COSTS_N_INSNS (35), /* HI */
1008 COSTS_N_INSNS (51), /* SI */
1009 COSTS_N_INSNS (83), /* DI */
1010 COSTS_N_INSNS (83)}, /* other */
1011 COSTS_N_INSNS (1), /* cost of movsx */
1012 COSTS_N_INSNS (1), /* cost of movzx */
1013 8, /* "large" insn */
1015 4, /* cost for loading QImode using movzbl */
1016 {5, 5, 4}, /* cost of loading integer registers
1017 in QImode, HImode and SImode.
1018 Relative to reg-reg move (2). */
1019 {4, 4, 4}, /* cost of storing integer registers */
1020 2, /* cost of reg,reg fld/fst */
1021 {5, 5, 12}, /* cost of loading fp registers
1022 in SFmode, DFmode and XFmode */
1023 {4, 4, 8}, /* cost of storing fp registers
1024 in SFmode, DFmode and XFmode */
1025 2, /* cost of moving MMX register */
1026 {4, 4}, /* cost of loading MMX registers
1027 in SImode and DImode */
1028 {4, 4}, /* cost of storing MMX registers
1029 in SImode and DImode */
1030 2, /* cost of moving SSE register */
1031 {4, 4, 4}, /* cost of loading SSE registers
1032 in SImode, DImode and TImode */
1033 {4, 4, 4}, /* cost of storing SSE registers
1034 in SImode, DImode and TImode */
1035 2, /* MMX or SSE register to integer */
1037 MOVD reg64, xmmreg Double FSTORE 4
1038 MOVD reg32, xmmreg Double FSTORE 4
1040 MOVD reg64, xmmreg Double FADD 3
1042 MOVD reg32, xmmreg Double FADD 3
1044 16, /* size of l1 cache. */
1045 2048, /* size of l2 cache. */
1046 64, /* size of prefetch block */
1047 /* New AMD processors never drop prefetches; if they cannot be performed
1048 immediately, they are queued. We set number of simultaneous prefetches
1049 to a large constant to reflect this (it probably is not a good idea not
1050 to limit number of prefetches at all, as their execution also takes some
1052 100, /* number of parallel prefetches */
1053 2, /* Branch cost */
1054 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1055 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1056 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1057 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1058 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1059 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1063 6, /* scalar_stmt_cost. */
1064 4, /* scalar load_cost. */
1065 4, /* scalar_store_cost. */
1066 6, /* vec_stmt_cost. */
1067 0, /* vec_to_scalar_cost. */
1068 2, /* scalar_to_vec_cost. */
1069 4, /* vec_align_load_cost. */
1070 4, /* vec_unalign_load_cost. */
1071 4, /* vec_store_cost. */
1072 2, /* cond_taken_branch_cost. */
1073 1, /* cond_not_taken_branch_cost. */
1077 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1078 very small blocks it is better to use loop. For large blocks, libcall
1079 can do nontemporary accesses and beat inline considerably. */
1080 static stringop_algs bdver3_memcpy
[2] = {
1081 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1082 {-1, rep_prefix_4_byte
, false}}},
1083 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1084 {-1, libcall
, false}}}};
1085 static stringop_algs bdver3_memset
[2] = {
1086 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1087 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1088 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1089 {-1, libcall
, false}}}};
1090 struct processor_costs bdver3_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (1), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (4), /* SI */
1098 COSTS_N_INSNS (6), /* DI */
1099 COSTS_N_INSNS (6)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (35), /* HI */
1103 COSTS_N_INSNS (51), /* SI */
1104 COSTS_N_INSNS (83), /* DI */
1105 COSTS_N_INSNS (83)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {5, 5, 4}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {4, 4, 4}, /* cost of storing integer registers */
1115 2, /* cost of reg,reg fld/fst */
1116 {5, 5, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {4, 4, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {4, 4}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 4, 4}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 4}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 2, /* MMX or SSE register to integer */
1131 16, /* size of l1 cache. */
1132 2048, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 2, /* Branch cost */
1141 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1150 6, /* scalar_stmt_cost. */
1151 4, /* scalar load_cost. */
1152 4, /* scalar_store_cost. */
1153 6, /* vec_stmt_cost. */
1154 0, /* vec_to_scalar_cost. */
1155 2, /* scalar_to_vec_cost. */
1156 4, /* vec_align_load_cost. */
1157 4, /* vec_unalign_load_cost. */
1158 4, /* vec_store_cost. */
1159 2, /* cond_taken_branch_cost. */
1160 1, /* cond_not_taken_branch_cost. */
1163 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1164 very small blocks it is better to use loop. For large blocks, libcall can
1165 do nontemporary accesses and beat inline considerably. */
1166 static stringop_algs btver1_memcpy
[2] = {
1167 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1168 {-1, rep_prefix_4_byte
, false}}},
1169 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}};
1171 static stringop_algs btver1_memset
[2] = {
1172 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1173 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1174 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1175 {-1, libcall
, false}}}};
1176 const struct processor_costs btver1_cost
= {
1177 COSTS_N_INSNS (1), /* cost of an add instruction */
1178 COSTS_N_INSNS (2), /* cost of a lea instruction */
1179 COSTS_N_INSNS (1), /* variable shift costs */
1180 COSTS_N_INSNS (1), /* constant shift costs */
1181 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1182 COSTS_N_INSNS (4), /* HI */
1183 COSTS_N_INSNS (3), /* SI */
1184 COSTS_N_INSNS (4), /* DI */
1185 COSTS_N_INSNS (5)}, /* other */
1186 0, /* cost of multiply per each bit set */
1187 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1188 COSTS_N_INSNS (35), /* HI */
1189 COSTS_N_INSNS (51), /* SI */
1190 COSTS_N_INSNS (83), /* DI */
1191 COSTS_N_INSNS (83)}, /* other */
1192 COSTS_N_INSNS (1), /* cost of movsx */
1193 COSTS_N_INSNS (1), /* cost of movzx */
1194 8, /* "large" insn */
1196 4, /* cost for loading QImode using movzbl */
1197 {3, 4, 3}, /* cost of loading integer registers
1198 in QImode, HImode and SImode.
1199 Relative to reg-reg move (2). */
1200 {3, 4, 3}, /* cost of storing integer registers */
1201 4, /* cost of reg,reg fld/fst */
1202 {4, 4, 12}, /* cost of loading fp registers
1203 in SFmode, DFmode and XFmode */
1204 {6, 6, 8}, /* cost of storing fp registers
1205 in SFmode, DFmode and XFmode */
1206 2, /* cost of moving MMX register */
1207 {3, 3}, /* cost of loading MMX registers
1208 in SImode and DImode */
1209 {4, 4}, /* cost of storing MMX registers
1210 in SImode and DImode */
1211 2, /* cost of moving SSE register */
1212 {4, 4, 3}, /* cost of loading SSE registers
1213 in SImode, DImode and TImode */
1214 {4, 4, 5}, /* cost of storing SSE registers
1215 in SImode, DImode and TImode */
1216 3, /* MMX or SSE register to integer */
1218 MOVD reg64, xmmreg Double FSTORE 4
1219 MOVD reg32, xmmreg Double FSTORE 4
1221 MOVD reg64, xmmreg Double FADD 3
1223 MOVD reg32, xmmreg Double FADD 3
1225 32, /* size of l1 cache. */
1226 512, /* size of l2 cache. */
1227 64, /* size of prefetch block */
1228 100, /* number of parallel prefetches */
1229 2, /* Branch cost */
1230 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1231 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1232 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1233 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1234 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1235 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1239 4, /* scalar_stmt_cost. */
1240 2, /* scalar load_cost. */
1241 2, /* scalar_store_cost. */
1242 6, /* vec_stmt_cost. */
1243 0, /* vec_to_scalar_cost. */
1244 2, /* scalar_to_vec_cost. */
1245 2, /* vec_align_load_cost. */
1246 2, /* vec_unalign_load_cost. */
1247 2, /* vec_store_cost. */
1248 2, /* cond_taken_branch_cost. */
1249 1, /* cond_not_taken_branch_cost. */
1252 static stringop_algs btver2_memcpy
[2] = {
1253 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1254 {-1, rep_prefix_4_byte
, false}}},
1255 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1256 {-1, libcall
, false}}}};
1257 static stringop_algs btver2_memset
[2] = {
1258 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1259 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1260 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1261 {-1, libcall
, false}}}};
1262 const struct processor_costs btver2_cost
= {
1263 COSTS_N_INSNS (1), /* cost of an add instruction */
1264 COSTS_N_INSNS (2), /* cost of a lea instruction */
1265 COSTS_N_INSNS (1), /* variable shift costs */
1266 COSTS_N_INSNS (1), /* constant shift costs */
1267 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1268 COSTS_N_INSNS (4), /* HI */
1269 COSTS_N_INSNS (3), /* SI */
1270 COSTS_N_INSNS (4), /* DI */
1271 COSTS_N_INSNS (5)}, /* other */
1272 0, /* cost of multiply per each bit set */
1273 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1274 COSTS_N_INSNS (35), /* HI */
1275 COSTS_N_INSNS (51), /* SI */
1276 COSTS_N_INSNS (83), /* DI */
1277 COSTS_N_INSNS (83)}, /* other */
1278 COSTS_N_INSNS (1), /* cost of movsx */
1279 COSTS_N_INSNS (1), /* cost of movzx */
1280 8, /* "large" insn */
1282 4, /* cost for loading QImode using movzbl */
1283 {3, 4, 3}, /* cost of loading integer registers
1284 in QImode, HImode and SImode.
1285 Relative to reg-reg move (2). */
1286 {3, 4, 3}, /* cost of storing integer registers */
1287 4, /* cost of reg,reg fld/fst */
1288 {4, 4, 12}, /* cost of loading fp registers
1289 in SFmode, DFmode and XFmode */
1290 {6, 6, 8}, /* cost of storing fp registers
1291 in SFmode, DFmode and XFmode */
1292 2, /* cost of moving MMX register */
1293 {3, 3}, /* cost of loading MMX registers
1294 in SImode and DImode */
1295 {4, 4}, /* cost of storing MMX registers
1296 in SImode and DImode */
1297 2, /* cost of moving SSE register */
1298 {4, 4, 3}, /* cost of loading SSE registers
1299 in SImode, DImode and TImode */
1300 {4, 4, 5}, /* cost of storing SSE registers
1301 in SImode, DImode and TImode */
1302 3, /* MMX or SSE register to integer */
1304 MOVD reg64, xmmreg Double FSTORE 4
1305 MOVD reg32, xmmreg Double FSTORE 4
1307 MOVD reg64, xmmreg Double FADD 3
1309 MOVD reg32, xmmreg Double FADD 3
1311 32, /* size of l1 cache. */
1312 2048, /* size of l2 cache. */
1313 64, /* size of prefetch block */
1314 100, /* number of parallel prefetches */
1315 2, /* Branch cost */
1316 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1317 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1318 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1319 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1320 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1321 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1324 4, /* scalar_stmt_cost. */
1325 2, /* scalar load_cost. */
1326 2, /* scalar_store_cost. */
1327 6, /* vec_stmt_cost. */
1328 0, /* vec_to_scalar_cost. */
1329 2, /* scalar_to_vec_cost. */
1330 2, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 2, /* vec_store_cost. */
1333 2, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1337 static stringop_algs pentium4_memcpy
[2] = {
1338 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1339 DUMMY_STRINGOP_ALGS
};
1340 static stringop_algs pentium4_memset
[2] = {
1341 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1342 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1343 DUMMY_STRINGOP_ALGS
};
1346 struct processor_costs pentium4_cost
= {
1347 COSTS_N_INSNS (1), /* cost of an add instruction */
1348 COSTS_N_INSNS (3), /* cost of a lea instruction */
1349 COSTS_N_INSNS (4), /* variable shift costs */
1350 COSTS_N_INSNS (4), /* constant shift costs */
1351 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1352 COSTS_N_INSNS (15), /* HI */
1353 COSTS_N_INSNS (15), /* SI */
1354 COSTS_N_INSNS (15), /* DI */
1355 COSTS_N_INSNS (15)}, /* other */
1356 0, /* cost of multiply per each bit set */
1357 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1358 COSTS_N_INSNS (56), /* HI */
1359 COSTS_N_INSNS (56), /* SI */
1360 COSTS_N_INSNS (56), /* DI */
1361 COSTS_N_INSNS (56)}, /* other */
1362 COSTS_N_INSNS (1), /* cost of movsx */
1363 COSTS_N_INSNS (1), /* cost of movzx */
1364 16, /* "large" insn */
1366 2, /* cost for loading QImode using movzbl */
1367 {4, 5, 4}, /* cost of loading integer registers
1368 in QImode, HImode and SImode.
1369 Relative to reg-reg move (2). */
1370 {2, 3, 2}, /* cost of storing integer registers */
1371 2, /* cost of reg,reg fld/fst */
1372 {2, 2, 6}, /* cost of loading fp registers
1373 in SFmode, DFmode and XFmode */
1374 {4, 4, 6}, /* cost of storing fp registers
1375 in SFmode, DFmode and XFmode */
1376 2, /* cost of moving MMX register */
1377 {2, 2}, /* cost of loading MMX registers
1378 in SImode and DImode */
1379 {2, 2}, /* cost of storing MMX registers
1380 in SImode and DImode */
1381 12, /* cost of moving SSE register */
1382 {12, 12, 12}, /* cost of loading SSE registers
1383 in SImode, DImode and TImode */
1384 {2, 2, 8}, /* cost of storing SSE registers
1385 in SImode, DImode and TImode */
1386 10, /* MMX or SSE register to integer */
1387 8, /* size of l1 cache. */
1388 256, /* size of l2 cache. */
1389 64, /* size of prefetch block */
1390 6, /* number of parallel prefetches */
1391 2, /* Branch cost */
1392 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1393 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1394 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1395 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1396 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1397 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1400 1, /* scalar_stmt_cost. */
1401 1, /* scalar load_cost. */
1402 1, /* scalar_store_cost. */
1403 1, /* vec_stmt_cost. */
1404 1, /* vec_to_scalar_cost. */
1405 1, /* scalar_to_vec_cost. */
1406 1, /* vec_align_load_cost. */
1407 2, /* vec_unalign_load_cost. */
1408 1, /* vec_store_cost. */
1409 3, /* cond_taken_branch_cost. */
1410 1, /* cond_not_taken_branch_cost. */
1413 static stringop_algs nocona_memcpy
[2] = {
1414 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1415 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1416 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1418 static stringop_algs nocona_memset
[2] = {
1419 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1420 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1421 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1422 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1425 struct processor_costs nocona_cost
= {
1426 COSTS_N_INSNS (1), /* cost of an add instruction */
1427 COSTS_N_INSNS (1), /* cost of a lea instruction */
1428 COSTS_N_INSNS (1), /* variable shift costs */
1429 COSTS_N_INSNS (1), /* constant shift costs */
1430 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1431 COSTS_N_INSNS (10), /* HI */
1432 COSTS_N_INSNS (10), /* SI */
1433 COSTS_N_INSNS (10), /* DI */
1434 COSTS_N_INSNS (10)}, /* other */
1435 0, /* cost of multiply per each bit set */
1436 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1437 COSTS_N_INSNS (66), /* HI */
1438 COSTS_N_INSNS (66), /* SI */
1439 COSTS_N_INSNS (66), /* DI */
1440 COSTS_N_INSNS (66)}, /* other */
1441 COSTS_N_INSNS (1), /* cost of movsx */
1442 COSTS_N_INSNS (1), /* cost of movzx */
1443 16, /* "large" insn */
1444 17, /* MOVE_RATIO */
1445 4, /* cost for loading QImode using movzbl */
1446 {4, 4, 4}, /* cost of loading integer registers
1447 in QImode, HImode and SImode.
1448 Relative to reg-reg move (2). */
1449 {4, 4, 4}, /* cost of storing integer registers */
1450 3, /* cost of reg,reg fld/fst */
1451 {12, 12, 12}, /* cost of loading fp registers
1452 in SFmode, DFmode and XFmode */
1453 {4, 4, 4}, /* cost of storing fp registers
1454 in SFmode, DFmode and XFmode */
1455 6, /* cost of moving MMX register */
1456 {12, 12}, /* cost of loading MMX registers
1457 in SImode and DImode */
1458 {12, 12}, /* cost of storing MMX registers
1459 in SImode and DImode */
1460 6, /* cost of moving SSE register */
1461 {12, 12, 12}, /* cost of loading SSE registers
1462 in SImode, DImode and TImode */
1463 {12, 12, 12}, /* cost of storing SSE registers
1464 in SImode, DImode and TImode */
1465 8, /* MMX or SSE register to integer */
1466 8, /* size of l1 cache. */
1467 1024, /* size of l2 cache. */
1468 128, /* size of prefetch block */
1469 8, /* number of parallel prefetches */
1470 1, /* Branch cost */
1471 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1472 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1473 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1474 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1475 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1476 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1479 1, /* scalar_stmt_cost. */
1480 1, /* scalar load_cost. */
1481 1, /* scalar_store_cost. */
1482 1, /* vec_stmt_cost. */
1483 1, /* vec_to_scalar_cost. */
1484 1, /* scalar_to_vec_cost. */
1485 1, /* vec_align_load_cost. */
1486 2, /* vec_unalign_load_cost. */
1487 1, /* vec_store_cost. */
1488 3, /* cond_taken_branch_cost. */
1489 1, /* cond_not_taken_branch_cost. */
1492 static stringop_algs atom_memcpy
[2] = {
1493 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1494 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1495 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1496 static stringop_algs atom_memset
[2] = {
1497 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1498 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1499 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1500 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1502 struct processor_costs atom_cost
= {
1503 COSTS_N_INSNS (1), /* cost of an add instruction */
1504 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1505 COSTS_N_INSNS (1), /* variable shift costs */
1506 COSTS_N_INSNS (1), /* constant shift costs */
1507 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1508 COSTS_N_INSNS (4), /* HI */
1509 COSTS_N_INSNS (3), /* SI */
1510 COSTS_N_INSNS (4), /* DI */
1511 COSTS_N_INSNS (2)}, /* other */
1512 0, /* cost of multiply per each bit set */
1513 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1514 COSTS_N_INSNS (26), /* HI */
1515 COSTS_N_INSNS (42), /* SI */
1516 COSTS_N_INSNS (74), /* DI */
1517 COSTS_N_INSNS (74)}, /* other */
1518 COSTS_N_INSNS (1), /* cost of movsx */
1519 COSTS_N_INSNS (1), /* cost of movzx */
1520 8, /* "large" insn */
1521 17, /* MOVE_RATIO */
1522 4, /* cost for loading QImode using movzbl */
1523 {4, 4, 4}, /* cost of loading integer registers
1524 in QImode, HImode and SImode.
1525 Relative to reg-reg move (2). */
1526 {4, 4, 4}, /* cost of storing integer registers */
1527 4, /* cost of reg,reg fld/fst */
1528 {12, 12, 12}, /* cost of loading fp registers
1529 in SFmode, DFmode and XFmode */
1530 {6, 6, 8}, /* cost of storing fp registers
1531 in SFmode, DFmode and XFmode */
1532 2, /* cost of moving MMX register */
1533 {8, 8}, /* cost of loading MMX registers
1534 in SImode and DImode */
1535 {8, 8}, /* cost of storing MMX registers
1536 in SImode and DImode */
1537 2, /* cost of moving SSE register */
1538 {8, 8, 8}, /* cost of loading SSE registers
1539 in SImode, DImode and TImode */
1540 {8, 8, 8}, /* cost of storing SSE registers
1541 in SImode, DImode and TImode */
1542 5, /* MMX or SSE register to integer */
1543 32, /* size of l1 cache. */
1544 256, /* size of l2 cache. */
1545 64, /* size of prefetch block */
1546 6, /* number of parallel prefetches */
1547 3, /* Branch cost */
1548 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1549 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1550 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1551 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1552 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1553 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1556 1, /* scalar_stmt_cost. */
1557 1, /* scalar load_cost. */
1558 1, /* scalar_store_cost. */
1559 1, /* vec_stmt_cost. */
1560 1, /* vec_to_scalar_cost. */
1561 1, /* scalar_to_vec_cost. */
1562 1, /* vec_align_load_cost. */
1563 2, /* vec_unalign_load_cost. */
1564 1, /* vec_store_cost. */
1565 3, /* cond_taken_branch_cost. */
1566 1, /* cond_not_taken_branch_cost. */
1569 static stringop_algs slm_memcpy
[2] = {
1570 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1571 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1572 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1573 static stringop_algs slm_memset
[2] = {
1574 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1575 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1576 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1577 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1579 struct processor_costs slm_cost
= {
1580 COSTS_N_INSNS (1), /* cost of an add instruction */
1581 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1582 COSTS_N_INSNS (1), /* variable shift costs */
1583 COSTS_N_INSNS (1), /* constant shift costs */
1584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1585 COSTS_N_INSNS (4), /* HI */
1586 COSTS_N_INSNS (3), /* SI */
1587 COSTS_N_INSNS (4), /* DI */
1588 COSTS_N_INSNS (2)}, /* other */
1589 0, /* cost of multiply per each bit set */
1590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1591 COSTS_N_INSNS (26), /* HI */
1592 COSTS_N_INSNS (42), /* SI */
1593 COSTS_N_INSNS (74), /* DI */
1594 COSTS_N_INSNS (74)}, /* other */
1595 COSTS_N_INSNS (1), /* cost of movsx */
1596 COSTS_N_INSNS (1), /* cost of movzx */
1597 8, /* "large" insn */
1598 17, /* MOVE_RATIO */
1599 4, /* cost for loading QImode using movzbl */
1600 {4, 4, 4}, /* cost of loading integer registers
1601 in QImode, HImode and SImode.
1602 Relative to reg-reg move (2). */
1603 {4, 4, 4}, /* cost of storing integer registers */
1604 4, /* cost of reg,reg fld/fst */
1605 {12, 12, 12}, /* cost of loading fp registers
1606 in SFmode, DFmode and XFmode */
1607 {6, 6, 8}, /* cost of storing fp registers
1608 in SFmode, DFmode and XFmode */
1609 2, /* cost of moving MMX register */
1610 {8, 8}, /* cost of loading MMX registers
1611 in SImode and DImode */
1612 {8, 8}, /* cost of storing MMX registers
1613 in SImode and DImode */
1614 2, /* cost of moving SSE register */
1615 {8, 8, 8}, /* cost of loading SSE registers
1616 in SImode, DImode and TImode */
1617 {8, 8, 8}, /* cost of storing SSE registers
1618 in SImode, DImode and TImode */
1619 5, /* MMX or SSE register to integer */
1620 32, /* size of l1 cache. */
1621 256, /* size of l2 cache. */
1622 64, /* size of prefetch block */
1623 6, /* number of parallel prefetches */
1624 3, /* Branch cost */
1625 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1626 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1627 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1628 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1629 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1630 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1633 1, /* scalar_stmt_cost. */
1634 1, /* scalar load_cost. */
1635 1, /* scalar_store_cost. */
1636 1, /* vec_stmt_cost. */
1637 1, /* vec_to_scalar_cost. */
1638 1, /* scalar_to_vec_cost. */
1639 1, /* vec_align_load_cost. */
1640 2, /* vec_unalign_load_cost. */
1641 1, /* vec_store_cost. */
1642 3, /* cond_taken_branch_cost. */
1643 1, /* cond_not_taken_branch_cost. */
1646 /* Generic should produce code tuned for Core-i7 (and newer chips)
1647 and btver1 (and newer chips). */
1649 static stringop_algs generic_memcpy
[2] = {
1650 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1651 {-1, libcall
, false}}},
1652 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1653 {-1, libcall
, false}}}};
1654 static stringop_algs generic_memset
[2] = {
1655 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1656 {-1, libcall
, false}}},
1657 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1658 {-1, libcall
, false}}}};
1660 struct processor_costs generic_cost
= {
1661 COSTS_N_INSNS (1), /* cost of an add instruction */
1662 /* On all chips taken into consideration lea is 2 cycles and more. With
1663 this cost however our current implementation of synth_mult results in
1664 use of unnecessary temporary registers causing regression on several
1665 SPECfp benchmarks. */
1666 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1667 COSTS_N_INSNS (1), /* variable shift costs */
1668 COSTS_N_INSNS (1), /* constant shift costs */
1669 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1670 COSTS_N_INSNS (4), /* HI */
1671 COSTS_N_INSNS (3), /* SI */
1672 COSTS_N_INSNS (4), /* DI */
1673 COSTS_N_INSNS (2)}, /* other */
1674 0, /* cost of multiply per each bit set */
1675 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1676 COSTS_N_INSNS (26), /* HI */
1677 COSTS_N_INSNS (42), /* SI */
1678 COSTS_N_INSNS (74), /* DI */
1679 COSTS_N_INSNS (74)}, /* other */
1680 COSTS_N_INSNS (1), /* cost of movsx */
1681 COSTS_N_INSNS (1), /* cost of movzx */
1682 8, /* "large" insn */
1683 17, /* MOVE_RATIO */
1684 4, /* cost for loading QImode using movzbl */
1685 {4, 4, 4}, /* cost of loading integer registers
1686 in QImode, HImode and SImode.
1687 Relative to reg-reg move (2). */
1688 {4, 4, 4}, /* cost of storing integer registers */
1689 4, /* cost of reg,reg fld/fst */
1690 {12, 12, 12}, /* cost of loading fp registers
1691 in SFmode, DFmode and XFmode */
1692 {6, 6, 8}, /* cost of storing fp registers
1693 in SFmode, DFmode and XFmode */
1694 2, /* cost of moving MMX register */
1695 {8, 8}, /* cost of loading MMX registers
1696 in SImode and DImode */
1697 {8, 8}, /* cost of storing MMX registers
1698 in SImode and DImode */
1699 2, /* cost of moving SSE register */
1700 {8, 8, 8}, /* cost of loading SSE registers
1701 in SImode, DImode and TImode */
1702 {8, 8, 8}, /* cost of storing SSE registers
1703 in SImode, DImode and TImode */
1704 5, /* MMX or SSE register to integer */
1705 32, /* size of l1 cache. */
1706 512, /* size of l2 cache. */
1707 64, /* size of prefetch block */
1708 6, /* number of parallel prefetches */
1709 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1710 value is increased to perhaps more appropriate value of 5. */
1711 3, /* Branch cost */
1712 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1713 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1714 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1715 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1716 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1717 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1720 1, /* scalar_stmt_cost. */
1721 1, /* scalar load_cost. */
1722 1, /* scalar_store_cost. */
1723 1, /* vec_stmt_cost. */
1724 1, /* vec_to_scalar_cost. */
1725 1, /* scalar_to_vec_cost. */
1726 1, /* vec_align_load_cost. */
1727 2, /* vec_unalign_load_cost. */
1728 1, /* vec_store_cost. */
1729 3, /* cond_taken_branch_cost. */
1730 1, /* cond_not_taken_branch_cost. */
1733 /* core_cost should produce code tuned for Core familly of CPUs. */
1734 static stringop_algs core_memcpy
[2] = {
1735 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1736 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1737 {-1, libcall
, false}}}};
1738 static stringop_algs core_memset
[2] = {
1739 {libcall
, {{6, loop_1_byte
, true},
1741 {8192, rep_prefix_4_byte
, true},
1742 {-1, libcall
, false}}},
1743 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1744 {-1, libcall
, false}}}};
1747 struct processor_costs core_cost
= {
1748 COSTS_N_INSNS (1), /* cost of an add instruction */
1749 /* On all chips taken into consideration lea is 2 cycles and more. With
1750 this cost however our current implementation of synth_mult results in
1751 use of unnecessary temporary registers causing regression on several
1752 SPECfp benchmarks. */
1753 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1754 COSTS_N_INSNS (1), /* variable shift costs */
1755 COSTS_N_INSNS (1), /* constant shift costs */
1756 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1757 COSTS_N_INSNS (4), /* HI */
1758 COSTS_N_INSNS (3), /* SI */
1759 COSTS_N_INSNS (4), /* DI */
1760 COSTS_N_INSNS (2)}, /* other */
1761 0, /* cost of multiply per each bit set */
1762 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1763 COSTS_N_INSNS (26), /* HI */
1764 COSTS_N_INSNS (42), /* SI */
1765 COSTS_N_INSNS (74), /* DI */
1766 COSTS_N_INSNS (74)}, /* other */
1767 COSTS_N_INSNS (1), /* cost of movsx */
1768 COSTS_N_INSNS (1), /* cost of movzx */
1769 8, /* "large" insn */
1770 17, /* MOVE_RATIO */
1771 4, /* cost for loading QImode using movzbl */
1772 {4, 4, 4}, /* cost of loading integer registers
1773 in QImode, HImode and SImode.
1774 Relative to reg-reg move (2). */
1775 {4, 4, 4}, /* cost of storing integer registers */
1776 4, /* cost of reg,reg fld/fst */
1777 {12, 12, 12}, /* cost of loading fp registers
1778 in SFmode, DFmode and XFmode */
1779 {6, 6, 8}, /* cost of storing fp registers
1780 in SFmode, DFmode and XFmode */
1781 2, /* cost of moving MMX register */
1782 {8, 8}, /* cost of loading MMX registers
1783 in SImode and DImode */
1784 {8, 8}, /* cost of storing MMX registers
1785 in SImode and DImode */
1786 2, /* cost of moving SSE register */
1787 {8, 8, 8}, /* cost of loading SSE registers
1788 in SImode, DImode and TImode */
1789 {8, 8, 8}, /* cost of storing SSE registers
1790 in SImode, DImode and TImode */
1791 5, /* MMX or SSE register to integer */
1792 64, /* size of l1 cache. */
1793 512, /* size of l2 cache. */
1794 64, /* size of prefetch block */
1795 6, /* number of parallel prefetches */
1796 /* FIXME perhaps more appropriate value is 5. */
1797 3, /* Branch cost */
1798 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1799 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1800 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1801 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1802 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1803 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1806 1, /* scalar_stmt_cost. */
1807 1, /* scalar load_cost. */
1808 1, /* scalar_store_cost. */
1809 1, /* vec_stmt_cost. */
1810 1, /* vec_to_scalar_cost. */
1811 1, /* scalar_to_vec_cost. */
1812 1, /* vec_align_load_cost. */
1813 2, /* vec_unalign_load_cost. */
1814 1, /* vec_store_cost. */
1815 3, /* cond_taken_branch_cost. */
1816 1, /* cond_not_taken_branch_cost. */
1820 /* Set by -mtune. */
1821 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1823 /* Set by -mtune or -Os. */
1824 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1826 /* Processor feature/optimization bitmasks. */
1827 #define m_386 (1<<PROCESSOR_I386)
1828 #define m_486 (1<<PROCESSOR_I486)
1829 #define m_PENT (1<<PROCESSOR_PENTIUM)
1830 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1831 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1832 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1833 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1834 #define m_CORE2 (1<<PROCESSOR_CORE2)
1835 #define m_COREI7 (1<<PROCESSOR_COREI7)
1836 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1837 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1838 #define m_ATOM (1<<PROCESSOR_ATOM)
1839 #define m_SLM (1<<PROCESSOR_SLM)
1841 #define m_GEODE (1<<PROCESSOR_GEODE)
1842 #define m_K6 (1<<PROCESSOR_K6)
1843 #define m_K6_GEODE (m_K6 | m_GEODE)
1844 #define m_K8 (1<<PROCESSOR_K8)
1845 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1846 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1847 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1848 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1849 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1850 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1851 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1852 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1853 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1854 #define m_BTVER (m_BTVER1 | m_BTVER2)
1855 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1857 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1859 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1861 #define DEF_TUNE(tune, name, selector) name,
1862 #include "x86-tune.def"
1866 /* Feature tests against the various tunings. */
1867 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1869 /* Feature tests against the various tunings used to create ix86_tune_features
1870 based on the processor mask. */
1871 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1873 #define DEF_TUNE(tune, name, selector) selector,
1874 #include "x86-tune.def"
1878 /* Feature tests against the various architecture variations. */
1879 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1881 /* Feature tests against the various architecture variations, used to create
1882 ix86_arch_features based on the processor mask. */
1883 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1884 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1885 ~(m_386
| m_486
| m_PENT
| m_K6
),
1887 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1890 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1893 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1896 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1900 /* In case the average insn count for single function invocation is
1901 lower than this constant, emit fast (but longer) prologue and
1903 #define FAST_PROLOGUE_INSN_COUNT 20
1905 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1906 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1907 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1908 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1910 /* Array of the smallest class containing reg number REGNO, indexed by
1911 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1913 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1915 /* ax, dx, cx, bx */
1916 AREG
, DREG
, CREG
, BREG
,
1917 /* si, di, bp, sp */
1918 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1920 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1921 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1924 /* flags, fpsr, fpcr, frame */
1925 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1927 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1930 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1933 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1934 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1935 /* SSE REX registers */
1936 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1938 /* AVX-512 SSE registers */
1939 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1940 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1941 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1942 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1943 /* Mask registers. */
1944 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1945 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1948 /* The "default" register map used in 32bit mode. */
1950 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1952 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1953 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1954 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1955 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1956 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1957 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1958 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1959 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
1960 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
1961 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
1964 /* The "default" register map used in 64bit mode. */
1966 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1968 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1969 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1970 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1971 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1972 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1973 8,9,10,11,12,13,14,15, /* extended integer registers */
1974 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1975 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
1976 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
1977 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
1980 /* Define the register numbers to be used in Dwarf debugging information.
1981 The SVR4 reference port C compiler uses the following register numbers
1982 in its Dwarf output code:
1983 0 for %eax (gcc regno = 0)
1984 1 for %ecx (gcc regno = 2)
1985 2 for %edx (gcc regno = 1)
1986 3 for %ebx (gcc regno = 3)
1987 4 for %esp (gcc regno = 7)
1988 5 for %ebp (gcc regno = 6)
1989 6 for %esi (gcc regno = 4)
1990 7 for %edi (gcc regno = 5)
1991 The following three DWARF register numbers are never generated by
1992 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1993 believes these numbers have these meanings.
1994 8 for %eip (no gcc equivalent)
1995 9 for %eflags (gcc regno = 17)
1996 10 for %trapno (no gcc equivalent)
1997 It is not at all clear how we should number the FP stack registers
1998 for the x86 architecture. If the version of SDB on x86/svr4 were
1999 a bit less brain dead with respect to floating-point then we would
2000 have a precedent to follow with respect to DWARF register numbers
2001 for x86 FP registers, but the SDB on x86/svr4 is so completely
2002 broken with respect to FP registers that it is hardly worth thinking
2003 of it as something to strive for compatibility with.
2004 The version of x86/svr4 SDB I have at the moment does (partially)
2005 seem to believe that DWARF register number 11 is associated with
2006 the x86 register %st(0), but that's about all. Higher DWARF
2007 register numbers don't seem to be associated with anything in
2008 particular, and even for DWARF regno 11, SDB only seems to under-
2009 stand that it should say that a variable lives in %st(0) (when
2010 asked via an `=' command) if we said it was in DWARF regno 11,
2011 but SDB still prints garbage when asked for the value of the
2012 variable in question (via a `/' command).
2013 (Also note that the labels SDB prints for various FP stack regs
2014 when doing an `x' command are all wrong.)
2015 Note that these problems generally don't affect the native SVR4
2016 C compiler because it doesn't allow the use of -O with -g and
2017 because when it is *not* optimizing, it allocates a memory
2018 location for each floating-point variable, and the memory
2019 location is what gets described in the DWARF AT_location
2020 attribute for the variable in question.
2021 Regardless of the severe mental illness of the x86/svr4 SDB, we
2022 do something sensible here and we use the following DWARF
2023 register numbers. Note that these are all stack-top-relative
2025 11 for %st(0) (gcc regno = 8)
2026 12 for %st(1) (gcc regno = 9)
2027 13 for %st(2) (gcc regno = 10)
2028 14 for %st(3) (gcc regno = 11)
2029 15 for %st(4) (gcc regno = 12)
2030 16 for %st(5) (gcc regno = 13)
2031 17 for %st(6) (gcc regno = 14)
2032 18 for %st(7) (gcc regno = 15)
2034 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2036 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2037 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2038 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2039 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2040 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2041 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2042 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2043 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2044 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2045 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2048 /* Define parameter passing and return registers. */
2050 static int const x86_64_int_parameter_registers
[6] =
2052 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2055 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2057 CX_REG
, DX_REG
, R8_REG
, R9_REG
2060 static int const x86_64_int_return_registers
[4] =
2062 AX_REG
, DX_REG
, DI_REG
, SI_REG
2065 /* Additional registers that are clobbered by SYSV calls. */
2067 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2071 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2072 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2075 /* Define the structure for the machine field in struct function. */
2077 struct GTY(()) stack_local_entry
{
2078 unsigned short mode
;
2081 struct stack_local_entry
*next
;
2084 /* Structure describing stack frame layout.
2085 Stack grows downward:
2091 saved static chain if ix86_static_chain_on_stack
2093 saved frame pointer if frame_pointer_needed
2094 <- HARD_FRAME_POINTER
2100 <- sse_regs_save_offset
2103 [va_arg registers] |
2107 [padding2] | = to_allocate
2116 int outgoing_arguments_size
;
2118 /* The offsets relative to ARG_POINTER. */
2119 HOST_WIDE_INT frame_pointer_offset
;
2120 HOST_WIDE_INT hard_frame_pointer_offset
;
2121 HOST_WIDE_INT stack_pointer_offset
;
2122 HOST_WIDE_INT hfp_save_offset
;
2123 HOST_WIDE_INT reg_save_offset
;
2124 HOST_WIDE_INT sse_reg_save_offset
;
2126 /* When save_regs_using_mov is set, emit prologue using
2127 move instead of push instructions. */
2128 bool save_regs_using_mov
;
2131 /* Which cpu are we scheduling for. */
2132 enum attr_cpu ix86_schedule
;
2134 /* Which cpu are we optimizing for. */
2135 enum processor_type ix86_tune
;
2137 /* Which instruction set architecture to use. */
2138 enum processor_type ix86_arch
;
2140 /* True if processor has SSE prefetch instruction. */
2141 unsigned char x86_prefetch_sse
;
2143 /* -mstackrealign option */
2144 static const char ix86_force_align_arg_pointer_string
[]
2145 = "force_align_arg_pointer";
2147 static rtx (*ix86_gen_leave
) (void);
2148 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2149 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2150 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2151 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2152 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2153 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2154 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2155 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2156 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2157 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2158 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2160 /* Preferred alignment for stack boundary in bits. */
2161 unsigned int ix86_preferred_stack_boundary
;
2163 /* Alignment for incoming stack boundary in bits specified at
2165 static unsigned int ix86_user_incoming_stack_boundary
;
2167 /* Default alignment for incoming stack boundary in bits. */
2168 static unsigned int ix86_default_incoming_stack_boundary
;
2170 /* Alignment for incoming stack boundary in bits. */
2171 unsigned int ix86_incoming_stack_boundary
;
2173 /* Calling abi specific va_list type nodes. */
2174 static GTY(()) tree sysv_va_list_type_node
;
2175 static GTY(()) tree ms_va_list_type_node
;
2177 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2178 char internal_label_prefix
[16];
2179 int internal_label_prefix_len
;
2181 /* Fence to use after loop using movnt. */
2184 /* Register class used for passing given 64bit part of the argument.
2185 These represent classes as documented by the PS ABI, with the exception
2186 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2187 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2189 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2190 whenever possible (upper half does contain padding). */
2191 enum x86_64_reg_class
2194 X86_64_INTEGER_CLASS
,
2195 X86_64_INTEGERSI_CLASS
,
2202 X86_64_COMPLEX_X87_CLASS
,
2206 #define MAX_CLASSES 4
2208 /* Table of constants used by fldpi, fldln2, etc.... */
2209 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2210 static bool ext_80387_constants_init
= 0;
2213 static struct machine_function
* ix86_init_machine_status (void);
2214 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2215 static bool ix86_function_value_regno_p (const unsigned int);
2216 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2218 static rtx
ix86_static_chain (const_tree
, bool);
2219 static int ix86_function_regparm (const_tree
, const_tree
);
2220 static void ix86_compute_frame_layout (struct ix86_frame
*);
2221 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2223 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2224 static tree
ix86_canonical_va_list_type (tree
);
2225 static void predict_jump (int);
2226 static unsigned int split_stack_prologue_scratch_regno (void);
2227 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2229 enum ix86_function_specific_strings
2231 IX86_FUNCTION_SPECIFIC_ARCH
,
2232 IX86_FUNCTION_SPECIFIC_TUNE
,
2233 IX86_FUNCTION_SPECIFIC_MAX
2236 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2237 const char *, enum fpmath_unit
, bool);
2238 static void ix86_function_specific_save (struct cl_target_option
*,
2239 struct gcc_options
*opts
);
2240 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2241 struct cl_target_option
*);
2242 static void ix86_function_specific_print (FILE *, int,
2243 struct cl_target_option
*);
2244 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2245 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2246 struct gcc_options
*,
2247 struct gcc_options
*,
2248 struct gcc_options
*);
2249 static bool ix86_can_inline_p (tree
, tree
);
2250 static void ix86_set_current_function (tree
);
2251 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2253 static enum calling_abi
ix86_function_abi (const_tree
);
2256 #ifndef SUBTARGET32_DEFAULT_CPU
2257 #define SUBTARGET32_DEFAULT_CPU "i386"
2260 /* Whether -mtune= or -march= were specified */
2261 static int ix86_tune_defaulted
;
2262 static int ix86_arch_specified
;
2264 /* Vectorization library interface and handlers. */
2265 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2267 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2268 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2270 /* Processor target table, indexed by processor number */
2273 const struct processor_costs
*cost
; /* Processor costs */
2274 const int align_loop
; /* Default alignments. */
2275 const int align_loop_max_skip
;
2276 const int align_jump
;
2277 const int align_jump_max_skip
;
2278 const int align_func
;
2281 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2283 {&i386_cost
, 4, 3, 4, 3, 4},
2284 {&i486_cost
, 16, 15, 16, 15, 16},
2285 {&pentium_cost
, 16, 7, 16, 7, 16},
2286 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2287 {&geode_cost
, 0, 0, 0, 0, 0},
2288 {&k6_cost
, 32, 7, 32, 7, 32},
2289 {&athlon_cost
, 16, 7, 16, 7, 16},
2290 {&pentium4_cost
, 0, 0, 0, 0, 0},
2291 {&k8_cost
, 16, 7, 16, 7, 16},
2292 {&nocona_cost
, 0, 0, 0, 0, 0},
2294 {&core_cost
, 16, 10, 16, 10, 16},
2296 {&core_cost
, 16, 10, 16, 10, 16},
2298 {&core_cost
, 16, 10, 16, 10, 16},
2299 {&generic_cost
, 16, 10, 16, 10, 16},
2300 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2301 {&bdver1_cost
, 16, 10, 16, 7, 11},
2302 {&bdver2_cost
, 16, 10, 16, 7, 11},
2303 {&bdver3_cost
, 16, 10, 16, 7, 11},
2304 {&btver1_cost
, 16, 10, 16, 7, 11},
2305 {&btver2_cost
, 16, 10, 16, 7, 11},
2306 {&atom_cost
, 16, 15, 16, 7, 16},
2307 {&slm_cost
, 16, 15, 16, 7, 16}
2310 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2345 gate_insert_vzeroupper (void)
2347 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2351 rest_of_handle_insert_vzeroupper (void)
2355 /* vzeroupper instructions are inserted immediately after reload to
2356 account for possible spills from 256bit registers. The pass
2357 reuses mode switching infrastructure by re-running mode insertion
2358 pass, so disable entities that have already been processed. */
2359 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2360 ix86_optimize_mode_switching
[i
] = 0;
2362 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2364 /* Call optimize_mode_switching. */
2365 g
->get_passes ()->execute_pass_mode_switching ();
2371 const pass_data pass_data_insert_vzeroupper
=
2373 RTL_PASS
, /* type */
2374 "vzeroupper", /* name */
2375 OPTGROUP_NONE
, /* optinfo_flags */
2376 true, /* has_gate */
2377 true, /* has_execute */
2378 TV_NONE
, /* tv_id */
2379 0, /* properties_required */
2380 0, /* properties_provided */
2381 0, /* properties_destroyed */
2382 0, /* todo_flags_start */
2383 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2386 class pass_insert_vzeroupper
: public rtl_opt_pass
2389 pass_insert_vzeroupper(gcc::context
*ctxt
)
2390 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2393 /* opt_pass methods: */
2394 bool gate () { return gate_insert_vzeroupper (); }
2395 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2397 }; // class pass_insert_vzeroupper
2402 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2404 return new pass_insert_vzeroupper (ctxt
);
2407 /* Return true if a red-zone is in use. */
2410 ix86_using_red_zone (void)
2412 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2415 /* Return a string that documents the current -m options. The caller is
2416 responsible for freeing the string. */
2419 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2420 const char *tune
, enum fpmath_unit fpmath
,
2423 struct ix86_target_opts
2425 const char *option
; /* option string */
2426 HOST_WIDE_INT mask
; /* isa mask options */
2429 /* This table is ordered so that options like -msse4.2 that imply
2430 preceding options while match those first. */
2431 static struct ix86_target_opts isa_opts
[] =
2433 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2434 { "-mfma", OPTION_MASK_ISA_FMA
},
2435 { "-mxop", OPTION_MASK_ISA_XOP
},
2436 { "-mlwp", OPTION_MASK_ISA_LWP
},
2437 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2438 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2439 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2440 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2441 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2442 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2443 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2444 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2445 { "-msse3", OPTION_MASK_ISA_SSE3
},
2446 { "-msse2", OPTION_MASK_ISA_SSE2
},
2447 { "-msse", OPTION_MASK_ISA_SSE
},
2448 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2449 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2450 { "-mmmx", OPTION_MASK_ISA_MMX
},
2451 { "-mabm", OPTION_MASK_ISA_ABM
},
2452 { "-mbmi", OPTION_MASK_ISA_BMI
},
2453 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2454 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2455 { "-mhle", OPTION_MASK_ISA_HLE
},
2456 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2457 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2458 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2459 { "-madx", OPTION_MASK_ISA_ADX
},
2460 { "-mtbm", OPTION_MASK_ISA_TBM
},
2461 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2462 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2463 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2464 { "-maes", OPTION_MASK_ISA_AES
},
2465 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2466 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2467 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2468 { "-mf16c", OPTION_MASK_ISA_F16C
},
2469 { "-mrtm", OPTION_MASK_ISA_RTM
},
2470 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2471 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2475 static struct ix86_target_opts flag_opts
[] =
2477 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2478 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2479 { "-m80387", MASK_80387
},
2480 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2481 { "-malign-double", MASK_ALIGN_DOUBLE
},
2482 { "-mcld", MASK_CLD
},
2483 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2484 { "-mieee-fp", MASK_IEEE_FP
},
2485 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2486 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2487 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2488 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2489 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2490 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2491 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2492 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2493 { "-mrecip", MASK_RECIP
},
2494 { "-mrtd", MASK_RTD
},
2495 { "-msseregparm", MASK_SSEREGPARM
},
2496 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2497 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2498 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2499 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2500 { "-mvzeroupper", MASK_VZEROUPPER
},
2501 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2502 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2503 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2506 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2509 char target_other
[40];
2519 memset (opts
, '\0', sizeof (opts
));
2521 /* Add -march= option. */
2524 opts
[num
][0] = "-march=";
2525 opts
[num
++][1] = arch
;
2528 /* Add -mtune= option. */
2531 opts
[num
][0] = "-mtune=";
2532 opts
[num
++][1] = tune
;
2535 /* Add -m32/-m64/-mx32. */
2536 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2538 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2542 isa
&= ~ (OPTION_MASK_ISA_64BIT
2543 | OPTION_MASK_ABI_64
2544 | OPTION_MASK_ABI_X32
);
2548 opts
[num
++][0] = abi
;
2550 /* Pick out the options in isa options. */
2551 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2553 if ((isa
& isa_opts
[i
].mask
) != 0)
2555 opts
[num
++][0] = isa_opts
[i
].option
;
2556 isa
&= ~ isa_opts
[i
].mask
;
2560 if (isa
&& add_nl_p
)
2562 opts
[num
++][0] = isa_other
;
2563 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2567 /* Add flag options. */
2568 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2570 if ((flags
& flag_opts
[i
].mask
) != 0)
2572 opts
[num
++][0] = flag_opts
[i
].option
;
2573 flags
&= ~ flag_opts
[i
].mask
;
2577 if (flags
&& add_nl_p
)
2579 opts
[num
++][0] = target_other
;
2580 sprintf (target_other
, "(other flags: %#x)", flags
);
2583 /* Add -fpmath= option. */
2586 opts
[num
][0] = "-mfpmath=";
2587 switch ((int) fpmath
)
2590 opts
[num
++][1] = "387";
2594 opts
[num
++][1] = "sse";
2597 case FPMATH_387
| FPMATH_SSE
:
2598 opts
[num
++][1] = "sse+387";
2610 gcc_assert (num
< ARRAY_SIZE (opts
));
2612 /* Size the string. */
2614 sep_len
= (add_nl_p
) ? 3 : 1;
2615 for (i
= 0; i
< num
; i
++)
2618 for (j
= 0; j
< 2; j
++)
2620 len
+= strlen (opts
[i
][j
]);
2623 /* Build the string. */
2624 ret
= ptr
= (char *) xmalloc (len
);
2627 for (i
= 0; i
< num
; i
++)
2631 for (j
= 0; j
< 2; j
++)
2632 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2639 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2647 for (j
= 0; j
< 2; j
++)
2650 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2652 line_len
+= len2
[j
];
2657 gcc_assert (ret
+ len
>= ptr
);
2662 /* Return true, if profiling code should be emitted before
2663 prologue. Otherwise it returns false.
2664 Note: For x86 with "hotfix" it is sorried. */
2666 ix86_profile_before_prologue (void)
2668 return flag_fentry
!= 0;
2671 /* Function that is callable from the debugger to print the current
2673 void ATTRIBUTE_UNUSED
2674 ix86_debug_options (void)
2676 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2677 ix86_arch_string
, ix86_tune_string
,
2682 fprintf (stderr
, "%s\n\n", opts
);
2686 fputs ("<no options>\n\n", stderr
);
2691 static const char *stringop_alg_names
[] = {
2693 #define DEF_ALG(alg, name) #name,
2694 #include "stringop.def"
2699 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2700 The string is of the following form (or comma separated list of it):
2702 strategy_alg:max_size:[align|noalign]
2704 where the full size range for the strategy is either [0, max_size] or
2705 [min_size, max_size], in which min_size is the max_size + 1 of the
2706 preceding range. The last size range must have max_size == -1.
2711 -mmemcpy-strategy=libcall:-1:noalign
2713 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2717 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2719 This is to tell the compiler to use the following strategy for memset
2720 1) when the expected size is between [1, 16], use rep_8byte strategy;
2721 2) when the size is between [17, 2048], use vector_loop;
2722 3) when the size is > 2048, use libcall. */
2724 struct stringop_size_range
2732 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2734 const struct stringop_algs
*default_algs
;
2735 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2736 char *curr_range_str
, *next_range_str
;
2740 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2742 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2744 curr_range_str
= strategy_str
;
2752 next_range_str
= strchr (curr_range_str
, ',');
2754 *next_range_str
++ = '\0';
2756 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2757 alg_name
, &maxs
, align
))
2759 error ("wrong arg %s to option %s", curr_range_str
,
2760 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2764 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2766 error ("size ranges of option %s should be increasing",
2767 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2771 for (i
= 0; i
< last_alg
; i
++)
2773 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2775 alg
= (stringop_alg
) i
;
2782 error ("wrong stringop strategy name %s specified for option %s",
2784 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2788 input_ranges
[n
].max
= maxs
;
2789 input_ranges
[n
].alg
= alg
;
2790 if (!strcmp (align
, "align"))
2791 input_ranges
[n
].noalign
= false;
2792 else if (!strcmp (align
, "noalign"))
2793 input_ranges
[n
].noalign
= true;
2796 error ("unknown alignment %s specified for option %s",
2797 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2801 curr_range_str
= next_range_str
;
2803 while (curr_range_str
);
2805 if (input_ranges
[n
- 1].max
!= -1)
2807 error ("the max value for the last size range should be -1"
2809 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2813 if (n
> MAX_STRINGOP_ALGS
)
2815 error ("too many size ranges specified in option %s",
2816 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2820 /* Now override the default algs array. */
2821 for (i
= 0; i
< n
; i
++)
2823 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2824 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2825 = input_ranges
[i
].alg
;
2826 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2827 = input_ranges
[i
].noalign
;
2832 /* parse -mtune-ctrl= option. When DUMP is true,
2833 print the features that are explicitly set. */
2836 parse_mtune_ctrl_str (bool dump
)
2838 if (!ix86_tune_ctrl_string
)
2841 char *next_feature_string
= NULL
;
2842 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2843 char *orig
= curr_feature_string
;
2849 next_feature_string
= strchr (curr_feature_string
, ',');
2850 if (next_feature_string
)
2851 *next_feature_string
++ = '\0';
2852 if (*curr_feature_string
== '^')
2854 curr_feature_string
++;
2857 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2859 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2861 ix86_tune_features
[i
] = !clear
;
2863 fprintf (stderr
, "Explicitly %s feature %s\n",
2864 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2868 if (i
== X86_TUNE_LAST
)
2869 error ("Unknown parameter to option -mtune-ctrl: %s",
2870 clear
? curr_feature_string
- 1 : curr_feature_string
);
2871 curr_feature_string
= next_feature_string
;
2873 while (curr_feature_string
);
2877 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2881 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2883 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2886 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2888 if (ix86_tune_no_default
)
2889 ix86_tune_features
[i
] = 0;
2891 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
2896 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
2897 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2898 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
2899 ix86_tune_features
[i
] ? "on" : "off");
2902 parse_mtune_ctrl_str (dump
);
2906 /* Override various settings based on options. If MAIN_ARGS_P, the
2907 options are from the command line, otherwise they are from
2911 ix86_option_override_internal (bool main_args_p
,
2912 struct gcc_options
*opts
,
2913 struct gcc_options
*opts_set
)
2916 unsigned int ix86_arch_mask
;
2917 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
2922 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2923 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2924 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2925 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2926 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2927 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2928 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2929 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2930 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2931 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2932 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2933 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2934 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2935 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2936 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2937 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2938 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2939 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2940 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2941 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2942 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2943 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2944 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2945 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2946 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2947 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2948 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2949 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2950 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2951 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2952 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2953 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2954 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2955 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2956 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2957 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2958 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2959 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2960 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2961 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2962 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
2963 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
2964 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
2965 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
2967 /* if this reaches 64, need to widen struct pta flags below */
2971 const char *const name
; /* processor name or nickname. */
2972 const enum processor_type processor
;
2973 const enum attr_cpu schedule
;
2974 const unsigned HOST_WIDE_INT flags
;
2976 const processor_alias_table
[] =
2978 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2979 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2980 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2981 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2982 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2983 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2984 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2985 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2986 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2987 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2988 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2989 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2990 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2991 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2992 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2993 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2994 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2995 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2996 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2997 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2998 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2999 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3000 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3001 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3002 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3003 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3004 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3005 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3006 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3007 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3008 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3009 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3010 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3011 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3012 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
3013 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3014 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3015 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3016 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3017 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
3018 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3019 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3020 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3021 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3022 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3023 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3024 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3025 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3026 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3027 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3029 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3030 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3031 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3032 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3033 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3034 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3036 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3037 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3038 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3039 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3040 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3041 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3042 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3043 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3044 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3045 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3046 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3047 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3048 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3049 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3050 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3051 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3052 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3053 {"k8", PROCESSOR_K8
, CPU_K8
,
3054 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3055 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3056 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3057 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3058 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3059 {"opteron", PROCESSOR_K8
, CPU_K8
,
3060 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3061 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3062 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3063 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3064 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3065 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3066 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3067 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3068 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3069 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3070 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3071 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3072 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3073 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3074 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3075 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3076 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3077 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3078 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3079 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3080 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3081 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3082 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3083 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3084 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3085 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3086 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3087 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3088 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3089 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3090 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3091 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3092 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3093 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3094 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3095 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3096 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3097 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3098 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3099 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3100 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3101 | PTA_FXSR
| PTA_XSAVE
},
3102 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3103 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3104 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3105 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3106 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3107 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3109 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3111 | PTA_HLE
/* flags are only used for -march switch. */ },
3114 /* -mrecip options. */
3117 const char *string
; /* option name */
3118 unsigned int mask
; /* mask bits to set */
3120 const recip_options
[] =
3122 { "all", RECIP_MASK_ALL
},
3123 { "none", RECIP_MASK_NONE
},
3124 { "div", RECIP_MASK_DIV
},
3125 { "sqrt", RECIP_MASK_SQRT
},
3126 { "vec-div", RECIP_MASK_VEC_DIV
},
3127 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3130 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3132 /* Set up prefix/suffix so the error messages refer to either the command
3133 line argument, or the attribute(target). */
3142 prefix
= "option(\"";
3147 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3148 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3149 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3150 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3151 #ifdef TARGET_BI_ARCH
3154 #if TARGET_BI_ARCH == 1
3155 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3156 is on and OPTION_MASK_ABI_X32 is off. We turn off
3157 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3159 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3160 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3162 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3163 on and OPTION_MASK_ABI_64 is off. We turn off
3164 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3166 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3167 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3172 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3174 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3175 OPTION_MASK_ABI_64 for TARGET_X32. */
3176 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3177 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3179 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3181 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3182 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3183 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3184 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3187 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3188 SUBTARGET_OVERRIDE_OPTIONS
;
3191 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3192 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3195 /* -fPIC is the default for x86_64. */
3196 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3197 opts
->x_flag_pic
= 2;
3199 /* Need to check -mtune=generic first. */
3200 if (opts
->x_ix86_tune_string
)
3202 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3203 || !strcmp (opts
->x_ix86_tune_string
, "i686")
3204 /* As special support for cross compilers we read -mtune=native
3205 as -mtune=generic. With native compilers we won't see the
3206 -mtune=native, as it was changed by the driver. */
3207 || !strcmp (opts
->x_ix86_tune_string
, "native"))
3209 opts
->x_ix86_tune_string
= "generic";
3211 /* If this call is for setting the option attribute, allow the
3212 generic that was previously set. */
3213 else if (!main_args_p
3214 && !strcmp (opts
->x_ix86_tune_string
, "generic"))
3216 else if (!strncmp (opts
->x_ix86_tune_string
, "generic", 7))
3217 error ("bad value (%s) for %stune=%s %s",
3218 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3219 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3220 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3221 "%stune=k8%s or %stune=generic%s instead as appropriate",
3222 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3226 if (opts
->x_ix86_arch_string
)
3227 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3228 if (!opts
->x_ix86_tune_string
)
3230 opts
->x_ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3231 ix86_tune_defaulted
= 1;
3234 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3235 or defaulted. We need to use a sensible tune option. */
3236 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3237 || !strcmp (opts
->x_ix86_tune_string
, "x86-64")
3238 || !strcmp (opts
->x_ix86_tune_string
, "i686"))
3240 opts
->x_ix86_tune_string
= "generic";
3244 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3245 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3247 /* rep; movq isn't available in 32-bit code. */
3248 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3249 opts
->x_ix86_stringop_alg
= no_stringop
;
3252 if (!opts
->x_ix86_arch_string
)
3253 opts
->x_ix86_arch_string
3254 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3255 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3257 ix86_arch_specified
= 1;
3259 if (opts_set
->x_ix86_pmode
)
3261 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3262 && opts
->x_ix86_pmode
== PMODE_SI
)
3263 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3264 && opts
->x_ix86_pmode
== PMODE_DI
))
3265 error ("address mode %qs not supported in the %s bit mode",
3266 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3267 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3270 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3271 ? PMODE_DI
: PMODE_SI
;
3273 if (!opts_set
->x_ix86_abi
)
3274 opts
->x_ix86_abi
= DEFAULT_ABI
;
3276 /* For targets using ms ABI enable ms-extensions, if not
3277 explicit turned off. For non-ms ABI we turn off this
3279 if (!opts_set
->x_flag_ms_extensions
)
3280 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3282 if (opts_set
->x_ix86_cmodel
)
3284 switch (opts
->x_ix86_cmodel
)
3288 if (opts
->x_flag_pic
)
3289 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3290 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3291 error ("code model %qs not supported in the %s bit mode",
3297 if (opts
->x_flag_pic
)
3298 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3299 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3300 error ("code model %qs not supported in the %s bit mode",
3302 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3303 error ("code model %qs not supported in x32 mode",
3309 if (opts
->x_flag_pic
)
3310 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3311 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3312 error ("code model %qs not supported in the %s bit mode",
3314 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3315 error ("code model %qs not supported in x32 mode",
3320 if (opts
->x_flag_pic
)
3321 error ("code model %s does not support PIC mode", "32");
3322 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3323 error ("code model %qs not supported in the %s bit mode",
3328 if (opts
->x_flag_pic
)
3330 error ("code model %s does not support PIC mode", "kernel");
3331 opts
->x_ix86_cmodel
= CM_32
;
3333 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3334 error ("code model %qs not supported in the %s bit mode",
3344 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3345 use of rip-relative addressing. This eliminates fixups that
3346 would otherwise be needed if this object is to be placed in a
3347 DLL, and is essentially just as efficient as direct addressing. */
3348 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3349 && (TARGET_RDOS
|| TARGET_PECOFF
))
3350 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3351 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3352 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3354 opts
->x_ix86_cmodel
= CM_32
;
3356 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3358 error ("-masm=intel not supported in this configuration");
3359 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3361 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3362 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3363 sorry ("%i-bit mode not compiled in",
3364 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3366 for (i
= 0; i
< pta_size
; i
++)
3367 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3369 ix86_schedule
= processor_alias_table
[i
].schedule
;
3370 ix86_arch
= processor_alias_table
[i
].processor
;
3371 /* Default cpu tuning to the architecture. */
3372 ix86_tune
= ix86_arch
;
3374 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3375 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3376 error ("CPU you selected does not support x86-64 "
3379 if (processor_alias_table
[i
].flags
& PTA_MMX
3380 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3381 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3382 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3383 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3384 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3385 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3386 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3387 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3388 if (processor_alias_table
[i
].flags
& PTA_SSE
3389 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3390 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3391 if (processor_alias_table
[i
].flags
& PTA_SSE2
3392 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3393 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3394 if (processor_alias_table
[i
].flags
& PTA_SSE3
3395 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3396 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3397 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3398 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3399 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3400 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3401 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3402 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3403 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3404 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3405 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3406 if (processor_alias_table
[i
].flags
& PTA_AVX
3407 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3408 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3409 if (processor_alias_table
[i
].flags
& PTA_AVX2
3410 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3411 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3412 if (processor_alias_table
[i
].flags
& PTA_FMA
3413 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3414 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3415 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3416 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3417 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3418 if (processor_alias_table
[i
].flags
& PTA_FMA4
3419 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3420 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3421 if (processor_alias_table
[i
].flags
& PTA_XOP
3422 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3423 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3424 if (processor_alias_table
[i
].flags
& PTA_LWP
3425 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3426 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3427 if (processor_alias_table
[i
].flags
& PTA_ABM
3428 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3429 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3430 if (processor_alias_table
[i
].flags
& PTA_BMI
3431 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3432 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3433 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3434 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3435 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3436 if (processor_alias_table
[i
].flags
& PTA_TBM
3437 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3438 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3439 if (processor_alias_table
[i
].flags
& PTA_BMI2
3440 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3441 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3442 if (processor_alias_table
[i
].flags
& PTA_CX16
3443 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3444 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3445 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3446 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3447 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3448 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3449 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3450 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3451 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3452 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3453 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3454 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3455 if (processor_alias_table
[i
].flags
& PTA_AES
3456 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3457 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3458 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3459 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3460 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3461 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3462 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3463 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3464 if (processor_alias_table
[i
].flags
& PTA_RDRND
3465 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3466 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3467 if (processor_alias_table
[i
].flags
& PTA_F16C
3468 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3469 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3470 if (processor_alias_table
[i
].flags
& PTA_RTM
3471 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3472 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3473 if (processor_alias_table
[i
].flags
& PTA_HLE
3474 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3475 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3476 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3477 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3478 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3479 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3480 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3481 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3482 if (processor_alias_table
[i
].flags
& PTA_ADX
3483 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3484 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3485 if (processor_alias_table
[i
].flags
& PTA_FXSR
3486 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3487 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3488 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3489 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3490 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3491 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3492 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3493 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3494 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3495 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3496 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3497 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3498 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3499 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3500 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3501 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3502 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3503 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3504 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3505 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3506 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3507 x86_prefetch_sse
= true;
3512 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3513 error ("generic CPU can be used only for %stune=%s %s",
3514 prefix
, suffix
, sw
);
3515 else if (!strncmp (opts
->x_ix86_arch_string
, "generic", 7) || i
== pta_size
)
3516 error ("bad value (%s) for %sarch=%s %s",
3517 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3519 ix86_arch_mask
= 1u << ix86_arch
;
3520 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3521 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3523 for (i
= 0; i
< pta_size
; i
++)
3524 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3526 ix86_schedule
= processor_alias_table
[i
].schedule
;
3527 ix86_tune
= processor_alias_table
[i
].processor
;
3528 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3530 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3532 if (ix86_tune_defaulted
)
3534 opts
->x_ix86_tune_string
= "x86-64";
3535 for (i
= 0; i
< pta_size
; i
++)
3536 if (! strcmp (opts
->x_ix86_tune_string
,
3537 processor_alias_table
[i
].name
))
3539 ix86_schedule
= processor_alias_table
[i
].schedule
;
3540 ix86_tune
= processor_alias_table
[i
].processor
;
3543 error ("CPU you selected does not support x86-64 "
3547 /* Intel CPUs have always interpreted SSE prefetch instructions as
3548 NOPs; so, we can enable SSE prefetch instructions even when
3549 -mtune (rather than -march) points us to a processor that has them.
3550 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3551 higher processors. */
3553 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3554 x86_prefetch_sse
= true;
3558 if (ix86_tune_specified
&& i
== pta_size
)
3559 error ("bad value (%s) for %stune=%s %s",
3560 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3562 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3564 #ifndef USE_IX86_FRAME_POINTER
3565 #define USE_IX86_FRAME_POINTER 0
3568 #ifndef USE_X86_64_FRAME_POINTER
3569 #define USE_X86_64_FRAME_POINTER 0
3572 /* Set the default values for switches whose default depends on TARGET_64BIT
3573 in case they weren't overwritten by command line options. */
3574 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3576 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3577 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3578 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3579 opts
->x_flag_unwind_tables
3580 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3581 if (opts
->x_flag_pcc_struct_return
== 2)
3582 opts
->x_flag_pcc_struct_return
= 0;
3586 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3587 opts
->x_flag_omit_frame_pointer
3588 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3589 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3590 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3591 if (opts
->x_flag_pcc_struct_return
== 2)
3592 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3595 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3596 if (opts
->x_optimize_size
)
3597 ix86_cost
= &ix86_size_cost
;
3599 ix86_cost
= ix86_tune_cost
;
3601 /* Arrange to set up i386_stack_locals for all functions. */
3602 init_machine_status
= ix86_init_machine_status
;
3604 /* Validate -mregparm= value. */
3605 if (opts_set
->x_ix86_regparm
)
3607 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3608 warning (0, "-mregparm is ignored in 64-bit mode");
3609 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3611 error ("-mregparm=%d is not between 0 and %d",
3612 opts
->x_ix86_regparm
, REGPARM_MAX
);
3613 opts
->x_ix86_regparm
= 0;
3616 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3617 opts
->x_ix86_regparm
= REGPARM_MAX
;
3619 /* Default align_* from the processor table. */
3620 if (opts
->x_align_loops
== 0)
3622 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3623 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3625 if (opts
->x_align_jumps
== 0)
3627 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3628 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3630 if (opts
->x_align_functions
== 0)
3632 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3635 /* Provide default for -mbranch-cost= value. */
3636 if (!opts_set
->x_ix86_branch_cost
)
3637 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3639 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3641 opts
->x_target_flags
3642 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3644 /* Enable by default the SSE and MMX builtins. Do allow the user to
3645 explicitly disable any of these. In particular, disabling SSE and
3646 MMX for kernel code is extremely useful. */
3647 if (!ix86_arch_specified
)
3648 opts
->x_ix86_isa_flags
3649 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3650 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3651 & ~opts
->x_ix86_isa_flags_explicit
);
3653 if (TARGET_RTD_P (opts
->x_target_flags
))
3654 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3658 opts
->x_target_flags
3659 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3661 if (!ix86_arch_specified
)
3662 opts
->x_ix86_isa_flags
3663 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3665 /* i386 ABI does not specify red zone. It still makes sense to use it
3666 when programmer takes care to stack from being destroyed. */
3667 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3668 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3671 /* Keep nonleaf frame pointers. */
3672 if (opts
->x_flag_omit_frame_pointer
)
3673 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3674 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3675 opts
->x_flag_omit_frame_pointer
= 1;
3677 /* If we're doing fast math, we don't care about comparison order
3678 wrt NaNs. This lets us use a shorter comparison sequence. */
3679 if (opts
->x_flag_finite_math_only
)
3680 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3682 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3683 since the insns won't need emulation. */
3684 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3685 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3687 /* Likewise, if the target doesn't have a 387, or we've specified
3688 software floating point, don't use 387 inline intrinsics. */
3689 if (!TARGET_80387_P (opts
->x_target_flags
))
3690 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3692 /* Turn on MMX builtins for -msse. */
3693 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3694 opts
->x_ix86_isa_flags
3695 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3697 /* Enable SSE prefetch. */
3698 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3699 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3700 x86_prefetch_sse
= true;
3702 /* Enable prefetch{,w} instructions for -m3dnow. */
3703 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
3704 opts
->x_ix86_isa_flags
3705 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3707 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3708 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3709 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3710 opts
->x_ix86_isa_flags
3711 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3713 /* Enable lzcnt instruction for -mabm. */
3714 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3715 opts
->x_ix86_isa_flags
3716 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3718 /* Validate -mpreferred-stack-boundary= value or default it to
3719 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3720 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3721 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3723 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3724 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3725 int max
= (TARGET_SEH
? 4 : 12);
3727 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3728 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3731 error ("-mpreferred-stack-boundary is not supported "
3734 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3735 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3738 ix86_preferred_stack_boundary
3739 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3742 /* Set the default value for -mstackrealign. */
3743 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3744 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3746 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3748 /* Validate -mincoming-stack-boundary= value or default it to
3749 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3750 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3751 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3753 if (ix86_incoming_stack_boundary_arg
3754 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3755 || ix86_incoming_stack_boundary_arg
> 12)
3756 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3757 ix86_incoming_stack_boundary_arg
,
3758 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3761 ix86_user_incoming_stack_boundary
3762 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3763 ix86_incoming_stack_boundary
3764 = ix86_user_incoming_stack_boundary
;
3768 /* Accept -msseregparm only if at least SSE support is enabled. */
3769 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3770 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3771 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3773 if (opts_set
->x_ix86_fpmath
)
3775 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3777 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3779 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3780 opts
->x_ix86_fpmath
= FPMATH_387
;
3782 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
3783 && !TARGET_80387_P (opts
->x_target_flags
))
3785 warning (0, "387 instruction set disabled, using SSE arithmetics");
3786 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3790 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3791 fpmath=387. The second is however default at many targets since the
3792 extra 80bit precision of temporaries is considered to be part of ABI.
3793 Overwrite the default at least for -ffast-math.
3794 TODO: -mfpmath=both seems to produce same performing code with bit
3795 smaller binaries. It is however not clear if register allocation is
3796 ready for this setting.
3797 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3798 codegen. We may switch to 387 with -ffast-math for size optimized
3800 else if (fast_math_flags_set_p (&global_options
)
3802 ix86_fpmath
= FPMATH_SSE
;
3804 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
3806 /* If the i387 is disabled, then do not return values in it. */
3807 if (!TARGET_80387_P (opts
->x_target_flags
))
3808 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
3810 /* Use external vectorized library in vectorizing intrinsics. */
3811 if (opts_set
->x_ix86_veclibabi_type
)
3812 switch (opts
->x_ix86_veclibabi_type
)
3814 case ix86_veclibabi_type_svml
:
3815 ix86_veclib_handler
= ix86_veclibabi_svml
;
3818 case ix86_veclibabi_type_acml
:
3819 ix86_veclib_handler
= ix86_veclibabi_acml
;
3826 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
3827 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3828 && !opts
->x_optimize_size
)
3829 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3831 /* If stack probes are required, the space used for large function
3832 arguments on the stack must also be probed, so enable
3833 -maccumulate-outgoing-args so this happens in the prologue. */
3834 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
3835 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3837 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3838 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3839 "for correctness", prefix
, suffix
);
3840 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3843 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3846 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3847 p
= strchr (internal_label_prefix
, 'X');
3848 internal_label_prefix_len
= p
- internal_label_prefix
;
3852 /* When scheduling description is not available, disable scheduler pass
3853 so it won't slow down the compilation and make x87 code slower. */
3854 if (!TARGET_SCHEDULE
)
3855 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
3857 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3858 ix86_tune_cost
->simultaneous_prefetches
,
3859 opts
->x_param_values
,
3860 opts_set
->x_param_values
);
3861 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3862 ix86_tune_cost
->prefetch_block
,
3863 opts
->x_param_values
,
3864 opts_set
->x_param_values
);
3865 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3866 ix86_tune_cost
->l1_cache_size
,
3867 opts
->x_param_values
,
3868 opts_set
->x_param_values
);
3869 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3870 ix86_tune_cost
->l2_cache_size
,
3871 opts
->x_param_values
,
3872 opts_set
->x_param_values
);
3874 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3875 if (opts
->x_flag_prefetch_loop_arrays
< 0
3877 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
3878 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3879 opts
->x_flag_prefetch_loop_arrays
= 1;
3881 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3882 can be opts->x_optimized to ap = __builtin_next_arg (0). */
3883 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
3884 targetm
.expand_builtin_va_start
= NULL
;
3886 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3888 ix86_gen_leave
= gen_leave_rex64
;
3889 if (Pmode
== DImode
)
3891 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3892 ix86_gen_tls_local_dynamic_base_64
3893 = gen_tls_local_dynamic_base_64_di
;
3897 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3898 ix86_gen_tls_local_dynamic_base_64
3899 = gen_tls_local_dynamic_base_64_si
;
3903 ix86_gen_leave
= gen_leave
;
3905 if (Pmode
== DImode
)
3907 ix86_gen_add3
= gen_adddi3
;
3908 ix86_gen_sub3
= gen_subdi3
;
3909 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3910 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3911 ix86_gen_andsp
= gen_anddi3
;
3912 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3913 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3914 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3915 ix86_gen_monitor
= gen_sse3_monitor_di
;
3919 ix86_gen_add3
= gen_addsi3
;
3920 ix86_gen_sub3
= gen_subsi3
;
3921 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3922 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3923 ix86_gen_andsp
= gen_andsi3
;
3924 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3925 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3926 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3927 ix86_gen_monitor
= gen_sse3_monitor_si
;
3931 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3932 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3933 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
3936 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
3938 if (opts
->x_flag_fentry
> 0)
3939 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3941 opts
->x_flag_fentry
= 0;
3943 else if (TARGET_SEH
)
3945 if (opts
->x_flag_fentry
== 0)
3946 sorry ("-mno-fentry isn%'t compatible with SEH");
3947 opts
->x_flag_fentry
= 1;
3949 else if (opts
->x_flag_fentry
< 0)
3951 #if defined(PROFILE_BEFORE_PROLOGUE)
3952 opts
->x_flag_fentry
= 1;
3954 opts
->x_flag_fentry
= 0;
3958 /* When not opts->x_optimize for size, enable vzeroupper optimization for
3959 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3960 AVX unaligned load/store. */
3961 if (!opts
->x_optimize_size
)
3963 if (flag_expensive_optimizations
3964 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
3965 opts
->x_target_flags
|= MASK_VZEROUPPER
;
3966 if (!ix86_tune_features
[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL
]
3967 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3968 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3969 if (!ix86_tune_features
[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL
]
3970 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3971 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3972 /* Enable 128-bit AVX instruction generation
3973 for the auto-vectorizer. */
3974 if (TARGET_AVX128_OPTIMAL
3975 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
3976 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
3979 if (opts
->x_ix86_recip_name
)
3981 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
3983 unsigned int mask
, i
;
3986 while ((q
= strtok (p
, ",")) != NULL
)
3997 if (!strcmp (q
, "default"))
3998 mask
= RECIP_MASK_ALL
;
4001 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4002 if (!strcmp (q
, recip_options
[i
].string
))
4004 mask
= recip_options
[i
].mask
;
4008 if (i
== ARRAY_SIZE (recip_options
))
4010 error ("unknown option for -mrecip=%s", q
);
4012 mask
= RECIP_MASK_NONE
;
4016 opts
->x_recip_mask_explicit
|= mask
;
4018 opts
->x_recip_mask
&= ~mask
;
4020 opts
->x_recip_mask
|= mask
;
4024 if (TARGET_RECIP_P (opts
->x_target_flags
))
4025 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4026 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4027 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4029 /* Default long double to 64-bit for Bionic. */
4030 if (TARGET_HAS_BIONIC
4031 && !(opts_set
->x_target_flags
& MASK_LONG_DOUBLE_64
))
4032 opts
->x_target_flags
|= MASK_LONG_DOUBLE_64
;
4034 /* Save the initial options in case the user does function specific
4037 target_option_default_node
= target_option_current_node
4038 = build_target_option_node (opts
);
4040 /* Handle stack protector */
4041 if (!opts_set
->x_ix86_stack_protector_guard
)
4042 opts
->x_ix86_stack_protector_guard
4043 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4045 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4046 if (opts
->x_ix86_tune_memcpy_strategy
)
4048 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4049 ix86_parse_stringop_strategy_string (str
, false);
4053 if (opts
->x_ix86_tune_memset_strategy
)
4055 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4056 ix86_parse_stringop_strategy_string (str
, true);
4061 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4064 ix86_option_override (void)
4066 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4067 static struct register_pass_info insert_vzeroupper_info
4068 = { pass_insert_vzeroupper
, "reload",
4069 1, PASS_POS_INSERT_AFTER
4072 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4075 /* This needs to be done at start up. It's convenient to do it here. */
4076 register_pass (&insert_vzeroupper_info
);
4079 /* Update register usage after having seen the compiler flags. */
4082 ix86_conditional_register_usage (void)
4087 /* The PIC register, if it exists, is fixed. */
4088 j
= PIC_OFFSET_TABLE_REGNUM
;
4089 if (j
!= INVALID_REGNUM
)
4090 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4092 /* For 32-bit targets, squash the REX registers. */
4095 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4096 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4097 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4098 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4099 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4100 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4103 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4104 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4105 : TARGET_64BIT
? (1 << 2)
4108 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4110 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4112 /* Set/reset conditionally defined registers from
4113 CALL_USED_REGISTERS initializer. */
4114 if (call_used_regs
[i
] > 1)
4115 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4117 /* Calculate registers of CLOBBERED_REGS register set
4118 as call used registers from GENERAL_REGS register set. */
4119 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4120 && call_used_regs
[i
])
4121 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4124 /* If MMX is disabled, squash the registers. */
4126 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4127 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4128 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4130 /* If SSE is disabled, squash the registers. */
4132 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4133 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4134 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4136 /* If the FPU is disabled, squash the registers. */
4137 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4138 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4139 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4140 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4142 /* If AVX512F is disabled, squash the registers. */
4143 if (! TARGET_AVX512F
)
4145 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4146 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4148 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4149 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4154 /* Save the current options */
4157 ix86_function_specific_save (struct cl_target_option
*ptr
,
4158 struct gcc_options
*opts
)
4160 ptr
->arch
= ix86_arch
;
4161 ptr
->schedule
= ix86_schedule
;
4162 ptr
->tune
= ix86_tune
;
4163 ptr
->branch_cost
= ix86_branch_cost
;
4164 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4165 ptr
->arch_specified
= ix86_arch_specified
;
4166 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4167 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4168 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4170 /* The fields are char but the variables are not; make sure the
4171 values fit in the fields. */
4172 gcc_assert (ptr
->arch
== ix86_arch
);
4173 gcc_assert (ptr
->schedule
== ix86_schedule
);
4174 gcc_assert (ptr
->tune
== ix86_tune
);
4175 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4178 /* Restore the current options */
4181 ix86_function_specific_restore (struct gcc_options
*opts
,
4182 struct cl_target_option
*ptr
)
4184 enum processor_type old_tune
= ix86_tune
;
4185 enum processor_type old_arch
= ix86_arch
;
4186 unsigned int ix86_arch_mask
;
4189 ix86_arch
= (enum processor_type
) ptr
->arch
;
4190 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4191 ix86_tune
= (enum processor_type
) ptr
->tune
;
4192 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4193 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4194 ix86_arch_specified
= ptr
->arch_specified
;
4195 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4196 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4197 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4199 /* Recreate the arch feature tests if the arch changed */
4200 if (old_arch
!= ix86_arch
)
4202 ix86_arch_mask
= 1u << ix86_arch
;
4203 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4204 ix86_arch_features
[i
]
4205 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4208 /* Recreate the tune optimization tests */
4209 if (old_tune
!= ix86_tune
)
4210 set_ix86_tune_features (ix86_tune
, false);
4213 /* Print the current options */
4216 ix86_function_specific_print (FILE *file
, int indent
,
4217 struct cl_target_option
*ptr
)
4220 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4221 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4223 fprintf (file
, "%*sarch = %d (%s)\n",
4226 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4227 ? cpu_names
[ptr
->arch
]
4230 fprintf (file
, "%*stune = %d (%s)\n",
4233 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4234 ? cpu_names
[ptr
->tune
]
4237 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4241 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4242 free (target_string
);
4247 /* Inner function to process the attribute((target(...))), take an argument and
4248 set the current options from the argument. If we have a list, recursively go
4252 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4253 struct gcc_options
*opts
,
4254 struct gcc_options
*opts_set
,
4255 struct gcc_options
*enum_opts_set
)
4260 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4261 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4262 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4263 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4264 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4280 enum ix86_opt_type type
;
4285 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4286 IX86_ATTR_ISA ("abm", OPT_mabm
),
4287 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4288 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4289 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4290 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4291 IX86_ATTR_ISA ("aes", OPT_maes
),
4292 IX86_ATTR_ISA ("avx", OPT_mavx
),
4293 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4294 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4295 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4296 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4297 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4298 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4299 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4300 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4301 IX86_ATTR_ISA ("sse", OPT_msse
),
4302 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4303 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4304 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4305 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4306 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4307 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4308 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4309 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4310 IX86_ATTR_ISA ("fma", OPT_mfma
),
4311 IX86_ATTR_ISA ("xop", OPT_mxop
),
4312 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4313 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4314 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4315 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4316 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4317 IX86_ATTR_ISA ("hle", OPT_mhle
),
4318 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4319 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4320 IX86_ATTR_ISA ("adx", OPT_madx
),
4321 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4322 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4323 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4326 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4328 /* string options */
4329 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4330 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4333 IX86_ATTR_YES ("cld",
4337 IX86_ATTR_NO ("fancy-math-387",
4338 OPT_mfancy_math_387
,
4339 MASK_NO_FANCY_MATH_387
),
4341 IX86_ATTR_YES ("ieee-fp",
4345 IX86_ATTR_YES ("inline-all-stringops",
4346 OPT_minline_all_stringops
,
4347 MASK_INLINE_ALL_STRINGOPS
),
4349 IX86_ATTR_YES ("inline-stringops-dynamically",
4350 OPT_minline_stringops_dynamically
,
4351 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4353 IX86_ATTR_NO ("align-stringops",
4354 OPT_mno_align_stringops
,
4355 MASK_NO_ALIGN_STRINGOPS
),
4357 IX86_ATTR_YES ("recip",
4363 /* If this is a list, recurse to get the options. */
4364 if (TREE_CODE (args
) == TREE_LIST
)
4368 for (; args
; args
= TREE_CHAIN (args
))
4369 if (TREE_VALUE (args
)
4370 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4371 p_strings
, opts
, opts_set
,
4378 else if (TREE_CODE (args
) != STRING_CST
)
4380 error ("attribute %<target%> argument not a string");
4384 /* Handle multiple arguments separated by commas. */
4385 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4387 while (next_optstr
&& *next_optstr
!= '\0')
4389 char *p
= next_optstr
;
4391 char *comma
= strchr (next_optstr
, ',');
4392 const char *opt_string
;
4393 size_t len
, opt_len
;
4398 enum ix86_opt_type type
= ix86_opt_unknown
;
4404 len
= comma
- next_optstr
;
4405 next_optstr
= comma
+ 1;
4413 /* Recognize no-xxx. */
4414 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4423 /* Find the option. */
4426 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4428 type
= attrs
[i
].type
;
4429 opt_len
= attrs
[i
].len
;
4430 if (ch
== attrs
[i
].string
[0]
4431 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4434 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4437 mask
= attrs
[i
].mask
;
4438 opt_string
= attrs
[i
].string
;
4443 /* Process the option. */
4446 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4450 else if (type
== ix86_opt_isa
)
4452 struct cl_decoded_option decoded
;
4454 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4455 ix86_handle_option (opts
, opts_set
,
4456 &decoded
, input_location
);
4459 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4461 if (type
== ix86_opt_no
)
4462 opt_set_p
= !opt_set_p
;
4465 opts
->x_target_flags
|= mask
;
4467 opts
->x_target_flags
&= ~mask
;
4470 else if (type
== ix86_opt_str
)
4474 error ("option(\"%s\") was already specified", opt_string
);
4478 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4481 else if (type
== ix86_opt_enum
)
4486 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4488 set_option (opts
, enum_opts_set
, opt
, value
,
4489 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4493 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4505 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4508 ix86_valid_target_attribute_tree (tree args
,
4509 struct gcc_options
*opts
,
4510 struct gcc_options
*opts_set
)
4512 const char *orig_arch_string
= ix86_arch_string
;
4513 const char *orig_tune_string
= ix86_tune_string
;
4514 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4515 int orig_tune_defaulted
= ix86_tune_defaulted
;
4516 int orig_arch_specified
= ix86_arch_specified
;
4517 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4520 struct cl_target_option
*def
4521 = TREE_TARGET_OPTION (target_option_default_node
);
4522 struct gcc_options enum_opts_set
;
4524 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4526 /* Process each of the options on the chain. */
4527 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4528 opts_set
, &enum_opts_set
))
4529 return error_mark_node
;
4531 /* If the changed options are different from the default, rerun
4532 ix86_option_override_internal, and then save the options away.
4533 The string options are are attribute options, and will be undone
4534 when we copy the save structure. */
4535 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4536 || opts
->x_target_flags
!= def
->x_target_flags
4537 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4538 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4539 || enum_opts_set
.x_ix86_fpmath
)
4541 /* If we are using the default tune= or arch=, undo the string assigned,
4542 and use the default. */
4543 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4544 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4545 else if (!orig_arch_specified
)
4546 opts
->x_ix86_arch_string
= NULL
;
4548 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4549 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4550 else if (orig_tune_defaulted
)
4551 opts
->x_ix86_tune_string
= NULL
;
4553 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4554 if (enum_opts_set
.x_ix86_fpmath
)
4555 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4556 else if (!TARGET_64BIT
&& TARGET_SSE
)
4558 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4559 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4562 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4563 ix86_option_override_internal (false, opts
, opts_set
);
4565 /* Add any builtin functions with the new isa if any. */
4566 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4568 /* Save the current options unless we are validating options for
4570 t
= build_target_option_node (opts
);
4572 opts
->x_ix86_arch_string
= orig_arch_string
;
4573 opts
->x_ix86_tune_string
= orig_tune_string
;
4574 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4576 /* Free up memory allocated to hold the strings */
4577 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4578 free (option_strings
[i
]);
4584 /* Hook to validate attribute((target("string"))). */
4587 ix86_valid_target_attribute_p (tree fndecl
,
4588 tree
ARG_UNUSED (name
),
4590 int ARG_UNUSED (flags
))
4592 struct gcc_options func_options
;
4593 tree new_target
, new_optimize
;
4596 /* attribute((target("default"))) does nothing, beyond
4597 affecting multi-versioning. */
4598 if (TREE_VALUE (args
)
4599 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4600 && TREE_CHAIN (args
) == NULL_TREE
4601 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4604 tree old_optimize
= build_optimization_node (&global_options
);
4606 /* Get the optimization options of the current function. */
4607 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4610 func_optimize
= old_optimize
;
4612 /* Init func_options. */
4613 memset (&func_options
, 0, sizeof (func_options
));
4614 init_options_struct (&func_options
, NULL
);
4615 lang_hooks
.init_options_struct (&func_options
);
4617 cl_optimization_restore (&func_options
,
4618 TREE_OPTIMIZATION (func_optimize
));
4620 /* Initialize func_options to the default before its target options can
4622 cl_target_option_restore (&func_options
,
4623 TREE_TARGET_OPTION (target_option_default_node
));
4625 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4626 &global_options_set
);
4628 new_optimize
= build_optimization_node (&func_options
);
4630 if (new_target
== error_mark_node
)
4633 else if (fndecl
&& new_target
)
4635 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4637 if (old_optimize
!= new_optimize
)
4638 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4645 /* Hook to determine if one function can safely inline another. */
4648 ix86_can_inline_p (tree caller
, tree callee
)
4651 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4652 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4654 /* If callee has no option attributes, then it is ok to inline. */
4658 /* If caller has no option attributes, but callee does then it is not ok to
4660 else if (!caller_tree
)
4665 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4666 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4668 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4669 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4671 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4672 != callee_opts
->x_ix86_isa_flags
)
4675 /* See if we have the same non-isa options. */
4676 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4679 /* See if arch, tune, etc. are the same. */
4680 else if (caller_opts
->arch
!= callee_opts
->arch
)
4683 else if (caller_opts
->tune
!= callee_opts
->tune
)
4686 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4689 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4700 /* Remember the last target of ix86_set_current_function. */
4701 static GTY(()) tree ix86_previous_fndecl
;
4703 /* Invalidate ix86_previous_fndecl cache. */
4705 ix86_reset_previous_fndecl (void)
4707 ix86_previous_fndecl
= NULL_TREE
;
4710 /* Establish appropriate back-end context for processing the function
4711 FNDECL. The argument might be NULL to indicate processing at top
4712 level, outside of any function scope. */
4714 ix86_set_current_function (tree fndecl
)
4716 /* Only change the context if the function changes. This hook is called
4717 several times in the course of compiling a function, and we don't want to
4718 slow things down too much or call target_reinit when it isn't safe. */
4719 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4721 tree old_tree
= (ix86_previous_fndecl
4722 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4725 tree new_tree
= (fndecl
4726 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4729 ix86_previous_fndecl
= fndecl
;
4730 if (old_tree
== new_tree
)
4735 cl_target_option_restore (&global_options
,
4736 TREE_TARGET_OPTION (new_tree
));
4742 struct cl_target_option
*def
4743 = TREE_TARGET_OPTION (target_option_current_node
);
4745 cl_target_option_restore (&global_options
, def
);
4752 /* Return true if this goes in large data/bss. */
4755 ix86_in_large_data_p (tree exp
)
4757 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4760 /* Functions are never large data. */
4761 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4764 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4766 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4767 if (strcmp (section
, ".ldata") == 0
4768 || strcmp (section
, ".lbss") == 0)
4774 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4776 /* If this is an incomplete type with size 0, then we can't put it
4777 in data because it might be too big when completed. */
4778 if (!size
|| size
> ix86_section_threshold
)
4785 /* Switch to the appropriate section for output of DECL.
4786 DECL is either a `VAR_DECL' node or a constant of some sort.
4787 RELOC indicates whether forming the initial value of DECL requires
4788 link-time relocations. */
4790 ATTRIBUTE_UNUSED
static section
*
4791 x86_64_elf_select_section (tree decl
, int reloc
,
4792 unsigned HOST_WIDE_INT align
)
4794 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4795 && ix86_in_large_data_p (decl
))
4797 const char *sname
= NULL
;
4798 unsigned int flags
= SECTION_WRITE
;
4799 switch (categorize_decl_for_section (decl
, reloc
))
4804 case SECCAT_DATA_REL
:
4805 sname
= ".ldata.rel";
4807 case SECCAT_DATA_REL_LOCAL
:
4808 sname
= ".ldata.rel.local";
4810 case SECCAT_DATA_REL_RO
:
4811 sname
= ".ldata.rel.ro";
4813 case SECCAT_DATA_REL_RO_LOCAL
:
4814 sname
= ".ldata.rel.ro.local";
4818 flags
|= SECTION_BSS
;
4821 case SECCAT_RODATA_MERGE_STR
:
4822 case SECCAT_RODATA_MERGE_STR_INIT
:
4823 case SECCAT_RODATA_MERGE_CONST
:
4827 case SECCAT_SRODATA
:
4834 /* We don't split these for medium model. Place them into
4835 default sections and hope for best. */
4840 /* We might get called with string constants, but get_named_section
4841 doesn't like them as they are not DECLs. Also, we need to set
4842 flags in that case. */
4844 return get_section (sname
, flags
, NULL
);
4845 return get_named_section (decl
, sname
, reloc
);
4848 return default_elf_select_section (decl
, reloc
, align
);
4851 /* Select a set of attributes for section NAME based on the properties
4852 of DECL and whether or not RELOC indicates that DECL's initializer
4853 might contain runtime relocations. */
4855 static unsigned int ATTRIBUTE_UNUSED
4856 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
4858 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
4860 if (decl
== NULL_TREE
4861 && (strcmp (name
, ".ldata.rel.ro") == 0
4862 || strcmp (name
, ".ldata.rel.ro.local") == 0))
4863 flags
|= SECTION_RELRO
;
4865 if (strcmp (name
, ".lbss") == 0
4866 || strncmp (name
, ".lbss.", 5) == 0
4867 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
4868 flags
|= SECTION_BSS
;
4873 /* Build up a unique section name, expressed as a
4874 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4875 RELOC indicates whether the initial value of EXP requires
4876 link-time relocations. */
4878 static void ATTRIBUTE_UNUSED
4879 x86_64_elf_unique_section (tree decl
, int reloc
)
4881 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4882 && ix86_in_large_data_p (decl
))
4884 const char *prefix
= NULL
;
4885 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4886 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4888 switch (categorize_decl_for_section (decl
, reloc
))
4891 case SECCAT_DATA_REL
:
4892 case SECCAT_DATA_REL_LOCAL
:
4893 case SECCAT_DATA_REL_RO
:
4894 case SECCAT_DATA_REL_RO_LOCAL
:
4895 prefix
= one_only
? ".ld" : ".ldata";
4898 prefix
= one_only
? ".lb" : ".lbss";
4901 case SECCAT_RODATA_MERGE_STR
:
4902 case SECCAT_RODATA_MERGE_STR_INIT
:
4903 case SECCAT_RODATA_MERGE_CONST
:
4904 prefix
= one_only
? ".lr" : ".lrodata";
4906 case SECCAT_SRODATA
:
4913 /* We don't split these for medium model. Place them into
4914 default sections and hope for best. */
4919 const char *name
, *linkonce
;
4922 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4923 name
= targetm
.strip_name_encoding (name
);
4925 /* If we're using one_only, then there needs to be a .gnu.linkonce
4926 prefix to the section name. */
4927 linkonce
= one_only
? ".gnu.linkonce" : "";
4929 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4931 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4935 default_unique_section (decl
, reloc
);
4938 #ifdef COMMON_ASM_OP
4939 /* This says how to output assembler code to declare an
4940 uninitialized external linkage data object.
4942 For medium model x86-64 we need to use .largecomm opcode for
4945 x86_elf_aligned_common (FILE *file
,
4946 const char *name
, unsigned HOST_WIDE_INT size
,
4949 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4950 && size
> (unsigned int)ix86_section_threshold
)
4951 fputs (".largecomm\t", file
);
4953 fputs (COMMON_ASM_OP
, file
);
4954 assemble_name (file
, name
);
4955 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4956 size
, align
/ BITS_PER_UNIT
);
4960 /* Utility function for targets to use in implementing
4961 ASM_OUTPUT_ALIGNED_BSS. */
4964 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4965 const char *name
, unsigned HOST_WIDE_INT size
,
4968 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4969 && size
> (unsigned int)ix86_section_threshold
)
4970 switch_to_section (get_named_section (decl
, ".lbss", 0));
4972 switch_to_section (bss_section
);
4973 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4974 #ifdef ASM_DECLARE_OBJECT_NAME
4975 last_assemble_variable_decl
= decl
;
4976 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4978 /* Standard thing is just output label for the object. */
4979 ASM_OUTPUT_LABEL (file
, name
);
4980 #endif /* ASM_DECLARE_OBJECT_NAME */
4981 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4984 /* Decide whether we must probe the stack before any space allocation
4985 on this target. It's essentially TARGET_STACK_PROBE except when
4986 -fstack-check causes the stack to be already probed differently. */
4989 ix86_target_stack_probe (void)
4991 /* Do not probe the stack twice if static stack checking is enabled. */
4992 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4995 return TARGET_STACK_PROBE
;
4998 /* Decide whether we can make a sibling call to a function. DECL is the
4999 declaration of the function being targeted by the call and EXP is the
5000 CALL_EXPR representing the call. */
5003 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5005 tree type
, decl_or_type
;
5008 /* If we are generating position-independent code, we cannot sibcall
5009 optimize any indirect call, or a direct call to a global function,
5010 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5014 && (!decl
|| !targetm
.binds_local_p (decl
)))
5017 /* If we need to align the outgoing stack, then sibcalling would
5018 unalign the stack, which may break the called function. */
5019 if (ix86_minimum_incoming_stack_boundary (true)
5020 < PREFERRED_STACK_BOUNDARY
)
5025 decl_or_type
= decl
;
5026 type
= TREE_TYPE (decl
);
5030 /* We're looking at the CALL_EXPR, we need the type of the function. */
5031 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5032 type
= TREE_TYPE (type
); /* pointer type */
5033 type
= TREE_TYPE (type
); /* function type */
5034 decl_or_type
= type
;
5037 /* Check that the return value locations are the same. Like
5038 if we are returning floats on the 80387 register stack, we cannot
5039 make a sibcall from a function that doesn't return a float to a
5040 function that does or, conversely, from a function that does return
5041 a float to a function that doesn't; the necessary stack adjustment
5042 would not be executed. This is also the place we notice
5043 differences in the return value ABI. Note that it is ok for one
5044 of the functions to have void return type as long as the return
5045 value of the other is passed in a register. */
5046 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5047 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5049 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5051 if (!rtx_equal_p (a
, b
))
5054 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5056 else if (!rtx_equal_p (a
, b
))
5061 /* The SYSV ABI has more call-clobbered registers;
5062 disallow sibcalls from MS to SYSV. */
5063 if (cfun
->machine
->call_abi
== MS_ABI
5064 && ix86_function_type_abi (type
) == SYSV_ABI
)
5069 /* If this call is indirect, we'll need to be able to use a
5070 call-clobbered register for the address of the target function.
5071 Make sure that all such registers are not used for passing
5072 parameters. Note that DLLIMPORT functions are indirect. */
5074 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5076 if (ix86_function_regparm (type
, NULL
) >= 3)
5078 /* ??? Need to count the actual number of registers to be used,
5079 not the possible number of registers. Fix later. */
5085 /* Otherwise okay. That also includes certain types of indirect calls. */
5089 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5090 and "sseregparm" calling convention attributes;
5091 arguments as in struct attribute_spec.handler. */
5094 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5096 int flags ATTRIBUTE_UNUSED
,
5099 if (TREE_CODE (*node
) != FUNCTION_TYPE
5100 && TREE_CODE (*node
) != METHOD_TYPE
5101 && TREE_CODE (*node
) != FIELD_DECL
5102 && TREE_CODE (*node
) != TYPE_DECL
)
5104 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5106 *no_add_attrs
= true;
5110 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5111 if (is_attribute_p ("regparm", name
))
5115 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5117 error ("fastcall and regparm attributes are not compatible");
5120 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5122 error ("regparam and thiscall attributes are not compatible");
5125 cst
= TREE_VALUE (args
);
5126 if (TREE_CODE (cst
) != INTEGER_CST
)
5128 warning (OPT_Wattributes
,
5129 "%qE attribute requires an integer constant argument",
5131 *no_add_attrs
= true;
5133 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5135 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5137 *no_add_attrs
= true;
5145 /* Do not warn when emulating the MS ABI. */
5146 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5147 && TREE_CODE (*node
) != METHOD_TYPE
)
5148 || ix86_function_type_abi (*node
) != MS_ABI
)
5149 warning (OPT_Wattributes
, "%qE attribute ignored",
5151 *no_add_attrs
= true;
5155 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5156 if (is_attribute_p ("fastcall", name
))
5158 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5160 error ("fastcall and cdecl attributes are not compatible");
5162 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5164 error ("fastcall and stdcall attributes are not compatible");
5166 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5168 error ("fastcall and regparm attributes are not compatible");
5170 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5172 error ("fastcall and thiscall attributes are not compatible");
5176 /* Can combine stdcall with fastcall (redundant), regparm and
5178 else if (is_attribute_p ("stdcall", name
))
5180 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5182 error ("stdcall and cdecl attributes are not compatible");
5184 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5186 error ("stdcall and fastcall attributes are not compatible");
5188 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5190 error ("stdcall and thiscall attributes are not compatible");
5194 /* Can combine cdecl with regparm and sseregparm. */
5195 else if (is_attribute_p ("cdecl", name
))
5197 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5199 error ("stdcall and cdecl attributes are not compatible");
5201 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5203 error ("fastcall and cdecl attributes are not compatible");
5205 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5207 error ("cdecl and thiscall attributes are not compatible");
5210 else if (is_attribute_p ("thiscall", name
))
5212 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5213 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5215 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5217 error ("stdcall and thiscall attributes are not compatible");
5219 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5221 error ("fastcall and thiscall attributes are not compatible");
5223 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5225 error ("cdecl and thiscall attributes are not compatible");
5229 /* Can combine sseregparm with all attributes. */
5234 /* The transactional memory builtins are implicitly regparm or fastcall
5235 depending on the ABI. Override the generic do-nothing attribute that
5236 these builtins were declared with, and replace it with one of the two
5237 attributes that we expect elsewhere. */
5240 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5241 tree args ATTRIBUTE_UNUSED
,
5242 int flags
, bool *no_add_attrs
)
5246 /* In no case do we want to add the placeholder attribute. */
5247 *no_add_attrs
= true;
5249 /* The 64-bit ABI is unchanged for transactional memory. */
5253 /* ??? Is there a better way to validate 32-bit windows? We have
5254 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5255 if (CHECK_STACK_LIMIT
> 0)
5256 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5259 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5260 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5262 decl_attributes (node
, alt
, flags
);
5267 /* This function determines from TYPE the calling-convention. */
5270 ix86_get_callcvt (const_tree type
)
5272 unsigned int ret
= 0;
5277 return IX86_CALLCVT_CDECL
;
5279 attrs
= TYPE_ATTRIBUTES (type
);
5280 if (attrs
!= NULL_TREE
)
5282 if (lookup_attribute ("cdecl", attrs
))
5283 ret
|= IX86_CALLCVT_CDECL
;
5284 else if (lookup_attribute ("stdcall", attrs
))
5285 ret
|= IX86_CALLCVT_STDCALL
;
5286 else if (lookup_attribute ("fastcall", attrs
))
5287 ret
|= IX86_CALLCVT_FASTCALL
;
5288 else if (lookup_attribute ("thiscall", attrs
))
5289 ret
|= IX86_CALLCVT_THISCALL
;
5291 /* Regparam isn't allowed for thiscall and fastcall. */
5292 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5294 if (lookup_attribute ("regparm", attrs
))
5295 ret
|= IX86_CALLCVT_REGPARM
;
5296 if (lookup_attribute ("sseregparm", attrs
))
5297 ret
|= IX86_CALLCVT_SSEREGPARM
;
5300 if (IX86_BASE_CALLCVT(ret
) != 0)
5304 is_stdarg
= stdarg_p (type
);
5305 if (TARGET_RTD
&& !is_stdarg
)
5306 return IX86_CALLCVT_STDCALL
| ret
;
5310 || TREE_CODE (type
) != METHOD_TYPE
5311 || ix86_function_type_abi (type
) != MS_ABI
)
5312 return IX86_CALLCVT_CDECL
| ret
;
5314 return IX86_CALLCVT_THISCALL
;
5317 /* Return 0 if the attributes for two types are incompatible, 1 if they
5318 are compatible, and 2 if they are nearly compatible (which causes a
5319 warning to be generated). */
5322 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5324 unsigned int ccvt1
, ccvt2
;
5326 if (TREE_CODE (type1
) != FUNCTION_TYPE
5327 && TREE_CODE (type1
) != METHOD_TYPE
)
5330 ccvt1
= ix86_get_callcvt (type1
);
5331 ccvt2
= ix86_get_callcvt (type2
);
5334 if (ix86_function_regparm (type1
, NULL
)
5335 != ix86_function_regparm (type2
, NULL
))
5341 /* Return the regparm value for a function with the indicated TYPE and DECL.
5342 DECL may be NULL when calling function indirectly
5343 or considering a libcall. */
5346 ix86_function_regparm (const_tree type
, const_tree decl
)
5353 return (ix86_function_type_abi (type
) == SYSV_ABI
5354 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5355 ccvt
= ix86_get_callcvt (type
);
5356 regparm
= ix86_regparm
;
5358 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5360 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5363 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5367 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5369 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5372 /* Use register calling convention for local functions when possible. */
5374 && TREE_CODE (decl
) == FUNCTION_DECL
5376 && !(profile_flag
&& !flag_fentry
))
5378 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5379 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5380 if (i
&& i
->local
&& i
->can_change_signature
)
5382 int local_regparm
, globals
= 0, regno
;
5384 /* Make sure no regparm register is taken by a
5385 fixed register variable. */
5386 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5387 if (fixed_regs
[local_regparm
])
5390 /* We don't want to use regparm(3) for nested functions as
5391 these use a static chain pointer in the third argument. */
5392 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5395 /* In 32-bit mode save a register for the split stack. */
5396 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5399 /* Each fixed register usage increases register pressure,
5400 so less registers should be used for argument passing.
5401 This functionality can be overriden by an explicit
5403 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5404 if (fixed_regs
[regno
])
5408 = globals
< local_regparm
? local_regparm
- globals
: 0;
5410 if (local_regparm
> regparm
)
5411 regparm
= local_regparm
;
5418 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5419 DFmode (2) arguments in SSE registers for a function with the
5420 indicated TYPE and DECL. DECL may be NULL when calling function
5421 indirectly or considering a libcall. Otherwise return 0. */
5424 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5426 gcc_assert (!TARGET_64BIT
);
5428 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5429 by the sseregparm attribute. */
5430 if (TARGET_SSEREGPARM
5431 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5438 error ("calling %qD with attribute sseregparm without "
5439 "SSE/SSE2 enabled", decl
);
5441 error ("calling %qT with attribute sseregparm without "
5442 "SSE/SSE2 enabled", type
);
5450 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5451 (and DFmode for SSE2) arguments in SSE registers. */
5452 if (decl
&& TARGET_SSE_MATH
&& optimize
5453 && !(profile_flag
&& !flag_fentry
))
5455 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5456 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5457 if (i
&& i
->local
&& i
->can_change_signature
)
5458 return TARGET_SSE2
? 2 : 1;
5464 /* Return true if EAX is live at the start of the function. Used by
5465 ix86_expand_prologue to determine if we need special help before
5466 calling allocate_stack_worker. */
5469 ix86_eax_live_at_start_p (void)
5471 /* Cheat. Don't bother working forward from ix86_function_regparm
5472 to the function type to whether an actual argument is located in
5473 eax. Instead just look at cfg info, which is still close enough
5474 to correct at this point. This gives false positives for broken
5475 functions that might use uninitialized data that happens to be
5476 allocated in eax, but who cares? */
5477 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5481 ix86_keep_aggregate_return_pointer (tree fntype
)
5487 attr
= lookup_attribute ("callee_pop_aggregate_return",
5488 TYPE_ATTRIBUTES (fntype
));
5490 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5492 /* For 32-bit MS-ABI the default is to keep aggregate
5494 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5497 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5500 /* Value is the number of bytes of arguments automatically
5501 popped when returning from a subroutine call.
5502 FUNDECL is the declaration node of the function (as a tree),
5503 FUNTYPE is the data type of the function (as a tree),
5504 or for a library call it is an identifier node for the subroutine name.
5505 SIZE is the number of bytes of arguments passed on the stack.
5507 On the 80386, the RTD insn may be used to pop them if the number
5508 of args is fixed, but if the number is variable then the caller
5509 must pop them all. RTD can't be used for library calls now
5510 because the library is compiled with the Unix compiler.
5511 Use of RTD is a selectable option, since it is incompatible with
5512 standard Unix calling sequences. If the option is not selected,
5513 the caller must always pop the args.
5515 The attribute stdcall is equivalent to RTD on a per module basis. */
5518 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5522 /* None of the 64-bit ABIs pop arguments. */
5526 ccvt
= ix86_get_callcvt (funtype
);
5528 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5529 | IX86_CALLCVT_THISCALL
)) != 0
5530 && ! stdarg_p (funtype
))
5533 /* Lose any fake structure return argument if it is passed on the stack. */
5534 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5535 && !ix86_keep_aggregate_return_pointer (funtype
))
5537 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5539 return GET_MODE_SIZE (Pmode
);
5545 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5548 ix86_legitimate_combined_insn (rtx insn
)
5550 /* Check operand constraints in case hard registers were propagated
5551 into insn pattern. This check prevents combine pass from
5552 generating insn patterns with invalid hard register operands.
5553 These invalid insns can eventually confuse reload to error out
5554 with a spill failure. See also PRs 46829 and 46843. */
5555 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5559 extract_insn (insn
);
5560 preprocess_constraints ();
5562 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5564 rtx op
= recog_data
.operand
[i
];
5565 enum machine_mode mode
= GET_MODE (op
);
5566 struct operand_alternative
*op_alt
;
5571 /* A unary operator may be accepted by the predicate, but it
5572 is irrelevant for matching constraints. */
5576 if (GET_CODE (op
) == SUBREG
)
5578 if (REG_P (SUBREG_REG (op
))
5579 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5580 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5581 GET_MODE (SUBREG_REG (op
)),
5584 op
= SUBREG_REG (op
);
5587 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5590 op_alt
= recog_op_alt
[i
];
5592 /* Operand has no constraints, anything is OK. */
5593 win
= !recog_data
.n_alternatives
;
5595 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5597 if (op_alt
[j
].anything_ok
5598 || (op_alt
[j
].matches
!= -1
5600 (recog_data
.operand
[i
],
5601 recog_data
.operand
[op_alt
[j
].matches
]))
5602 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5617 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5619 static unsigned HOST_WIDE_INT
5620 ix86_asan_shadow_offset (void)
5622 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5623 : HOST_WIDE_INT_C (0x7fff8000))
5624 : (HOST_WIDE_INT_1
<< 29);
5627 /* Argument support functions. */
5629 /* Return true when register may be used to pass function parameters. */
5631 ix86_function_arg_regno_p (int regno
)
5634 const int *parm_regs
;
5639 return (regno
< REGPARM_MAX
5640 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5642 return (regno
< REGPARM_MAX
5643 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5644 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5645 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5646 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5649 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5650 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5653 /* TODO: The function should depend on current function ABI but
5654 builtins.c would need updating then. Therefore we use the
5657 /* RAX is used as hidden argument to va_arg functions. */
5658 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5661 if (ix86_abi
== MS_ABI
)
5662 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5664 parm_regs
= x86_64_int_parameter_registers
;
5665 for (i
= 0; i
< (ix86_abi
== MS_ABI
5666 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5667 if (regno
== parm_regs
[i
])
5672 /* Return if we do not know how to pass TYPE solely in registers. */
5675 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5677 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5680 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5681 The layout_type routine is crafty and tries to trick us into passing
5682 currently unsupported vector types on the stack by using TImode. */
5683 return (!TARGET_64BIT
&& mode
== TImode
5684 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5687 /* It returns the size, in bytes, of the area reserved for arguments passed
5688 in registers for the function represented by fndecl dependent to the used
5691 ix86_reg_parm_stack_space (const_tree fndecl
)
5693 enum calling_abi call_abi
= SYSV_ABI
;
5694 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5695 call_abi
= ix86_function_abi (fndecl
);
5697 call_abi
= ix86_function_type_abi (fndecl
);
5698 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5703 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5706 ix86_function_type_abi (const_tree fntype
)
5708 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5710 enum calling_abi abi
= ix86_abi
;
5711 if (abi
== SYSV_ABI
)
5713 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5716 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5723 /* We add this as a workaround in order to use libc_has_function
5726 ix86_libc_has_function (enum function_class fn_class
)
5728 return targetm
.libc_has_function (fn_class
);
5732 ix86_function_ms_hook_prologue (const_tree fn
)
5734 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5736 if (decl_function_context (fn
) != NULL_TREE
)
5737 error_at (DECL_SOURCE_LOCATION (fn
),
5738 "ms_hook_prologue is not compatible with nested function");
5745 static enum calling_abi
5746 ix86_function_abi (const_tree fndecl
)
5750 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5753 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5756 ix86_cfun_abi (void)
5760 return cfun
->machine
->call_abi
;
5763 /* Write the extra assembler code needed to declare a function properly. */
5766 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5769 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5773 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5774 unsigned int filler_cc
= 0xcccccccc;
5776 for (i
= 0; i
< filler_count
; i
+= 4)
5777 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5780 #ifdef SUBTARGET_ASM_UNWIND_INIT
5781 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5784 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5786 /* Output magic byte marker, if hot-patch attribute is set. */
5791 /* leaq [%rsp + 0], %rsp */
5792 asm_fprintf (asm_out_file
, ASM_BYTE
5793 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5797 /* movl.s %edi, %edi
5799 movl.s %esp, %ebp */
5800 asm_fprintf (asm_out_file
, ASM_BYTE
5801 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5807 extern void init_regs (void);
5809 /* Implementation of call abi switching target hook. Specific to FNDECL
5810 the specific call register sets are set. See also
5811 ix86_conditional_register_usage for more details. */
5813 ix86_call_abi_override (const_tree fndecl
)
5815 if (fndecl
== NULL_TREE
)
5816 cfun
->machine
->call_abi
= ix86_abi
;
5818 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5821 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5822 expensive re-initialization of init_regs each time we switch function context
5823 since this is needed only during RTL expansion. */
5825 ix86_maybe_switch_abi (void)
5828 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5832 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5833 for a call to a function whose data type is FNTYPE.
5834 For a library call, FNTYPE is 0. */
5837 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5838 tree fntype
, /* tree ptr for function decl */
5839 rtx libname
, /* SYMBOL_REF of library name or 0 */
5843 struct cgraph_local_info
*i
;
5845 memset (cum
, 0, sizeof (*cum
));
5849 i
= cgraph_local_info (fndecl
);
5850 cum
->call_abi
= ix86_function_abi (fndecl
);
5855 cum
->call_abi
= ix86_function_type_abi (fntype
);
5858 cum
->caller
= caller
;
5860 /* Set up the number of registers to use for passing arguments. */
5862 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5863 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5864 "or subtarget optimization implying it");
5865 cum
->nregs
= ix86_regparm
;
5868 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5869 ? X86_64_REGPARM_MAX
5870 : X86_64_MS_REGPARM_MAX
);
5874 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5877 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5878 ? X86_64_SSE_REGPARM_MAX
5879 : X86_64_MS_SSE_REGPARM_MAX
);
5883 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5884 cum
->warn_avx
= true;
5885 cum
->warn_sse
= true;
5886 cum
->warn_mmx
= true;
5888 /* Because type might mismatch in between caller and callee, we need to
5889 use actual type of function for local calls.
5890 FIXME: cgraph_analyze can be told to actually record if function uses
5891 va_start so for local functions maybe_vaarg can be made aggressive
5893 FIXME: once typesytem is fixed, we won't need this code anymore. */
5894 if (i
&& i
->local
&& i
->can_change_signature
)
5895 fntype
= TREE_TYPE (fndecl
);
5896 cum
->maybe_vaarg
= (fntype
5897 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5902 /* If there are variable arguments, then we won't pass anything
5903 in registers in 32-bit mode. */
5904 if (stdarg_p (fntype
))
5915 /* Use ecx and edx registers if function has fastcall attribute,
5916 else look for regparm information. */
5919 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5920 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5923 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5925 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5931 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5934 /* Set up the number of SSE registers used for passing SFmode
5935 and DFmode arguments. Warn for mismatching ABI. */
5936 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5940 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5941 But in the case of vector types, it is some vector mode.
5943 When we have only some of our vector isa extensions enabled, then there
5944 are some modes for which vector_mode_supported_p is false. For these
5945 modes, the generic vector support in gcc will choose some non-vector mode
5946 in order to implement the type. By computing the natural mode, we'll
5947 select the proper ABI location for the operand and not depend on whatever
5948 the middle-end decides to do with these vector types.
5950 The midde-end can't deal with the vector types > 16 bytes. In this
5951 case, we return the original mode and warn ABI change if CUM isn't
5954 static enum machine_mode
5955 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5957 enum machine_mode mode
= TYPE_MODE (type
);
5959 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5961 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5962 if ((size
== 8 || size
== 16 || size
== 32)
5963 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5964 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5966 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5968 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5969 mode
= MIN_MODE_VECTOR_FLOAT
;
5971 mode
= MIN_MODE_VECTOR_INT
;
5973 /* Get the mode which has this inner mode and number of units. */
5974 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5975 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5976 && GET_MODE_INNER (mode
) == innermode
)
5978 if (size
== 32 && !TARGET_AVX
)
5980 static bool warnedavx
;
5987 warning (0, "AVX vector argument without AVX "
5988 "enabled changes the ABI");
5990 return TYPE_MODE (type
);
5992 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5994 static bool warnedsse
;
6001 warning (0, "SSE vector argument without SSE "
6002 "enabled changes the ABI");
6017 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6018 this may not agree with the mode that the type system has chosen for the
6019 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6020 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6023 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6028 if (orig_mode
!= BLKmode
)
6029 tmp
= gen_rtx_REG (orig_mode
, regno
);
6032 tmp
= gen_rtx_REG (mode
, regno
);
6033 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6034 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6040 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6041 of this code is to classify each 8bytes of incoming argument by the register
6042 class and assign registers accordingly. */
6044 /* Return the union class of CLASS1 and CLASS2.
6045 See the x86-64 PS ABI for details. */
6047 static enum x86_64_reg_class
6048 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6050 /* Rule #1: If both classes are equal, this is the resulting class. */
6051 if (class1
== class2
)
6054 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6056 if (class1
== X86_64_NO_CLASS
)
6058 if (class2
== X86_64_NO_CLASS
)
6061 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6062 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6063 return X86_64_MEMORY_CLASS
;
6065 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6066 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6067 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6068 return X86_64_INTEGERSI_CLASS
;
6069 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6070 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6071 return X86_64_INTEGER_CLASS
;
6073 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6075 if (class1
== X86_64_X87_CLASS
6076 || class1
== X86_64_X87UP_CLASS
6077 || class1
== X86_64_COMPLEX_X87_CLASS
6078 || class2
== X86_64_X87_CLASS
6079 || class2
== X86_64_X87UP_CLASS
6080 || class2
== X86_64_COMPLEX_X87_CLASS
)
6081 return X86_64_MEMORY_CLASS
;
6083 /* Rule #6: Otherwise class SSE is used. */
6084 return X86_64_SSE_CLASS
;
6087 /* Classify the argument of type TYPE and mode MODE.
6088 CLASSES will be filled by the register class used to pass each word
6089 of the operand. The number of words is returned. In case the parameter
6090 should be passed in memory, 0 is returned. As a special case for zero
6091 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6093 BIT_OFFSET is used internally for handling records and specifies offset
6094 of the offset in bits modulo 256 to avoid overflow cases.
6096 See the x86-64 PS ABI for details.
6100 classify_argument (enum machine_mode mode
, const_tree type
,
6101 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6103 HOST_WIDE_INT bytes
=
6104 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6106 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6108 /* Variable sized entities are always passed/returned in memory. */
6112 if (mode
!= VOIDmode
6113 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6116 if (type
&& AGGREGATE_TYPE_P (type
))
6120 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6122 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6126 for (i
= 0; i
< words
; i
++)
6127 classes
[i
] = X86_64_NO_CLASS
;
6129 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6130 signalize memory class, so handle it as special case. */
6133 classes
[0] = X86_64_NO_CLASS
;
6137 /* Classify each field of record and merge classes. */
6138 switch (TREE_CODE (type
))
6141 /* And now merge the fields of structure. */
6142 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6144 if (TREE_CODE (field
) == FIELD_DECL
)
6148 if (TREE_TYPE (field
) == error_mark_node
)
6151 /* Bitfields are always classified as integer. Handle them
6152 early, since later code would consider them to be
6153 misaligned integers. */
6154 if (DECL_BIT_FIELD (field
))
6156 for (i
= (int_bit_position (field
)
6157 + (bit_offset
% 64)) / 8 / 8;
6158 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6159 + tree_low_cst (DECL_SIZE (field
), 0)
6162 merge_classes (X86_64_INTEGER_CLASS
,
6169 type
= TREE_TYPE (field
);
6171 /* Flexible array member is ignored. */
6172 if (TYPE_MODE (type
) == BLKmode
6173 && TREE_CODE (type
) == ARRAY_TYPE
6174 && TYPE_SIZE (type
) == NULL_TREE
6175 && TYPE_DOMAIN (type
) != NULL_TREE
6176 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6181 if (!warned
&& warn_psabi
)
6184 inform (input_location
,
6185 "the ABI of passing struct with"
6186 " a flexible array member has"
6187 " changed in GCC 4.4");
6191 num
= classify_argument (TYPE_MODE (type
), type
,
6193 (int_bit_position (field
)
6194 + bit_offset
) % 256);
6197 pos
= (int_bit_position (field
)
6198 + (bit_offset
% 64)) / 8 / 8;
6199 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6201 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6208 /* Arrays are handled as small records. */
6211 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6212 TREE_TYPE (type
), subclasses
, bit_offset
);
6216 /* The partial classes are now full classes. */
6217 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6218 subclasses
[0] = X86_64_SSE_CLASS
;
6219 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6220 && !((bit_offset
% 64) == 0 && bytes
== 4))
6221 subclasses
[0] = X86_64_INTEGER_CLASS
;
6223 for (i
= 0; i
< words
; i
++)
6224 classes
[i
] = subclasses
[i
% num
];
6229 case QUAL_UNION_TYPE
:
6230 /* Unions are similar to RECORD_TYPE but offset is always 0.
6232 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6234 if (TREE_CODE (field
) == FIELD_DECL
)
6238 if (TREE_TYPE (field
) == error_mark_node
)
6241 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6242 TREE_TYPE (field
), subclasses
,
6246 for (i
= 0; i
< num
; i
++)
6247 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6258 /* When size > 16 bytes, if the first one isn't
6259 X86_64_SSE_CLASS or any other ones aren't
6260 X86_64_SSEUP_CLASS, everything should be passed in
6262 if (classes
[0] != X86_64_SSE_CLASS
)
6265 for (i
= 1; i
< words
; i
++)
6266 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6270 /* Final merger cleanup. */
6271 for (i
= 0; i
< words
; i
++)
6273 /* If one class is MEMORY, everything should be passed in
6275 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6278 /* The X86_64_SSEUP_CLASS should be always preceded by
6279 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6280 if (classes
[i
] == X86_64_SSEUP_CLASS
6281 && classes
[i
- 1] != X86_64_SSE_CLASS
6282 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6284 /* The first one should never be X86_64_SSEUP_CLASS. */
6285 gcc_assert (i
!= 0);
6286 classes
[i
] = X86_64_SSE_CLASS
;
6289 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6290 everything should be passed in memory. */
6291 if (classes
[i
] == X86_64_X87UP_CLASS
6292 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6296 /* The first one should never be X86_64_X87UP_CLASS. */
6297 gcc_assert (i
!= 0);
6298 if (!warned
&& warn_psabi
)
6301 inform (input_location
,
6302 "the ABI of passing union with long double"
6303 " has changed in GCC 4.4");
6311 /* Compute alignment needed. We align all types to natural boundaries with
6312 exception of XFmode that is aligned to 64bits. */
6313 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6315 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6318 mode_alignment
= 128;
6319 else if (mode
== XCmode
)
6320 mode_alignment
= 256;
6321 if (COMPLEX_MODE_P (mode
))
6322 mode_alignment
/= 2;
6323 /* Misaligned fields are always returned in memory. */
6324 if (bit_offset
% mode_alignment
)
6328 /* for V1xx modes, just use the base mode */
6329 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6330 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6331 mode
= GET_MODE_INNER (mode
);
6333 /* Classification of atomic types. */
6338 classes
[0] = X86_64_SSE_CLASS
;
6341 classes
[0] = X86_64_SSE_CLASS
;
6342 classes
[1] = X86_64_SSEUP_CLASS
;
6352 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6356 classes
[0] = X86_64_INTEGERSI_CLASS
;
6359 else if (size
<= 64)
6361 classes
[0] = X86_64_INTEGER_CLASS
;
6364 else if (size
<= 64+32)
6366 classes
[0] = X86_64_INTEGER_CLASS
;
6367 classes
[1] = X86_64_INTEGERSI_CLASS
;
6370 else if (size
<= 64+64)
6372 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6380 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6384 /* OImode shouldn't be used directly. */
6389 if (!(bit_offset
% 64))
6390 classes
[0] = X86_64_SSESF_CLASS
;
6392 classes
[0] = X86_64_SSE_CLASS
;
6395 classes
[0] = X86_64_SSEDF_CLASS
;
6398 classes
[0] = X86_64_X87_CLASS
;
6399 classes
[1] = X86_64_X87UP_CLASS
;
6402 classes
[0] = X86_64_SSE_CLASS
;
6403 classes
[1] = X86_64_SSEUP_CLASS
;
6406 classes
[0] = X86_64_SSE_CLASS
;
6407 if (!(bit_offset
% 64))
6413 if (!warned
&& warn_psabi
)
6416 inform (input_location
,
6417 "the ABI of passing structure with complex float"
6418 " member has changed in GCC 4.4");
6420 classes
[1] = X86_64_SSESF_CLASS
;
6424 classes
[0] = X86_64_SSEDF_CLASS
;
6425 classes
[1] = X86_64_SSEDF_CLASS
;
6428 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6431 /* This modes is larger than 16 bytes. */
6439 classes
[0] = X86_64_SSE_CLASS
;
6440 classes
[1] = X86_64_SSEUP_CLASS
;
6441 classes
[2] = X86_64_SSEUP_CLASS
;
6442 classes
[3] = X86_64_SSEUP_CLASS
;
6450 classes
[0] = X86_64_SSE_CLASS
;
6451 classes
[1] = X86_64_SSEUP_CLASS
;
6459 classes
[0] = X86_64_SSE_CLASS
;
6465 gcc_assert (VECTOR_MODE_P (mode
));
6470 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6472 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6473 classes
[0] = X86_64_INTEGERSI_CLASS
;
6475 classes
[0] = X86_64_INTEGER_CLASS
;
6476 classes
[1] = X86_64_INTEGER_CLASS
;
6477 return 1 + (bytes
> 8);
6481 /* Examine the argument and return set number of register required in each
6482 class. Return 0 iff parameter should be passed in memory. */
6484 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6485 int *int_nregs
, int *sse_nregs
)
6487 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6488 int n
= classify_argument (mode
, type
, regclass
, 0);
6494 for (n
--; n
>= 0; n
--)
6495 switch (regclass
[n
])
6497 case X86_64_INTEGER_CLASS
:
6498 case X86_64_INTEGERSI_CLASS
:
6501 case X86_64_SSE_CLASS
:
6502 case X86_64_SSESF_CLASS
:
6503 case X86_64_SSEDF_CLASS
:
6506 case X86_64_NO_CLASS
:
6507 case X86_64_SSEUP_CLASS
:
6509 case X86_64_X87_CLASS
:
6510 case X86_64_X87UP_CLASS
:
6514 case X86_64_COMPLEX_X87_CLASS
:
6515 return in_return
? 2 : 0;
6516 case X86_64_MEMORY_CLASS
:
6522 /* Construct container for the argument used by GCC interface. See
6523 FUNCTION_ARG for the detailed description. */
6526 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6527 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6528 const int *intreg
, int sse_regno
)
6530 /* The following variables hold the static issued_error state. */
6531 static bool issued_sse_arg_error
;
6532 static bool issued_sse_ret_error
;
6533 static bool issued_x87_ret_error
;
6535 enum machine_mode tmpmode
;
6537 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6538 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6542 int needed_sseregs
, needed_intregs
;
6543 rtx exp
[MAX_CLASSES
];
6546 n
= classify_argument (mode
, type
, regclass
, 0);
6549 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6552 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6555 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6556 some less clueful developer tries to use floating-point anyway. */
6557 if (needed_sseregs
&& !TARGET_SSE
)
6561 if (!issued_sse_ret_error
)
6563 error ("SSE register return with SSE disabled");
6564 issued_sse_ret_error
= true;
6567 else if (!issued_sse_arg_error
)
6569 error ("SSE register argument with SSE disabled");
6570 issued_sse_arg_error
= true;
6575 /* Likewise, error if the ABI requires us to return values in the
6576 x87 registers and the user specified -mno-80387. */
6577 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6578 for (i
= 0; i
< n
; i
++)
6579 if (regclass
[i
] == X86_64_X87_CLASS
6580 || regclass
[i
] == X86_64_X87UP_CLASS
6581 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6583 if (!issued_x87_ret_error
)
6585 error ("x87 register return with x87 disabled");
6586 issued_x87_ret_error
= true;
6591 /* First construct simple cases. Avoid SCmode, since we want to use
6592 single register to pass this type. */
6593 if (n
== 1 && mode
!= SCmode
)
6594 switch (regclass
[0])
6596 case X86_64_INTEGER_CLASS
:
6597 case X86_64_INTEGERSI_CLASS
:
6598 return gen_rtx_REG (mode
, intreg
[0]);
6599 case X86_64_SSE_CLASS
:
6600 case X86_64_SSESF_CLASS
:
6601 case X86_64_SSEDF_CLASS
:
6602 if (mode
!= BLKmode
)
6603 return gen_reg_or_parallel (mode
, orig_mode
,
6604 SSE_REGNO (sse_regno
));
6606 case X86_64_X87_CLASS
:
6607 case X86_64_COMPLEX_X87_CLASS
:
6608 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6609 case X86_64_NO_CLASS
:
6610 /* Zero sized array, struct or class. */
6616 && regclass
[0] == X86_64_SSE_CLASS
6617 && regclass
[1] == X86_64_SSEUP_CLASS
6619 return gen_reg_or_parallel (mode
, orig_mode
,
6620 SSE_REGNO (sse_regno
));
6622 && regclass
[0] == X86_64_SSE_CLASS
6623 && regclass
[1] == X86_64_SSEUP_CLASS
6624 && regclass
[2] == X86_64_SSEUP_CLASS
6625 && regclass
[3] == X86_64_SSEUP_CLASS
6627 return gen_reg_or_parallel (mode
, orig_mode
,
6628 SSE_REGNO (sse_regno
));
6630 && regclass
[0] == X86_64_X87_CLASS
6631 && regclass
[1] == X86_64_X87UP_CLASS
)
6632 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6635 && regclass
[0] == X86_64_INTEGER_CLASS
6636 && regclass
[1] == X86_64_INTEGER_CLASS
6637 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6638 && intreg
[0] + 1 == intreg
[1])
6639 return gen_rtx_REG (mode
, intreg
[0]);
6641 /* Otherwise figure out the entries of the PARALLEL. */
6642 for (i
= 0; i
< n
; i
++)
6646 switch (regclass
[i
])
6648 case X86_64_NO_CLASS
:
6650 case X86_64_INTEGER_CLASS
:
6651 case X86_64_INTEGERSI_CLASS
:
6652 /* Merge TImodes on aligned occasions here too. */
6653 if (i
* 8 + 8 > bytes
)
6655 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6656 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6660 /* We've requested 24 bytes we
6661 don't have mode for. Use DImode. */
6662 if (tmpmode
== BLKmode
)
6665 = gen_rtx_EXPR_LIST (VOIDmode
,
6666 gen_rtx_REG (tmpmode
, *intreg
),
6670 case X86_64_SSESF_CLASS
:
6672 = gen_rtx_EXPR_LIST (VOIDmode
,
6673 gen_rtx_REG (SFmode
,
6674 SSE_REGNO (sse_regno
)),
6678 case X86_64_SSEDF_CLASS
:
6680 = gen_rtx_EXPR_LIST (VOIDmode
,
6681 gen_rtx_REG (DFmode
,
6682 SSE_REGNO (sse_regno
)),
6686 case X86_64_SSE_CLASS
:
6694 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6704 && regclass
[1] == X86_64_SSEUP_CLASS
6705 && regclass
[2] == X86_64_SSEUP_CLASS
6706 && regclass
[3] == X86_64_SSEUP_CLASS
);
6714 = gen_rtx_EXPR_LIST (VOIDmode
,
6715 gen_rtx_REG (tmpmode
,
6716 SSE_REGNO (sse_regno
)),
6725 /* Empty aligned struct, union or class. */
6729 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6730 for (i
= 0; i
< nexps
; i
++)
6731 XVECEXP (ret
, 0, i
) = exp
[i
];
6735 /* Update the data in CUM to advance over an argument of mode MODE
6736 and data type TYPE. (TYPE is null for libcalls where that information
6737 may not be available.) */
6740 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6741 const_tree type
, HOST_WIDE_INT bytes
,
6742 HOST_WIDE_INT words
)
6758 cum
->words
+= words
;
6759 cum
->nregs
-= words
;
6760 cum
->regno
+= words
;
6762 if (cum
->nregs
<= 0)
6770 /* OImode shouldn't be used directly. */
6774 if (cum
->float_in_sse
< 2)
6777 if (cum
->float_in_sse
< 1)
6794 if (!type
|| !AGGREGATE_TYPE_P (type
))
6796 cum
->sse_words
+= words
;
6797 cum
->sse_nregs
-= 1;
6798 cum
->sse_regno
+= 1;
6799 if (cum
->sse_nregs
<= 0)
6813 if (!type
|| !AGGREGATE_TYPE_P (type
))
6815 cum
->mmx_words
+= words
;
6816 cum
->mmx_nregs
-= 1;
6817 cum
->mmx_regno
+= 1;
6818 if (cum
->mmx_nregs
<= 0)
6829 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6830 const_tree type
, HOST_WIDE_INT words
, bool named
)
6832 int int_nregs
, sse_nregs
;
6834 /* Unnamed 256bit vector mode parameters are passed on stack. */
6835 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6838 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6839 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6841 cum
->nregs
-= int_nregs
;
6842 cum
->sse_nregs
-= sse_nregs
;
6843 cum
->regno
+= int_nregs
;
6844 cum
->sse_regno
+= sse_nregs
;
6848 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6849 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6850 cum
->words
+= words
;
6855 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6856 HOST_WIDE_INT words
)
6858 /* Otherwise, this should be passed indirect. */
6859 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6861 cum
->words
+= words
;
6869 /* Update the data in CUM to advance over an argument of mode MODE and
6870 data type TYPE. (TYPE is null for libcalls where that information
6871 may not be available.) */
6874 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6875 const_tree type
, bool named
)
6877 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6878 HOST_WIDE_INT bytes
, words
;
6880 if (mode
== BLKmode
)
6881 bytes
= int_size_in_bytes (type
);
6883 bytes
= GET_MODE_SIZE (mode
);
6884 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6887 mode
= type_natural_mode (type
, NULL
);
6889 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6890 function_arg_advance_ms_64 (cum
, bytes
, words
);
6891 else if (TARGET_64BIT
)
6892 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6894 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6897 /* Define where to put the arguments to a function.
6898 Value is zero to push the argument on the stack,
6899 or a hard register in which to store the argument.
6901 MODE is the argument's machine mode.
6902 TYPE is the data type of the argument (as a tree).
6903 This is null for libcalls where that information may
6905 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6906 the preceding args and about the function being called.
6907 NAMED is nonzero if this argument is a named parameter
6908 (otherwise it is an extra parameter matching an ellipsis). */
6911 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6912 enum machine_mode orig_mode
, const_tree type
,
6913 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6915 static bool warnedsse
, warnedmmx
;
6917 /* Avoid the AL settings for the Unix64 ABI. */
6918 if (mode
== VOIDmode
)
6934 if (words
<= cum
->nregs
)
6936 int regno
= cum
->regno
;
6938 /* Fastcall allocates the first two DWORD (SImode) or
6939 smaller arguments to ECX and EDX if it isn't an
6945 || (type
&& AGGREGATE_TYPE_P (type
)))
6948 /* ECX not EAX is the first allocated register. */
6949 if (regno
== AX_REG
)
6952 return gen_rtx_REG (mode
, regno
);
6957 if (cum
->float_in_sse
< 2)
6960 if (cum
->float_in_sse
< 1)
6964 /* In 32bit, we pass TImode in xmm registers. */
6971 if (!type
|| !AGGREGATE_TYPE_P (type
))
6973 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6976 warning (0, "SSE vector argument without SSE enabled "
6980 return gen_reg_or_parallel (mode
, orig_mode
,
6981 cum
->sse_regno
+ FIRST_SSE_REG
);
6986 /* OImode shouldn't be used directly. */
6995 if (!type
|| !AGGREGATE_TYPE_P (type
))
6998 return gen_reg_or_parallel (mode
, orig_mode
,
6999 cum
->sse_regno
+ FIRST_SSE_REG
);
7009 if (!type
|| !AGGREGATE_TYPE_P (type
))
7011 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7014 warning (0, "MMX vector argument without MMX enabled "
7018 return gen_reg_or_parallel (mode
, orig_mode
,
7019 cum
->mmx_regno
+ FIRST_MMX_REG
);
7028 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7029 enum machine_mode orig_mode
, const_tree type
, bool named
)
7031 /* Handle a hidden AL argument containing number of registers
7032 for varargs x86-64 functions. */
7033 if (mode
== VOIDmode
)
7034 return GEN_INT (cum
->maybe_vaarg
7035 ? (cum
->sse_nregs
< 0
7036 ? X86_64_SSE_REGPARM_MAX
7051 /* Unnamed 256bit vector mode parameters are passed on stack. */
7057 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7059 &x86_64_int_parameter_registers
[cum
->regno
],
7064 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7065 enum machine_mode orig_mode
, bool named
,
7066 HOST_WIDE_INT bytes
)
7070 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7071 We use value of -2 to specify that current function call is MSABI. */
7072 if (mode
== VOIDmode
)
7073 return GEN_INT (-2);
7075 /* If we've run out of registers, it goes on the stack. */
7076 if (cum
->nregs
== 0)
7079 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7081 /* Only floating point modes are passed in anything but integer regs. */
7082 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7085 regno
= cum
->regno
+ FIRST_SSE_REG
;
7090 /* Unnamed floating parameters are passed in both the
7091 SSE and integer registers. */
7092 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7093 t2
= gen_rtx_REG (mode
, regno
);
7094 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7095 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7096 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7099 /* Handle aggregated types passed in register. */
7100 if (orig_mode
== BLKmode
)
7102 if (bytes
> 0 && bytes
<= 8)
7103 mode
= (bytes
> 4 ? DImode
: SImode
);
7104 if (mode
== BLKmode
)
7108 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7111 /* Return where to put the arguments to a function.
7112 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7114 MODE is the argument's machine mode. TYPE is the data type of the
7115 argument. It is null for libcalls where that information may not be
7116 available. CUM gives information about the preceding args and about
7117 the function being called. NAMED is nonzero if this argument is a
7118 named parameter (otherwise it is an extra parameter matching an
7122 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7123 const_tree type
, bool named
)
7125 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7126 enum machine_mode mode
= omode
;
7127 HOST_WIDE_INT bytes
, words
;
7130 if (mode
== BLKmode
)
7131 bytes
= int_size_in_bytes (type
);
7133 bytes
= GET_MODE_SIZE (mode
);
7134 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7136 /* To simplify the code below, represent vector types with a vector mode
7137 even if MMX/SSE are not active. */
7138 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7139 mode
= type_natural_mode (type
, cum
);
7141 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7142 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7143 else if (TARGET_64BIT
)
7144 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7146 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7151 /* A C expression that indicates when an argument must be passed by
7152 reference. If nonzero for an argument, a copy of that argument is
7153 made in memory and a pointer to the argument is passed instead of
7154 the argument itself. The pointer is passed in whatever way is
7155 appropriate for passing a pointer to that type. */
7158 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7159 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7161 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7163 /* See Windows x64 Software Convention. */
7164 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7166 int msize
= (int) GET_MODE_SIZE (mode
);
7169 /* Arrays are passed by reference. */
7170 if (TREE_CODE (type
) == ARRAY_TYPE
)
7173 if (AGGREGATE_TYPE_P (type
))
7175 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7176 are passed by reference. */
7177 msize
= int_size_in_bytes (type
);
7181 /* __m128 is passed by reference. */
7183 case 1: case 2: case 4: case 8:
7189 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7195 /* Return true when TYPE should be 128bit aligned for 32bit argument
7196 passing ABI. XXX: This function is obsolete and is only used for
7197 checking psABI compatibility with previous versions of GCC. */
7200 ix86_compat_aligned_value_p (const_tree type
)
7202 enum machine_mode mode
= TYPE_MODE (type
);
7203 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7207 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7209 if (TYPE_ALIGN (type
) < 128)
7212 if (AGGREGATE_TYPE_P (type
))
7214 /* Walk the aggregates recursively. */
7215 switch (TREE_CODE (type
))
7219 case QUAL_UNION_TYPE
:
7223 /* Walk all the structure fields. */
7224 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7226 if (TREE_CODE (field
) == FIELD_DECL
7227 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7234 /* Just for use if some languages passes arrays by value. */
7235 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7246 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7247 XXX: This function is obsolete and is only used for checking psABI
7248 compatibility with previous versions of GCC. */
7251 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7252 const_tree type
, unsigned int align
)
7254 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7255 natural boundaries. */
7256 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7258 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7259 make an exception for SSE modes since these require 128bit
7262 The handling here differs from field_alignment. ICC aligns MMX
7263 arguments to 4 byte boundaries, while structure fields are aligned
7264 to 8 byte boundaries. */
7267 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7268 align
= PARM_BOUNDARY
;
7272 if (!ix86_compat_aligned_value_p (type
))
7273 align
= PARM_BOUNDARY
;
7276 if (align
> BIGGEST_ALIGNMENT
)
7277 align
= BIGGEST_ALIGNMENT
;
7281 /* Return true when TYPE should be 128bit aligned for 32bit argument
7285 ix86_contains_aligned_value_p (const_tree type
)
7287 enum machine_mode mode
= TYPE_MODE (type
);
7289 if (mode
== XFmode
|| mode
== XCmode
)
7292 if (TYPE_ALIGN (type
) < 128)
7295 if (AGGREGATE_TYPE_P (type
))
7297 /* Walk the aggregates recursively. */
7298 switch (TREE_CODE (type
))
7302 case QUAL_UNION_TYPE
:
7306 /* Walk all the structure fields. */
7307 for (field
= TYPE_FIELDS (type
);
7309 field
= DECL_CHAIN (field
))
7311 if (TREE_CODE (field
) == FIELD_DECL
7312 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7319 /* Just for use if some languages passes arrays by value. */
7320 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7329 return TYPE_ALIGN (type
) >= 128;
7334 /* Gives the alignment boundary, in bits, of an argument with the
7335 specified mode and type. */
7338 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7343 /* Since the main variant type is used for call, we convert it to
7344 the main variant type. */
7345 type
= TYPE_MAIN_VARIANT (type
);
7346 align
= TYPE_ALIGN (type
);
7349 align
= GET_MODE_ALIGNMENT (mode
);
7350 if (align
< PARM_BOUNDARY
)
7351 align
= PARM_BOUNDARY
;
7355 unsigned int saved_align
= align
;
7359 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7362 if (mode
== XFmode
|| mode
== XCmode
)
7363 align
= PARM_BOUNDARY
;
7365 else if (!ix86_contains_aligned_value_p (type
))
7366 align
= PARM_BOUNDARY
;
7369 align
= PARM_BOUNDARY
;
7374 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7378 inform (input_location
,
7379 "The ABI for passing parameters with %d-byte"
7380 " alignment has changed in GCC 4.6",
7381 align
/ BITS_PER_UNIT
);
7388 /* Return true if N is a possible register number of function value. */
7391 ix86_function_value_regno_p (const unsigned int regno
)
7398 case FIRST_FLOAT_REG
:
7399 /* TODO: The function should depend on current function ABI but
7400 builtins.c would need updating then. Therefore we use the
7402 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7404 return TARGET_FLOAT_RETURNS_IN_80387
;
7410 if (TARGET_MACHO
|| TARGET_64BIT
)
7418 /* Define how to find the value returned by a function.
7419 VALTYPE is the data type of the value (as a tree).
7420 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7421 otherwise, FUNC is 0. */
7424 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7425 const_tree fntype
, const_tree fn
)
7429 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7430 we normally prevent this case when mmx is not available. However
7431 some ABIs may require the result to be returned like DImode. */
7432 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7433 regno
= FIRST_MMX_REG
;
7435 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7436 we prevent this case when sse is not available. However some ABIs
7437 may require the result to be returned like integer TImode. */
7438 else if (mode
== TImode
7439 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7440 regno
= FIRST_SSE_REG
;
7442 /* 32-byte vector modes in %ymm0. */
7443 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7444 regno
= FIRST_SSE_REG
;
7446 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7447 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7448 regno
= FIRST_FLOAT_REG
;
7450 /* Most things go in %eax. */
7453 /* Override FP return register with %xmm0 for local functions when
7454 SSE math is enabled or for functions with sseregparm attribute. */
7455 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7457 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7458 if ((sse_level
>= 1 && mode
== SFmode
)
7459 || (sse_level
== 2 && mode
== DFmode
))
7460 regno
= FIRST_SSE_REG
;
7463 /* OImode shouldn't be used directly. */
7464 gcc_assert (mode
!= OImode
);
7466 return gen_rtx_REG (orig_mode
, regno
);
7470 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7475 /* Handle libcalls, which don't provide a type node. */
7476 if (valtype
== NULL
)
7490 regno
= FIRST_SSE_REG
;
7494 regno
= FIRST_FLOAT_REG
;
7502 return gen_rtx_REG (mode
, regno
);
7504 else if (POINTER_TYPE_P (valtype
))
7506 /* Pointers are always returned in word_mode. */
7510 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7511 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7512 x86_64_int_return_registers
, 0);
7514 /* For zero sized structures, construct_container returns NULL, but we
7515 need to keep rest of compiler happy by returning meaningful value. */
7517 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7523 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7526 unsigned int regno
= AX_REG
;
7530 switch (GET_MODE_SIZE (mode
))
7533 if (valtype
!= NULL_TREE
7534 && !VECTOR_INTEGER_TYPE_P (valtype
)
7535 && !VECTOR_INTEGER_TYPE_P (valtype
)
7536 && !INTEGRAL_TYPE_P (valtype
)
7537 && !VECTOR_FLOAT_TYPE_P (valtype
))
7539 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7540 && !COMPLEX_MODE_P (mode
))
7541 regno
= FIRST_SSE_REG
;
7545 if (mode
== SFmode
|| mode
== DFmode
)
7546 regno
= FIRST_SSE_REG
;
7552 return gen_rtx_REG (orig_mode
, regno
);
7556 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7557 enum machine_mode orig_mode
, enum machine_mode mode
)
7559 const_tree fn
, fntype
;
7562 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7563 fn
= fntype_or_decl
;
7564 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7566 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7567 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7568 else if (TARGET_64BIT
)
7569 return function_value_64 (orig_mode
, mode
, valtype
);
7571 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7575 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7576 bool outgoing ATTRIBUTE_UNUSED
)
7578 enum machine_mode mode
, orig_mode
;
7580 orig_mode
= TYPE_MODE (valtype
);
7581 mode
= type_natural_mode (valtype
, NULL
);
7582 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7585 /* Pointer function arguments and return values are promoted to
7588 static enum machine_mode
7589 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7590 int *punsignedp
, const_tree fntype
,
7593 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7595 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7598 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7602 /* Return true if a structure, union or array with MODE containing FIELD
7603 should be accessed using BLKmode. */
7606 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7608 /* Union with XFmode must be in BLKmode. */
7609 return (mode
== XFmode
7610 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7611 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7615 ix86_libcall_value (enum machine_mode mode
)
7617 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7620 /* Return true iff type is returned in memory. */
7622 static bool ATTRIBUTE_UNUSED
7623 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7627 if (mode
== BLKmode
)
7630 size
= int_size_in_bytes (type
);
7632 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7635 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7637 /* User-created vectors small enough to fit in EAX. */
7641 /* MMX/3dNow values are returned in MM0,
7642 except when it doesn't exits or the ABI prescribes otherwise. */
7644 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7646 /* SSE values are returned in XMM0, except when it doesn't exist. */
7650 /* AVX values are returned in YMM0, except when it doesn't exist. */
7661 /* OImode shouldn't be used directly. */
7662 gcc_assert (mode
!= OImode
);
7667 static bool ATTRIBUTE_UNUSED
7668 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7670 int needed_intregs
, needed_sseregs
;
7671 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7674 static bool ATTRIBUTE_UNUSED
7675 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7677 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7679 /* __m128 is returned in xmm0. */
7680 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7681 || VECTOR_FLOAT_TYPE_P (type
))
7682 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7683 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7686 /* Otherwise, the size must be exactly in [1248]. */
7687 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7691 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7693 #ifdef SUBTARGET_RETURN_IN_MEMORY
7694 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7696 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7700 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7701 return return_in_memory_ms_64 (type
, mode
);
7703 return return_in_memory_64 (type
, mode
);
7706 return return_in_memory_32 (type
, mode
);
7710 /* When returning SSE vector types, we have a choice of either
7711 (1) being abi incompatible with a -march switch, or
7712 (2) generating an error.
7713 Given no good solution, I think the safest thing is one warning.
7714 The user won't be able to use -Werror, but....
7716 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7717 called in response to actually generating a caller or callee that
7718 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7719 via aggregate_value_p for general type probing from tree-ssa. */
7722 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7724 static bool warnedsse
, warnedmmx
;
7726 if (!TARGET_64BIT
&& type
)
7728 /* Look at the return type of the function, not the function type. */
7729 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7731 if (!TARGET_SSE
&& !warnedsse
)
7734 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7737 warning (0, "SSE vector return without SSE enabled "
7742 if (!TARGET_MMX
&& !warnedmmx
)
7744 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7747 warning (0, "MMX vector return without MMX enabled "
7757 /* Create the va_list data type. */
7759 /* Returns the calling convention specific va_list date type.
7760 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7763 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7765 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7767 /* For i386 we use plain pointer to argument area. */
7768 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7769 return build_pointer_type (char_type_node
);
7771 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7772 type_decl
= build_decl (BUILTINS_LOCATION
,
7773 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7775 f_gpr
= build_decl (BUILTINS_LOCATION
,
7776 FIELD_DECL
, get_identifier ("gp_offset"),
7777 unsigned_type_node
);
7778 f_fpr
= build_decl (BUILTINS_LOCATION
,
7779 FIELD_DECL
, get_identifier ("fp_offset"),
7780 unsigned_type_node
);
7781 f_ovf
= build_decl (BUILTINS_LOCATION
,
7782 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7784 f_sav
= build_decl (BUILTINS_LOCATION
,
7785 FIELD_DECL
, get_identifier ("reg_save_area"),
7788 va_list_gpr_counter_field
= f_gpr
;
7789 va_list_fpr_counter_field
= f_fpr
;
7791 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7792 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7793 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7794 DECL_FIELD_CONTEXT (f_sav
) = record
;
7796 TYPE_STUB_DECL (record
) = type_decl
;
7797 TYPE_NAME (record
) = type_decl
;
7798 TYPE_FIELDS (record
) = f_gpr
;
7799 DECL_CHAIN (f_gpr
) = f_fpr
;
7800 DECL_CHAIN (f_fpr
) = f_ovf
;
7801 DECL_CHAIN (f_ovf
) = f_sav
;
7803 layout_type (record
);
7805 /* The correct type is an array type of one element. */
7806 return build_array_type (record
, build_index_type (size_zero_node
));
7809 /* Setup the builtin va_list data type and for 64-bit the additional
7810 calling convention specific va_list data types. */
7813 ix86_build_builtin_va_list (void)
7815 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7817 /* Initialize abi specific va_list builtin types. */
7821 if (ix86_abi
== MS_ABI
)
7823 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7824 if (TREE_CODE (t
) != RECORD_TYPE
)
7825 t
= build_variant_type_copy (t
);
7826 sysv_va_list_type_node
= t
;
7831 if (TREE_CODE (t
) != RECORD_TYPE
)
7832 t
= build_variant_type_copy (t
);
7833 sysv_va_list_type_node
= t
;
7835 if (ix86_abi
!= MS_ABI
)
7837 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7838 if (TREE_CODE (t
) != RECORD_TYPE
)
7839 t
= build_variant_type_copy (t
);
7840 ms_va_list_type_node
= t
;
7845 if (TREE_CODE (t
) != RECORD_TYPE
)
7846 t
= build_variant_type_copy (t
);
7847 ms_va_list_type_node
= t
;
7854 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7857 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7863 /* GPR size of varargs save area. */
7864 if (cfun
->va_list_gpr_size
)
7865 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7867 ix86_varargs_gpr_size
= 0;
7869 /* FPR size of varargs save area. We don't need it if we don't pass
7870 anything in SSE registers. */
7871 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7872 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7874 ix86_varargs_fpr_size
= 0;
7876 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7879 save_area
= frame_pointer_rtx
;
7880 set
= get_varargs_alias_set ();
7882 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7883 if (max
> X86_64_REGPARM_MAX
)
7884 max
= X86_64_REGPARM_MAX
;
7886 for (i
= cum
->regno
; i
< max
; i
++)
7888 mem
= gen_rtx_MEM (word_mode
,
7889 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7890 MEM_NOTRAP_P (mem
) = 1;
7891 set_mem_alias_set (mem
, set
);
7892 emit_move_insn (mem
,
7893 gen_rtx_REG (word_mode
,
7894 x86_64_int_parameter_registers
[i
]));
7897 if (ix86_varargs_fpr_size
)
7899 enum machine_mode smode
;
7902 /* Now emit code to save SSE registers. The AX parameter contains number
7903 of SSE parameter registers used to call this function, though all we
7904 actually check here is the zero/non-zero status. */
7906 label
= gen_label_rtx ();
7907 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7908 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7911 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7912 we used movdqa (i.e. TImode) instead? Perhaps even better would
7913 be if we could determine the real mode of the data, via a hook
7914 into pass_stdarg. Ignore all that for now. */
7916 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7917 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7919 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7920 if (max
> X86_64_SSE_REGPARM_MAX
)
7921 max
= X86_64_SSE_REGPARM_MAX
;
7923 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7925 mem
= plus_constant (Pmode
, save_area
,
7926 i
* 16 + ix86_varargs_gpr_size
);
7927 mem
= gen_rtx_MEM (smode
, mem
);
7928 MEM_NOTRAP_P (mem
) = 1;
7929 set_mem_alias_set (mem
, set
);
7930 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7932 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7940 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7942 alias_set_type set
= get_varargs_alias_set ();
7945 /* Reset to zero, as there might be a sysv vaarg used
7947 ix86_varargs_gpr_size
= 0;
7948 ix86_varargs_fpr_size
= 0;
7950 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7954 mem
= gen_rtx_MEM (Pmode
,
7955 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7956 i
* UNITS_PER_WORD
));
7957 MEM_NOTRAP_P (mem
) = 1;
7958 set_mem_alias_set (mem
, set
);
7960 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7961 emit_move_insn (mem
, reg
);
7966 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7967 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7970 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7971 CUMULATIVE_ARGS next_cum
;
7974 /* This argument doesn't appear to be used anymore. Which is good,
7975 because the old code here didn't suppress rtl generation. */
7976 gcc_assert (!no_rtl
);
7981 fntype
= TREE_TYPE (current_function_decl
);
7983 /* For varargs, we do not want to skip the dummy va_dcl argument.
7984 For stdargs, we do want to skip the last named argument. */
7986 if (stdarg_p (fntype
))
7987 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7990 if (cum
->call_abi
== MS_ABI
)
7991 setup_incoming_varargs_ms_64 (&next_cum
);
7993 setup_incoming_varargs_64 (&next_cum
);
7996 /* Checks if TYPE is of kind va_list char *. */
7999 is_va_list_char_pointer (tree type
)
8003 /* For 32-bit it is always true. */
8006 canonic
= ix86_canonical_va_list_type (type
);
8007 return (canonic
== ms_va_list_type_node
8008 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8011 /* Implement va_start. */
8014 ix86_va_start (tree valist
, rtx nextarg
)
8016 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8017 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8018 tree gpr
, fpr
, ovf
, sav
, t
;
8022 if (flag_split_stack
8023 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8025 unsigned int scratch_regno
;
8027 /* When we are splitting the stack, we can't refer to the stack
8028 arguments using internal_arg_pointer, because they may be on
8029 the old stack. The split stack prologue will arrange to
8030 leave a pointer to the old stack arguments in a scratch
8031 register, which we here copy to a pseudo-register. The split
8032 stack prologue can't set the pseudo-register directly because
8033 it (the prologue) runs before any registers have been saved. */
8035 scratch_regno
= split_stack_prologue_scratch_regno ();
8036 if (scratch_regno
!= INVALID_REGNUM
)
8040 reg
= gen_reg_rtx (Pmode
);
8041 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8044 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8048 push_topmost_sequence ();
8049 emit_insn_after (seq
, entry_of_function ());
8050 pop_topmost_sequence ();
8054 /* Only 64bit target needs something special. */
8055 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8057 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8058 std_expand_builtin_va_start (valist
, nextarg
);
8063 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8064 next
= expand_binop (ptr_mode
, add_optab
,
8065 cfun
->machine
->split_stack_varargs_pointer
,
8066 crtl
->args
.arg_offset_rtx
,
8067 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8068 convert_move (va_r
, next
, 0);
8073 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8074 f_fpr
= DECL_CHAIN (f_gpr
);
8075 f_ovf
= DECL_CHAIN (f_fpr
);
8076 f_sav
= DECL_CHAIN (f_ovf
);
8078 valist
= build_simple_mem_ref (valist
);
8079 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8080 /* The following should be folded into the MEM_REF offset. */
8081 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8083 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8085 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8087 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8090 /* Count number of gp and fp argument registers used. */
8091 words
= crtl
->args
.info
.words
;
8092 n_gpr
= crtl
->args
.info
.regno
;
8093 n_fpr
= crtl
->args
.info
.sse_regno
;
8095 if (cfun
->va_list_gpr_size
)
8097 type
= TREE_TYPE (gpr
);
8098 t
= build2 (MODIFY_EXPR
, type
,
8099 gpr
, build_int_cst (type
, n_gpr
* 8));
8100 TREE_SIDE_EFFECTS (t
) = 1;
8101 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8104 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8106 type
= TREE_TYPE (fpr
);
8107 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8108 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8109 TREE_SIDE_EFFECTS (t
) = 1;
8110 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8113 /* Find the overflow area. */
8114 type
= TREE_TYPE (ovf
);
8115 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8116 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8118 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8119 t
= make_tree (type
, ovf_rtx
);
8121 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8122 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8123 TREE_SIDE_EFFECTS (t
) = 1;
8124 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8126 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8128 /* Find the register save area.
8129 Prologue of the function save it right above stack frame. */
8130 type
= TREE_TYPE (sav
);
8131 t
= make_tree (type
, frame_pointer_rtx
);
8132 if (!ix86_varargs_gpr_size
)
8133 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8134 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8135 TREE_SIDE_EFFECTS (t
) = 1;
8136 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8140 /* Implement va_arg. */
8143 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8146 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8147 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8148 tree gpr
, fpr
, ovf
, sav
, t
;
8150 tree lab_false
, lab_over
= NULL_TREE
;
8155 enum machine_mode nat_mode
;
8156 unsigned int arg_boundary
;
8158 /* Only 64bit target needs something special. */
8159 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8160 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8162 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8163 f_fpr
= DECL_CHAIN (f_gpr
);
8164 f_ovf
= DECL_CHAIN (f_fpr
);
8165 f_sav
= DECL_CHAIN (f_ovf
);
8167 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8168 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8169 valist
= build_va_arg_indirect_ref (valist
);
8170 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8171 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8172 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8174 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8176 type
= build_pointer_type (type
);
8177 size
= int_size_in_bytes (type
);
8178 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8180 nat_mode
= type_natural_mode (type
, NULL
);
8189 /* Unnamed 256bit vector mode parameters are passed on stack. */
8190 if (!TARGET_64BIT_MS_ABI
)
8197 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8198 type
, 0, X86_64_REGPARM_MAX
,
8199 X86_64_SSE_REGPARM_MAX
, intreg
,
8204 /* Pull the value out of the saved registers. */
8206 addr
= create_tmp_var (ptr_type_node
, "addr");
8210 int needed_intregs
, needed_sseregs
;
8212 tree int_addr
, sse_addr
;
8214 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8215 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8217 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8219 need_temp
= (!REG_P (container
)
8220 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8221 || TYPE_ALIGN (type
) > 128));
8223 /* In case we are passing structure, verify that it is consecutive block
8224 on the register save area. If not we need to do moves. */
8225 if (!need_temp
&& !REG_P (container
))
8227 /* Verify that all registers are strictly consecutive */
8228 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8232 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8234 rtx slot
= XVECEXP (container
, 0, i
);
8235 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8236 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8244 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8246 rtx slot
= XVECEXP (container
, 0, i
);
8247 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8248 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8260 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8261 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8264 /* First ensure that we fit completely in registers. */
8267 t
= build_int_cst (TREE_TYPE (gpr
),
8268 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8269 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8270 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8271 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8272 gimplify_and_add (t
, pre_p
);
8276 t
= build_int_cst (TREE_TYPE (fpr
),
8277 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8278 + X86_64_REGPARM_MAX
* 8);
8279 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8280 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8281 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8282 gimplify_and_add (t
, pre_p
);
8285 /* Compute index to start of area used for integer regs. */
8288 /* int_addr = gpr + sav; */
8289 t
= fold_build_pointer_plus (sav
, gpr
);
8290 gimplify_assign (int_addr
, t
, pre_p
);
8294 /* sse_addr = fpr + sav; */
8295 t
= fold_build_pointer_plus (sav
, fpr
);
8296 gimplify_assign (sse_addr
, t
, pre_p
);
8300 int i
, prev_size
= 0;
8301 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8304 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8305 gimplify_assign (addr
, t
, pre_p
);
8307 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8309 rtx slot
= XVECEXP (container
, 0, i
);
8310 rtx reg
= XEXP (slot
, 0);
8311 enum machine_mode mode
= GET_MODE (reg
);
8317 tree dest_addr
, dest
;
8318 int cur_size
= GET_MODE_SIZE (mode
);
8320 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8321 prev_size
= INTVAL (XEXP (slot
, 1));
8322 if (prev_size
+ cur_size
> size
)
8324 cur_size
= size
- prev_size
;
8325 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8326 if (mode
== BLKmode
)
8329 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8330 if (mode
== GET_MODE (reg
))
8331 addr_type
= build_pointer_type (piece_type
);
8333 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8335 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8338 if (SSE_REGNO_P (REGNO (reg
)))
8340 src_addr
= sse_addr
;
8341 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8345 src_addr
= int_addr
;
8346 src_offset
= REGNO (reg
) * 8;
8348 src_addr
= fold_convert (addr_type
, src_addr
);
8349 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8351 dest_addr
= fold_convert (daddr_type
, addr
);
8352 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8353 if (cur_size
== GET_MODE_SIZE (mode
))
8355 src
= build_va_arg_indirect_ref (src_addr
);
8356 dest
= build_va_arg_indirect_ref (dest_addr
);
8358 gimplify_assign (dest
, src
, pre_p
);
8363 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8364 3, dest_addr
, src_addr
,
8365 size_int (cur_size
));
8366 gimplify_and_add (copy
, pre_p
);
8368 prev_size
+= cur_size
;
8374 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8375 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8376 gimplify_assign (gpr
, t
, pre_p
);
8381 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8382 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8383 gimplify_assign (fpr
, t
, pre_p
);
8386 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8388 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8391 /* ... otherwise out of the overflow area. */
8393 /* When we align parameter on stack for caller, if the parameter
8394 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8395 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8396 here with caller. */
8397 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8398 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8399 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8401 /* Care for on-stack alignment if needed. */
8402 if (arg_boundary
<= 64 || size
== 0)
8406 HOST_WIDE_INT align
= arg_boundary
/ 8;
8407 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8408 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8409 build_int_cst (TREE_TYPE (t
), -align
));
8412 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8413 gimplify_assign (addr
, t
, pre_p
);
8415 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8416 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8419 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8421 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8422 addr
= fold_convert (ptrtype
, addr
);
8425 addr
= build_va_arg_indirect_ref (addr
);
8426 return build_va_arg_indirect_ref (addr
);
8429 /* Return true if OPNUM's MEM should be matched
8430 in movabs* patterns. */
8433 ix86_check_movabs (rtx insn
, int opnum
)
8437 set
= PATTERN (insn
);
8438 if (GET_CODE (set
) == PARALLEL
)
8439 set
= XVECEXP (set
, 0, 0);
8440 gcc_assert (GET_CODE (set
) == SET
);
8441 mem
= XEXP (set
, opnum
);
8442 while (GET_CODE (mem
) == SUBREG
)
8443 mem
= SUBREG_REG (mem
);
8444 gcc_assert (MEM_P (mem
));
8445 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8448 /* Initialize the table of extra 80387 mathematical constants. */
8451 init_ext_80387_constants (void)
8453 static const char * cst
[5] =
8455 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8456 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8457 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8458 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8459 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8463 for (i
= 0; i
< 5; i
++)
8465 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8466 /* Ensure each constant is rounded to XFmode precision. */
8467 real_convert (&ext_80387_constants_table
[i
],
8468 XFmode
, &ext_80387_constants_table
[i
]);
8471 ext_80387_constants_init
= 1;
8474 /* Return non-zero if the constant is something that
8475 can be loaded with a special instruction. */
8478 standard_80387_constant_p (rtx x
)
8480 enum machine_mode mode
= GET_MODE (x
);
8484 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8487 if (x
== CONST0_RTX (mode
))
8489 if (x
== CONST1_RTX (mode
))
8492 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8494 /* For XFmode constants, try to find a special 80387 instruction when
8495 optimizing for size or on those CPUs that benefit from them. */
8497 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8501 if (! ext_80387_constants_init
)
8502 init_ext_80387_constants ();
8504 for (i
= 0; i
< 5; i
++)
8505 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8509 /* Load of the constant -0.0 or -1.0 will be split as
8510 fldz;fchs or fld1;fchs sequence. */
8511 if (real_isnegzero (&r
))
8513 if (real_identical (&r
, &dconstm1
))
8519 /* Return the opcode of the special instruction to be used to load
8523 standard_80387_constant_opcode (rtx x
)
8525 switch (standard_80387_constant_p (x
))
8549 /* Return the CONST_DOUBLE representing the 80387 constant that is
8550 loaded by the specified special instruction. The argument IDX
8551 matches the return value from standard_80387_constant_p. */
8554 standard_80387_constant_rtx (int idx
)
8558 if (! ext_80387_constants_init
)
8559 init_ext_80387_constants ();
8575 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8579 /* Return 1 if X is all 0s and 2 if x is all 1s
8580 in supported SSE/AVX vector mode. */
8583 standard_sse_constant_p (rtx x
)
8585 enum machine_mode mode
= GET_MODE (x
);
8587 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8589 if (vector_all_ones_operand (x
, mode
))
8611 /* Return the opcode of the special instruction to be used to load
8615 standard_sse_constant_opcode (rtx insn
, rtx x
)
8617 switch (standard_sse_constant_p (x
))
8620 switch (get_attr_mode (insn
))
8623 return "%vpxor\t%0, %d0";
8625 return "%vxorpd\t%0, %d0";
8627 return "%vxorps\t%0, %d0";
8630 return "vpxor\t%x0, %x0, %x0";
8632 return "vxorpd\t%x0, %x0, %x0";
8634 return "vxorps\t%x0, %x0, %x0";
8641 if (get_attr_mode (insn
) == MODE_XI
8642 || get_attr_mode (insn
) == MODE_V8DF
8643 || get_attr_mode (insn
) == MODE_V16SF
)
8644 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8646 return "vpcmpeqd\t%0, %0, %0";
8648 return "pcmpeqd\t%0, %0";
8656 /* Returns true if OP contains a symbol reference */
8659 symbolic_reference_mentioned_p (rtx op
)
8664 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8667 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8668 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8674 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8675 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8679 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8686 /* Return true if it is appropriate to emit `ret' instructions in the
8687 body of a function. Do this only if the epilogue is simple, needing a
8688 couple of insns. Prior to reloading, we can't tell how many registers
8689 must be saved, so return false then. Return false if there is no frame
8690 marker to de-allocate. */
8693 ix86_can_use_return_insn_p (void)
8695 struct ix86_frame frame
;
8697 if (! reload_completed
|| frame_pointer_needed
)
8700 /* Don't allow more than 32k pop, since that's all we can do
8701 with one instruction. */
8702 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8705 ix86_compute_frame_layout (&frame
);
8706 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8707 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8710 /* Value should be nonzero if functions must have frame pointers.
8711 Zero means the frame pointer need not be set up (and parms may
8712 be accessed via the stack pointer) in functions that seem suitable. */
8715 ix86_frame_pointer_required (void)
8717 /* If we accessed previous frames, then the generated code expects
8718 to be able to access the saved ebp value in our frame. */
8719 if (cfun
->machine
->accesses_prev_frame
)
8722 /* Several x86 os'es need a frame pointer for other reasons,
8723 usually pertaining to setjmp. */
8724 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8727 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8728 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8731 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8732 allocation is 4GB. */
8733 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8736 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8737 turns off the frame pointer by default. Turn it back on now if
8738 we've not got a leaf function. */
8739 if (TARGET_OMIT_LEAF_FRAME_POINTER
8741 || ix86_current_function_calls_tls_descriptor
))
8744 if (crtl
->profile
&& !flag_fentry
)
8750 /* Record that the current function accesses previous call frames. */
8753 ix86_setup_frame_addresses (void)
8755 cfun
->machine
->accesses_prev_frame
= 1;
8758 #ifndef USE_HIDDEN_LINKONCE
8759 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8760 # define USE_HIDDEN_LINKONCE 1
8762 # define USE_HIDDEN_LINKONCE 0
8766 static int pic_labels_used
;
8768 /* Fills in the label name that should be used for a pc thunk for
8769 the given register. */
8772 get_pc_thunk_name (char name
[32], unsigned int regno
)
8774 gcc_assert (!TARGET_64BIT
);
8776 if (USE_HIDDEN_LINKONCE
)
8777 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8779 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8783 /* This function generates code for -fpic that loads %ebx with
8784 the return address of the caller and then returns. */
8787 ix86_code_end (void)
8792 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8797 if (!(pic_labels_used
& (1 << regno
)))
8800 get_pc_thunk_name (name
, regno
);
8802 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8803 get_identifier (name
),
8804 build_function_type_list (void_type_node
, NULL_TREE
));
8805 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8806 NULL_TREE
, void_type_node
);
8807 TREE_PUBLIC (decl
) = 1;
8808 TREE_STATIC (decl
) = 1;
8809 DECL_IGNORED_P (decl
) = 1;
8814 switch_to_section (darwin_sections
[text_coal_section
]);
8815 fputs ("\t.weak_definition\t", asm_out_file
);
8816 assemble_name (asm_out_file
, name
);
8817 fputs ("\n\t.private_extern\t", asm_out_file
);
8818 assemble_name (asm_out_file
, name
);
8819 putc ('\n', asm_out_file
);
8820 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8821 DECL_WEAK (decl
) = 1;
8825 if (USE_HIDDEN_LINKONCE
)
8827 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8829 targetm
.asm_out
.unique_section (decl
, 0);
8830 switch_to_section (get_named_section (decl
, NULL
, 0));
8832 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8833 fputs ("\t.hidden\t", asm_out_file
);
8834 assemble_name (asm_out_file
, name
);
8835 putc ('\n', asm_out_file
);
8836 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8840 switch_to_section (text_section
);
8841 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8844 DECL_INITIAL (decl
) = make_node (BLOCK
);
8845 current_function_decl
= decl
;
8846 init_function_start (decl
);
8847 first_function_block_is_cold
= false;
8848 /* Make sure unwind info is emitted for the thunk if needed. */
8849 final_start_function (emit_barrier (), asm_out_file
, 1);
8851 /* Pad stack IP move with 4 instructions (two NOPs count
8852 as one instruction). */
8853 if (TARGET_PAD_SHORT_FUNCTION
)
8858 fputs ("\tnop\n", asm_out_file
);
8861 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8862 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8863 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8864 fputs ("\tret\n", asm_out_file
);
8865 final_end_function ();
8866 init_insn_lengths ();
8867 free_after_compilation (cfun
);
8869 current_function_decl
= NULL
;
8872 if (flag_split_stack
)
8873 file_end_indicate_split_stack ();
8876 /* Emit code for the SET_GOT patterns. */
8879 output_set_got (rtx dest
, rtx label
)
8885 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8887 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8888 xops
[2] = gen_rtx_MEM (Pmode
,
8889 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8890 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8892 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8893 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8894 an unadorned address. */
8895 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8896 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8897 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8901 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8906 /* We don't need a pic base, we're not producing pic. */
8909 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8910 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8911 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8912 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8917 get_pc_thunk_name (name
, REGNO (dest
));
8918 pic_labels_used
|= 1 << REGNO (dest
);
8920 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8921 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8922 output_asm_insn ("call\t%X2", xops
);
8925 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8926 This is what will be referenced by the Mach-O PIC subsystem. */
8927 if (machopic_should_output_picbase_label () || !label
)
8928 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8930 /* When we are restoring the pic base at the site of a nonlocal label,
8931 and we decided to emit the pic base above, we will still output a
8932 local label used for calculating the correction offset (even though
8933 the offset will be 0 in that case). */
8935 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8936 CODE_LABEL_NUMBER (label
));
8941 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8946 /* Generate an "push" pattern for input ARG. */
8951 struct machine_function
*m
= cfun
->machine
;
8953 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8954 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8955 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8957 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8958 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8960 return gen_rtx_SET (VOIDmode
,
8961 gen_rtx_MEM (word_mode
,
8962 gen_rtx_PRE_DEC (Pmode
,
8963 stack_pointer_rtx
)),
8967 /* Generate an "pop" pattern for input ARG. */
8972 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8973 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8975 return gen_rtx_SET (VOIDmode
,
8977 gen_rtx_MEM (word_mode
,
8978 gen_rtx_POST_INC (Pmode
,
8979 stack_pointer_rtx
)));
8982 /* Return >= 0 if there is an unused call-clobbered register available
8983 for the entire function. */
8986 ix86_select_alt_pic_regnum (void)
8990 && !ix86_current_function_calls_tls_descriptor
)
8993 /* Can't use the same register for both PIC and DRAP. */
8995 drap
= REGNO (crtl
->drap_reg
);
8998 for (i
= 2; i
>= 0; --i
)
8999 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9003 return INVALID_REGNUM
;
9006 /* Return TRUE if we need to save REGNO. */
9009 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9011 if (pic_offset_table_rtx
9012 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9013 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9015 || crtl
->calls_eh_return
9016 || crtl
->uses_const_pool
9017 || cfun
->has_nonlocal_label
))
9018 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9020 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9025 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9026 if (test
== INVALID_REGNUM
)
9033 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9036 return (df_regs_ever_live_p (regno
)
9037 && !call_used_regs
[regno
]
9038 && !fixed_regs
[regno
]
9039 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9042 /* Return number of saved general prupose registers. */
9045 ix86_nsaved_regs (void)
9050 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9051 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9056 /* Return number of saved SSE registrers. */
9059 ix86_nsaved_sseregs (void)
9064 if (!TARGET_64BIT_MS_ABI
)
9066 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9067 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9072 /* Given FROM and TO register numbers, say whether this elimination is
9073 allowed. If stack alignment is needed, we can only replace argument
9074 pointer with hard frame pointer, or replace frame pointer with stack
9075 pointer. Otherwise, frame pointer elimination is automatically
9076 handled and all other eliminations are valid. */
9079 ix86_can_eliminate (const int from
, const int to
)
9081 if (stack_realign_fp
)
9082 return ((from
== ARG_POINTER_REGNUM
9083 && to
== HARD_FRAME_POINTER_REGNUM
)
9084 || (from
== FRAME_POINTER_REGNUM
9085 && to
== STACK_POINTER_REGNUM
));
9087 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9090 /* Return the offset between two registers, one to be eliminated, and the other
9091 its replacement, at the start of a routine. */
9094 ix86_initial_elimination_offset (int from
, int to
)
9096 struct ix86_frame frame
;
9097 ix86_compute_frame_layout (&frame
);
9099 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9100 return frame
.hard_frame_pointer_offset
;
9101 else if (from
== FRAME_POINTER_REGNUM
9102 && to
== HARD_FRAME_POINTER_REGNUM
)
9103 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9106 gcc_assert (to
== STACK_POINTER_REGNUM
);
9108 if (from
== ARG_POINTER_REGNUM
)
9109 return frame
.stack_pointer_offset
;
9111 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9112 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9116 /* In a dynamically-aligned function, we can't know the offset from
9117 stack pointer to frame pointer, so we must ensure that setjmp
9118 eliminates fp against the hard fp (%ebp) rather than trying to
9119 index from %esp up to the top of the frame across a gap that is
9120 of unknown (at compile-time) size. */
9122 ix86_builtin_setjmp_frame_value (void)
9124 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9127 /* When using -fsplit-stack, the allocation routines set a field in
9128 the TCB to the bottom of the stack plus this much space, measured
9131 #define SPLIT_STACK_AVAILABLE 256
9133 /* Fill structure ix86_frame about frame of currently computed function. */
9136 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9138 unsigned HOST_WIDE_INT stack_alignment_needed
;
9139 HOST_WIDE_INT offset
;
9140 unsigned HOST_WIDE_INT preferred_alignment
;
9141 HOST_WIDE_INT size
= get_frame_size ();
9142 HOST_WIDE_INT to_allocate
;
9144 frame
->nregs
= ix86_nsaved_regs ();
9145 frame
->nsseregs
= ix86_nsaved_sseregs ();
9147 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9148 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9150 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9151 function prologues and leaf. */
9152 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9153 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9154 || ix86_current_function_calls_tls_descriptor
))
9156 preferred_alignment
= 16;
9157 stack_alignment_needed
= 16;
9158 crtl
->preferred_stack_boundary
= 128;
9159 crtl
->stack_alignment_needed
= 128;
9162 gcc_assert (!size
|| stack_alignment_needed
);
9163 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9164 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9166 /* For SEH we have to limit the amount of code movement into the prologue.
9167 At present we do this via a BLOCKAGE, at which point there's very little
9168 scheduling that can be done, which means that there's very little point
9169 in doing anything except PUSHs. */
9171 cfun
->machine
->use_fast_prologue_epilogue
= false;
9173 /* During reload iteration the amount of registers saved can change.
9174 Recompute the value as needed. Do not recompute when amount of registers
9175 didn't change as reload does multiple calls to the function and does not
9176 expect the decision to change within single iteration. */
9177 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9178 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9180 int count
= frame
->nregs
;
9181 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9183 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9185 /* The fast prologue uses move instead of push to save registers. This
9186 is significantly longer, but also executes faster as modern hardware
9187 can execute the moves in parallel, but can't do that for push/pop.
9189 Be careful about choosing what prologue to emit: When function takes
9190 many instructions to execute we may use slow version as well as in
9191 case function is known to be outside hot spot (this is known with
9192 feedback only). Weight the size of function by number of registers
9193 to save as it is cheap to use one or two push instructions but very
9194 slow to use many of them. */
9196 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9197 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9198 || (flag_branch_probabilities
9199 && node
->frequency
< NODE_FREQUENCY_HOT
))
9200 cfun
->machine
->use_fast_prologue_epilogue
= false;
9202 cfun
->machine
->use_fast_prologue_epilogue
9203 = !expensive_function_p (count
);
9206 frame
->save_regs_using_mov
9207 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9208 /* If static stack checking is enabled and done with probes,
9209 the registers need to be saved before allocating the frame. */
9210 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9212 /* Skip return address. */
9213 offset
= UNITS_PER_WORD
;
9215 /* Skip pushed static chain. */
9216 if (ix86_static_chain_on_stack
)
9217 offset
+= UNITS_PER_WORD
;
9219 /* Skip saved base pointer. */
9220 if (frame_pointer_needed
)
9221 offset
+= UNITS_PER_WORD
;
9222 frame
->hfp_save_offset
= offset
;
9224 /* The traditional frame pointer location is at the top of the frame. */
9225 frame
->hard_frame_pointer_offset
= offset
;
9227 /* Register save area */
9228 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9229 frame
->reg_save_offset
= offset
;
9231 /* On SEH target, registers are pushed just before the frame pointer
9234 frame
->hard_frame_pointer_offset
= offset
;
9236 /* Align and set SSE register save area. */
9237 if (frame
->nsseregs
)
9239 /* The only ABI that has saved SSE registers (Win64) also has a
9240 16-byte aligned default stack, and thus we don't need to be
9241 within the re-aligned local stack frame to save them. */
9242 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9243 offset
= (offset
+ 16 - 1) & -16;
9244 offset
+= frame
->nsseregs
* 16;
9246 frame
->sse_reg_save_offset
= offset
;
9248 /* The re-aligned stack starts here. Values before this point are not
9249 directly comparable with values below this point. In order to make
9250 sure that no value happens to be the same before and after, force
9251 the alignment computation below to add a non-zero value. */
9252 if (stack_realign_fp
)
9253 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9256 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9257 offset
+= frame
->va_arg_size
;
9259 /* Align start of frame for local function. */
9260 if (stack_realign_fp
9261 || offset
!= frame
->sse_reg_save_offset
9264 || cfun
->calls_alloca
9265 || ix86_current_function_calls_tls_descriptor
)
9266 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9268 /* Frame pointer points here. */
9269 frame
->frame_pointer_offset
= offset
;
9273 /* Add outgoing arguments area. Can be skipped if we eliminated
9274 all the function calls as dead code.
9275 Skipping is however impossible when function calls alloca. Alloca
9276 expander assumes that last crtl->outgoing_args_size
9277 of stack frame are unused. */
9278 if (ACCUMULATE_OUTGOING_ARGS
9279 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9280 || ix86_current_function_calls_tls_descriptor
))
9282 offset
+= crtl
->outgoing_args_size
;
9283 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9286 frame
->outgoing_arguments_size
= 0;
9288 /* Align stack boundary. Only needed if we're calling another function
9290 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9291 || ix86_current_function_calls_tls_descriptor
)
9292 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9294 /* We've reached end of stack frame. */
9295 frame
->stack_pointer_offset
= offset
;
9297 /* Size prologue needs to allocate. */
9298 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9300 if ((!to_allocate
&& frame
->nregs
<= 1)
9301 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9302 frame
->save_regs_using_mov
= false;
9304 if (ix86_using_red_zone ()
9305 && crtl
->sp_is_unchanging
9307 && !ix86_current_function_calls_tls_descriptor
)
9309 frame
->red_zone_size
= to_allocate
;
9310 if (frame
->save_regs_using_mov
)
9311 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9312 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9313 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9316 frame
->red_zone_size
= 0;
9317 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9319 /* The SEH frame pointer location is near the bottom of the frame.
9320 This is enforced by the fact that the difference between the
9321 stack pointer and the frame pointer is limited to 240 bytes in
9322 the unwind data structure. */
9327 /* If we can leave the frame pointer where it is, do so. Also, returns
9328 the establisher frame for __builtin_frame_address (0). */
9329 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9330 if (diff
<= SEH_MAX_FRAME_SIZE
9331 && (diff
> 240 || (diff
& 15) != 0)
9332 && !crtl
->accesses_prior_frames
)
9334 /* Ideally we'd determine what portion of the local stack frame
9335 (within the constraint of the lowest 240) is most heavily used.
9336 But without that complication, simply bias the frame pointer
9337 by 128 bytes so as to maximize the amount of the local stack
9338 frame that is addressable with 8-bit offsets. */
9339 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9344 /* This is semi-inlined memory_address_length, but simplified
9345 since we know that we're always dealing with reg+offset, and
9346 to avoid having to create and discard all that rtl. */
9349 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9355 /* EBP and R13 cannot be encoded without an offset. */
9356 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9358 else if (IN_RANGE (offset
, -128, 127))
9361 /* ESP and R12 must be encoded with a SIB byte. */
9362 if (regno
== SP_REG
|| regno
== R12_REG
)
9368 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9369 The valid base registers are taken from CFUN->MACHINE->FS. */
9372 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9374 const struct machine_function
*m
= cfun
->machine
;
9375 rtx base_reg
= NULL
;
9376 HOST_WIDE_INT base_offset
= 0;
9378 if (m
->use_fast_prologue_epilogue
)
9380 /* Choose the base register most likely to allow the most scheduling
9381 opportunities. Generally FP is valid throughout the function,
9382 while DRAP must be reloaded within the epilogue. But choose either
9383 over the SP due to increased encoding size. */
9387 base_reg
= hard_frame_pointer_rtx
;
9388 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9390 else if (m
->fs
.drap_valid
)
9392 base_reg
= crtl
->drap_reg
;
9393 base_offset
= 0 - cfa_offset
;
9395 else if (m
->fs
.sp_valid
)
9397 base_reg
= stack_pointer_rtx
;
9398 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9403 HOST_WIDE_INT toffset
;
9406 /* Choose the base register with the smallest address encoding.
9407 With a tie, choose FP > DRAP > SP. */
9410 base_reg
= stack_pointer_rtx
;
9411 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9412 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9414 if (m
->fs
.drap_valid
)
9416 toffset
= 0 - cfa_offset
;
9417 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9420 base_reg
= crtl
->drap_reg
;
9421 base_offset
= toffset
;
9427 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9428 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9431 base_reg
= hard_frame_pointer_rtx
;
9432 base_offset
= toffset
;
9437 gcc_assert (base_reg
!= NULL
);
9439 return plus_constant (Pmode
, base_reg
, base_offset
);
9442 /* Emit code to save registers in the prologue. */
9445 ix86_emit_save_regs (void)
9450 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9451 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9453 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9454 RTX_FRAME_RELATED_P (insn
) = 1;
9458 /* Emit a single register save at CFA - CFA_OFFSET. */
9461 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9462 HOST_WIDE_INT cfa_offset
)
9464 struct machine_function
*m
= cfun
->machine
;
9465 rtx reg
= gen_rtx_REG (mode
, regno
);
9466 rtx mem
, addr
, base
, insn
;
9468 addr
= choose_baseaddr (cfa_offset
);
9469 mem
= gen_frame_mem (mode
, addr
);
9471 /* For SSE saves, we need to indicate the 128-bit alignment. */
9472 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9474 insn
= emit_move_insn (mem
, reg
);
9475 RTX_FRAME_RELATED_P (insn
) = 1;
9478 if (GET_CODE (base
) == PLUS
)
9479 base
= XEXP (base
, 0);
9480 gcc_checking_assert (REG_P (base
));
9482 /* When saving registers into a re-aligned local stack frame, avoid
9483 any tricky guessing by dwarf2out. */
9484 if (m
->fs
.realigned
)
9486 gcc_checking_assert (stack_realign_drap
);
9488 if (regno
== REGNO (crtl
->drap_reg
))
9490 /* A bit of a hack. We force the DRAP register to be saved in
9491 the re-aligned stack frame, which provides us with a copy
9492 of the CFA that will last past the prologue. Install it. */
9493 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9494 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9495 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9496 mem
= gen_rtx_MEM (mode
, addr
);
9497 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9501 /* The frame pointer is a stable reference within the
9502 aligned frame. Use it. */
9503 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9504 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9505 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9506 mem
= gen_rtx_MEM (mode
, addr
);
9507 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9508 gen_rtx_SET (VOIDmode
, mem
, reg
));
9512 /* The memory may not be relative to the current CFA register,
9513 which means that we may need to generate a new pattern for
9514 use by the unwind info. */
9515 else if (base
!= m
->fs
.cfa_reg
)
9517 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9518 m
->fs
.cfa_offset
- cfa_offset
);
9519 mem
= gen_rtx_MEM (mode
, addr
);
9520 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9524 /* Emit code to save registers using MOV insns.
9525 First register is stored at CFA - CFA_OFFSET. */
9527 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9531 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9532 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9534 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9535 cfa_offset
-= UNITS_PER_WORD
;
9539 /* Emit code to save SSE registers using MOV insns.
9540 First register is stored at CFA - CFA_OFFSET. */
9542 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9546 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9547 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9549 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9554 static GTY(()) rtx queued_cfa_restores
;
9556 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9557 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9558 Don't add the note if the previously saved value will be left untouched
9559 within stack red-zone till return, as unwinders can find the same value
9560 in the register and on the stack. */
9563 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9565 if (!crtl
->shrink_wrapped
9566 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9571 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9572 RTX_FRAME_RELATED_P (insn
) = 1;
9576 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9579 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9582 ix86_add_queued_cfa_restore_notes (rtx insn
)
9585 if (!queued_cfa_restores
)
9587 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9589 XEXP (last
, 1) = REG_NOTES (insn
);
9590 REG_NOTES (insn
) = queued_cfa_restores
;
9591 queued_cfa_restores
= NULL_RTX
;
9592 RTX_FRAME_RELATED_P (insn
) = 1;
9595 /* Expand prologue or epilogue stack adjustment.
9596 The pattern exist to put a dependency on all ebp-based memory accesses.
9597 STYLE should be negative if instructions should be marked as frame related,
9598 zero if %r11 register is live and cannot be freely used and positive
9602 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9603 int style
, bool set_cfa
)
9605 struct machine_function
*m
= cfun
->machine
;
9607 bool add_frame_related_expr
= false;
9609 if (Pmode
== SImode
)
9610 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9611 else if (x86_64_immediate_operand (offset
, DImode
))
9612 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9616 /* r11 is used by indirect sibcall return as well, set before the
9617 epilogue and used after the epilogue. */
9619 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9622 gcc_assert (src
!= hard_frame_pointer_rtx
9623 && dest
!= hard_frame_pointer_rtx
);
9624 tmp
= hard_frame_pointer_rtx
;
9626 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9628 add_frame_related_expr
= true;
9630 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9633 insn
= emit_insn (insn
);
9635 ix86_add_queued_cfa_restore_notes (insn
);
9641 gcc_assert (m
->fs
.cfa_reg
== src
);
9642 m
->fs
.cfa_offset
+= INTVAL (offset
);
9643 m
->fs
.cfa_reg
= dest
;
9645 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9646 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9647 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9648 RTX_FRAME_RELATED_P (insn
) = 1;
9652 RTX_FRAME_RELATED_P (insn
) = 1;
9653 if (add_frame_related_expr
)
9655 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9656 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9657 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9661 if (dest
== stack_pointer_rtx
)
9663 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9664 bool valid
= m
->fs
.sp_valid
;
9666 if (src
== hard_frame_pointer_rtx
)
9668 valid
= m
->fs
.fp_valid
;
9669 ooffset
= m
->fs
.fp_offset
;
9671 else if (src
== crtl
->drap_reg
)
9673 valid
= m
->fs
.drap_valid
;
9678 /* Else there are two possibilities: SP itself, which we set
9679 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9680 taken care of this by hand along the eh_return path. */
9681 gcc_checking_assert (src
== stack_pointer_rtx
9682 || offset
== const0_rtx
);
9685 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9686 m
->fs
.sp_valid
= valid
;
9690 /* Find an available register to be used as dynamic realign argument
9691 pointer regsiter. Such a register will be written in prologue and
9692 used in begin of body, so it must not be
9693 1. parameter passing register.
9695 We reuse static-chain register if it is available. Otherwise, we
9696 use DI for i386 and R13 for x86-64. We chose R13 since it has
9699 Return: the regno of chosen register. */
9702 find_drap_reg (void)
9704 tree decl
= cfun
->decl
;
9708 /* Use R13 for nested function or function need static chain.
9709 Since function with tail call may use any caller-saved
9710 registers in epilogue, DRAP must not use caller-saved
9711 register in such case. */
9712 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9719 /* Use DI for nested function or function need static chain.
9720 Since function with tail call may use any caller-saved
9721 registers in epilogue, DRAP must not use caller-saved
9722 register in such case. */
9723 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9726 /* Reuse static chain register if it isn't used for parameter
9728 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9730 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9731 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9738 /* Return minimum incoming stack alignment. */
9741 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9743 unsigned int incoming_stack_boundary
;
9745 /* Prefer the one specified at command line. */
9746 if (ix86_user_incoming_stack_boundary
)
9747 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9748 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9749 if -mstackrealign is used, it isn't used for sibcall check and
9750 estimated stack alignment is 128bit. */
9753 && ix86_force_align_arg_pointer
9754 && crtl
->stack_alignment_estimated
== 128)
9755 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9757 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9759 /* Incoming stack alignment can be changed on individual functions
9760 via force_align_arg_pointer attribute. We use the smallest
9761 incoming stack boundary. */
9762 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9763 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9764 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9765 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9767 /* The incoming stack frame has to be aligned at least at
9768 parm_stack_boundary. */
9769 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9770 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9772 /* Stack at entrance of main is aligned by runtime. We use the
9773 smallest incoming stack boundary. */
9774 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9775 && DECL_NAME (current_function_decl
)
9776 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9777 && DECL_FILE_SCOPE_P (current_function_decl
))
9778 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9780 return incoming_stack_boundary
;
9783 /* Update incoming stack boundary and estimated stack alignment. */
9786 ix86_update_stack_boundary (void)
9788 ix86_incoming_stack_boundary
9789 = ix86_minimum_incoming_stack_boundary (false);
9791 /* x86_64 vararg needs 16byte stack alignment for register save
9795 && crtl
->stack_alignment_estimated
< 128)
9796 crtl
->stack_alignment_estimated
= 128;
9799 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9800 needed or an rtx for DRAP otherwise. */
9803 ix86_get_drap_rtx (void)
9805 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9806 crtl
->need_drap
= true;
9808 if (stack_realign_drap
)
9810 /* Assign DRAP to vDRAP and returns vDRAP */
9811 unsigned int regno
= find_drap_reg ();
9816 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9817 crtl
->drap_reg
= arg_ptr
;
9820 drap_vreg
= copy_to_reg (arg_ptr
);
9824 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9827 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9828 RTX_FRAME_RELATED_P (insn
) = 1;
9836 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9839 ix86_internal_arg_pointer (void)
9841 return virtual_incoming_args_rtx
;
9844 struct scratch_reg
{
9849 /* Return a short-lived scratch register for use on function entry.
9850 In 32-bit mode, it is valid only after the registers are saved
9851 in the prologue. This register must be released by means of
9852 release_scratch_register_on_entry once it is dead. */
9855 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9863 /* We always use R11 in 64-bit mode. */
9868 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9870 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9872 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9873 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9874 int regparm
= ix86_function_regparm (fntype
, decl
);
9876 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9878 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9879 for the static chain register. */
9880 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9881 && drap_regno
!= AX_REG
)
9883 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9884 for the static chain register. */
9885 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9887 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9889 /* ecx is the static chain register. */
9890 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9892 && drap_regno
!= CX_REG
)
9894 else if (ix86_save_reg (BX_REG
, true))
9896 /* esi is the static chain register. */
9897 else if (!(regparm
== 3 && static_chain_p
)
9898 && ix86_save_reg (SI_REG
, true))
9900 else if (ix86_save_reg (DI_REG
, true))
9904 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9909 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9912 rtx insn
= emit_insn (gen_push (sr
->reg
));
9913 RTX_FRAME_RELATED_P (insn
) = 1;
9917 /* Release a scratch register obtained from the preceding function. */
9920 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9924 struct machine_function
*m
= cfun
->machine
;
9925 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9927 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9928 RTX_FRAME_RELATED_P (insn
) = 1;
9929 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9930 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9931 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9932 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9936 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9938 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9941 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9943 /* We skip the probe for the first interval + a small dope of 4 words and
9944 probe that many bytes past the specified size to maintain a protection
9945 area at the botton of the stack. */
9946 const int dope
= 4 * UNITS_PER_WORD
;
9947 rtx size_rtx
= GEN_INT (size
), last
;
9949 /* See if we have a constant small number of probes to generate. If so,
9950 that's the easy case. The run-time loop is made up of 11 insns in the
9951 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9952 for n # of intervals. */
9953 if (size
<= 5 * PROBE_INTERVAL
)
9955 HOST_WIDE_INT i
, adjust
;
9956 bool first_probe
= true;
9958 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9959 values of N from 1 until it exceeds SIZE. If only one probe is
9960 needed, this will not generate any code. Then adjust and probe
9961 to PROBE_INTERVAL + SIZE. */
9962 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9966 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9967 first_probe
= false;
9970 adjust
= PROBE_INTERVAL
;
9972 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9973 plus_constant (Pmode
, stack_pointer_rtx
,
9975 emit_stack_probe (stack_pointer_rtx
);
9979 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9981 adjust
= size
+ PROBE_INTERVAL
- i
;
9983 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9984 plus_constant (Pmode
, stack_pointer_rtx
,
9986 emit_stack_probe (stack_pointer_rtx
);
9988 /* Adjust back to account for the additional first interval. */
9989 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9990 plus_constant (Pmode
, stack_pointer_rtx
,
9991 PROBE_INTERVAL
+ dope
)));
9994 /* Otherwise, do the same as above, but in a loop. Note that we must be
9995 extra careful with variables wrapping around because we might be at
9996 the very top (or the very bottom) of the address space and we have
9997 to be able to handle this case properly; in particular, we use an
9998 equality test for the loop condition. */
10001 HOST_WIDE_INT rounded_size
;
10002 struct scratch_reg sr
;
10004 get_scratch_register_on_entry (&sr
);
10007 /* Step 1: round SIZE to the previous multiple of the interval. */
10009 rounded_size
= size
& -PROBE_INTERVAL
;
10012 /* Step 2: compute initial and final value of the loop counter. */
10014 /* SP = SP_0 + PROBE_INTERVAL. */
10015 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10016 plus_constant (Pmode
, stack_pointer_rtx
,
10017 - (PROBE_INTERVAL
+ dope
))));
10019 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10020 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10021 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10022 gen_rtx_PLUS (Pmode
, sr
.reg
,
10023 stack_pointer_rtx
)));
10026 /* Step 3: the loop
10028 while (SP != LAST_ADDR)
10030 SP = SP + PROBE_INTERVAL
10034 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10035 values of N from 1 until it is equal to ROUNDED_SIZE. */
10037 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10040 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10041 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10043 if (size
!= rounded_size
)
10045 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10046 plus_constant (Pmode
, stack_pointer_rtx
,
10047 rounded_size
- size
)));
10048 emit_stack_probe (stack_pointer_rtx
);
10051 /* Adjust back to account for the additional first interval. */
10052 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10053 plus_constant (Pmode
, stack_pointer_rtx
,
10054 PROBE_INTERVAL
+ dope
)));
10056 release_scratch_register_on_entry (&sr
);
10059 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10061 /* Even if the stack pointer isn't the CFA register, we need to correctly
10062 describe the adjustments made to it, in particular differentiate the
10063 frame-related ones from the frame-unrelated ones. */
10066 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10067 XVECEXP (expr
, 0, 0)
10068 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10069 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10070 XVECEXP (expr
, 0, 1)
10071 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10072 plus_constant (Pmode
, stack_pointer_rtx
,
10073 PROBE_INTERVAL
+ dope
+ size
));
10074 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10075 RTX_FRAME_RELATED_P (last
) = 1;
10077 cfun
->machine
->fs
.sp_offset
+= size
;
10080 /* Make sure nothing is scheduled before we are done. */
10081 emit_insn (gen_blockage ());
10084 /* Adjust the stack pointer up to REG while probing it. */
10087 output_adjust_stack_and_probe (rtx reg
)
10089 static int labelno
= 0;
10090 char loop_lab
[32], end_lab
[32];
10093 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10094 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10096 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10098 /* Jump to END_LAB if SP == LAST_ADDR. */
10099 xops
[0] = stack_pointer_rtx
;
10101 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10102 fputs ("\tje\t", asm_out_file
);
10103 assemble_name_raw (asm_out_file
, end_lab
);
10104 fputc ('\n', asm_out_file
);
10106 /* SP = SP + PROBE_INTERVAL. */
10107 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10108 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10111 xops
[1] = const0_rtx
;
10112 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10114 fprintf (asm_out_file
, "\tjmp\t");
10115 assemble_name_raw (asm_out_file
, loop_lab
);
10116 fputc ('\n', asm_out_file
);
10118 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10123 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10124 inclusive. These are offsets from the current stack pointer. */
10127 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10129 /* See if we have a constant small number of probes to generate. If so,
10130 that's the easy case. The run-time loop is made up of 7 insns in the
10131 generic case while the compile-time loop is made up of n insns for n #
10133 if (size
<= 7 * PROBE_INTERVAL
)
10137 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10138 it exceeds SIZE. If only one probe is needed, this will not
10139 generate any code. Then probe at FIRST + SIZE. */
10140 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10141 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10144 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10148 /* Otherwise, do the same as above, but in a loop. Note that we must be
10149 extra careful with variables wrapping around because we might be at
10150 the very top (or the very bottom) of the address space and we have
10151 to be able to handle this case properly; in particular, we use an
10152 equality test for the loop condition. */
10155 HOST_WIDE_INT rounded_size
, last
;
10156 struct scratch_reg sr
;
10158 get_scratch_register_on_entry (&sr
);
10161 /* Step 1: round SIZE to the previous multiple of the interval. */
10163 rounded_size
= size
& -PROBE_INTERVAL
;
10166 /* Step 2: compute initial and final value of the loop counter. */
10168 /* TEST_OFFSET = FIRST. */
10169 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10171 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10172 last
= first
+ rounded_size
;
10175 /* Step 3: the loop
10177 while (TEST_ADDR != LAST_ADDR)
10179 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10183 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10184 until it is equal to ROUNDED_SIZE. */
10186 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10189 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10190 that SIZE is equal to ROUNDED_SIZE. */
10192 if (size
!= rounded_size
)
10193 emit_stack_probe (plus_constant (Pmode
,
10194 gen_rtx_PLUS (Pmode
,
10197 rounded_size
- size
));
10199 release_scratch_register_on_entry (&sr
);
10202 /* Make sure nothing is scheduled before we are done. */
10203 emit_insn (gen_blockage ());
10206 /* Probe a range of stack addresses from REG to END, inclusive. These are
10207 offsets from the current stack pointer. */
10210 output_probe_stack_range (rtx reg
, rtx end
)
10212 static int labelno
= 0;
10213 char loop_lab
[32], end_lab
[32];
10216 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10217 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10219 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10221 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10224 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10225 fputs ("\tje\t", asm_out_file
);
10226 assemble_name_raw (asm_out_file
, end_lab
);
10227 fputc ('\n', asm_out_file
);
10229 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10230 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10231 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10233 /* Probe at TEST_ADDR. */
10234 xops
[0] = stack_pointer_rtx
;
10236 xops
[2] = const0_rtx
;
10237 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10239 fprintf (asm_out_file
, "\tjmp\t");
10240 assemble_name_raw (asm_out_file
, loop_lab
);
10241 fputc ('\n', asm_out_file
);
10243 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10248 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10249 to be generated in correct form. */
10251 ix86_finalize_stack_realign_flags (void)
10253 /* Check if stack realign is really needed after reload, and
10254 stores result in cfun */
10255 unsigned int incoming_stack_boundary
10256 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10257 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10258 unsigned int stack_realign
= (incoming_stack_boundary
10260 ? crtl
->max_used_stack_slot_alignment
10261 : crtl
->stack_alignment_needed
));
10263 if (crtl
->stack_realign_finalized
)
10265 /* After stack_realign_needed is finalized, we can't no longer
10267 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10271 /* If the only reason for frame_pointer_needed is that we conservatively
10272 assumed stack realignment might be needed, but in the end nothing that
10273 needed the stack alignment had been spilled, clear frame_pointer_needed
10274 and say we don't need stack realignment. */
10276 && !crtl
->need_drap
10277 && frame_pointer_needed
10279 && flag_omit_frame_pointer
10280 && crtl
->sp_is_unchanging
10281 && !ix86_current_function_calls_tls_descriptor
10282 && !crtl
->accesses_prior_frames
10283 && !cfun
->calls_alloca
10284 && !crtl
->calls_eh_return
10285 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10286 && !ix86_frame_pointer_required ()
10287 && get_frame_size () == 0
10288 && ix86_nsaved_sseregs () == 0
10289 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10291 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10294 CLEAR_HARD_REG_SET (prologue_used
);
10295 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10296 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10297 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10298 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10299 HARD_FRAME_POINTER_REGNUM
);
10303 FOR_BB_INSNS (bb
, insn
)
10304 if (NONDEBUG_INSN_P (insn
)
10305 && requires_stack_frame_p (insn
, prologue_used
,
10306 set_up_by_prologue
))
10308 crtl
->stack_realign_needed
= stack_realign
;
10309 crtl
->stack_realign_finalized
= true;
10314 frame_pointer_needed
= false;
10315 stack_realign
= false;
10316 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10317 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10318 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10319 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10320 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10321 df_finish_pass (true);
10322 df_scan_alloc (NULL
);
10324 df_compute_regs_ever_live (true);
10328 crtl
->stack_realign_needed
= stack_realign
;
10329 crtl
->stack_realign_finalized
= true;
10332 /* Expand the prologue into a bunch of separate insns. */
10335 ix86_expand_prologue (void)
10337 struct machine_function
*m
= cfun
->machine
;
10340 struct ix86_frame frame
;
10341 HOST_WIDE_INT allocate
;
10342 bool int_registers_saved
;
10343 bool sse_registers_saved
;
10345 ix86_finalize_stack_realign_flags ();
10347 /* DRAP should not coexist with stack_realign_fp */
10348 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10350 memset (&m
->fs
, 0, sizeof (m
->fs
));
10352 /* Initialize CFA state for before the prologue. */
10353 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10354 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10356 /* Track SP offset to the CFA. We continue tracking this after we've
10357 swapped the CFA register away from SP. In the case of re-alignment
10358 this is fudged; we're interested to offsets within the local frame. */
10359 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10360 m
->fs
.sp_valid
= true;
10362 ix86_compute_frame_layout (&frame
);
10364 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10366 /* We should have already generated an error for any use of
10367 ms_hook on a nested function. */
10368 gcc_checking_assert (!ix86_static_chain_on_stack
);
10370 /* Check if profiling is active and we shall use profiling before
10371 prologue variant. If so sorry. */
10372 if (crtl
->profile
&& flag_fentry
!= 0)
10373 sorry ("ms_hook_prologue attribute isn%'t compatible "
10374 "with -mfentry for 32-bit");
10376 /* In ix86_asm_output_function_label we emitted:
10377 8b ff movl.s %edi,%edi
10379 8b ec movl.s %esp,%ebp
10381 This matches the hookable function prologue in Win32 API
10382 functions in Microsoft Windows XP Service Pack 2 and newer.
10383 Wine uses this to enable Windows apps to hook the Win32 API
10384 functions provided by Wine.
10386 What that means is that we've already set up the frame pointer. */
10388 if (frame_pointer_needed
10389 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10393 /* We've decided to use the frame pointer already set up.
10394 Describe this to the unwinder by pretending that both
10395 push and mov insns happen right here.
10397 Putting the unwind info here at the end of the ms_hook
10398 is done so that we can make absolutely certain we get
10399 the required byte sequence at the start of the function,
10400 rather than relying on an assembler that can produce
10401 the exact encoding required.
10403 However it does mean (in the unpatched case) that we have
10404 a 1 insn window where the asynchronous unwind info is
10405 incorrect. However, if we placed the unwind info at
10406 its correct location we would have incorrect unwind info
10407 in the patched case. Which is probably all moot since
10408 I don't expect Wine generates dwarf2 unwind info for the
10409 system libraries that use this feature. */
10411 insn
= emit_insn (gen_blockage ());
10413 push
= gen_push (hard_frame_pointer_rtx
);
10414 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10415 stack_pointer_rtx
);
10416 RTX_FRAME_RELATED_P (push
) = 1;
10417 RTX_FRAME_RELATED_P (mov
) = 1;
10419 RTX_FRAME_RELATED_P (insn
) = 1;
10420 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10421 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10423 /* Note that gen_push incremented m->fs.cfa_offset, even
10424 though we didn't emit the push insn here. */
10425 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10426 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10427 m
->fs
.fp_valid
= true;
10431 /* The frame pointer is not needed so pop %ebp again.
10432 This leaves us with a pristine state. */
10433 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10437 /* The first insn of a function that accepts its static chain on the
10438 stack is to push the register that would be filled in by a direct
10439 call. This insn will be skipped by the trampoline. */
10440 else if (ix86_static_chain_on_stack
)
10442 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10443 emit_insn (gen_blockage ());
10445 /* We don't want to interpret this push insn as a register save,
10446 only as a stack adjustment. The real copy of the register as
10447 a save will be done later, if needed. */
10448 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10449 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10450 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10451 RTX_FRAME_RELATED_P (insn
) = 1;
10454 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10455 of DRAP is needed and stack realignment is really needed after reload */
10456 if (stack_realign_drap
)
10458 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10460 /* Only need to push parameter pointer reg if it is caller saved. */
10461 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10463 /* Push arg pointer reg */
10464 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10465 RTX_FRAME_RELATED_P (insn
) = 1;
10468 /* Grab the argument pointer. */
10469 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10470 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10471 RTX_FRAME_RELATED_P (insn
) = 1;
10472 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10473 m
->fs
.cfa_offset
= 0;
10475 /* Align the stack. */
10476 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10478 GEN_INT (-align_bytes
)));
10479 RTX_FRAME_RELATED_P (insn
) = 1;
10481 /* Replicate the return address on the stack so that return
10482 address can be reached via (argp - 1) slot. This is needed
10483 to implement macro RETURN_ADDR_RTX and intrinsic function
10484 expand_builtin_return_addr etc. */
10485 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10486 t
= gen_frame_mem (word_mode
, t
);
10487 insn
= emit_insn (gen_push (t
));
10488 RTX_FRAME_RELATED_P (insn
) = 1;
10490 /* For the purposes of frame and register save area addressing,
10491 we've started over with a new frame. */
10492 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10493 m
->fs
.realigned
= true;
10496 int_registers_saved
= (frame
.nregs
== 0);
10497 sse_registers_saved
= (frame
.nsseregs
== 0);
10499 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10501 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10502 slower on all targets. Also sdb doesn't like it. */
10503 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10504 RTX_FRAME_RELATED_P (insn
) = 1;
10506 /* Push registers now, before setting the frame pointer
10508 if (!int_registers_saved
10510 && !frame
.save_regs_using_mov
)
10512 ix86_emit_save_regs ();
10513 int_registers_saved
= true;
10514 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10517 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10519 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10520 RTX_FRAME_RELATED_P (insn
) = 1;
10522 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10523 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10524 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10525 m
->fs
.fp_valid
= true;
10529 if (!int_registers_saved
)
10531 /* If saving registers via PUSH, do so now. */
10532 if (!frame
.save_regs_using_mov
)
10534 ix86_emit_save_regs ();
10535 int_registers_saved
= true;
10536 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10539 /* When using red zone we may start register saving before allocating
10540 the stack frame saving one cycle of the prologue. However, avoid
10541 doing this if we have to probe the stack; at least on x86_64 the
10542 stack probe can turn into a call that clobbers a red zone location. */
10543 else if (ix86_using_red_zone ()
10544 && (! TARGET_STACK_PROBE
10545 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10547 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10548 int_registers_saved
= true;
10552 if (stack_realign_fp
)
10554 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10555 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10557 /* The computation of the size of the re-aligned stack frame means
10558 that we must allocate the size of the register save area before
10559 performing the actual alignment. Otherwise we cannot guarantee
10560 that there's enough storage above the realignment point. */
10561 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10562 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10563 GEN_INT (m
->fs
.sp_offset
10564 - frame
.sse_reg_save_offset
),
10567 /* Align the stack. */
10568 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10570 GEN_INT (-align_bytes
)));
10572 /* For the purposes of register save area addressing, the stack
10573 pointer is no longer valid. As for the value of sp_offset,
10574 see ix86_compute_frame_layout, which we need to match in order
10575 to pass verification of stack_pointer_offset at the end. */
10576 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10577 m
->fs
.sp_valid
= false;
10580 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10582 if (flag_stack_usage_info
)
10584 /* We start to count from ARG_POINTER. */
10585 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10587 /* If it was realigned, take into account the fake frame. */
10588 if (stack_realign_drap
)
10590 if (ix86_static_chain_on_stack
)
10591 stack_size
+= UNITS_PER_WORD
;
10593 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10594 stack_size
+= UNITS_PER_WORD
;
10596 /* This over-estimates by 1 minimal-stack-alignment-unit but
10597 mitigates that by counting in the new return address slot. */
10598 current_function_dynamic_stack_size
10599 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10602 current_function_static_stack_size
= stack_size
;
10605 /* On SEH target with very large frame size, allocate an area to save
10606 SSE registers (as the very large allocation won't be described). */
10608 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10609 && !sse_registers_saved
)
10611 HOST_WIDE_INT sse_size
=
10612 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10614 gcc_assert (int_registers_saved
);
10616 /* No need to do stack checking as the area will be immediately
10618 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10619 GEN_INT (-sse_size
), -1,
10620 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10621 allocate
-= sse_size
;
10622 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10623 sse_registers_saved
= true;
10626 /* The stack has already been decremented by the instruction calling us
10627 so probe if the size is non-negative to preserve the protection area. */
10628 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10630 /* We expect the registers to be saved when probes are used. */
10631 gcc_assert (int_registers_saved
);
10633 if (STACK_CHECK_MOVING_SP
)
10635 ix86_adjust_stack_and_probe (allocate
);
10640 HOST_WIDE_INT size
= allocate
;
10642 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10643 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10645 if (TARGET_STACK_PROBE
)
10646 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10648 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10654 else if (!ix86_target_stack_probe ()
10655 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10657 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10658 GEN_INT (-allocate
), -1,
10659 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10663 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10665 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10666 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10667 bool eax_live
= false;
10668 bool r10_live
= false;
10671 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10672 if (!TARGET_64BIT_MS_ABI
)
10673 eax_live
= ix86_eax_live_at_start_p ();
10675 /* Note that SEH directives need to continue tracking the stack
10676 pointer even after the frame pointer has been set up. */
10679 insn
= emit_insn (gen_push (eax
));
10680 allocate
-= UNITS_PER_WORD
;
10681 if (sp_is_cfa_reg
|| TARGET_SEH
)
10684 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10685 RTX_FRAME_RELATED_P (insn
) = 1;
10691 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10692 insn
= emit_insn (gen_push (r10
));
10693 allocate
-= UNITS_PER_WORD
;
10694 if (sp_is_cfa_reg
|| TARGET_SEH
)
10697 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10698 RTX_FRAME_RELATED_P (insn
) = 1;
10702 emit_move_insn (eax
, GEN_INT (allocate
));
10703 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10705 /* Use the fact that AX still contains ALLOCATE. */
10706 adjust_stack_insn
= (Pmode
== DImode
10707 ? gen_pro_epilogue_adjust_stack_di_sub
10708 : gen_pro_epilogue_adjust_stack_si_sub
);
10710 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10711 stack_pointer_rtx
, eax
));
10713 if (sp_is_cfa_reg
|| TARGET_SEH
)
10716 m
->fs
.cfa_offset
+= allocate
;
10717 RTX_FRAME_RELATED_P (insn
) = 1;
10718 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10719 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10720 plus_constant (Pmode
, stack_pointer_rtx
,
10723 m
->fs
.sp_offset
+= allocate
;
10725 if (r10_live
&& eax_live
)
10727 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10728 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10729 gen_frame_mem (word_mode
, t
));
10730 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10731 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10732 gen_frame_mem (word_mode
, t
));
10734 else if (eax_live
|| r10_live
)
10736 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10737 emit_move_insn (gen_rtx_REG (word_mode
,
10738 (eax_live
? AX_REG
: R10_REG
)),
10739 gen_frame_mem (word_mode
, t
));
10742 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10744 /* If we havn't already set up the frame pointer, do so now. */
10745 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10747 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10748 GEN_INT (frame
.stack_pointer_offset
10749 - frame
.hard_frame_pointer_offset
));
10750 insn
= emit_insn (insn
);
10751 RTX_FRAME_RELATED_P (insn
) = 1;
10752 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10754 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10755 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10756 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10757 m
->fs
.fp_valid
= true;
10760 if (!int_registers_saved
)
10761 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10762 if (!sse_registers_saved
)
10763 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10765 pic_reg_used
= false;
10766 /* We don't use pic-register for pe-coff target. */
10767 if (pic_offset_table_rtx
10769 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10772 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10774 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10775 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10777 pic_reg_used
= true;
10784 if (ix86_cmodel
== CM_LARGE_PIC
)
10786 rtx label
, tmp_reg
;
10788 gcc_assert (Pmode
== DImode
);
10789 label
= gen_label_rtx ();
10790 emit_label (label
);
10791 LABEL_PRESERVE_P (label
) = 1;
10792 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10793 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10794 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10796 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10797 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10798 pic_offset_table_rtx
, tmp_reg
));
10801 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10805 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10806 RTX_FRAME_RELATED_P (insn
) = 1;
10807 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10811 /* In the pic_reg_used case, make sure that the got load isn't deleted
10812 when mcount needs it. Blockage to avoid call movement across mcount
10813 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10815 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10816 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10818 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10820 /* vDRAP is setup but after reload it turns out stack realign
10821 isn't necessary, here we will emit prologue to setup DRAP
10822 without stack realign adjustment */
10823 t
= choose_baseaddr (0);
10824 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10827 /* Prevent instructions from being scheduled into register save push
10828 sequence when access to the redzone area is done through frame pointer.
10829 The offset between the frame pointer and the stack pointer is calculated
10830 relative to the value of the stack pointer at the end of the function
10831 prologue, and moving instructions that access redzone area via frame
10832 pointer inside push sequence violates this assumption. */
10833 if (frame_pointer_needed
&& frame
.red_zone_size
)
10834 emit_insn (gen_memory_blockage ());
10836 /* Emit cld instruction if stringops are used in the function. */
10837 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10838 emit_insn (gen_cld ());
10840 /* SEH requires that the prologue end within 256 bytes of the start of
10841 the function. Prevent instruction schedules that would extend that.
10842 Further, prevent alloca modifications to the stack pointer from being
10843 combined with prologue modifications. */
10845 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10848 /* Emit code to restore REG using a POP insn. */
10851 ix86_emit_restore_reg_using_pop (rtx reg
)
10853 struct machine_function
*m
= cfun
->machine
;
10854 rtx insn
= emit_insn (gen_pop (reg
));
10856 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10857 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10859 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10860 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10862 /* Previously we'd represented the CFA as an expression
10863 like *(%ebp - 8). We've just popped that value from
10864 the stack, which means we need to reset the CFA to
10865 the drap register. This will remain until we restore
10866 the stack pointer. */
10867 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10868 RTX_FRAME_RELATED_P (insn
) = 1;
10870 /* This means that the DRAP register is valid for addressing too. */
10871 m
->fs
.drap_valid
= true;
10875 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10877 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10878 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10879 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10880 RTX_FRAME_RELATED_P (insn
) = 1;
10882 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10885 /* When the frame pointer is the CFA, and we pop it, we are
10886 swapping back to the stack pointer as the CFA. This happens
10887 for stack frames that don't allocate other data, so we assume
10888 the stack pointer is now pointing at the return address, i.e.
10889 the function entry state, which makes the offset be 1 word. */
10890 if (reg
== hard_frame_pointer_rtx
)
10892 m
->fs
.fp_valid
= false;
10893 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10895 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10896 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10898 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10899 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10900 GEN_INT (m
->fs
.cfa_offset
)));
10901 RTX_FRAME_RELATED_P (insn
) = 1;
10906 /* Emit code to restore saved registers using POP insns. */
10909 ix86_emit_restore_regs_using_pop (void)
10911 unsigned int regno
;
10913 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10914 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10915 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10918 /* Emit code and notes for the LEAVE instruction. */
10921 ix86_emit_leave (void)
10923 struct machine_function
*m
= cfun
->machine
;
10924 rtx insn
= emit_insn (ix86_gen_leave ());
10926 ix86_add_queued_cfa_restore_notes (insn
);
10928 gcc_assert (m
->fs
.fp_valid
);
10929 m
->fs
.sp_valid
= true;
10930 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10931 m
->fs
.fp_valid
= false;
10933 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10935 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10936 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10938 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10939 plus_constant (Pmode
, stack_pointer_rtx
,
10941 RTX_FRAME_RELATED_P (insn
) = 1;
10943 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10947 /* Emit code to restore saved registers using MOV insns.
10948 First register is restored from CFA - CFA_OFFSET. */
10950 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10951 bool maybe_eh_return
)
10953 struct machine_function
*m
= cfun
->machine
;
10954 unsigned int regno
;
10956 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10957 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10959 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10962 mem
= choose_baseaddr (cfa_offset
);
10963 mem
= gen_frame_mem (word_mode
, mem
);
10964 insn
= emit_move_insn (reg
, mem
);
10966 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10968 /* Previously we'd represented the CFA as an expression
10969 like *(%ebp - 8). We've just popped that value from
10970 the stack, which means we need to reset the CFA to
10971 the drap register. This will remain until we restore
10972 the stack pointer. */
10973 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10974 RTX_FRAME_RELATED_P (insn
) = 1;
10976 /* This means that the DRAP register is valid for addressing. */
10977 m
->fs
.drap_valid
= true;
10980 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10982 cfa_offset
-= UNITS_PER_WORD
;
10986 /* Emit code to restore saved registers using MOV insns.
10987 First register is restored from CFA - CFA_OFFSET. */
10989 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10990 bool maybe_eh_return
)
10992 unsigned int regno
;
10994 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10995 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10997 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11000 mem
= choose_baseaddr (cfa_offset
);
11001 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11002 set_mem_align (mem
, 128);
11003 emit_move_insn (reg
, mem
);
11005 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11011 /* Restore function stack, frame, and registers. */
11014 ix86_expand_epilogue (int style
)
11016 struct machine_function
*m
= cfun
->machine
;
11017 struct machine_frame_state frame_state_save
= m
->fs
;
11018 struct ix86_frame frame
;
11019 bool restore_regs_via_mov
;
11022 ix86_finalize_stack_realign_flags ();
11023 ix86_compute_frame_layout (&frame
);
11025 m
->fs
.sp_valid
= (!frame_pointer_needed
11026 || (crtl
->sp_is_unchanging
11027 && !stack_realign_fp
));
11028 gcc_assert (!m
->fs
.sp_valid
11029 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11031 /* The FP must be valid if the frame pointer is present. */
11032 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11033 gcc_assert (!m
->fs
.fp_valid
11034 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11036 /* We must have *some* valid pointer to the stack frame. */
11037 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11039 /* The DRAP is never valid at this point. */
11040 gcc_assert (!m
->fs
.drap_valid
);
11042 /* See the comment about red zone and frame
11043 pointer usage in ix86_expand_prologue. */
11044 if (frame_pointer_needed
&& frame
.red_zone_size
)
11045 emit_insn (gen_memory_blockage ());
11047 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11048 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11050 /* Determine the CFA offset of the end of the red-zone. */
11051 m
->fs
.red_zone_offset
= 0;
11052 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11054 /* The red-zone begins below the return address. */
11055 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11057 /* When the register save area is in the aligned portion of
11058 the stack, determine the maximum runtime displacement that
11059 matches up with the aligned frame. */
11060 if (stack_realign_drap
)
11061 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11065 /* Special care must be taken for the normal return case of a function
11066 using eh_return: the eax and edx registers are marked as saved, but
11067 not restored along this path. Adjust the save location to match. */
11068 if (crtl
->calls_eh_return
&& style
!= 2)
11069 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11071 /* EH_RETURN requires the use of moves to function properly. */
11072 if (crtl
->calls_eh_return
)
11073 restore_regs_via_mov
= true;
11074 /* SEH requires the use of pops to identify the epilogue. */
11075 else if (TARGET_SEH
)
11076 restore_regs_via_mov
= false;
11077 /* If we're only restoring one register and sp is not valid then
11078 using a move instruction to restore the register since it's
11079 less work than reloading sp and popping the register. */
11080 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11081 restore_regs_via_mov
= true;
11082 else if (TARGET_EPILOGUE_USING_MOVE
11083 && cfun
->machine
->use_fast_prologue_epilogue
11084 && (frame
.nregs
> 1
11085 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11086 restore_regs_via_mov
= true;
11087 else if (frame_pointer_needed
11089 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11090 restore_regs_via_mov
= true;
11091 else if (frame_pointer_needed
11092 && TARGET_USE_LEAVE
11093 && cfun
->machine
->use_fast_prologue_epilogue
11094 && frame
.nregs
== 1)
11095 restore_regs_via_mov
= true;
11097 restore_regs_via_mov
= false;
11099 if (restore_regs_via_mov
|| frame
.nsseregs
)
11101 /* Ensure that the entire register save area is addressable via
11102 the stack pointer, if we will restore via sp. */
11104 && m
->fs
.sp_offset
> 0x7fffffff
11105 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11106 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11108 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11109 GEN_INT (m
->fs
.sp_offset
11110 - frame
.sse_reg_save_offset
),
11112 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11116 /* If there are any SSE registers to restore, then we have to do it
11117 via moves, since there's obviously no pop for SSE regs. */
11118 if (frame
.nsseregs
)
11119 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11122 if (restore_regs_via_mov
)
11127 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11129 /* eh_return epilogues need %ecx added to the stack pointer. */
11132 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11134 /* Stack align doesn't work with eh_return. */
11135 gcc_assert (!stack_realign_drap
);
11136 /* Neither does regparm nested functions. */
11137 gcc_assert (!ix86_static_chain_on_stack
);
11139 if (frame_pointer_needed
)
11141 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11142 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11143 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11145 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11146 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11148 /* Note that we use SA as a temporary CFA, as the return
11149 address is at the proper place relative to it. We
11150 pretend this happens at the FP restore insn because
11151 prior to this insn the FP would be stored at the wrong
11152 offset relative to SA, and after this insn we have no
11153 other reasonable register to use for the CFA. We don't
11154 bother resetting the CFA to the SP for the duration of
11155 the return insn. */
11156 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11157 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11158 ix86_add_queued_cfa_restore_notes (insn
);
11159 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11160 RTX_FRAME_RELATED_P (insn
) = 1;
11162 m
->fs
.cfa_reg
= sa
;
11163 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11164 m
->fs
.fp_valid
= false;
11166 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11167 const0_rtx
, style
, false);
11171 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11172 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11173 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11174 ix86_add_queued_cfa_restore_notes (insn
);
11176 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11177 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11179 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11180 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11181 plus_constant (Pmode
, stack_pointer_rtx
,
11183 RTX_FRAME_RELATED_P (insn
) = 1;
11186 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11187 m
->fs
.sp_valid
= true;
11192 /* SEH requires that the function end with (1) a stack adjustment
11193 if necessary, (2) a sequence of pops, and (3) a return or
11194 jump instruction. Prevent insns from the function body from
11195 being scheduled into this sequence. */
11198 /* Prevent a catch region from being adjacent to the standard
11199 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11200 several other flags that would be interesting to test are
11202 if (flag_non_call_exceptions
)
11203 emit_insn (gen_nops (const1_rtx
));
11205 emit_insn (gen_blockage ());
11208 /* First step is to deallocate the stack frame so that we can
11209 pop the registers. Also do it on SEH target for very large
11210 frame as the emitted instructions aren't allowed by the ABI in
11212 if (!m
->fs
.sp_valid
11214 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11215 >= SEH_MAX_FRAME_SIZE
)))
11217 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11218 GEN_INT (m
->fs
.fp_offset
11219 - frame
.reg_save_offset
),
11222 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11224 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11225 GEN_INT (m
->fs
.sp_offset
11226 - frame
.reg_save_offset
),
11228 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11231 ix86_emit_restore_regs_using_pop ();
11234 /* If we used a stack pointer and haven't already got rid of it,
11236 if (m
->fs
.fp_valid
)
11238 /* If the stack pointer is valid and pointing at the frame
11239 pointer store address, then we only need a pop. */
11240 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11241 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11242 /* Leave results in shorter dependency chains on CPUs that are
11243 able to grok it fast. */
11244 else if (TARGET_USE_LEAVE
11245 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11246 || !cfun
->machine
->use_fast_prologue_epilogue
)
11247 ix86_emit_leave ();
11250 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11251 hard_frame_pointer_rtx
,
11252 const0_rtx
, style
, !using_drap
);
11253 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11259 int param_ptr_offset
= UNITS_PER_WORD
;
11262 gcc_assert (stack_realign_drap
);
11264 if (ix86_static_chain_on_stack
)
11265 param_ptr_offset
+= UNITS_PER_WORD
;
11266 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11267 param_ptr_offset
+= UNITS_PER_WORD
;
11269 insn
= emit_insn (gen_rtx_SET
11270 (VOIDmode
, stack_pointer_rtx
,
11271 gen_rtx_PLUS (Pmode
,
11273 GEN_INT (-param_ptr_offset
))));
11274 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11275 m
->fs
.cfa_offset
= param_ptr_offset
;
11276 m
->fs
.sp_offset
= param_ptr_offset
;
11277 m
->fs
.realigned
= false;
11279 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11280 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11281 GEN_INT (param_ptr_offset
)));
11282 RTX_FRAME_RELATED_P (insn
) = 1;
11284 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11285 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11288 /* At this point the stack pointer must be valid, and we must have
11289 restored all of the registers. We may not have deallocated the
11290 entire stack frame. We've delayed this until now because it may
11291 be possible to merge the local stack deallocation with the
11292 deallocation forced by ix86_static_chain_on_stack. */
11293 gcc_assert (m
->fs
.sp_valid
);
11294 gcc_assert (!m
->fs
.fp_valid
);
11295 gcc_assert (!m
->fs
.realigned
);
11296 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11298 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11299 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11303 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11305 /* Sibcall epilogues don't want a return instruction. */
11308 m
->fs
= frame_state_save
;
11312 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11314 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11316 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11317 address, do explicit add, and jump indirectly to the caller. */
11319 if (crtl
->args
.pops_args
>= 65536)
11321 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11324 /* There is no "pascal" calling convention in any 64bit ABI. */
11325 gcc_assert (!TARGET_64BIT
);
11327 insn
= emit_insn (gen_pop (ecx
));
11328 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11329 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11331 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11332 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11333 add_reg_note (insn
, REG_CFA_REGISTER
,
11334 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11335 RTX_FRAME_RELATED_P (insn
) = 1;
11337 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11339 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11342 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11345 emit_jump_insn (gen_simple_return_internal ());
11347 /* Restore the state back to the state from the prologue,
11348 so that it's correct for the next epilogue. */
11349 m
->fs
= frame_state_save
;
11352 /* Reset from the function's potential modifications. */
11355 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11356 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11358 if (pic_offset_table_rtx
)
11359 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11361 /* Mach-O doesn't support labels at the end of objects, so if
11362 it looks like we might want one, insert a NOP. */
11364 rtx insn
= get_last_insn ();
11365 rtx deleted_debug_label
= NULL_RTX
;
11368 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11370 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11371 notes only, instead set their CODE_LABEL_NUMBER to -1,
11372 otherwise there would be code generation differences
11373 in between -g and -g0. */
11374 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11375 deleted_debug_label
= insn
;
11376 insn
= PREV_INSN (insn
);
11381 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11382 fputs ("\tnop\n", file
);
11383 else if (deleted_debug_label
)
11384 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11385 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11386 CODE_LABEL_NUMBER (insn
) = -1;
11392 /* Return a scratch register to use in the split stack prologue. The
11393 split stack prologue is used for -fsplit-stack. It is the first
11394 instructions in the function, even before the regular prologue.
11395 The scratch register can be any caller-saved register which is not
11396 used for parameters or for the static chain. */
11398 static unsigned int
11399 split_stack_prologue_scratch_regno (void)
11405 bool is_fastcall
, is_thiscall
;
11408 is_fastcall
= (lookup_attribute ("fastcall",
11409 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11411 is_thiscall
= (lookup_attribute ("thiscall",
11412 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11414 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11418 if (DECL_STATIC_CHAIN (cfun
->decl
))
11420 sorry ("-fsplit-stack does not support fastcall with "
11421 "nested function");
11422 return INVALID_REGNUM
;
11426 else if (is_thiscall
)
11428 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11432 else if (regparm
< 3)
11434 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11440 sorry ("-fsplit-stack does not support 2 register "
11441 " parameters for a nested function");
11442 return INVALID_REGNUM
;
11449 /* FIXME: We could make this work by pushing a register
11450 around the addition and comparison. */
11451 sorry ("-fsplit-stack does not support 3 register parameters");
11452 return INVALID_REGNUM
;
11457 /* A SYMBOL_REF for the function which allocates new stackspace for
11460 static GTY(()) rtx split_stack_fn
;
11462 /* A SYMBOL_REF for the more stack function when using the large
11465 static GTY(()) rtx split_stack_fn_large
;
11467 /* Handle -fsplit-stack. These are the first instructions in the
11468 function, even before the regular prologue. */
11471 ix86_expand_split_stack_prologue (void)
11473 struct ix86_frame frame
;
11474 HOST_WIDE_INT allocate
;
11475 unsigned HOST_WIDE_INT args_size
;
11476 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11477 rtx scratch_reg
= NULL_RTX
;
11478 rtx varargs_label
= NULL_RTX
;
11481 gcc_assert (flag_split_stack
&& reload_completed
);
11483 ix86_finalize_stack_realign_flags ();
11484 ix86_compute_frame_layout (&frame
);
11485 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11487 /* This is the label we will branch to if we have enough stack
11488 space. We expect the basic block reordering pass to reverse this
11489 branch if optimizing, so that we branch in the unlikely case. */
11490 label
= gen_label_rtx ();
11492 /* We need to compare the stack pointer minus the frame size with
11493 the stack boundary in the TCB. The stack boundary always gives
11494 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11495 can compare directly. Otherwise we need to do an addition. */
11497 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11498 UNSPEC_STACK_CHECK
);
11499 limit
= gen_rtx_CONST (Pmode
, limit
);
11500 limit
= gen_rtx_MEM (Pmode
, limit
);
11501 if (allocate
< SPLIT_STACK_AVAILABLE
)
11502 current
= stack_pointer_rtx
;
11505 unsigned int scratch_regno
;
11508 /* We need a scratch register to hold the stack pointer minus
11509 the required frame size. Since this is the very start of the
11510 function, the scratch register can be any caller-saved
11511 register which is not used for parameters. */
11512 offset
= GEN_INT (- allocate
);
11513 scratch_regno
= split_stack_prologue_scratch_regno ();
11514 if (scratch_regno
== INVALID_REGNUM
)
11516 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11517 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11519 /* We don't use ix86_gen_add3 in this case because it will
11520 want to split to lea, but when not optimizing the insn
11521 will not be split after this point. */
11522 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11523 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11528 emit_move_insn (scratch_reg
, offset
);
11529 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11530 stack_pointer_rtx
));
11532 current
= scratch_reg
;
11535 ix86_expand_branch (GEU
, current
, limit
, label
);
11536 jump_insn
= get_last_insn ();
11537 JUMP_LABEL (jump_insn
) = label
;
11539 /* Mark the jump as very likely to be taken. */
11540 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11541 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11543 if (split_stack_fn
== NULL_RTX
)
11544 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11545 fn
= split_stack_fn
;
11547 /* Get more stack space. We pass in the desired stack space and the
11548 size of the arguments to copy to the new stack. In 32-bit mode
11549 we push the parameters; __morestack will return on a new stack
11550 anyhow. In 64-bit mode we pass the parameters in r10 and
11552 allocate_rtx
= GEN_INT (allocate
);
11553 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11554 call_fusage
= NULL_RTX
;
11559 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11560 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11562 /* If this function uses a static chain, it will be in %r10.
11563 Preserve it across the call to __morestack. */
11564 if (DECL_STATIC_CHAIN (cfun
->decl
))
11568 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11569 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11570 use_reg (&call_fusage
, rax
);
11573 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11576 HOST_WIDE_INT argval
;
11578 gcc_assert (Pmode
== DImode
);
11579 /* When using the large model we need to load the address
11580 into a register, and we've run out of registers. So we
11581 switch to a different calling convention, and we call a
11582 different function: __morestack_large. We pass the
11583 argument size in the upper 32 bits of r10 and pass the
11584 frame size in the lower 32 bits. */
11585 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11586 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11588 if (split_stack_fn_large
== NULL_RTX
)
11589 split_stack_fn_large
=
11590 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11592 if (ix86_cmodel
== CM_LARGE_PIC
)
11596 label
= gen_label_rtx ();
11597 emit_label (label
);
11598 LABEL_PRESERVE_P (label
) = 1;
11599 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11600 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11601 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11602 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11604 x
= gen_rtx_CONST (Pmode
, x
);
11605 emit_move_insn (reg11
, x
);
11606 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11607 x
= gen_const_mem (Pmode
, x
);
11608 emit_move_insn (reg11
, x
);
11611 emit_move_insn (reg11
, split_stack_fn_large
);
11615 argval
= ((args_size
<< 16) << 16) + allocate
;
11616 emit_move_insn (reg10
, GEN_INT (argval
));
11620 emit_move_insn (reg10
, allocate_rtx
);
11621 emit_move_insn (reg11
, GEN_INT (args_size
));
11622 use_reg (&call_fusage
, reg11
);
11625 use_reg (&call_fusage
, reg10
);
11629 emit_insn (gen_push (GEN_INT (args_size
)));
11630 emit_insn (gen_push (allocate_rtx
));
11632 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11633 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11635 add_function_usage_to (call_insn
, call_fusage
);
11637 /* In order to make call/return prediction work right, we now need
11638 to execute a return instruction. See
11639 libgcc/config/i386/morestack.S for the details on how this works.
11641 For flow purposes gcc must not see this as a return
11642 instruction--we need control flow to continue at the subsequent
11643 label. Therefore, we use an unspec. */
11644 gcc_assert (crtl
->args
.pops_args
< 65536);
11645 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11647 /* If we are in 64-bit mode and this function uses a static chain,
11648 we saved %r10 in %rax before calling _morestack. */
11649 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11650 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11651 gen_rtx_REG (word_mode
, AX_REG
));
11653 /* If this function calls va_start, we need to store a pointer to
11654 the arguments on the old stack, because they may not have been
11655 all copied to the new stack. At this point the old stack can be
11656 found at the frame pointer value used by __morestack, because
11657 __morestack has set that up before calling back to us. Here we
11658 store that pointer in a scratch register, and in
11659 ix86_expand_prologue we store the scratch register in a stack
11661 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11663 unsigned int scratch_regno
;
11667 scratch_regno
= split_stack_prologue_scratch_regno ();
11668 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11669 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11673 return address within this function
11674 return address of caller of this function
11676 So we add three words to get to the stack arguments.
11680 return address within this function
11681 first argument to __morestack
11682 second argument to __morestack
11683 return address of caller of this function
11685 So we add five words to get to the stack arguments.
11687 words
= TARGET_64BIT
? 3 : 5;
11688 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11689 gen_rtx_PLUS (Pmode
, frame_reg
,
11690 GEN_INT (words
* UNITS_PER_WORD
))));
11692 varargs_label
= gen_label_rtx ();
11693 emit_jump_insn (gen_jump (varargs_label
));
11694 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11699 emit_label (label
);
11700 LABEL_NUSES (label
) = 1;
11702 /* If this function calls va_start, we now have to set the scratch
11703 register for the case where we do not call __morestack. In this
11704 case we need to set it based on the stack pointer. */
11705 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11707 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11708 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11709 GEN_INT (UNITS_PER_WORD
))));
11711 emit_label (varargs_label
);
11712 LABEL_NUSES (varargs_label
) = 1;
11716 /* We may have to tell the dataflow pass that the split stack prologue
11717 is initializing a scratch register. */
11720 ix86_live_on_entry (bitmap regs
)
11722 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11724 gcc_assert (flag_split_stack
);
11725 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11729 /* Determine if op is suitable SUBREG RTX for address. */
11732 ix86_address_subreg_operand (rtx op
)
11734 enum machine_mode mode
;
11739 mode
= GET_MODE (op
);
11741 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11744 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11745 failures when the register is one word out of a two word structure. */
11746 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11749 /* Allow only SUBREGs of non-eliminable hard registers. */
11750 return register_no_elim_operand (op
, mode
);
11753 /* Extract the parts of an RTL expression that is a valid memory address
11754 for an instruction. Return 0 if the structure of the address is
11755 grossly off. Return -1 if the address contains ASHIFT, so it is not
11756 strictly valid, but still used for computing length of lea instruction. */
11759 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11761 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11762 rtx base_reg
, index_reg
;
11763 HOST_WIDE_INT scale
= 1;
11764 rtx scale_rtx
= NULL_RTX
;
11767 enum ix86_address_seg seg
= SEG_DEFAULT
;
11769 /* Allow zero-extended SImode addresses,
11770 they will be emitted with addr32 prefix. */
11771 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11773 if (GET_CODE (addr
) == ZERO_EXTEND
11774 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11776 addr
= XEXP (addr
, 0);
11777 if (CONST_INT_P (addr
))
11780 else if (GET_CODE (addr
) == AND
11781 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11783 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11784 if (addr
== NULL_RTX
)
11787 if (CONST_INT_P (addr
))
11792 /* Allow SImode subregs of DImode addresses,
11793 they will be emitted with addr32 prefix. */
11794 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11796 if (GET_CODE (addr
) == SUBREG
11797 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11799 addr
= SUBREG_REG (addr
);
11800 if (CONST_INT_P (addr
))
11807 else if (GET_CODE (addr
) == SUBREG
)
11809 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11814 else if (GET_CODE (addr
) == PLUS
)
11816 rtx addends
[4], op
;
11824 addends
[n
++] = XEXP (op
, 1);
11827 while (GET_CODE (op
) == PLUS
);
11832 for (i
= n
; i
>= 0; --i
)
11835 switch (GET_CODE (op
))
11840 index
= XEXP (op
, 0);
11841 scale_rtx
= XEXP (op
, 1);
11847 index
= XEXP (op
, 0);
11848 tmp
= XEXP (op
, 1);
11849 if (!CONST_INT_P (tmp
))
11851 scale
= INTVAL (tmp
);
11852 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11854 scale
= 1 << scale
;
11859 if (GET_CODE (op
) != UNSPEC
)
11864 if (XINT (op
, 1) == UNSPEC_TP
11865 && TARGET_TLS_DIRECT_SEG_REFS
11866 && seg
== SEG_DEFAULT
)
11867 seg
= DEFAULT_TLS_SEG_REG
;
11873 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11900 else if (GET_CODE (addr
) == MULT
)
11902 index
= XEXP (addr
, 0); /* index*scale */
11903 scale_rtx
= XEXP (addr
, 1);
11905 else if (GET_CODE (addr
) == ASHIFT
)
11907 /* We're called for lea too, which implements ashift on occasion. */
11908 index
= XEXP (addr
, 0);
11909 tmp
= XEXP (addr
, 1);
11910 if (!CONST_INT_P (tmp
))
11912 scale
= INTVAL (tmp
);
11913 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11915 scale
= 1 << scale
;
11918 else if (CONST_INT_P (addr
))
11920 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11923 /* Constant addresses are sign extended to 64bit, we have to
11924 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11926 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11932 disp
= addr
; /* displacement */
11938 else if (GET_CODE (index
) == SUBREG
11939 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11945 /* Address override works only on the (%reg) part of %fs:(%reg). */
11946 if (seg
!= SEG_DEFAULT
11947 && ((base
&& GET_MODE (base
) != word_mode
)
11948 || (index
&& GET_MODE (index
) != word_mode
)))
11951 /* Extract the integral value of scale. */
11954 if (!CONST_INT_P (scale_rtx
))
11956 scale
= INTVAL (scale_rtx
);
11959 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11960 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11962 /* Avoid useless 0 displacement. */
11963 if (disp
== const0_rtx
&& (base
|| index
))
11966 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11967 if (base_reg
&& index_reg
&& scale
== 1
11968 && (index_reg
== arg_pointer_rtx
11969 || index_reg
== frame_pointer_rtx
11970 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11973 tmp
= base
, base
= index
, index
= tmp
;
11974 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11977 /* Special case: %ebp cannot be encoded as a base without a displacement.
11981 && (base_reg
== hard_frame_pointer_rtx
11982 || base_reg
== frame_pointer_rtx
11983 || base_reg
== arg_pointer_rtx
11984 || (REG_P (base_reg
)
11985 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11986 || REGNO (base_reg
) == R13_REG
))))
11989 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11990 Avoid this by transforming to [%esi+0].
11991 Reload calls address legitimization without cfun defined, so we need
11992 to test cfun for being non-NULL. */
11993 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11994 && base_reg
&& !index_reg
&& !disp
11995 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11998 /* Special case: encode reg+reg instead of reg*2. */
11999 if (!base
&& index
&& scale
== 2)
12000 base
= index
, base_reg
= index_reg
, scale
= 1;
12002 /* Special case: scaling cannot be encoded without base or displacement. */
12003 if (!base
&& !disp
&& index
&& scale
!= 1)
12007 out
->index
= index
;
12009 out
->scale
= scale
;
12015 /* Return cost of the memory address x.
12016 For i386, it is better to use a complex address than let gcc copy
12017 the address into a reg and make a new pseudo. But not if the address
12018 requires to two regs - that would mean more pseudos with longer
12021 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12022 addr_space_t as ATTRIBUTE_UNUSED
,
12023 bool speed ATTRIBUTE_UNUSED
)
12025 struct ix86_address parts
;
12027 int ok
= ix86_decompose_address (x
, &parts
);
12031 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12032 parts
.base
= SUBREG_REG (parts
.base
);
12033 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12034 parts
.index
= SUBREG_REG (parts
.index
);
12036 /* Attempt to minimize number of registers in the address. */
12038 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12040 && (!REG_P (parts
.index
)
12041 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12045 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12047 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12048 && parts
.base
!= parts
.index
)
12051 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12052 since it's predecode logic can't detect the length of instructions
12053 and it degenerates to vector decoded. Increase cost of such
12054 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12055 to split such addresses or even refuse such addresses at all.
12057 Following addressing modes are affected:
12062 The first and last case may be avoidable by explicitly coding the zero in
12063 memory address, but I don't have AMD-K6 machine handy to check this
12067 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12068 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12069 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12075 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12076 this is used for to form addresses to local data when -fPIC is in
12080 darwin_local_data_pic (rtx disp
)
12082 return (GET_CODE (disp
) == UNSPEC
12083 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12086 /* Determine if a given RTX is a valid constant. We already know this
12087 satisfies CONSTANT_P. */
12090 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12092 switch (GET_CODE (x
))
12097 if (GET_CODE (x
) == PLUS
)
12099 if (!CONST_INT_P (XEXP (x
, 1)))
12104 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12107 /* Only some unspecs are valid as "constants". */
12108 if (GET_CODE (x
) == UNSPEC
)
12109 switch (XINT (x
, 1))
12112 case UNSPEC_GOTOFF
:
12113 case UNSPEC_PLTOFF
:
12114 return TARGET_64BIT
;
12116 case UNSPEC_NTPOFF
:
12117 x
= XVECEXP (x
, 0, 0);
12118 return (GET_CODE (x
) == SYMBOL_REF
12119 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12120 case UNSPEC_DTPOFF
:
12121 x
= XVECEXP (x
, 0, 0);
12122 return (GET_CODE (x
) == SYMBOL_REF
12123 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12128 /* We must have drilled down to a symbol. */
12129 if (GET_CODE (x
) == LABEL_REF
)
12131 if (GET_CODE (x
) != SYMBOL_REF
)
12136 /* TLS symbols are never valid. */
12137 if (SYMBOL_REF_TLS_MODEL (x
))
12140 /* DLLIMPORT symbols are never valid. */
12141 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12142 && SYMBOL_REF_DLLIMPORT_P (x
))
12146 /* mdynamic-no-pic */
12147 if (MACHO_DYNAMIC_NO_PIC_P
)
12148 return machopic_symbol_defined_p (x
);
12153 if (GET_MODE (x
) == TImode
12154 && x
!= CONST0_RTX (TImode
)
12160 if (!standard_sse_constant_p (x
))
12167 /* Otherwise we handle everything else in the move patterns. */
12171 /* Determine if it's legal to put X into the constant pool. This
12172 is not possible for the address of thread-local symbols, which
12173 is checked above. */
12176 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12178 /* We can always put integral constants and vectors in memory. */
12179 switch (GET_CODE (x
))
12189 return !ix86_legitimate_constant_p (mode
, x
);
12192 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12196 is_imported_p (rtx x
)
12198 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12199 || GET_CODE (x
) != SYMBOL_REF
)
12202 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12206 /* Nonzero if the constant value X is a legitimate general operand
12207 when generating PIC code. It is given that flag_pic is on and
12208 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12211 legitimate_pic_operand_p (rtx x
)
12215 switch (GET_CODE (x
))
12218 inner
= XEXP (x
, 0);
12219 if (GET_CODE (inner
) == PLUS
12220 && CONST_INT_P (XEXP (inner
, 1)))
12221 inner
= XEXP (inner
, 0);
12223 /* Only some unspecs are valid as "constants". */
12224 if (GET_CODE (inner
) == UNSPEC
)
12225 switch (XINT (inner
, 1))
12228 case UNSPEC_GOTOFF
:
12229 case UNSPEC_PLTOFF
:
12230 return TARGET_64BIT
;
12232 x
= XVECEXP (inner
, 0, 0);
12233 return (GET_CODE (x
) == SYMBOL_REF
12234 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12235 case UNSPEC_MACHOPIC_OFFSET
:
12236 return legitimate_pic_address_disp_p (x
);
12244 return legitimate_pic_address_disp_p (x
);
12251 /* Determine if a given CONST RTX is a valid memory displacement
12255 legitimate_pic_address_disp_p (rtx disp
)
12259 /* In 64bit mode we can allow direct addresses of symbols and labels
12260 when they are not dynamic symbols. */
12263 rtx op0
= disp
, op1
;
12265 switch (GET_CODE (disp
))
12271 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12273 op0
= XEXP (XEXP (disp
, 0), 0);
12274 op1
= XEXP (XEXP (disp
, 0), 1);
12275 if (!CONST_INT_P (op1
)
12276 || INTVAL (op1
) >= 16*1024*1024
12277 || INTVAL (op1
) < -16*1024*1024)
12279 if (GET_CODE (op0
) == LABEL_REF
)
12281 if (GET_CODE (op0
) == CONST
12282 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12283 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12285 if (GET_CODE (op0
) == UNSPEC
12286 && XINT (op0
, 1) == UNSPEC_PCREL
)
12288 if (GET_CODE (op0
) != SYMBOL_REF
)
12293 /* TLS references should always be enclosed in UNSPEC.
12294 The dllimported symbol needs always to be resolved. */
12295 if (SYMBOL_REF_TLS_MODEL (op0
)
12296 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12301 if (is_imported_p (op0
))
12304 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12305 || !SYMBOL_REF_LOCAL_P (op0
))
12308 /* Function-symbols need to be resolved only for
12310 For the small-model we don't need to resolve anything
12312 if ((ix86_cmodel
!= CM_LARGE_PIC
12313 && SYMBOL_REF_FUNCTION_P (op0
))
12314 || ix86_cmodel
== CM_SMALL_PIC
)
12316 /* Non-external symbols don't need to be resolved for
12317 large, and medium-model. */
12318 if ((ix86_cmodel
== CM_LARGE_PIC
12319 || ix86_cmodel
== CM_MEDIUM_PIC
)
12320 && !SYMBOL_REF_EXTERNAL_P (op0
))
12323 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12324 && SYMBOL_REF_LOCAL_P (op0
)
12325 && ix86_cmodel
!= CM_LARGE_PIC
)
12333 if (GET_CODE (disp
) != CONST
)
12335 disp
= XEXP (disp
, 0);
12339 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12340 of GOT tables. We should not need these anyway. */
12341 if (GET_CODE (disp
) != UNSPEC
12342 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12343 && XINT (disp
, 1) != UNSPEC_GOTOFF
12344 && XINT (disp
, 1) != UNSPEC_PCREL
12345 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12348 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12349 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12355 if (GET_CODE (disp
) == PLUS
)
12357 if (!CONST_INT_P (XEXP (disp
, 1)))
12359 disp
= XEXP (disp
, 0);
12363 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12366 if (GET_CODE (disp
) != UNSPEC
)
12369 switch (XINT (disp
, 1))
12374 /* We need to check for both symbols and labels because VxWorks loads
12375 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12377 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12378 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12379 case UNSPEC_GOTOFF
:
12380 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12381 While ABI specify also 32bit relocation but we don't produce it in
12382 small PIC model at all. */
12383 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12384 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12386 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12388 case UNSPEC_GOTTPOFF
:
12389 case UNSPEC_GOTNTPOFF
:
12390 case UNSPEC_INDNTPOFF
:
12393 disp
= XVECEXP (disp
, 0, 0);
12394 return (GET_CODE (disp
) == SYMBOL_REF
12395 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12396 case UNSPEC_NTPOFF
:
12397 disp
= XVECEXP (disp
, 0, 0);
12398 return (GET_CODE (disp
) == SYMBOL_REF
12399 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12400 case UNSPEC_DTPOFF
:
12401 disp
= XVECEXP (disp
, 0, 0);
12402 return (GET_CODE (disp
) == SYMBOL_REF
12403 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12409 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12410 replace the input X, or the original X if no replacement is called for.
12411 The output parameter *WIN is 1 if the calling macro should goto WIN,
12412 0 if it should not. */
12415 ix86_legitimize_reload_address (rtx x
,
12416 enum machine_mode mode ATTRIBUTE_UNUSED
,
12417 int opnum
, int type
,
12418 int ind_levels ATTRIBUTE_UNUSED
)
12420 /* Reload can generate:
12422 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12426 This RTX is rejected from ix86_legitimate_address_p due to
12427 non-strictness of base register 97. Following this rejection,
12428 reload pushes all three components into separate registers,
12429 creating invalid memory address RTX.
12431 Following code reloads only the invalid part of the
12432 memory address RTX. */
12434 if (GET_CODE (x
) == PLUS
12435 && REG_P (XEXP (x
, 1))
12436 && GET_CODE (XEXP (x
, 0)) == PLUS
12437 && REG_P (XEXP (XEXP (x
, 0), 1)))
12440 bool something_reloaded
= false;
12442 base
= XEXP (XEXP (x
, 0), 1);
12443 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12445 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12446 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12447 opnum
, (enum reload_type
) type
);
12448 something_reloaded
= true;
12451 index
= XEXP (x
, 1);
12452 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12454 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12455 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12456 opnum
, (enum reload_type
) type
);
12457 something_reloaded
= true;
12460 gcc_assert (something_reloaded
);
12467 /* Recognizes RTL expressions that are valid memory addresses for an
12468 instruction. The MODE argument is the machine mode for the MEM
12469 expression that wants to use this address.
12471 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12472 convert common non-canonical forms to canonical form so that they will
12476 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12477 rtx addr
, bool strict
)
12479 struct ix86_address parts
;
12480 rtx base
, index
, disp
;
12481 HOST_WIDE_INT scale
;
12483 if (ix86_decompose_address (addr
, &parts
) <= 0)
12484 /* Decomposition failed. */
12488 index
= parts
.index
;
12490 scale
= parts
.scale
;
12492 /* Validate base register. */
12499 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12500 reg
= SUBREG_REG (base
);
12502 /* Base is not a register. */
12505 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12508 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12509 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12510 /* Base is not valid. */
12514 /* Validate index register. */
12521 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12522 reg
= SUBREG_REG (index
);
12524 /* Index is not a register. */
12527 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12530 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12531 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12532 /* Index is not valid. */
12536 /* Index and base should have the same mode. */
12538 && GET_MODE (base
) != GET_MODE (index
))
12541 /* Validate scale factor. */
12545 /* Scale without index. */
12548 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12549 /* Scale is not a valid multiplier. */
12553 /* Validate displacement. */
12556 if (GET_CODE (disp
) == CONST
12557 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12558 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12559 switch (XINT (XEXP (disp
, 0), 1))
12561 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12562 used. While ABI specify also 32bit relocations, we don't produce
12563 them at all and use IP relative instead. */
12565 case UNSPEC_GOTOFF
:
12566 gcc_assert (flag_pic
);
12568 goto is_legitimate_pic
;
12570 /* 64bit address unspec. */
12573 case UNSPEC_GOTPCREL
:
12575 gcc_assert (flag_pic
);
12576 goto is_legitimate_pic
;
12578 case UNSPEC_GOTTPOFF
:
12579 case UNSPEC_GOTNTPOFF
:
12580 case UNSPEC_INDNTPOFF
:
12581 case UNSPEC_NTPOFF
:
12582 case UNSPEC_DTPOFF
:
12585 case UNSPEC_STACK_CHECK
:
12586 gcc_assert (flag_split_stack
);
12590 /* Invalid address unspec. */
12594 else if (SYMBOLIC_CONST (disp
)
12598 && MACHOPIC_INDIRECT
12599 && !machopic_operand_p (disp
)
12605 if (TARGET_64BIT
&& (index
|| base
))
12607 /* foo@dtpoff(%rX) is ok. */
12608 if (GET_CODE (disp
) != CONST
12609 || GET_CODE (XEXP (disp
, 0)) != PLUS
12610 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12611 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12612 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12613 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12614 /* Non-constant pic memory reference. */
12617 else if ((!TARGET_MACHO
|| flag_pic
)
12618 && ! legitimate_pic_address_disp_p (disp
))
12619 /* Displacement is an invalid pic construct. */
12622 else if (MACHO_DYNAMIC_NO_PIC_P
12623 && !ix86_legitimate_constant_p (Pmode
, disp
))
12624 /* displacment must be referenced via non_lazy_pointer */
12628 /* This code used to verify that a symbolic pic displacement
12629 includes the pic_offset_table_rtx register.
12631 While this is good idea, unfortunately these constructs may
12632 be created by "adds using lea" optimization for incorrect
12641 This code is nonsensical, but results in addressing
12642 GOT table with pic_offset_table_rtx base. We can't
12643 just refuse it easily, since it gets matched by
12644 "addsi3" pattern, that later gets split to lea in the
12645 case output register differs from input. While this
12646 can be handled by separate addsi pattern for this case
12647 that never results in lea, this seems to be easier and
12648 correct fix for crash to disable this test. */
12650 else if (GET_CODE (disp
) != LABEL_REF
12651 && !CONST_INT_P (disp
)
12652 && (GET_CODE (disp
) != CONST
12653 || !ix86_legitimate_constant_p (Pmode
, disp
))
12654 && (GET_CODE (disp
) != SYMBOL_REF
12655 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12656 /* Displacement is not constant. */
12658 else if (TARGET_64BIT
12659 && !x86_64_immediate_operand (disp
, VOIDmode
))
12660 /* Displacement is out of range. */
12664 /* Everything looks valid. */
12668 /* Determine if a given RTX is a valid constant address. */
12671 constant_address_p (rtx x
)
12673 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12676 /* Return a unique alias set for the GOT. */
12678 static alias_set_type
12679 ix86_GOT_alias_set (void)
12681 static alias_set_type set
= -1;
12683 set
= new_alias_set ();
12687 /* Return a legitimate reference for ORIG (an address) using the
12688 register REG. If REG is 0, a new pseudo is generated.
12690 There are two types of references that must be handled:
12692 1. Global data references must load the address from the GOT, via
12693 the PIC reg. An insn is emitted to do this load, and the reg is
12696 2. Static data references, constant pool addresses, and code labels
12697 compute the address as an offset from the GOT, whose base is in
12698 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12699 differentiate them from global data objects. The returned
12700 address is the PIC reg + an unspec constant.
12702 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12703 reg also appears in the address. */
12706 legitimize_pic_address (rtx orig
, rtx reg
)
12709 rtx new_rtx
= orig
;
12712 if (TARGET_MACHO
&& !TARGET_64BIT
)
12715 reg
= gen_reg_rtx (Pmode
);
12716 /* Use the generic Mach-O PIC machinery. */
12717 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12721 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12723 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12728 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12730 else if (TARGET_64BIT
&& !TARGET_PECOFF
12731 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12734 /* This symbol may be referenced via a displacement from the PIC
12735 base address (@GOTOFF). */
12737 if (reload_in_progress
)
12738 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12739 if (GET_CODE (addr
) == CONST
)
12740 addr
= XEXP (addr
, 0);
12741 if (GET_CODE (addr
) == PLUS
)
12743 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12745 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12748 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12749 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12751 tmpreg
= gen_reg_rtx (Pmode
);
12754 emit_move_insn (tmpreg
, new_rtx
);
12758 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12759 tmpreg
, 1, OPTAB_DIRECT
);
12763 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12765 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12767 /* This symbol may be referenced via a displacement from the PIC
12768 base address (@GOTOFF). */
12770 if (reload_in_progress
)
12771 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12772 if (GET_CODE (addr
) == CONST
)
12773 addr
= XEXP (addr
, 0);
12774 if (GET_CODE (addr
) == PLUS
)
12776 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12778 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12781 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12782 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12783 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12787 emit_move_insn (reg
, new_rtx
);
12791 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12792 /* We can't use @GOTOFF for text labels on VxWorks;
12793 see gotoff_operand. */
12794 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12796 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12800 /* For x64 PE-COFF there is no GOT table. So we use address
12802 if (TARGET_64BIT
&& TARGET_PECOFF
)
12804 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12805 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12808 reg
= gen_reg_rtx (Pmode
);
12809 emit_move_insn (reg
, new_rtx
);
12812 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12814 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12815 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12816 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12817 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12820 reg
= gen_reg_rtx (Pmode
);
12821 /* Use directly gen_movsi, otherwise the address is loaded
12822 into register for CSE. We don't want to CSE this addresses,
12823 instead we CSE addresses from the GOT table, so skip this. */
12824 emit_insn (gen_movsi (reg
, new_rtx
));
12829 /* This symbol must be referenced via a load from the
12830 Global Offset Table (@GOT). */
12832 if (reload_in_progress
)
12833 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12834 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12835 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12837 new_rtx
= force_reg (Pmode
, new_rtx
);
12838 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12839 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12840 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12843 reg
= gen_reg_rtx (Pmode
);
12844 emit_move_insn (reg
, new_rtx
);
12850 if (CONST_INT_P (addr
)
12851 && !x86_64_immediate_operand (addr
, VOIDmode
))
12855 emit_move_insn (reg
, addr
);
12859 new_rtx
= force_reg (Pmode
, addr
);
12861 else if (GET_CODE (addr
) == CONST
)
12863 addr
= XEXP (addr
, 0);
12865 /* We must match stuff we generate before. Assume the only
12866 unspecs that can get here are ours. Not that we could do
12867 anything with them anyway.... */
12868 if (GET_CODE (addr
) == UNSPEC
12869 || (GET_CODE (addr
) == PLUS
12870 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12872 gcc_assert (GET_CODE (addr
) == PLUS
);
12874 if (GET_CODE (addr
) == PLUS
)
12876 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12878 /* Check first to see if this is a constant offset from a @GOTOFF
12879 symbol reference. */
12880 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
12881 && CONST_INT_P (op1
))
12885 if (reload_in_progress
)
12886 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12887 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12889 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12890 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12891 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12895 emit_move_insn (reg
, new_rtx
);
12901 if (INTVAL (op1
) < -16*1024*1024
12902 || INTVAL (op1
) >= 16*1024*1024)
12904 if (!x86_64_immediate_operand (op1
, Pmode
))
12905 op1
= force_reg (Pmode
, op1
);
12906 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12912 rtx base
= legitimize_pic_address (op0
, reg
);
12913 enum machine_mode mode
= GET_MODE (base
);
12915 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12917 if (CONST_INT_P (new_rtx
))
12919 if (INTVAL (new_rtx
) < -16*1024*1024
12920 || INTVAL (new_rtx
) >= 16*1024*1024)
12922 if (!x86_64_immediate_operand (new_rtx
, mode
))
12923 new_rtx
= force_reg (mode
, new_rtx
);
12925 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12928 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12932 if (GET_CODE (new_rtx
) == PLUS
12933 && CONSTANT_P (XEXP (new_rtx
, 1)))
12935 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12936 new_rtx
= XEXP (new_rtx
, 1);
12938 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12946 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12949 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12951 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12953 if (GET_MODE (tp
) != tp_mode
)
12955 gcc_assert (GET_MODE (tp
) == SImode
);
12956 gcc_assert (tp_mode
== DImode
);
12958 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12962 tp
= copy_to_mode_reg (tp_mode
, tp
);
12967 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12969 static GTY(()) rtx ix86_tls_symbol
;
12972 ix86_tls_get_addr (void)
12974 if (!ix86_tls_symbol
)
12977 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12978 ? "___tls_get_addr" : "__tls_get_addr");
12980 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12983 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
12985 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
12987 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
12988 gen_rtx_CONST (Pmode
, unspec
));
12991 return ix86_tls_symbol
;
12994 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12996 static GTY(()) rtx ix86_tls_module_base_symbol
;
12999 ix86_tls_module_base (void)
13001 if (!ix86_tls_module_base_symbol
)
13003 ix86_tls_module_base_symbol
13004 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13006 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13007 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13010 return ix86_tls_module_base_symbol
;
13013 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13014 false if we expect this to be used for a memory address and true if
13015 we expect to load the address into a register. */
13018 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13020 rtx dest
, base
, off
;
13021 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13022 enum machine_mode tp_mode
= Pmode
;
13027 case TLS_MODEL_GLOBAL_DYNAMIC
:
13028 dest
= gen_reg_rtx (Pmode
);
13032 if (flag_pic
&& !TARGET_PECOFF
)
13033 pic
= pic_offset_table_rtx
;
13036 pic
= gen_reg_rtx (Pmode
);
13037 emit_insn (gen_set_got (pic
));
13041 if (TARGET_GNU2_TLS
)
13044 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13046 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13048 tp
= get_thread_pointer (Pmode
, true);
13049 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13051 if (GET_MODE (x
) != Pmode
)
13052 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13054 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13058 rtx caddr
= ix86_tls_get_addr ();
13062 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13067 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13068 insns
= get_insns ();
13071 if (GET_MODE (x
) != Pmode
)
13072 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13074 RTL_CONST_CALL_P (insns
) = 1;
13075 emit_libcall_block (insns
, dest
, rax
, x
);
13078 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13082 case TLS_MODEL_LOCAL_DYNAMIC
:
13083 base
= gen_reg_rtx (Pmode
);
13088 pic
= pic_offset_table_rtx
;
13091 pic
= gen_reg_rtx (Pmode
);
13092 emit_insn (gen_set_got (pic
));
13096 if (TARGET_GNU2_TLS
)
13098 rtx tmp
= ix86_tls_module_base ();
13101 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13103 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13105 tp
= get_thread_pointer (Pmode
, true);
13106 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13107 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13111 rtx caddr
= ix86_tls_get_addr ();
13115 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13120 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13121 insns
= get_insns ();
13124 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13125 share the LD_BASE result with other LD model accesses. */
13126 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13127 UNSPEC_TLS_LD_BASE
);
13129 RTL_CONST_CALL_P (insns
) = 1;
13130 emit_libcall_block (insns
, base
, rax
, eqv
);
13133 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13136 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13137 off
= gen_rtx_CONST (Pmode
, off
);
13139 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13141 if (TARGET_GNU2_TLS
)
13143 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13145 if (GET_MODE (x
) != Pmode
)
13146 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13148 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13152 case TLS_MODEL_INITIAL_EXEC
:
13155 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13157 /* The Sun linker took the AMD64 TLS spec literally
13158 and can only handle %rax as destination of the
13159 initial executable code sequence. */
13161 dest
= gen_reg_rtx (DImode
);
13162 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13166 /* Generate DImode references to avoid %fs:(%reg32)
13167 problems and linker IE->LE relaxation bug. */
13170 type
= UNSPEC_GOTNTPOFF
;
13174 if (reload_in_progress
)
13175 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13176 pic
= pic_offset_table_rtx
;
13177 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13179 else if (!TARGET_ANY_GNU_TLS
)
13181 pic
= gen_reg_rtx (Pmode
);
13182 emit_insn (gen_set_got (pic
));
13183 type
= UNSPEC_GOTTPOFF
;
13188 type
= UNSPEC_INDNTPOFF
;
13191 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13192 off
= gen_rtx_CONST (tp_mode
, off
);
13194 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13195 off
= gen_const_mem (tp_mode
, off
);
13196 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13198 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13200 base
= get_thread_pointer (tp_mode
,
13201 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13202 off
= force_reg (tp_mode
, off
);
13203 return gen_rtx_PLUS (tp_mode
, base
, off
);
13207 base
= get_thread_pointer (Pmode
, true);
13208 dest
= gen_reg_rtx (Pmode
);
13209 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13213 case TLS_MODEL_LOCAL_EXEC
:
13214 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13215 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13216 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13217 off
= gen_rtx_CONST (Pmode
, off
);
13219 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13221 base
= get_thread_pointer (Pmode
,
13222 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13223 return gen_rtx_PLUS (Pmode
, base
, off
);
13227 base
= get_thread_pointer (Pmode
, true);
13228 dest
= gen_reg_rtx (Pmode
);
13229 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13234 gcc_unreachable ();
13240 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13241 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13242 unique refptr-DECL symbol corresponding to symbol DECL. */
13244 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13245 htab_t dllimport_map
;
13248 get_dllimport_decl (tree decl
, bool beimport
)
13250 struct tree_map
*h
, in
;
13253 const char *prefix
;
13254 size_t namelen
, prefixlen
;
13259 if (!dllimport_map
)
13260 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13262 in
.hash
= htab_hash_pointer (decl
);
13263 in
.base
.from
= decl
;
13264 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13265 h
= (struct tree_map
*) *loc
;
13269 *loc
= h
= ggc_alloc_tree_map ();
13271 h
->base
.from
= decl
;
13272 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13273 VAR_DECL
, NULL
, ptr_type_node
);
13274 DECL_ARTIFICIAL (to
) = 1;
13275 DECL_IGNORED_P (to
) = 1;
13276 DECL_EXTERNAL (to
) = 1;
13277 TREE_READONLY (to
) = 1;
13279 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13280 name
= targetm
.strip_name_encoding (name
);
13282 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13283 ? "*__imp_" : "*__imp__";
13285 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13286 namelen
= strlen (name
);
13287 prefixlen
= strlen (prefix
);
13288 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13289 memcpy (imp_name
, prefix
, prefixlen
);
13290 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13292 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13293 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13294 SET_SYMBOL_REF_DECL (rtl
, to
);
13295 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13298 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13299 #ifdef SUB_TARGET_RECORD_STUB
13300 SUB_TARGET_RECORD_STUB (name
);
13304 rtl
= gen_const_mem (Pmode
, rtl
);
13305 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13307 SET_DECL_RTL (to
, rtl
);
13308 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13313 /* Expand SYMBOL into its corresponding far-addresse symbol.
13314 WANT_REG is true if we require the result be a register. */
13317 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13322 gcc_assert (SYMBOL_REF_DECL (symbol
));
13323 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13325 x
= DECL_RTL (imp_decl
);
13327 x
= force_reg (Pmode
, x
);
13331 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13332 true if we require the result be a register. */
13335 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13340 gcc_assert (SYMBOL_REF_DECL (symbol
));
13341 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13343 x
= DECL_RTL (imp_decl
);
13345 x
= force_reg (Pmode
, x
);
13349 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13350 is true if we require the result be a register. */
13353 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13355 if (!TARGET_PECOFF
)
13358 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13360 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13361 return legitimize_dllimport_symbol (addr
, inreg
);
13362 if (GET_CODE (addr
) == CONST
13363 && GET_CODE (XEXP (addr
, 0)) == PLUS
13364 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13365 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13367 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13368 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13372 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13374 if (GET_CODE (addr
) == SYMBOL_REF
13375 && !is_imported_p (addr
)
13376 && SYMBOL_REF_EXTERNAL_P (addr
)
13377 && SYMBOL_REF_DECL (addr
))
13378 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13380 if (GET_CODE (addr
) == CONST
13381 && GET_CODE (XEXP (addr
, 0)) == PLUS
13382 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13383 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13384 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13385 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13387 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13388 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13393 /* Try machine-dependent ways of modifying an illegitimate address
13394 to be legitimate. If we find one, return the new, valid address.
13395 This macro is used in only one place: `memory_address' in explow.c.
13397 OLDX is the address as it was before break_out_memory_refs was called.
13398 In some cases it is useful to look at this to decide what needs to be done.
13400 It is always safe for this macro to do nothing. It exists to recognize
13401 opportunities to optimize the output.
13403 For the 80386, we handle X+REG by loading X into a register R and
13404 using R+REG. R will go in a general reg and indexing will be used.
13405 However, if REG is a broken-out memory address or multiplication,
13406 nothing needs to be done because REG can certainly go in a general reg.
13408 When -fpic is used, special handling is needed for symbolic references.
13409 See comments by legitimize_pic_address in i386.c for details. */
13412 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13413 enum machine_mode mode
)
13418 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13420 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13421 if (GET_CODE (x
) == CONST
13422 && GET_CODE (XEXP (x
, 0)) == PLUS
13423 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13424 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13426 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13427 (enum tls_model
) log
, false);
13428 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13431 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13433 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13438 if (flag_pic
&& SYMBOLIC_CONST (x
))
13439 return legitimize_pic_address (x
, 0);
13442 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13443 return machopic_indirect_data_reference (x
, 0);
13446 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13447 if (GET_CODE (x
) == ASHIFT
13448 && CONST_INT_P (XEXP (x
, 1))
13449 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13452 log
= INTVAL (XEXP (x
, 1));
13453 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13454 GEN_INT (1 << log
));
13457 if (GET_CODE (x
) == PLUS
)
13459 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13461 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13462 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13463 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13466 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13467 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13468 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13469 GEN_INT (1 << log
));
13472 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13473 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13474 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13477 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13478 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13479 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13480 GEN_INT (1 << log
));
13483 /* Put multiply first if it isn't already. */
13484 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13486 rtx tmp
= XEXP (x
, 0);
13487 XEXP (x
, 0) = XEXP (x
, 1);
13492 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13493 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13494 created by virtual register instantiation, register elimination, and
13495 similar optimizations. */
13496 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13499 x
= gen_rtx_PLUS (Pmode
,
13500 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13501 XEXP (XEXP (x
, 1), 0)),
13502 XEXP (XEXP (x
, 1), 1));
13506 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13507 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13508 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13509 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13510 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13511 && CONSTANT_P (XEXP (x
, 1)))
13514 rtx other
= NULL_RTX
;
13516 if (CONST_INT_P (XEXP (x
, 1)))
13518 constant
= XEXP (x
, 1);
13519 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13521 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13523 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13524 other
= XEXP (x
, 1);
13532 x
= gen_rtx_PLUS (Pmode
,
13533 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13534 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13535 plus_constant (Pmode
, other
,
13536 INTVAL (constant
)));
13540 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13543 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13546 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13549 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13552 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13556 && REG_P (XEXP (x
, 1))
13557 && REG_P (XEXP (x
, 0)))
13560 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13563 x
= legitimize_pic_address (x
, 0);
13566 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13569 if (REG_P (XEXP (x
, 0)))
13571 rtx temp
= gen_reg_rtx (Pmode
);
13572 rtx val
= force_operand (XEXP (x
, 1), temp
);
13575 val
= convert_to_mode (Pmode
, val
, 1);
13576 emit_move_insn (temp
, val
);
13579 XEXP (x
, 1) = temp
;
13583 else if (REG_P (XEXP (x
, 1)))
13585 rtx temp
= gen_reg_rtx (Pmode
);
13586 rtx val
= force_operand (XEXP (x
, 0), temp
);
13589 val
= convert_to_mode (Pmode
, val
, 1);
13590 emit_move_insn (temp
, val
);
13593 XEXP (x
, 0) = temp
;
13601 /* Print an integer constant expression in assembler syntax. Addition
13602 and subtraction are the only arithmetic that may appear in these
13603 expressions. FILE is the stdio stream to write to, X is the rtx, and
13604 CODE is the operand print code from the output string. */
13607 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13611 switch (GET_CODE (x
))
13614 gcc_assert (flag_pic
);
13619 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13620 output_addr_const (file
, x
);
13623 const char *name
= XSTR (x
, 0);
13625 /* Mark the decl as referenced so that cgraph will
13626 output the function. */
13627 if (SYMBOL_REF_DECL (x
))
13628 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13631 if (MACHOPIC_INDIRECT
13632 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13633 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13635 assemble_name (file
, name
);
13637 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13638 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13639 fputs ("@PLT", file
);
13646 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13647 assemble_name (asm_out_file
, buf
);
13651 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13655 /* This used to output parentheses around the expression,
13656 but that does not work on the 386 (either ATT or BSD assembler). */
13657 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13661 if (GET_MODE (x
) == VOIDmode
)
13663 /* We can use %d if the number is <32 bits and positive. */
13664 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13665 fprintf (file
, "0x%lx%08lx",
13666 (unsigned long) CONST_DOUBLE_HIGH (x
),
13667 (unsigned long) CONST_DOUBLE_LOW (x
));
13669 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13672 /* We can't handle floating point constants;
13673 TARGET_PRINT_OPERAND must handle them. */
13674 output_operand_lossage ("floating constant misused");
13678 /* Some assemblers need integer constants to appear first. */
13679 if (CONST_INT_P (XEXP (x
, 0)))
13681 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13683 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13687 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13688 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13690 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13696 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13697 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13699 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13701 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13705 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13707 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13712 gcc_assert (XVECLEN (x
, 0) == 1);
13713 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13714 switch (XINT (x
, 1))
13717 fputs ("@GOT", file
);
13719 case UNSPEC_GOTOFF
:
13720 fputs ("@GOTOFF", file
);
13722 case UNSPEC_PLTOFF
:
13723 fputs ("@PLTOFF", file
);
13726 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13727 "(%rip)" : "[rip]", file
);
13729 case UNSPEC_GOTPCREL
:
13730 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13731 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13733 case UNSPEC_GOTTPOFF
:
13734 /* FIXME: This might be @TPOFF in Sun ld too. */
13735 fputs ("@gottpoff", file
);
13738 fputs ("@tpoff", file
);
13740 case UNSPEC_NTPOFF
:
13742 fputs ("@tpoff", file
);
13744 fputs ("@ntpoff", file
);
13746 case UNSPEC_DTPOFF
:
13747 fputs ("@dtpoff", file
);
13749 case UNSPEC_GOTNTPOFF
:
13751 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13752 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13754 fputs ("@gotntpoff", file
);
13756 case UNSPEC_INDNTPOFF
:
13757 fputs ("@indntpoff", file
);
13760 case UNSPEC_MACHOPIC_OFFSET
:
13762 machopic_output_function_base_name (file
);
13766 output_operand_lossage ("invalid UNSPEC as operand");
13772 output_operand_lossage ("invalid expression as operand");
13776 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13777 We need to emit DTP-relative relocations. */
13779 static void ATTRIBUTE_UNUSED
13780 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13782 fputs (ASM_LONG
, file
);
13783 output_addr_const (file
, x
);
13784 fputs ("@dtpoff", file
);
13790 fputs (", 0", file
);
13793 gcc_unreachable ();
13797 /* Return true if X is a representation of the PIC register. This copes
13798 with calls from ix86_find_base_term, where the register might have
13799 been replaced by a cselib value. */
13802 ix86_pic_register_p (rtx x
)
13804 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13805 return (pic_offset_table_rtx
13806 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13808 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13811 /* Helper function for ix86_delegitimize_address.
13812 Attempt to delegitimize TLS local-exec accesses. */
13815 ix86_delegitimize_tls_address (rtx orig_x
)
13817 rtx x
= orig_x
, unspec
;
13818 struct ix86_address addr
;
13820 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13824 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13826 if (ix86_decompose_address (x
, &addr
) == 0
13827 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13828 || addr
.disp
== NULL_RTX
13829 || GET_CODE (addr
.disp
) != CONST
)
13831 unspec
= XEXP (addr
.disp
, 0);
13832 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13833 unspec
= XEXP (unspec
, 0);
13834 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13836 x
= XVECEXP (unspec
, 0, 0);
13837 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13838 if (unspec
!= XEXP (addr
.disp
, 0))
13839 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13842 rtx idx
= addr
.index
;
13843 if (addr
.scale
!= 1)
13844 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13845 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13848 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13849 if (MEM_P (orig_x
))
13850 x
= replace_equiv_address_nv (orig_x
, x
);
13854 /* In the name of slightly smaller debug output, and to cater to
13855 general assembler lossage, recognize PIC+GOTOFF and turn it back
13856 into a direct symbol reference.
13858 On Darwin, this is necessary to avoid a crash, because Darwin
13859 has a different PIC label for each routine but the DWARF debugging
13860 information is not associated with any particular routine, so it's
13861 necessary to remove references to the PIC label from RTL stored by
13862 the DWARF output code. */
13865 ix86_delegitimize_address (rtx x
)
13867 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13868 /* addend is NULL or some rtx if x is something+GOTOFF where
13869 something doesn't include the PIC register. */
13870 rtx addend
= NULL_RTX
;
13871 /* reg_addend is NULL or a multiple of some register. */
13872 rtx reg_addend
= NULL_RTX
;
13873 /* const_addend is NULL or a const_int. */
13874 rtx const_addend
= NULL_RTX
;
13875 /* This is the result, or NULL. */
13876 rtx result
= NULL_RTX
;
13885 if (GET_CODE (x
) == CONST
13886 && GET_CODE (XEXP (x
, 0)) == PLUS
13887 && GET_MODE (XEXP (x
, 0)) == Pmode
13888 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13889 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13890 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13892 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13893 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13894 if (MEM_P (orig_x
))
13895 x
= replace_equiv_address_nv (orig_x
, x
);
13899 if (GET_CODE (x
) == CONST
13900 && GET_CODE (XEXP (x
, 0)) == UNSPEC
13901 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
13902 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
13903 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
13905 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13906 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13908 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13916 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
13917 return ix86_delegitimize_tls_address (orig_x
);
13919 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13920 and -mcmodel=medium -fpic. */
13923 if (GET_CODE (x
) != PLUS
13924 || GET_CODE (XEXP (x
, 1)) != CONST
)
13925 return ix86_delegitimize_tls_address (orig_x
);
13927 if (ix86_pic_register_p (XEXP (x
, 0)))
13928 /* %ebx + GOT/GOTOFF */
13930 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13932 /* %ebx + %reg * scale + GOT/GOTOFF */
13933 reg_addend
= XEXP (x
, 0);
13934 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13935 reg_addend
= XEXP (reg_addend
, 1);
13936 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13937 reg_addend
= XEXP (reg_addend
, 0);
13940 reg_addend
= NULL_RTX
;
13941 addend
= XEXP (x
, 0);
13945 addend
= XEXP (x
, 0);
13947 x
= XEXP (XEXP (x
, 1), 0);
13948 if (GET_CODE (x
) == PLUS
13949 && CONST_INT_P (XEXP (x
, 1)))
13951 const_addend
= XEXP (x
, 1);
13955 if (GET_CODE (x
) == UNSPEC
13956 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13957 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
13958 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
13959 && !MEM_P (orig_x
) && !addend
)))
13960 result
= XVECEXP (x
, 0, 0);
13962 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
13963 && !MEM_P (orig_x
))
13964 result
= XVECEXP (x
, 0, 0);
13967 return ix86_delegitimize_tls_address (orig_x
);
13970 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13972 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13975 /* If the rest of original X doesn't involve the PIC register, add
13976 addend and subtract pic_offset_table_rtx. This can happen e.g.
13978 leal (%ebx, %ecx, 4), %ecx
13980 movl foo@GOTOFF(%ecx), %edx
13981 in which case we return (%ecx - %ebx) + foo. */
13982 if (pic_offset_table_rtx
)
13983 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13984 pic_offset_table_rtx
),
13989 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13991 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13992 if (result
== NULL_RTX
)
13998 /* If X is a machine specific address (i.e. a symbol or label being
13999 referenced as a displacement from the GOT implemented using an
14000 UNSPEC), then return the base term. Otherwise return X. */
14003 ix86_find_base_term (rtx x
)
14009 if (GET_CODE (x
) != CONST
)
14011 term
= XEXP (x
, 0);
14012 if (GET_CODE (term
) == PLUS
14013 && (CONST_INT_P (XEXP (term
, 1))
14014 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14015 term
= XEXP (term
, 0);
14016 if (GET_CODE (term
) != UNSPEC
14017 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14018 && XINT (term
, 1) != UNSPEC_PCREL
))
14021 return XVECEXP (term
, 0, 0);
14024 return ix86_delegitimize_address (x
);
14028 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14029 bool fp
, FILE *file
)
14031 const char *suffix
;
14033 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14035 code
= ix86_fp_compare_code_to_integer (code
);
14039 code
= reverse_condition (code
);
14090 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14094 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14095 Those same assemblers have the same but opposite lossage on cmov. */
14096 if (mode
== CCmode
)
14097 suffix
= fp
? "nbe" : "a";
14098 else if (mode
== CCCmode
)
14101 gcc_unreachable ();
14117 gcc_unreachable ();
14121 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14138 gcc_unreachable ();
14142 /* ??? As above. */
14143 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14144 suffix
= fp
? "nb" : "ae";
14147 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14151 /* ??? As above. */
14152 if (mode
== CCmode
)
14154 else if (mode
== CCCmode
)
14155 suffix
= fp
? "nb" : "ae";
14157 gcc_unreachable ();
14160 suffix
= fp
? "u" : "p";
14163 suffix
= fp
? "nu" : "np";
14166 gcc_unreachable ();
14168 fputs (suffix
, file
);
14171 /* Print the name of register X to FILE based on its machine mode and number.
14172 If CODE is 'w', pretend the mode is HImode.
14173 If CODE is 'b', pretend the mode is QImode.
14174 If CODE is 'k', pretend the mode is SImode.
14175 If CODE is 'q', pretend the mode is DImode.
14176 If CODE is 'x', pretend the mode is V4SFmode.
14177 If CODE is 't', pretend the mode is V8SFmode.
14178 If CODE is 'g', pretend the mode is V16SFmode.
14179 If CODE is 'h', pretend the reg is the 'high' byte register.
14180 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14181 If CODE is 'd', duplicate the operand for AVX instruction.
14185 print_reg (rtx x
, int code
, FILE *file
)
14188 unsigned int regno
;
14189 bool duplicated
= code
== 'd' && TARGET_AVX
;
14191 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14196 gcc_assert (TARGET_64BIT
);
14197 fputs ("rip", file
);
14201 regno
= true_regnum (x
);
14202 gcc_assert (regno
!= ARG_POINTER_REGNUM
14203 && regno
!= FRAME_POINTER_REGNUM
14204 && regno
!= FLAGS_REG
14205 && regno
!= FPSR_REG
14206 && regno
!= FPCR_REG
);
14208 if (code
== 'w' || MMX_REG_P (x
))
14210 else if (code
== 'b')
14212 else if (code
== 'k')
14214 else if (code
== 'q')
14216 else if (code
== 'y')
14218 else if (code
== 'h')
14220 else if (code
== 'x')
14222 else if (code
== 't')
14224 else if (code
== 'g')
14227 code
= GET_MODE_SIZE (GET_MODE (x
));
14229 /* Irritatingly, AMD extended registers use different naming convention
14230 from the normal registers: "r%d[bwd]" */
14231 if (REX_INT_REGNO_P (regno
))
14233 gcc_assert (TARGET_64BIT
);
14235 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14239 error ("extended registers have no high halves");
14254 error ("unsupported operand size for extended register");
14264 if (STACK_TOP_P (x
))
14273 if (! ANY_FP_REG_P (x
))
14274 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14279 reg
= hi_reg_name
[regno
];
14282 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14284 reg
= qi_reg_name
[regno
];
14287 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14289 reg
= qi_high_reg_name
[regno
];
14294 gcc_assert (!duplicated
);
14296 fputs (hi_reg_name
[regno
] + 1, file
);
14302 gcc_assert (!duplicated
);
14304 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14309 gcc_unreachable ();
14315 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14316 fprintf (file
, ", %%%s", reg
);
14318 fprintf (file
, ", %s", reg
);
14322 /* Locate some local-dynamic symbol still in use by this function
14323 so that we can print its name in some tls_local_dynamic_base
14327 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14331 if (GET_CODE (x
) == SYMBOL_REF
14332 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14334 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14341 static const char *
14342 get_some_local_dynamic_name (void)
14346 if (cfun
->machine
->some_ld_name
)
14347 return cfun
->machine
->some_ld_name
;
14349 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14350 if (NONDEBUG_INSN_P (insn
)
14351 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14352 return cfun
->machine
->some_ld_name
;
14357 /* Meaning of CODE:
14358 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14359 C -- print opcode suffix for set/cmov insn.
14360 c -- like C, but print reversed condition
14361 F,f -- likewise, but for floating-point.
14362 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14364 R -- print the prefix for register names.
14365 z -- print the opcode suffix for the size of the current operand.
14366 Z -- likewise, with special suffixes for x87 instructions.
14367 * -- print a star (in certain assembler syntax)
14368 A -- print an absolute memory reference.
14369 E -- print address with DImode register names if TARGET_64BIT.
14370 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14371 s -- print a shift double count, followed by the assemblers argument
14373 b -- print the QImode name of the register for the indicated operand.
14374 %b0 would print %al if operands[0] is reg 0.
14375 w -- likewise, print the HImode name of the register.
14376 k -- likewise, print the SImode name of the register.
14377 q -- likewise, print the DImode name of the register.
14378 x -- likewise, print the V4SFmode name of the register.
14379 t -- likewise, print the V8SFmode name of the register.
14380 g -- likewise, print the V16SFmode name of the register.
14381 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14382 y -- print "st(0)" instead of "st" as a register.
14383 d -- print duplicated register operand for AVX instruction.
14384 D -- print condition for SSE cmp instruction.
14385 P -- if PIC, print an @PLT suffix.
14386 p -- print raw symbol name.
14387 X -- don't print any sort of PIC '@' suffix for a symbol.
14388 & -- print some in-use local-dynamic symbol name.
14389 H -- print a memory address offset by 8; used for sse high-parts
14390 Y -- print condition for XOP pcom* instruction.
14391 + -- print a branch hint as 'cs' or 'ds' prefix
14392 ; -- print a semicolon (after prefixes due to bug in older gas).
14393 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14394 @ -- print a segment register of thread base pointer load
14395 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14399 ix86_print_operand (FILE *file
, rtx x
, int code
)
14406 switch (ASSEMBLER_DIALECT
)
14413 /* Intel syntax. For absolute addresses, registers should not
14414 be surrounded by braces. */
14418 ix86_print_operand (file
, x
, 0);
14425 gcc_unreachable ();
14428 ix86_print_operand (file
, x
, 0);
14432 /* Wrap address in an UNSPEC to declare special handling. */
14434 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14436 output_address (x
);
14440 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14445 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14450 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14455 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14460 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14465 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14470 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14471 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14474 switch (GET_MODE_SIZE (GET_MODE (x
)))
14489 output_operand_lossage
14490 ("invalid operand size for operand code 'O'");
14499 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14501 /* Opcodes don't get size suffixes if using Intel opcodes. */
14502 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14505 switch (GET_MODE_SIZE (GET_MODE (x
)))
14524 output_operand_lossage
14525 ("invalid operand size for operand code 'z'");
14530 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14532 (0, "non-integer operand used with operand code 'z'");
14536 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14537 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14540 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14542 switch (GET_MODE_SIZE (GET_MODE (x
)))
14545 #ifdef HAVE_AS_IX86_FILDS
14555 #ifdef HAVE_AS_IX86_FILDQ
14558 fputs ("ll", file
);
14566 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14568 /* 387 opcodes don't get size suffixes
14569 if the operands are registers. */
14570 if (STACK_REG_P (x
))
14573 switch (GET_MODE_SIZE (GET_MODE (x
)))
14594 output_operand_lossage
14595 ("invalid operand type used with operand code 'Z'");
14599 output_operand_lossage
14600 ("invalid operand size for operand code 'Z'");
14619 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14621 ix86_print_operand (file
, x
, 0);
14622 fputs (", ", file
);
14627 switch (GET_CODE (x
))
14630 fputs ("neq", file
);
14633 fputs ("eq", file
);
14637 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14641 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14645 fputs ("le", file
);
14649 fputs ("lt", file
);
14652 fputs ("unord", file
);
14655 fputs ("ord", file
);
14658 fputs ("ueq", file
);
14661 fputs ("nlt", file
);
14664 fputs ("nle", file
);
14667 fputs ("ule", file
);
14670 fputs ("ult", file
);
14673 fputs ("une", file
);
14676 output_operand_lossage ("operand is not a condition code, "
14677 "invalid operand code 'Y'");
14683 /* Little bit of braindamage here. The SSE compare instructions
14684 does use completely different names for the comparisons that the
14685 fp conditional moves. */
14686 switch (GET_CODE (x
))
14691 fputs ("eq_us", file
);
14695 fputs ("eq", file
);
14700 fputs ("nge", file
);
14704 fputs ("lt", file
);
14709 fputs ("ngt", file
);
14713 fputs ("le", file
);
14716 fputs ("unord", file
);
14721 fputs ("neq_oq", file
);
14725 fputs ("neq", file
);
14730 fputs ("ge", file
);
14734 fputs ("nlt", file
);
14739 fputs ("gt", file
);
14743 fputs ("nle", file
);
14746 fputs ("ord", file
);
14749 output_operand_lossage ("operand is not a condition code, "
14750 "invalid operand code 'D'");
14757 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14758 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14764 if (!COMPARISON_P (x
))
14766 output_operand_lossage ("operand is not a condition code, "
14767 "invalid operand code '%c'", code
);
14770 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14771 code
== 'c' || code
== 'f',
14772 code
== 'F' || code
== 'f',
14777 if (!offsettable_memref_p (x
))
14779 output_operand_lossage ("operand is not an offsettable memory "
14780 "reference, invalid operand code 'H'");
14783 /* It doesn't actually matter what mode we use here, as we're
14784 only going to use this for printing. */
14785 x
= adjust_address_nv (x
, DImode
, 8);
14786 /* Output 'qword ptr' for intel assembler dialect. */
14787 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14792 gcc_assert (CONST_INT_P (x
));
14794 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14795 #ifdef HAVE_AS_IX86_HLE
14796 fputs ("xacquire ", file
);
14798 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14800 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14801 #ifdef HAVE_AS_IX86_HLE
14802 fputs ("xrelease ", file
);
14804 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14806 /* We do not want to print value of the operand. */
14810 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14816 const char *name
= get_some_local_dynamic_name ();
14818 output_operand_lossage ("'%%&' used without any "
14819 "local dynamic TLS references");
14821 assemble_name (file
, name
);
14830 || optimize_function_for_size_p (cfun
)
14831 || !TARGET_BRANCH_PREDICTION_HINTS
)
14834 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14837 int pred_val
= XINT (x
, 0);
14839 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14840 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14842 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14844 = final_forward_branch_p (current_output_insn
) == 0;
14846 /* Emit hints only in the case default branch prediction
14847 heuristics would fail. */
14848 if (taken
!= cputaken
)
14850 /* We use 3e (DS) prefix for taken branches and
14851 2e (CS) prefix for not taken branches. */
14853 fputs ("ds ; ", file
);
14855 fputs ("cs ; ", file
);
14863 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14869 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14872 /* The kernel uses a different segment register for performance
14873 reasons; a system call would not have to trash the userspace
14874 segment register, which would be expensive. */
14875 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14876 fputs ("fs", file
);
14878 fputs ("gs", file
);
14882 putc (TARGET_AVX2
? 'i' : 'f', file
);
14886 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14887 fputs ("addr32 ", file
);
14891 output_operand_lossage ("invalid operand code '%c'", code
);
14896 print_reg (x
, code
, file
);
14898 else if (MEM_P (x
))
14900 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14901 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14902 && GET_MODE (x
) != BLKmode
)
14905 switch (GET_MODE_SIZE (GET_MODE (x
)))
14907 case 1: size
= "BYTE"; break;
14908 case 2: size
= "WORD"; break;
14909 case 4: size
= "DWORD"; break;
14910 case 8: size
= "QWORD"; break;
14911 case 12: size
= "TBYTE"; break;
14913 if (GET_MODE (x
) == XFmode
)
14918 case 32: size
= "YMMWORD"; break;
14919 case 64: size
= "ZMMWORD"; break;
14921 gcc_unreachable ();
14924 /* Check for explicit size override (codes 'b', 'w', 'k',
14928 else if (code
== 'w')
14930 else if (code
== 'k')
14932 else if (code
== 'q')
14934 else if (code
== 'x')
14937 fputs (size
, file
);
14938 fputs (" PTR ", file
);
14942 /* Avoid (%rip) for call operands. */
14943 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14944 && !CONST_INT_P (x
))
14945 output_addr_const (file
, x
);
14946 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14947 output_operand_lossage ("invalid constraints for operand");
14949 output_address (x
);
14952 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14957 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14958 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14960 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14962 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14964 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14965 (unsigned long long) (int) l
);
14967 fprintf (file
, "0x%08x", (unsigned int) l
);
14970 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14975 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14976 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14978 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14980 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14983 /* These float cases don't actually occur as immediate operands. */
14984 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14988 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14989 fputs (dstr
, file
);
14994 /* We have patterns that allow zero sets of memory, for instance.
14995 In 64-bit mode, we should probably support all 8-byte vectors,
14996 since we can in fact encode that into an immediate. */
14997 if (GET_CODE (x
) == CONST_VECTOR
)
14999 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15003 if (code
!= 'P' && code
!= 'p')
15005 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15007 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15010 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15011 || GET_CODE (x
) == LABEL_REF
)
15013 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15016 fputs ("OFFSET FLAT:", file
);
15019 if (CONST_INT_P (x
))
15020 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15021 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15022 output_pic_addr_const (file
, x
, code
);
15024 output_addr_const (file
, x
);
15029 ix86_print_operand_punct_valid_p (unsigned char code
)
15031 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15032 || code
== ';' || code
== '~' || code
== '^');
15035 /* Print a memory operand whose address is ADDR. */
15038 ix86_print_operand_address (FILE *file
, rtx addr
)
15040 struct ix86_address parts
;
15041 rtx base
, index
, disp
;
15047 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15049 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15050 gcc_assert (parts
.index
== NULL_RTX
);
15051 parts
.index
= XVECEXP (addr
, 0, 1);
15052 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15053 addr
= XVECEXP (addr
, 0, 0);
15056 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15058 gcc_assert (TARGET_64BIT
);
15059 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15063 ok
= ix86_decompose_address (addr
, &parts
);
15068 index
= parts
.index
;
15070 scale
= parts
.scale
;
15078 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15080 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15083 gcc_unreachable ();
15086 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15087 if (TARGET_64BIT
&& !base
&& !index
)
15091 if (GET_CODE (disp
) == CONST
15092 && GET_CODE (XEXP (disp
, 0)) == PLUS
15093 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15094 symbol
= XEXP (XEXP (disp
, 0), 0);
15096 if (GET_CODE (symbol
) == LABEL_REF
15097 || (GET_CODE (symbol
) == SYMBOL_REF
15098 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15101 if (!base
&& !index
)
15103 /* Displacement only requires special attention. */
15105 if (CONST_INT_P (disp
))
15107 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15108 fputs ("ds:", file
);
15109 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15112 output_pic_addr_const (file
, disp
, 0);
15114 output_addr_const (file
, disp
);
15118 /* Print SImode register names to force addr32 prefix. */
15119 if (SImode_address_operand (addr
, VOIDmode
))
15121 #ifdef ENABLE_CHECKING
15122 gcc_assert (TARGET_64BIT
);
15123 switch (GET_CODE (addr
))
15126 gcc_assert (GET_MODE (addr
) == SImode
);
15127 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15131 gcc_assert (GET_MODE (addr
) == DImode
);
15134 gcc_unreachable ();
15137 gcc_assert (!code
);
15143 && CONST_INT_P (disp
)
15144 && INTVAL (disp
) < -16*1024*1024)
15146 /* X32 runs in 64-bit mode, where displacement, DISP, in
15147 address DISP(%r64), is encoded as 32-bit immediate sign-
15148 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15149 address is %r64 + 0xffffffffbffffd00. When %r64 <
15150 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15151 which is invalid for x32. The correct address is %r64
15152 - 0x40000300 == 0xf7ffdd64. To properly encode
15153 -0x40000300(%r64) for x32, we zero-extend negative
15154 displacement by forcing addr32 prefix which truncates
15155 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15156 zero-extend all negative displacements, including -1(%rsp).
15157 However, for small negative displacements, sign-extension
15158 won't cause overflow. We only zero-extend negative
15159 displacements if they < -16*1024*1024, which is also used
15160 to check legitimate address displacements for PIC. */
15164 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15169 output_pic_addr_const (file
, disp
, 0);
15170 else if (GET_CODE (disp
) == LABEL_REF
)
15171 output_asm_label (disp
);
15173 output_addr_const (file
, disp
);
15178 print_reg (base
, code
, file
);
15182 print_reg (index
, vsib
? 0 : code
, file
);
15183 if (scale
!= 1 || vsib
)
15184 fprintf (file
, ",%d", scale
);
15190 rtx offset
= NULL_RTX
;
15194 /* Pull out the offset of a symbol; print any symbol itself. */
15195 if (GET_CODE (disp
) == CONST
15196 && GET_CODE (XEXP (disp
, 0)) == PLUS
15197 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15199 offset
= XEXP (XEXP (disp
, 0), 1);
15200 disp
= gen_rtx_CONST (VOIDmode
,
15201 XEXP (XEXP (disp
, 0), 0));
15205 output_pic_addr_const (file
, disp
, 0);
15206 else if (GET_CODE (disp
) == LABEL_REF
)
15207 output_asm_label (disp
);
15208 else if (CONST_INT_P (disp
))
15211 output_addr_const (file
, disp
);
15217 print_reg (base
, code
, file
);
15220 if (INTVAL (offset
) >= 0)
15222 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15226 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15233 print_reg (index
, vsib
? 0 : code
, file
);
15234 if (scale
!= 1 || vsib
)
15235 fprintf (file
, "*%d", scale
);
15242 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15245 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15249 if (GET_CODE (x
) != UNSPEC
)
15252 op
= XVECEXP (x
, 0, 0);
15253 switch (XINT (x
, 1))
15255 case UNSPEC_GOTTPOFF
:
15256 output_addr_const (file
, op
);
15257 /* FIXME: This might be @TPOFF in Sun ld. */
15258 fputs ("@gottpoff", file
);
15261 output_addr_const (file
, op
);
15262 fputs ("@tpoff", file
);
15264 case UNSPEC_NTPOFF
:
15265 output_addr_const (file
, op
);
15267 fputs ("@tpoff", file
);
15269 fputs ("@ntpoff", file
);
15271 case UNSPEC_DTPOFF
:
15272 output_addr_const (file
, op
);
15273 fputs ("@dtpoff", file
);
15275 case UNSPEC_GOTNTPOFF
:
15276 output_addr_const (file
, op
);
15278 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15279 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15281 fputs ("@gotntpoff", file
);
15283 case UNSPEC_INDNTPOFF
:
15284 output_addr_const (file
, op
);
15285 fputs ("@indntpoff", file
);
15288 case UNSPEC_MACHOPIC_OFFSET
:
15289 output_addr_const (file
, op
);
15291 machopic_output_function_base_name (file
);
15295 case UNSPEC_STACK_CHECK
:
15299 gcc_assert (flag_split_stack
);
15301 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15302 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15304 gcc_unreachable ();
15307 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15318 /* Split one or more double-mode RTL references into pairs of half-mode
15319 references. The RTL can be REG, offsettable MEM, integer constant, or
15320 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15321 split and "num" is its length. lo_half and hi_half are output arrays
15322 that parallel "operands". */
15325 split_double_mode (enum machine_mode mode
, rtx operands
[],
15326 int num
, rtx lo_half
[], rtx hi_half
[])
15328 enum machine_mode half_mode
;
15334 half_mode
= DImode
;
15337 half_mode
= SImode
;
15340 gcc_unreachable ();
15343 byte
= GET_MODE_SIZE (half_mode
);
15347 rtx op
= operands
[num
];
15349 /* simplify_subreg refuse to split volatile memory addresses,
15350 but we still have to handle it. */
15353 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15354 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15358 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15359 GET_MODE (op
) == VOIDmode
15360 ? mode
: GET_MODE (op
), 0);
15361 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15362 GET_MODE (op
) == VOIDmode
15363 ? mode
: GET_MODE (op
), byte
);
15368 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15369 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15370 is the expression of the binary operation. The output may either be
15371 emitted here, or returned to the caller, like all output_* functions.
15373 There is no guarantee that the operands are the same mode, as they
15374 might be within FLOAT or FLOAT_EXTEND expressions. */
15376 #ifndef SYSV386_COMPAT
15377 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15378 wants to fix the assemblers because that causes incompatibility
15379 with gcc. No-one wants to fix gcc because that causes
15380 incompatibility with assemblers... You can use the option of
15381 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15382 #define SYSV386_COMPAT 1
15386 output_387_binary_op (rtx insn
, rtx
*operands
)
15388 static char buf
[40];
15391 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15393 #ifdef ENABLE_CHECKING
15394 /* Even if we do not want to check the inputs, this documents input
15395 constraints. Which helps in understanding the following code. */
15396 if (STACK_REG_P (operands
[0])
15397 && ((REG_P (operands
[1])
15398 && REGNO (operands
[0]) == REGNO (operands
[1])
15399 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15400 || (REG_P (operands
[2])
15401 && REGNO (operands
[0]) == REGNO (operands
[2])
15402 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15403 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15406 gcc_assert (is_sse
);
15409 switch (GET_CODE (operands
[3]))
15412 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15413 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15421 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15422 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15430 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15431 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15439 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15440 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15448 gcc_unreachable ();
15455 strcpy (buf
, ssep
);
15456 if (GET_MODE (operands
[0]) == SFmode
)
15457 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15459 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15463 strcpy (buf
, ssep
+ 1);
15464 if (GET_MODE (operands
[0]) == SFmode
)
15465 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15467 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15473 switch (GET_CODE (operands
[3]))
15477 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15479 rtx temp
= operands
[2];
15480 operands
[2] = operands
[1];
15481 operands
[1] = temp
;
15484 /* know operands[0] == operands[1]. */
15486 if (MEM_P (operands
[2]))
15492 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15494 if (STACK_TOP_P (operands
[0]))
15495 /* How is it that we are storing to a dead operand[2]?
15496 Well, presumably operands[1] is dead too. We can't
15497 store the result to st(0) as st(0) gets popped on this
15498 instruction. Instead store to operands[2] (which I
15499 think has to be st(1)). st(1) will be popped later.
15500 gcc <= 2.8.1 didn't have this check and generated
15501 assembly code that the Unixware assembler rejected. */
15502 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15504 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15508 if (STACK_TOP_P (operands
[0]))
15509 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15511 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15516 if (MEM_P (operands
[1]))
15522 if (MEM_P (operands
[2]))
15528 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15531 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15532 derived assemblers, confusingly reverse the direction of
15533 the operation for fsub{r} and fdiv{r} when the
15534 destination register is not st(0). The Intel assembler
15535 doesn't have this brain damage. Read !SYSV386_COMPAT to
15536 figure out what the hardware really does. */
15537 if (STACK_TOP_P (operands
[0]))
15538 p
= "{p\t%0, %2|rp\t%2, %0}";
15540 p
= "{rp\t%2, %0|p\t%0, %2}";
15542 if (STACK_TOP_P (operands
[0]))
15543 /* As above for fmul/fadd, we can't store to st(0). */
15544 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15546 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15551 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15554 if (STACK_TOP_P (operands
[0]))
15555 p
= "{rp\t%0, %1|p\t%1, %0}";
15557 p
= "{p\t%1, %0|rp\t%0, %1}";
15559 if (STACK_TOP_P (operands
[0]))
15560 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15562 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15567 if (STACK_TOP_P (operands
[0]))
15569 if (STACK_TOP_P (operands
[1]))
15570 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15572 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15575 else if (STACK_TOP_P (operands
[1]))
15578 p
= "{\t%1, %0|r\t%0, %1}";
15580 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15586 p
= "{r\t%2, %0|\t%0, %2}";
15588 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15594 gcc_unreachable ();
15601 /* Check if a 256bit AVX register is referenced inside of EXP. */
15604 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15608 if (GET_CODE (exp
) == SUBREG
)
15609 exp
= SUBREG_REG (exp
);
15612 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15618 /* Return needed mode for entity in optimize_mode_switching pass. */
15621 ix86_avx_u128_mode_needed (rtx insn
)
15627 /* Needed mode is set to AVX_U128_CLEAN if there are
15628 no 256bit modes used in function arguments. */
15629 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15631 link
= XEXP (link
, 1))
15633 if (GET_CODE (XEXP (link
, 0)) == USE
)
15635 rtx arg
= XEXP (XEXP (link
, 0), 0);
15637 if (ix86_check_avx256_register (&arg
, NULL
))
15638 return AVX_U128_ANY
;
15642 return AVX_U128_CLEAN
;
15645 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15646 changes state only when a 256bit register is written to, but we need
15647 to prevent the compiler from moving optimal insertion point above
15648 eventual read from 256bit register. */
15649 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15650 return AVX_U128_DIRTY
;
15652 return AVX_U128_ANY
;
15655 /* Return mode that i387 must be switched into
15656 prior to the execution of insn. */
15659 ix86_i387_mode_needed (int entity
, rtx insn
)
15661 enum attr_i387_cw mode
;
15663 /* The mode UNINITIALIZED is used to store control word after a
15664 function call or ASM pattern. The mode ANY specify that function
15665 has no requirements on the control word and make no changes in the
15666 bits we are interested in. */
15669 || (NONJUMP_INSN_P (insn
)
15670 && (asm_noperands (PATTERN (insn
)) >= 0
15671 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15672 return I387_CW_UNINITIALIZED
;
15674 if (recog_memoized (insn
) < 0)
15675 return I387_CW_ANY
;
15677 mode
= get_attr_i387_cw (insn
);
15682 if (mode
== I387_CW_TRUNC
)
15687 if (mode
== I387_CW_FLOOR
)
15692 if (mode
== I387_CW_CEIL
)
15697 if (mode
== I387_CW_MASK_PM
)
15702 gcc_unreachable ();
15705 return I387_CW_ANY
;
15708 /* Return mode that entity must be switched into
15709 prior to the execution of insn. */
15712 ix86_mode_needed (int entity
, rtx insn
)
15717 return ix86_avx_u128_mode_needed (insn
);
15722 return ix86_i387_mode_needed (entity
, insn
);
15724 gcc_unreachable ();
15729 /* Check if a 256bit AVX register is referenced in stores. */
15732 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15734 if (ix86_check_avx256_register (&dest
, NULL
))
15736 bool *used
= (bool *) data
;
15741 /* Calculate mode of upper 128bit AVX registers after the insn. */
15744 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15746 rtx pat
= PATTERN (insn
);
15748 if (vzeroupper_operation (pat
, VOIDmode
)
15749 || vzeroall_operation (pat
, VOIDmode
))
15750 return AVX_U128_CLEAN
;
15752 /* We know that state is clean after CALL insn if there are no
15753 256bit registers used in the function return register. */
15756 bool avx_reg256_found
= false;
15757 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15758 if (!avx_reg256_found
)
15759 return AVX_U128_CLEAN
;
15762 /* Otherwise, return current mode. Remember that if insn
15763 references AVX 256bit registers, the mode was already changed
15764 to DIRTY from MODE_NEEDED. */
15768 /* Return the mode that an insn results in. */
15771 ix86_mode_after (int entity
, int mode
, rtx insn
)
15776 return ix86_avx_u128_mode_after (mode
, insn
);
15783 gcc_unreachable ();
15788 ix86_avx_u128_mode_entry (void)
15792 /* Entry mode is set to AVX_U128_DIRTY if there are
15793 256bit modes used in function arguments. */
15794 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15795 arg
= TREE_CHAIN (arg
))
15797 rtx incoming
= DECL_INCOMING_RTL (arg
);
15799 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15800 return AVX_U128_DIRTY
;
15803 return AVX_U128_CLEAN
;
15806 /* Return a mode that ENTITY is assumed to be
15807 switched to at function entry. */
15810 ix86_mode_entry (int entity
)
15815 return ix86_avx_u128_mode_entry ();
15820 return I387_CW_ANY
;
15822 gcc_unreachable ();
15827 ix86_avx_u128_mode_exit (void)
15829 rtx reg
= crtl
->return_rtx
;
15831 /* Exit mode is set to AVX_U128_DIRTY if there are
15832 256bit modes used in the function return register. */
15833 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15834 return AVX_U128_DIRTY
;
15836 return AVX_U128_CLEAN
;
15839 /* Return a mode that ENTITY is assumed to be
15840 switched to at function exit. */
15843 ix86_mode_exit (int entity
)
15848 return ix86_avx_u128_mode_exit ();
15853 return I387_CW_ANY
;
15855 gcc_unreachable ();
15859 /* Output code to initialize control word copies used by trunc?f?i and
15860 rounding patterns. CURRENT_MODE is set to current control word,
15861 while NEW_MODE is set to new control word. */
15864 emit_i387_cw_initialization (int mode
)
15866 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15869 enum ix86_stack_slot slot
;
15871 rtx reg
= gen_reg_rtx (HImode
);
15873 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15874 emit_move_insn (reg
, copy_rtx (stored_mode
));
15876 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15877 || optimize_insn_for_size_p ())
15881 case I387_CW_TRUNC
:
15882 /* round toward zero (truncate) */
15883 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15884 slot
= SLOT_CW_TRUNC
;
15887 case I387_CW_FLOOR
:
15888 /* round down toward -oo */
15889 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15890 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15891 slot
= SLOT_CW_FLOOR
;
15895 /* round up toward +oo */
15896 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15897 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15898 slot
= SLOT_CW_CEIL
;
15901 case I387_CW_MASK_PM
:
15902 /* mask precision exception for nearbyint() */
15903 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15904 slot
= SLOT_CW_MASK_PM
;
15908 gcc_unreachable ();
15915 case I387_CW_TRUNC
:
15916 /* round toward zero (truncate) */
15917 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15918 slot
= SLOT_CW_TRUNC
;
15921 case I387_CW_FLOOR
:
15922 /* round down toward -oo */
15923 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15924 slot
= SLOT_CW_FLOOR
;
15928 /* round up toward +oo */
15929 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15930 slot
= SLOT_CW_CEIL
;
15933 case I387_CW_MASK_PM
:
15934 /* mask precision exception for nearbyint() */
15935 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15936 slot
= SLOT_CW_MASK_PM
;
15940 gcc_unreachable ();
15944 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15946 new_mode
= assign_386_stack_local (HImode
, slot
);
15947 emit_move_insn (new_mode
, reg
);
15950 /* Emit vzeroupper. */
15953 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15957 /* Cancel automatic vzeroupper insertion if there are
15958 live call-saved SSE registers at the insertion point. */
15960 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15961 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15965 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15966 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15969 emit_insn (gen_avx_vzeroupper ());
15972 /* Generate one or more insns to set ENTITY to MODE. */
15975 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15980 if (mode
== AVX_U128_CLEAN
)
15981 ix86_avx_emit_vzeroupper (regs_live
);
15987 if (mode
!= I387_CW_ANY
15988 && mode
!= I387_CW_UNINITIALIZED
)
15989 emit_i387_cw_initialization (mode
);
15992 gcc_unreachable ();
15996 /* Output code for INSN to convert a float to a signed int. OPERANDS
15997 are the insn operands. The output may be [HSD]Imode and the input
15998 operand may be [SDX]Fmode. */
16001 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16003 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16004 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16005 int round_mode
= get_attr_i387_cw (insn
);
16007 /* Jump through a hoop or two for DImode, since the hardware has no
16008 non-popping instruction. We used to do this a different way, but
16009 that was somewhat fragile and broke with post-reload splitters. */
16010 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16011 output_asm_insn ("fld\t%y1", operands
);
16013 gcc_assert (STACK_TOP_P (operands
[1]));
16014 gcc_assert (MEM_P (operands
[0]));
16015 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16018 output_asm_insn ("fisttp%Z0\t%0", operands
);
16021 if (round_mode
!= I387_CW_ANY
)
16022 output_asm_insn ("fldcw\t%3", operands
);
16023 if (stack_top_dies
|| dimode_p
)
16024 output_asm_insn ("fistp%Z0\t%0", operands
);
16026 output_asm_insn ("fist%Z0\t%0", operands
);
16027 if (round_mode
!= I387_CW_ANY
)
16028 output_asm_insn ("fldcw\t%2", operands
);
16034 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16035 have the values zero or one, indicates the ffreep insn's operand
16036 from the OPERANDS array. */
16038 static const char *
16039 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16041 if (TARGET_USE_FFREEP
)
16042 #ifdef HAVE_AS_IX86_FFREEP
16043 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16046 static char retval
[32];
16047 int regno
= REGNO (operands
[opno
]);
16049 gcc_assert (STACK_REGNO_P (regno
));
16051 regno
-= FIRST_STACK_REG
;
16053 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16058 return opno
? "fstp\t%y1" : "fstp\t%y0";
16062 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16063 should be used. UNORDERED_P is true when fucom should be used. */
16066 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16068 int stack_top_dies
;
16069 rtx cmp_op0
, cmp_op1
;
16070 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16074 cmp_op0
= operands
[0];
16075 cmp_op1
= operands
[1];
16079 cmp_op0
= operands
[1];
16080 cmp_op1
= operands
[2];
16085 if (GET_MODE (operands
[0]) == SFmode
)
16087 return "%vucomiss\t{%1, %0|%0, %1}";
16089 return "%vcomiss\t{%1, %0|%0, %1}";
16092 return "%vucomisd\t{%1, %0|%0, %1}";
16094 return "%vcomisd\t{%1, %0|%0, %1}";
16097 gcc_assert (STACK_TOP_P (cmp_op0
));
16099 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16101 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16103 if (stack_top_dies
)
16105 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16106 return output_387_ffreep (operands
, 1);
16109 return "ftst\n\tfnstsw\t%0";
16112 if (STACK_REG_P (cmp_op1
)
16114 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16115 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16117 /* If both the top of the 387 stack dies, and the other operand
16118 is also a stack register that dies, then this must be a
16119 `fcompp' float compare */
16123 /* There is no double popping fcomi variant. Fortunately,
16124 eflags is immune from the fstp's cc clobbering. */
16126 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16128 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16129 return output_387_ffreep (operands
, 0);
16134 return "fucompp\n\tfnstsw\t%0";
16136 return "fcompp\n\tfnstsw\t%0";
16141 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16143 static const char * const alt
[16] =
16145 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16146 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16147 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16148 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16150 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16151 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16155 "fcomi\t{%y1, %0|%0, %y1}",
16156 "fcomip\t{%y1, %0|%0, %y1}",
16157 "fucomi\t{%y1, %0|%0, %y1}",
16158 "fucomip\t{%y1, %0|%0, %y1}",
16169 mask
= eflags_p
<< 3;
16170 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16171 mask
|= unordered_p
<< 1;
16172 mask
|= stack_top_dies
;
16174 gcc_assert (mask
< 16);
16183 ix86_output_addr_vec_elt (FILE *file
, int value
)
16185 const char *directive
= ASM_LONG
;
16189 directive
= ASM_QUAD
;
16191 gcc_assert (!TARGET_64BIT
);
16194 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16198 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16200 const char *directive
= ASM_LONG
;
16203 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16204 directive
= ASM_QUAD
;
16206 gcc_assert (!TARGET_64BIT
);
16208 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16209 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16210 fprintf (file
, "%s%s%d-%s%d\n",
16211 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16212 else if (HAVE_AS_GOTOFF_IN_DATA
)
16213 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16215 else if (TARGET_MACHO
)
16217 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16218 machopic_output_function_base_name (file
);
16223 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16224 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16227 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16231 ix86_expand_clear (rtx dest
)
16235 /* We play register width games, which are only valid after reload. */
16236 gcc_assert (reload_completed
);
16238 /* Avoid HImode and its attendant prefix byte. */
16239 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16240 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16241 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16243 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16244 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16246 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16247 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16253 /* X is an unchanging MEM. If it is a constant pool reference, return
16254 the constant pool rtx, else NULL. */
16257 maybe_get_pool_constant (rtx x
)
16259 x
= ix86_delegitimize_address (XEXP (x
, 0));
16261 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16262 return get_pool_constant (x
);
16268 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16271 enum tls_model model
;
16276 if (GET_CODE (op1
) == SYMBOL_REF
)
16280 model
= SYMBOL_REF_TLS_MODEL (op1
);
16283 op1
= legitimize_tls_address (op1
, model
, true);
16284 op1
= force_operand (op1
, op0
);
16287 op1
= convert_to_mode (mode
, op1
, 1);
16289 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16292 else if (GET_CODE (op1
) == CONST
16293 && GET_CODE (XEXP (op1
, 0)) == PLUS
16294 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16296 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16297 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16300 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16302 tmp
= legitimize_tls_address (symbol
, model
, true);
16304 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16308 tmp
= force_operand (tmp
, NULL
);
16309 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16310 op0
, 1, OPTAB_DIRECT
);
16313 op1
= convert_to_mode (mode
, tmp
, 1);
16317 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16318 && symbolic_operand (op1
, mode
))
16320 if (TARGET_MACHO
&& !TARGET_64BIT
)
16323 /* dynamic-no-pic */
16324 if (MACHOPIC_INDIRECT
)
16326 rtx temp
= ((reload_in_progress
16327 || ((op0
&& REG_P (op0
))
16329 ? op0
: gen_reg_rtx (Pmode
));
16330 op1
= machopic_indirect_data_reference (op1
, temp
);
16332 op1
= machopic_legitimize_pic_address (op1
, mode
,
16333 temp
== op1
? 0 : temp
);
16335 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16337 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16341 if (GET_CODE (op0
) == MEM
)
16342 op1
= force_reg (Pmode
, op1
);
16346 if (GET_CODE (temp
) != REG
)
16347 temp
= gen_reg_rtx (Pmode
);
16348 temp
= legitimize_pic_address (op1
, temp
);
16353 /* dynamic-no-pic */
16359 op1
= force_reg (mode
, op1
);
16360 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16362 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16363 op1
= legitimize_pic_address (op1
, reg
);
16366 op1
= convert_to_mode (mode
, op1
, 1);
16373 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16374 || !push_operand (op0
, mode
))
16376 op1
= force_reg (mode
, op1
);
16378 if (push_operand (op0
, mode
)
16379 && ! general_no_elim_operand (op1
, mode
))
16380 op1
= copy_to_mode_reg (mode
, op1
);
16382 /* Force large constants in 64bit compilation into register
16383 to get them CSEed. */
16384 if (can_create_pseudo_p ()
16385 && (mode
== DImode
) && TARGET_64BIT
16386 && immediate_operand (op1
, mode
)
16387 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16388 && !register_operand (op0
, mode
)
16390 op1
= copy_to_mode_reg (mode
, op1
);
16392 if (can_create_pseudo_p ()
16393 && FLOAT_MODE_P (mode
)
16394 && GET_CODE (op1
) == CONST_DOUBLE
)
16396 /* If we are loading a floating point constant to a register,
16397 force the value to memory now, since we'll get better code
16398 out the back end. */
16400 op1
= validize_mem (force_const_mem (mode
, op1
));
16401 if (!register_operand (op0
, mode
))
16403 rtx temp
= gen_reg_rtx (mode
);
16404 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16405 emit_move_insn (op0
, temp
);
16411 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16415 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16417 rtx op0
= operands
[0], op1
= operands
[1];
16418 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16420 /* Force constants other than zero into memory. We do not know how
16421 the instructions used to build constants modify the upper 64 bits
16422 of the register, once we have that information we may be able
16423 to handle some of them more efficiently. */
16424 if (can_create_pseudo_p ()
16425 && register_operand (op0
, mode
)
16426 && (CONSTANT_P (op1
)
16427 || (GET_CODE (op1
) == SUBREG
16428 && CONSTANT_P (SUBREG_REG (op1
))))
16429 && !standard_sse_constant_p (op1
))
16430 op1
= validize_mem (force_const_mem (mode
, op1
));
16432 /* We need to check memory alignment for SSE mode since attribute
16433 can make operands unaligned. */
16434 if (can_create_pseudo_p ()
16435 && SSE_REG_MODE_P (mode
)
16436 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16437 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16441 /* ix86_expand_vector_move_misalign() does not like constants ... */
16442 if (CONSTANT_P (op1
)
16443 || (GET_CODE (op1
) == SUBREG
16444 && CONSTANT_P (SUBREG_REG (op1
))))
16445 op1
= validize_mem (force_const_mem (mode
, op1
));
16447 /* ... nor both arguments in memory. */
16448 if (!register_operand (op0
, mode
)
16449 && !register_operand (op1
, mode
))
16450 op1
= force_reg (mode
, op1
);
16452 tmp
[0] = op0
; tmp
[1] = op1
;
16453 ix86_expand_vector_move_misalign (mode
, tmp
);
16457 /* Make operand1 a register if it isn't already. */
16458 if (can_create_pseudo_p ()
16459 && !register_operand (op0
, mode
)
16460 && !register_operand (op1
, mode
))
16462 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16466 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16469 /* Split 32-byte AVX unaligned load and store if needed. */
16472 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16475 rtx (*extract
) (rtx
, rtx
, rtx
);
16476 rtx (*load_unaligned
) (rtx
, rtx
);
16477 rtx (*store_unaligned
) (rtx
, rtx
);
16478 enum machine_mode mode
;
16480 switch (GET_MODE (op0
))
16483 gcc_unreachable ();
16485 extract
= gen_avx_vextractf128v32qi
;
16486 load_unaligned
= gen_avx_loaddquv32qi
;
16487 store_unaligned
= gen_avx_storedquv32qi
;
16491 extract
= gen_avx_vextractf128v8sf
;
16492 load_unaligned
= gen_avx_loadups256
;
16493 store_unaligned
= gen_avx_storeups256
;
16497 extract
= gen_avx_vextractf128v4df
;
16498 load_unaligned
= gen_avx_loadupd256
;
16499 store_unaligned
= gen_avx_storeupd256
;
16506 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16508 rtx r
= gen_reg_rtx (mode
);
16509 m
= adjust_address (op1
, mode
, 0);
16510 emit_move_insn (r
, m
);
16511 m
= adjust_address (op1
, mode
, 16);
16512 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16513 emit_move_insn (op0
, r
);
16516 emit_insn (load_unaligned (op0
, op1
));
16518 else if (MEM_P (op0
))
16520 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16522 m
= adjust_address (op0
, mode
, 0);
16523 emit_insn (extract (m
, op1
, const0_rtx
));
16524 m
= adjust_address (op0
, mode
, 16);
16525 emit_insn (extract (m
, op1
, const1_rtx
));
16528 emit_insn (store_unaligned (op0
, op1
));
16531 gcc_unreachable ();
16534 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16535 straight to ix86_expand_vector_move. */
16536 /* Code generation for scalar reg-reg moves of single and double precision data:
16537 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16541 if (x86_sse_partial_reg_dependency == true)
16546 Code generation for scalar loads of double precision data:
16547 if (x86_sse_split_regs == true)
16548 movlpd mem, reg (gas syntax)
16552 Code generation for unaligned packed loads of single precision data
16553 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16554 if (x86_sse_unaligned_move_optimal)
16557 if (x86_sse_partial_reg_dependency == true)
16569 Code generation for unaligned packed loads of double precision data
16570 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16571 if (x86_sse_unaligned_move_optimal)
16574 if (x86_sse_split_regs == true)
16587 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16590 rtx (*load_unaligned
) (rtx
, rtx
);
16591 rtx (*store_unaligned
) (rtx
, rtx
);
16596 if (GET_MODE_SIZE (mode
) == 64)
16598 switch (GET_MODE_CLASS (mode
))
16600 case MODE_VECTOR_INT
:
16602 op0
= gen_lowpart (V16SImode
, op0
);
16603 op1
= gen_lowpart (V16SImode
, op1
);
16606 case MODE_VECTOR_FLOAT
:
16607 switch (GET_MODE (op0
))
16610 gcc_unreachable ();
16612 load_unaligned
= gen_avx512f_loaddquv16si
;
16613 store_unaligned
= gen_avx512f_storedquv16si
;
16616 load_unaligned
= gen_avx512f_loadups512
;
16617 store_unaligned
= gen_avx512f_storeups512
;
16620 load_unaligned
= gen_avx512f_loadupd512
;
16621 store_unaligned
= gen_avx512f_storeupd512
;
16626 emit_insn (load_unaligned (op0
, op1
));
16627 else if (MEM_P (op0
))
16628 emit_insn (store_unaligned (op0
, op1
));
16630 gcc_unreachable ();
16634 gcc_unreachable ();
16641 && GET_MODE_SIZE (mode
) == 32)
16643 switch (GET_MODE_CLASS (mode
))
16645 case MODE_VECTOR_INT
:
16647 op0
= gen_lowpart (V32QImode
, op0
);
16648 op1
= gen_lowpart (V32QImode
, op1
);
16651 case MODE_VECTOR_FLOAT
:
16652 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16656 gcc_unreachable ();
16664 /* ??? If we have typed data, then it would appear that using
16665 movdqu is the only way to get unaligned data loaded with
16667 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16669 op0
= gen_lowpart (V16QImode
, op0
);
16670 op1
= gen_lowpart (V16QImode
, op1
);
16671 /* We will eventually emit movups based on insn attributes. */
16672 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
16674 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16679 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16680 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16681 || optimize_insn_for_size_p ())
16683 /* We will eventually emit movups based on insn attributes. */
16684 emit_insn (gen_sse2_loadupd (op0
, op1
));
16688 /* When SSE registers are split into halves, we can avoid
16689 writing to the top half twice. */
16690 if (TARGET_SSE_SPLIT_REGS
)
16692 emit_clobber (op0
);
16697 /* ??? Not sure about the best option for the Intel chips.
16698 The following would seem to satisfy; the register is
16699 entirely cleared, breaking the dependency chain. We
16700 then store to the upper half, with a dependency depth
16701 of one. A rumor has it that Intel recommends two movsd
16702 followed by an unpacklpd, but this is unconfirmed. And
16703 given that the dependency depth of the unpacklpd would
16704 still be one, I'm not sure why this would be better. */
16705 zero
= CONST0_RTX (V2DFmode
);
16708 m
= adjust_address (op1
, DFmode
, 0);
16709 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16710 m
= adjust_address (op1
, DFmode
, 8);
16711 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16716 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16717 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16718 || optimize_insn_for_size_p ())
16720 op0
= gen_lowpart (V4SFmode
, op0
);
16721 op1
= gen_lowpart (V4SFmode
, op1
);
16722 emit_insn (gen_sse_loadups (op0
, op1
));
16726 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16727 emit_move_insn (op0
, CONST0_RTX (mode
));
16729 emit_clobber (op0
);
16731 if (mode
!= V4SFmode
)
16732 op0
= gen_lowpart (V4SFmode
, op0
);
16734 m
= adjust_address (op1
, V2SFmode
, 0);
16735 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16736 m
= adjust_address (op1
, V2SFmode
, 8);
16737 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16740 else if (MEM_P (op0
))
16742 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16744 op0
= gen_lowpart (V16QImode
, op0
);
16745 op1
= gen_lowpart (V16QImode
, op1
);
16746 /* We will eventually emit movups based on insn attributes. */
16747 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
16749 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16752 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16753 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16754 || optimize_insn_for_size_p ())
16755 /* We will eventually emit movups based on insn attributes. */
16756 emit_insn (gen_sse2_storeupd (op0
, op1
));
16759 m
= adjust_address (op0
, DFmode
, 0);
16760 emit_insn (gen_sse2_storelpd (m
, op1
));
16761 m
= adjust_address (op0
, DFmode
, 8);
16762 emit_insn (gen_sse2_storehpd (m
, op1
));
16767 if (mode
!= V4SFmode
)
16768 op1
= gen_lowpart (V4SFmode
, op1
);
16771 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16772 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16773 || optimize_insn_for_size_p ())
16775 op0
= gen_lowpart (V4SFmode
, op0
);
16776 emit_insn (gen_sse_storeups (op0
, op1
));
16780 m
= adjust_address (op0
, V2SFmode
, 0);
16781 emit_insn (gen_sse_storelps (m
, op1
));
16782 m
= adjust_address (op0
, V2SFmode
, 8);
16783 emit_insn (gen_sse_storehps (m
, op1
));
16788 gcc_unreachable ();
16791 /* Expand a push in MODE. This is some mode for which we do not support
16792 proper push instructions, at least from the registers that we expect
16793 the value to live in. */
16796 ix86_expand_push (enum machine_mode mode
, rtx x
)
16800 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16801 GEN_INT (-GET_MODE_SIZE (mode
)),
16802 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16803 if (tmp
!= stack_pointer_rtx
)
16804 emit_move_insn (stack_pointer_rtx
, tmp
);
16806 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16808 /* When we push an operand onto stack, it has to be aligned at least
16809 at the function argument boundary. However since we don't have
16810 the argument type, we can't determine the actual argument
16812 emit_move_insn (tmp
, x
);
16815 /* Helper function of ix86_fixup_binary_operands to canonicalize
16816 operand order. Returns true if the operands should be swapped. */
16819 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16822 rtx dst
= operands
[0];
16823 rtx src1
= operands
[1];
16824 rtx src2
= operands
[2];
16826 /* If the operation is not commutative, we can't do anything. */
16827 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16830 /* Highest priority is that src1 should match dst. */
16831 if (rtx_equal_p (dst
, src1
))
16833 if (rtx_equal_p (dst
, src2
))
16836 /* Next highest priority is that immediate constants come second. */
16837 if (immediate_operand (src2
, mode
))
16839 if (immediate_operand (src1
, mode
))
16842 /* Lowest priority is that memory references should come second. */
16852 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16853 destination to use for the operation. If different from the true
16854 destination in operands[0], a copy operation will be required. */
16857 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16860 rtx dst
= operands
[0];
16861 rtx src1
= operands
[1];
16862 rtx src2
= operands
[2];
16864 /* Canonicalize operand order. */
16865 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16869 /* It is invalid to swap operands of different modes. */
16870 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16877 /* Both source operands cannot be in memory. */
16878 if (MEM_P (src1
) && MEM_P (src2
))
16880 /* Optimization: Only read from memory once. */
16881 if (rtx_equal_p (src1
, src2
))
16883 src2
= force_reg (mode
, src2
);
16886 else if (rtx_equal_p (dst
, src1
))
16887 src2
= force_reg (mode
, src2
);
16889 src1
= force_reg (mode
, src1
);
16892 /* If the destination is memory, and we do not have matching source
16893 operands, do things in registers. */
16894 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16895 dst
= gen_reg_rtx (mode
);
16897 /* Source 1 cannot be a constant. */
16898 if (CONSTANT_P (src1
))
16899 src1
= force_reg (mode
, src1
);
16901 /* Source 1 cannot be a non-matching memory. */
16902 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16903 src1
= force_reg (mode
, src1
);
16905 /* Improve address combine. */
16907 && GET_MODE_CLASS (mode
) == MODE_INT
16909 src2
= force_reg (mode
, src2
);
16911 operands
[1] = src1
;
16912 operands
[2] = src2
;
16916 /* Similarly, but assume that the destination has already been
16917 set up properly. */
16920 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16921 enum machine_mode mode
, rtx operands
[])
16923 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16924 gcc_assert (dst
== operands
[0]);
16927 /* Attempt to expand a binary operator. Make the expansion closer to the
16928 actual machine, then just general_operand, which will allow 3 separate
16929 memory references (one output, two input) in a single insn. */
16932 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16935 rtx src1
, src2
, dst
, op
, clob
;
16937 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16938 src1
= operands
[1];
16939 src2
= operands
[2];
16941 /* Emit the instruction. */
16943 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16944 if (reload_in_progress
)
16946 /* Reload doesn't know about the flags register, and doesn't know that
16947 it doesn't want to clobber it. We can only do this with PLUS. */
16948 gcc_assert (code
== PLUS
);
16951 else if (reload_completed
16953 && !rtx_equal_p (dst
, src1
))
16955 /* This is going to be an LEA; avoid splitting it later. */
16960 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16961 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16964 /* Fix up the destination if needed. */
16965 if (dst
!= operands
[0])
16966 emit_move_insn (operands
[0], dst
);
16969 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16970 the given OPERANDS. */
16973 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16976 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16977 if (GET_CODE (operands
[1]) == SUBREG
)
16982 else if (GET_CODE (operands
[2]) == SUBREG
)
16987 /* Optimize (__m128i) d | (__m128i) e and similar code
16988 when d and e are float vectors into float vector logical
16989 insn. In C/C++ without using intrinsics there is no other way
16990 to express vector logical operation on float vectors than
16991 to cast them temporarily to integer vectors. */
16993 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16994 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16995 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16996 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16997 && SUBREG_BYTE (op1
) == 0
16998 && (GET_CODE (op2
) == CONST_VECTOR
16999 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17000 && SUBREG_BYTE (op2
) == 0))
17001 && can_create_pseudo_p ())
17004 switch (GET_MODE (SUBREG_REG (op1
)))
17010 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17011 if (GET_CODE (op2
) == CONST_VECTOR
)
17013 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17014 op2
= force_reg (GET_MODE (dst
), op2
);
17019 op2
= SUBREG_REG (operands
[2]);
17020 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17021 op2
= force_reg (GET_MODE (dst
), op2
);
17023 op1
= SUBREG_REG (op1
);
17024 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17025 op1
= force_reg (GET_MODE (dst
), op1
);
17026 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17027 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17029 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17035 if (!nonimmediate_operand (operands
[1], mode
))
17036 operands
[1] = force_reg (mode
, operands
[1]);
17037 if (!nonimmediate_operand (operands
[2], mode
))
17038 operands
[2] = force_reg (mode
, operands
[2]);
17039 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17040 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17041 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17045 /* Return TRUE or FALSE depending on whether the binary operator meets the
17046 appropriate constraints. */
17049 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17052 rtx dst
= operands
[0];
17053 rtx src1
= operands
[1];
17054 rtx src2
= operands
[2];
17056 /* Both source operands cannot be in memory. */
17057 if (MEM_P (src1
) && MEM_P (src2
))
17060 /* Canonicalize operand order for commutative operators. */
17061 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17068 /* If the destination is memory, we must have a matching source operand. */
17069 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17072 /* Source 1 cannot be a constant. */
17073 if (CONSTANT_P (src1
))
17076 /* Source 1 cannot be a non-matching memory. */
17077 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17078 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17079 return (code
== AND
17082 || (TARGET_64BIT
&& mode
== DImode
))
17083 && satisfies_constraint_L (src2
));
17088 /* Attempt to expand a unary operator. Make the expansion closer to the
17089 actual machine, then just general_operand, which will allow 2 separate
17090 memory references (one output, one input) in a single insn. */
17093 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17096 int matching_memory
;
17097 rtx src
, dst
, op
, clob
;
17102 /* If the destination is memory, and we do not have matching source
17103 operands, do things in registers. */
17104 matching_memory
= 0;
17107 if (rtx_equal_p (dst
, src
))
17108 matching_memory
= 1;
17110 dst
= gen_reg_rtx (mode
);
17113 /* When source operand is memory, destination must match. */
17114 if (MEM_P (src
) && !matching_memory
)
17115 src
= force_reg (mode
, src
);
17117 /* Emit the instruction. */
17119 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17120 if (reload_in_progress
|| code
== NOT
)
17122 /* Reload doesn't know about the flags register, and doesn't know that
17123 it doesn't want to clobber it. */
17124 gcc_assert (code
== NOT
);
17129 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17130 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17133 /* Fix up the destination if needed. */
17134 if (dst
!= operands
[0])
17135 emit_move_insn (operands
[0], dst
);
17138 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17139 divisor are within the range [0-255]. */
17142 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17145 rtx end_label
, qimode_label
;
17146 rtx insn
, div
, mod
;
17147 rtx scratch
, tmp0
, tmp1
, tmp2
;
17148 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17149 rtx (*gen_zero_extend
) (rtx
, rtx
);
17150 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17155 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17156 gen_test_ccno_1
= gen_testsi_ccno_1
;
17157 gen_zero_extend
= gen_zero_extendqisi2
;
17160 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17161 gen_test_ccno_1
= gen_testdi_ccno_1
;
17162 gen_zero_extend
= gen_zero_extendqidi2
;
17165 gcc_unreachable ();
17168 end_label
= gen_label_rtx ();
17169 qimode_label
= gen_label_rtx ();
17171 scratch
= gen_reg_rtx (mode
);
17173 /* Use 8bit unsigned divimod if dividend and divisor are within
17174 the range [0-255]. */
17175 emit_move_insn (scratch
, operands
[2]);
17176 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17177 scratch
, 1, OPTAB_DIRECT
);
17178 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17179 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17180 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17181 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17182 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17184 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17185 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17186 JUMP_LABEL (insn
) = qimode_label
;
17188 /* Generate original signed/unsigned divimod. */
17189 div
= gen_divmod4_1 (operands
[0], operands
[1],
17190 operands
[2], operands
[3]);
17193 /* Branch to the end. */
17194 emit_jump_insn (gen_jump (end_label
));
17197 /* Generate 8bit unsigned divide. */
17198 emit_label (qimode_label
);
17199 /* Don't use operands[0] for result of 8bit divide since not all
17200 registers support QImode ZERO_EXTRACT. */
17201 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17202 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17203 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17204 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17208 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17209 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17213 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17214 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17217 /* Extract remainder from AH. */
17218 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17219 if (REG_P (operands
[1]))
17220 insn
= emit_move_insn (operands
[1], tmp1
);
17223 /* Need a new scratch register since the old one has result
17225 scratch
= gen_reg_rtx (mode
);
17226 emit_move_insn (scratch
, tmp1
);
17227 insn
= emit_move_insn (operands
[1], scratch
);
17229 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17231 /* Zero extend quotient from AL. */
17232 tmp1
= gen_lowpart (QImode
, tmp0
);
17233 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17234 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17236 emit_label (end_label
);
17239 #define LEA_MAX_STALL (3)
17240 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17242 /* Increase given DISTANCE in half-cycles according to
17243 dependencies between PREV and NEXT instructions.
17244 Add 1 half-cycle if there is no dependency and
17245 go to next cycle if there is some dependecy. */
17247 static unsigned int
17248 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17253 if (!prev
|| !next
)
17254 return distance
+ (distance
& 1) + 2;
17256 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17257 return distance
+ 1;
17259 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17260 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17261 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17262 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17263 return distance
+ (distance
& 1) + 2;
17265 return distance
+ 1;
17268 /* Function checks if instruction INSN defines register number
17269 REGNO1 or REGNO2. */
17272 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17277 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17278 if (DF_REF_REG_DEF_P (*def_rec
)
17279 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17280 && (regno1
== DF_REF_REGNO (*def_rec
)
17281 || regno2
== DF_REF_REGNO (*def_rec
)))
17289 /* Function checks if instruction INSN uses register number
17290 REGNO as a part of address expression. */
17293 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17297 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17298 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17304 /* Search backward for non-agu definition of register number REGNO1
17305 or register number REGNO2 in basic block starting from instruction
17306 START up to head of basic block or instruction INSN.
17308 Function puts true value into *FOUND var if definition was found
17309 and false otherwise.
17311 Distance in half-cycles between START and found instruction or head
17312 of BB is added to DISTANCE and returned. */
17315 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17316 rtx insn
, int distance
,
17317 rtx start
, bool *found
)
17319 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17327 && distance
< LEA_SEARCH_THRESHOLD
)
17329 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17331 distance
= increase_distance (prev
, next
, distance
);
17332 if (insn_defines_reg (regno1
, regno2
, prev
))
17334 if (recog_memoized (prev
) < 0
17335 || get_attr_type (prev
) != TYPE_LEA
)
17344 if (prev
== BB_HEAD (bb
))
17347 prev
= PREV_INSN (prev
);
17353 /* Search backward for non-agu definition of register number REGNO1
17354 or register number REGNO2 in INSN's basic block until
17355 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17356 2. Reach neighbour BBs boundary, or
17357 3. Reach agu definition.
17358 Returns the distance between the non-agu definition point and INSN.
17359 If no definition point, returns -1. */
17362 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17365 basic_block bb
= BLOCK_FOR_INSN (insn
);
17367 bool found
= false;
17369 if (insn
!= BB_HEAD (bb
))
17370 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17371 distance
, PREV_INSN (insn
),
17374 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17378 bool simple_loop
= false;
17380 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17383 simple_loop
= true;
17388 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17390 BB_END (bb
), &found
);
17393 int shortest_dist
= -1;
17394 bool found_in_bb
= false;
17396 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17399 = distance_non_agu_define_in_bb (regno1
, regno2
,
17405 if (shortest_dist
< 0)
17406 shortest_dist
= bb_dist
;
17407 else if (bb_dist
> 0)
17408 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17414 distance
= shortest_dist
;
17418 /* get_attr_type may modify recog data. We want to make sure
17419 that recog data is valid for instruction INSN, on which
17420 distance_non_agu_define is called. INSN is unchanged here. */
17421 extract_insn_cached (insn
);
17426 return distance
>> 1;
17429 /* Return the distance in half-cycles between INSN and the next
17430 insn that uses register number REGNO in memory address added
17431 to DISTANCE. Return -1 if REGNO0 is set.
17433 Put true value into *FOUND if register usage was found and
17435 Put true value into *REDEFINED if register redefinition was
17436 found and false otherwise. */
17439 distance_agu_use_in_bb (unsigned int regno
,
17440 rtx insn
, int distance
, rtx start
,
17441 bool *found
, bool *redefined
)
17443 basic_block bb
= NULL
;
17448 *redefined
= false;
17450 if (start
!= NULL_RTX
)
17452 bb
= BLOCK_FOR_INSN (start
);
17453 if (start
!= BB_HEAD (bb
))
17454 /* If insn and start belong to the same bb, set prev to insn,
17455 so the call to increase_distance will increase the distance
17456 between insns by 1. */
17462 && distance
< LEA_SEARCH_THRESHOLD
)
17464 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17466 distance
= increase_distance(prev
, next
, distance
);
17467 if (insn_uses_reg_mem (regno
, next
))
17469 /* Return DISTANCE if OP0 is used in memory
17470 address in NEXT. */
17475 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17477 /* Return -1 if OP0 is set in NEXT. */
17485 if (next
== BB_END (bb
))
17488 next
= NEXT_INSN (next
);
17494 /* Return the distance between INSN and the next insn that uses
17495 register number REGNO0 in memory address. Return -1 if no such
17496 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17499 distance_agu_use (unsigned int regno0
, rtx insn
)
17501 basic_block bb
= BLOCK_FOR_INSN (insn
);
17503 bool found
= false;
17504 bool redefined
= false;
17506 if (insn
!= BB_END (bb
))
17507 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17509 &found
, &redefined
);
17511 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17515 bool simple_loop
= false;
17517 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17520 simple_loop
= true;
17525 distance
= distance_agu_use_in_bb (regno0
, insn
,
17526 distance
, BB_HEAD (bb
),
17527 &found
, &redefined
);
17530 int shortest_dist
= -1;
17531 bool found_in_bb
= false;
17532 bool redefined_in_bb
= false;
17534 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17537 = distance_agu_use_in_bb (regno0
, insn
,
17538 distance
, BB_HEAD (e
->dest
),
17539 &found_in_bb
, &redefined_in_bb
);
17542 if (shortest_dist
< 0)
17543 shortest_dist
= bb_dist
;
17544 else if (bb_dist
> 0)
17545 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17551 distance
= shortest_dist
;
17555 if (!found
|| redefined
)
17558 return distance
>> 1;
17561 /* Define this macro to tune LEA priority vs ADD, it take effect when
17562 there is a dilemma of choicing LEA or ADD
17563 Negative value: ADD is more preferred than LEA
17565 Positive value: LEA is more preferred than ADD*/
17566 #define IX86_LEA_PRIORITY 0
17568 /* Return true if usage of lea INSN has performance advantage
17569 over a sequence of instructions. Instructions sequence has
17570 SPLIT_COST cycles higher latency than lea latency. */
17573 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17574 unsigned int regno2
, int split_cost
, bool has_scale
)
17576 int dist_define
, dist_use
;
17578 /* For Silvermont if using a 2-source or 3-source LEA for
17579 non-destructive destination purposes, or due to wanting
17580 ability to use SCALE, the use of LEA is justified. */
17581 if (ix86_tune
== PROCESSOR_SLM
)
17585 if (split_cost
< 1)
17587 if (regno0
== regno1
|| regno0
== regno2
)
17592 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17593 dist_use
= distance_agu_use (regno0
, insn
);
17595 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17597 /* If there is no non AGU operand definition, no AGU
17598 operand usage and split cost is 0 then both lea
17599 and non lea variants have same priority. Currently
17600 we prefer lea for 64 bit code and non lea on 32 bit
17602 if (dist_use
< 0 && split_cost
== 0)
17603 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17608 /* With longer definitions distance lea is more preferable.
17609 Here we change it to take into account splitting cost and
17611 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17613 /* If there is no use in memory addess then we just check
17614 that split cost exceeds AGU stall. */
17616 return dist_define
> LEA_MAX_STALL
;
17618 /* If this insn has both backward non-agu dependence and forward
17619 agu dependence, the one with short distance takes effect. */
17620 return dist_define
>= dist_use
;
17623 /* Return true if it is legal to clobber flags by INSN and
17624 false otherwise. */
17627 ix86_ok_to_clobber_flags (rtx insn
)
17629 basic_block bb
= BLOCK_FOR_INSN (insn
);
17635 if (NONDEBUG_INSN_P (insn
))
17637 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17638 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17641 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17645 if (insn
== BB_END (bb
))
17648 insn
= NEXT_INSN (insn
);
17651 live
= df_get_live_out(bb
);
17652 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17655 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17656 move and add to avoid AGU stalls. */
17659 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17661 unsigned int regno0
, regno1
, regno2
;
17663 /* Check if we need to optimize. */
17664 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17667 /* Check it is correct to split here. */
17668 if (!ix86_ok_to_clobber_flags(insn
))
17671 regno0
= true_regnum (operands
[0]);
17672 regno1
= true_regnum (operands
[1]);
17673 regno2
= true_regnum (operands
[2]);
17675 /* We need to split only adds with non destructive
17676 destination operand. */
17677 if (regno0
== regno1
|| regno0
== regno2
)
17680 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17683 /* Return true if we should emit lea instruction instead of mov
17687 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17689 unsigned int regno0
, regno1
;
17691 /* Check if we need to optimize. */
17692 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17695 /* Use lea for reg to reg moves only. */
17696 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17699 regno0
= true_regnum (operands
[0]);
17700 regno1
= true_regnum (operands
[1]);
17702 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17705 /* Return true if we need to split lea into a sequence of
17706 instructions to avoid AGU stalls. */
17709 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17711 unsigned int regno0
, regno1
, regno2
;
17713 struct ix86_address parts
;
17716 /* Check we need to optimize. */
17717 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17720 /* Check it is correct to split here. */
17721 if (!ix86_ok_to_clobber_flags(insn
))
17724 ok
= ix86_decompose_address (operands
[1], &parts
);
17727 /* There should be at least two components in the address. */
17728 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17729 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17732 /* We should not split into add if non legitimate pic
17733 operand is used as displacement. */
17734 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17737 regno0
= true_regnum (operands
[0]) ;
17738 regno1
= INVALID_REGNUM
;
17739 regno2
= INVALID_REGNUM
;
17742 regno1
= true_regnum (parts
.base
);
17744 regno2
= true_regnum (parts
.index
);
17748 /* Compute how many cycles we will add to execution time
17749 if split lea into a sequence of instructions. */
17750 if (parts
.base
|| parts
.index
)
17752 /* Have to use mov instruction if non desctructive
17753 destination form is used. */
17754 if (regno1
!= regno0
&& regno2
!= regno0
)
17757 /* Have to add index to base if both exist. */
17758 if (parts
.base
&& parts
.index
)
17761 /* Have to use shift and adds if scale is 2 or greater. */
17762 if (parts
.scale
> 1)
17764 if (regno0
!= regno1
)
17766 else if (regno2
== regno0
)
17769 split_cost
+= parts
.scale
;
17772 /* Have to use add instruction with immediate if
17773 disp is non zero. */
17774 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17777 /* Subtract the price of lea. */
17781 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17785 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17786 matches destination. RTX includes clobber of FLAGS_REG. */
17789 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17794 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17795 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17797 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17800 /* Return true if regno1 def is nearest to the insn. */
17803 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17806 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17810 while (prev
&& prev
!= start
)
17812 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17814 prev
= PREV_INSN (prev
);
17817 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17819 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17821 prev
= PREV_INSN (prev
);
17824 /* None of the regs is defined in the bb. */
17828 /* Split lea instructions into a sequence of instructions
17829 which are executed on ALU to avoid AGU stalls.
17830 It is assumed that it is allowed to clobber flags register
17831 at lea position. */
17834 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17836 unsigned int regno0
, regno1
, regno2
;
17837 struct ix86_address parts
;
17841 ok
= ix86_decompose_address (operands
[1], &parts
);
17844 target
= gen_lowpart (mode
, operands
[0]);
17846 regno0
= true_regnum (target
);
17847 regno1
= INVALID_REGNUM
;
17848 regno2
= INVALID_REGNUM
;
17852 parts
.base
= gen_lowpart (mode
, parts
.base
);
17853 regno1
= true_regnum (parts
.base
);
17858 parts
.index
= gen_lowpart (mode
, parts
.index
);
17859 regno2
= true_regnum (parts
.index
);
17863 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17865 if (parts
.scale
> 1)
17867 /* Case r1 = r1 + ... */
17868 if (regno1
== regno0
)
17870 /* If we have a case r1 = r1 + C * r1 then we
17871 should use multiplication which is very
17872 expensive. Assume cost model is wrong if we
17873 have such case here. */
17874 gcc_assert (regno2
!= regno0
);
17876 for (adds
= parts
.scale
; adds
> 0; adds
--)
17877 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17881 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17882 if (regno0
!= regno2
)
17883 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17885 /* Use shift for scaling. */
17886 ix86_emit_binop (ASHIFT
, mode
, target
,
17887 GEN_INT (exact_log2 (parts
.scale
)));
17890 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17892 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17893 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17896 else if (!parts
.base
&& !parts
.index
)
17898 gcc_assert(parts
.disp
);
17899 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17905 if (regno0
!= regno2
)
17906 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17908 else if (!parts
.index
)
17910 if (regno0
!= regno1
)
17911 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17915 if (regno0
== regno1
)
17917 else if (regno0
== regno2
)
17923 /* Find better operand for SET instruction, depending
17924 on which definition is farther from the insn. */
17925 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17926 tmp
= parts
.index
, tmp1
= parts
.base
;
17928 tmp
= parts
.base
, tmp1
= parts
.index
;
17930 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17932 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17933 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17935 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17939 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17942 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17943 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17947 /* Return true if it is ok to optimize an ADD operation to LEA
17948 operation to avoid flag register consumation. For most processors,
17949 ADD is faster than LEA. For the processors like ATOM, if the
17950 destination register of LEA holds an actual address which will be
17951 used soon, LEA is better and otherwise ADD is better. */
17954 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17956 unsigned int regno0
= true_regnum (operands
[0]);
17957 unsigned int regno1
= true_regnum (operands
[1]);
17958 unsigned int regno2
= true_regnum (operands
[2]);
17960 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17961 if (regno0
!= regno1
&& regno0
!= regno2
)
17964 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17967 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
17970 /* Return true if destination reg of SET_BODY is shift count of
17974 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17980 /* Retrieve destination of SET_BODY. */
17981 switch (GET_CODE (set_body
))
17984 set_dest
= SET_DEST (set_body
);
17985 if (!set_dest
|| !REG_P (set_dest
))
17989 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17990 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17998 /* Retrieve shift count of USE_BODY. */
17999 switch (GET_CODE (use_body
))
18002 shift_rtx
= XEXP (use_body
, 1);
18005 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18006 if (ix86_dep_by_shift_count_body (set_body
,
18007 XVECEXP (use_body
, 0, i
)))
18015 && (GET_CODE (shift_rtx
) == ASHIFT
18016 || GET_CODE (shift_rtx
) == LSHIFTRT
18017 || GET_CODE (shift_rtx
) == ASHIFTRT
18018 || GET_CODE (shift_rtx
) == ROTATE
18019 || GET_CODE (shift_rtx
) == ROTATERT
))
18021 rtx shift_count
= XEXP (shift_rtx
, 1);
18023 /* Return true if shift count is dest of SET_BODY. */
18024 if (REG_P (shift_count
))
18026 /* Add check since it can be invoked before register
18027 allocation in pre-reload schedule. */
18028 if (reload_completed
18029 && true_regnum (set_dest
) == true_regnum (shift_count
))
18031 else if (REGNO(set_dest
) == REGNO(shift_count
))
18039 /* Return true if destination reg of SET_INSN is shift count of
18043 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18045 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18046 PATTERN (use_insn
));
18049 /* Return TRUE or FALSE depending on whether the unary operator meets the
18050 appropriate constraints. */
18053 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18054 enum machine_mode mode ATTRIBUTE_UNUSED
,
18057 /* If one of operands is memory, source and destination must match. */
18058 if ((MEM_P (operands
[0])
18059 || MEM_P (operands
[1]))
18060 && ! rtx_equal_p (operands
[0], operands
[1]))
18065 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18066 are ok, keeping in mind the possible movddup alternative. */
18069 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18071 if (MEM_P (operands
[0]))
18072 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18073 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18074 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18078 /* Post-reload splitter for converting an SF or DFmode value in an
18079 SSE register into an unsigned SImode. */
18082 ix86_split_convert_uns_si_sse (rtx operands
[])
18084 enum machine_mode vecmode
;
18085 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18087 large
= operands
[1];
18088 zero_or_two31
= operands
[2];
18089 input
= operands
[3];
18090 two31
= operands
[4];
18091 vecmode
= GET_MODE (large
);
18092 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18094 /* Load up the value into the low element. We must ensure that the other
18095 elements are valid floats -- zero is the easiest such value. */
18098 if (vecmode
== V4SFmode
)
18099 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18101 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18105 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18106 emit_move_insn (value
, CONST0_RTX (vecmode
));
18107 if (vecmode
== V4SFmode
)
18108 emit_insn (gen_sse_movss (value
, value
, input
));
18110 emit_insn (gen_sse2_movsd (value
, value
, input
));
18113 emit_move_insn (large
, two31
);
18114 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18116 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18117 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18119 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18120 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18122 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18123 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18125 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18126 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18128 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18129 if (vecmode
== V4SFmode
)
18130 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18132 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18135 emit_insn (gen_xorv4si3 (value
, value
, large
));
18138 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18139 Expects the 64-bit DImode to be supplied in a pair of integral
18140 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18141 -mfpmath=sse, !optimize_size only. */
18144 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18146 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18147 rtx int_xmm
, fp_xmm
;
18148 rtx biases
, exponents
;
18151 int_xmm
= gen_reg_rtx (V4SImode
);
18152 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18153 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18154 else if (TARGET_SSE_SPLIT_REGS
)
18156 emit_clobber (int_xmm
);
18157 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18161 x
= gen_reg_rtx (V2DImode
);
18162 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18163 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18166 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18167 gen_rtvec (4, GEN_INT (0x43300000UL
),
18168 GEN_INT (0x45300000UL
),
18169 const0_rtx
, const0_rtx
));
18170 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18172 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18173 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18175 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18176 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18177 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18178 (0x1.0p84 + double(fp_value_hi_xmm)).
18179 Note these exponents differ by 32. */
18181 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18183 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18184 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18185 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18186 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18187 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18188 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18189 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18190 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18191 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18193 /* Add the upper and lower DFmode values together. */
18195 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18198 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18199 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18200 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18203 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18206 /* Not used, but eases macroization of patterns. */
18208 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18209 rtx input ATTRIBUTE_UNUSED
)
18211 gcc_unreachable ();
18214 /* Convert an unsigned SImode value into a DFmode. Only currently used
18215 for SSE, but applicable anywhere. */
18218 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18220 REAL_VALUE_TYPE TWO31r
;
18223 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18224 NULL
, 1, OPTAB_DIRECT
);
18226 fp
= gen_reg_rtx (DFmode
);
18227 emit_insn (gen_floatsidf2 (fp
, x
));
18229 real_ldexp (&TWO31r
, &dconst1
, 31);
18230 x
= const_double_from_real_value (TWO31r
, DFmode
);
18232 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18234 emit_move_insn (target
, x
);
18237 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18238 32-bit mode; otherwise we have a direct convert instruction. */
18241 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18243 REAL_VALUE_TYPE TWO32r
;
18244 rtx fp_lo
, fp_hi
, x
;
18246 fp_lo
= gen_reg_rtx (DFmode
);
18247 fp_hi
= gen_reg_rtx (DFmode
);
18249 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18251 real_ldexp (&TWO32r
, &dconst1
, 32);
18252 x
= const_double_from_real_value (TWO32r
, DFmode
);
18253 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18255 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18257 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18260 emit_move_insn (target
, x
);
18263 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18264 For x86_32, -mfpmath=sse, !optimize_size only. */
18266 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18268 REAL_VALUE_TYPE ONE16r
;
18269 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18271 real_ldexp (&ONE16r
, &dconst1
, 16);
18272 x
= const_double_from_real_value (ONE16r
, SFmode
);
18273 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18274 NULL
, 0, OPTAB_DIRECT
);
18275 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18276 NULL
, 0, OPTAB_DIRECT
);
18277 fp_hi
= gen_reg_rtx (SFmode
);
18278 fp_lo
= gen_reg_rtx (SFmode
);
18279 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18280 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18281 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18283 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18285 if (!rtx_equal_p (target
, fp_hi
))
18286 emit_move_insn (target
, fp_hi
);
18289 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18290 a vector of unsigned ints VAL to vector of floats TARGET. */
18293 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18296 REAL_VALUE_TYPE TWO16r
;
18297 enum machine_mode intmode
= GET_MODE (val
);
18298 enum machine_mode fltmode
= GET_MODE (target
);
18299 rtx (*cvt
) (rtx
, rtx
);
18301 if (intmode
== V4SImode
)
18302 cvt
= gen_floatv4siv4sf2
;
18304 cvt
= gen_floatv8siv8sf2
;
18305 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18306 tmp
[0] = force_reg (intmode
, tmp
[0]);
18307 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18309 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18310 NULL_RTX
, 1, OPTAB_DIRECT
);
18311 tmp
[3] = gen_reg_rtx (fltmode
);
18312 emit_insn (cvt (tmp
[3], tmp
[1]));
18313 tmp
[4] = gen_reg_rtx (fltmode
);
18314 emit_insn (cvt (tmp
[4], tmp
[2]));
18315 real_ldexp (&TWO16r
, &dconst1
, 16);
18316 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18317 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18318 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18320 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18322 if (tmp
[7] != target
)
18323 emit_move_insn (target
, tmp
[7]);
18326 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18327 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18328 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18329 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18332 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18334 REAL_VALUE_TYPE TWO31r
;
18335 rtx two31r
, tmp
[4];
18336 enum machine_mode mode
= GET_MODE (val
);
18337 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18338 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18339 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18342 for (i
= 0; i
< 3; i
++)
18343 tmp
[i
] = gen_reg_rtx (mode
);
18344 real_ldexp (&TWO31r
, &dconst1
, 31);
18345 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18346 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18347 two31r
= force_reg (mode
, two31r
);
18350 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18351 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18352 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18353 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18354 default: gcc_unreachable ();
18356 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18357 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18358 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18360 if (intmode
== V4SImode
|| TARGET_AVX2
)
18361 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18362 gen_lowpart (intmode
, tmp
[0]),
18363 GEN_INT (31), NULL_RTX
, 0,
18367 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18368 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18369 *xorp
= expand_simple_binop (intmode
, AND
,
18370 gen_lowpart (intmode
, tmp
[0]),
18371 two31
, NULL_RTX
, 0,
18374 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18378 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18379 then replicate the value for all elements of the vector
18383 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18387 enum machine_mode scalar_mode
;
18404 n_elt
= GET_MODE_NUNITS (mode
);
18405 v
= rtvec_alloc (n_elt
);
18406 scalar_mode
= GET_MODE_INNER (mode
);
18408 RTVEC_ELT (v
, 0) = value
;
18410 for (i
= 1; i
< n_elt
; ++i
)
18411 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18413 return gen_rtx_CONST_VECTOR (mode
, v
);
18416 gcc_unreachable ();
18420 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18421 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18422 for an SSE register. If VECT is true, then replicate the mask for
18423 all elements of the vector register. If INVERT is true, then create
18424 a mask excluding the sign bit. */
18427 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18429 enum machine_mode vec_mode
, imode
;
18430 HOST_WIDE_INT hi
, lo
;
18435 /* Find the sign bit, sign extended to 2*HWI. */
18443 mode
= GET_MODE_INNER (mode
);
18445 lo
= 0x80000000, hi
= lo
< 0;
18453 mode
= GET_MODE_INNER (mode
);
18455 if (HOST_BITS_PER_WIDE_INT
>= 64)
18456 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18458 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18463 vec_mode
= VOIDmode
;
18464 if (HOST_BITS_PER_WIDE_INT
>= 64)
18467 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18474 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18478 lo
= ~lo
, hi
= ~hi
;
18484 mask
= immed_double_const (lo
, hi
, imode
);
18486 vec
= gen_rtvec (2, v
, mask
);
18487 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18488 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18495 gcc_unreachable ();
18499 lo
= ~lo
, hi
= ~hi
;
18501 /* Force this value into the low part of a fp vector constant. */
18502 mask
= immed_double_const (lo
, hi
, imode
);
18503 mask
= gen_lowpart (mode
, mask
);
18505 if (vec_mode
== VOIDmode
)
18506 return force_reg (mode
, mask
);
18508 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18509 return force_reg (vec_mode
, v
);
18512 /* Generate code for floating point ABS or NEG. */
18515 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18518 rtx mask
, set
, dst
, src
;
18519 bool use_sse
= false;
18520 bool vector_mode
= VECTOR_MODE_P (mode
);
18521 enum machine_mode vmode
= mode
;
18525 else if (mode
== TFmode
)
18527 else if (TARGET_SSE_MATH
)
18529 use_sse
= SSE_FLOAT_MODE_P (mode
);
18530 if (mode
== SFmode
)
18532 else if (mode
== DFmode
)
18536 /* NEG and ABS performed with SSE use bitwise mask operations.
18537 Create the appropriate mask now. */
18539 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18546 set
= gen_rtx_fmt_e (code
, mode
, src
);
18547 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18554 use
= gen_rtx_USE (VOIDmode
, mask
);
18556 par
= gen_rtvec (2, set
, use
);
18559 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18560 par
= gen_rtvec (3, set
, use
, clob
);
18562 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18568 /* Expand a copysign operation. Special case operand 0 being a constant. */
18571 ix86_expand_copysign (rtx operands
[])
18573 enum machine_mode mode
, vmode
;
18574 rtx dest
, op0
, op1
, mask
, nmask
;
18576 dest
= operands
[0];
18580 mode
= GET_MODE (dest
);
18582 if (mode
== SFmode
)
18584 else if (mode
== DFmode
)
18589 if (GET_CODE (op0
) == CONST_DOUBLE
)
18591 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18593 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18594 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18596 if (mode
== SFmode
|| mode
== DFmode
)
18598 if (op0
== CONST0_RTX (mode
))
18599 op0
= CONST0_RTX (vmode
);
18602 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18604 op0
= force_reg (vmode
, v
);
18607 else if (op0
!= CONST0_RTX (mode
))
18608 op0
= force_reg (mode
, op0
);
18610 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18612 if (mode
== SFmode
)
18613 copysign_insn
= gen_copysignsf3_const
;
18614 else if (mode
== DFmode
)
18615 copysign_insn
= gen_copysigndf3_const
;
18617 copysign_insn
= gen_copysigntf3_const
;
18619 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18623 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18625 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18626 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18628 if (mode
== SFmode
)
18629 copysign_insn
= gen_copysignsf3_var
;
18630 else if (mode
== DFmode
)
18631 copysign_insn
= gen_copysigndf3_var
;
18633 copysign_insn
= gen_copysigntf3_var
;
18635 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18639 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18640 be a constant, and so has already been expanded into a vector constant. */
18643 ix86_split_copysign_const (rtx operands
[])
18645 enum machine_mode mode
, vmode
;
18646 rtx dest
, op0
, mask
, x
;
18648 dest
= operands
[0];
18650 mask
= operands
[3];
18652 mode
= GET_MODE (dest
);
18653 vmode
= GET_MODE (mask
);
18655 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18656 x
= gen_rtx_AND (vmode
, dest
, mask
);
18657 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18659 if (op0
!= CONST0_RTX (vmode
))
18661 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18662 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18666 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18667 so we have to do two masks. */
18670 ix86_split_copysign_var (rtx operands
[])
18672 enum machine_mode mode
, vmode
;
18673 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18675 dest
= operands
[0];
18676 scratch
= operands
[1];
18679 nmask
= operands
[4];
18680 mask
= operands
[5];
18682 mode
= GET_MODE (dest
);
18683 vmode
= GET_MODE (mask
);
18685 if (rtx_equal_p (op0
, op1
))
18687 /* Shouldn't happen often (it's useless, obviously), but when it does
18688 we'd generate incorrect code if we continue below. */
18689 emit_move_insn (dest
, op0
);
18693 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18695 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18697 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18698 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18701 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18702 x
= gen_rtx_NOT (vmode
, dest
);
18703 x
= gen_rtx_AND (vmode
, x
, op0
);
18704 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18708 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18710 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18712 else /* alternative 2,4 */
18714 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18715 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18716 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18718 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18720 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18722 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18723 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18725 else /* alternative 3,4 */
18727 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18729 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18730 x
= gen_rtx_AND (vmode
, dest
, op0
);
18732 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18735 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18736 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18739 /* Return TRUE or FALSE depending on whether the first SET in INSN
18740 has source and destination with matching CC modes, and that the
18741 CC mode is at least as constrained as REQ_MODE. */
18744 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18747 enum machine_mode set_mode
;
18749 set
= PATTERN (insn
);
18750 if (GET_CODE (set
) == PARALLEL
)
18751 set
= XVECEXP (set
, 0, 0);
18752 gcc_assert (GET_CODE (set
) == SET
);
18753 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18755 set_mode
= GET_MODE (SET_DEST (set
));
18759 if (req_mode
!= CCNOmode
18760 && (req_mode
!= CCmode
18761 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18765 if (req_mode
== CCGCmode
)
18769 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18773 if (req_mode
== CCZmode
)
18783 if (set_mode
!= req_mode
)
18788 gcc_unreachable ();
18791 return GET_MODE (SET_SRC (set
)) == set_mode
;
18794 /* Generate insn patterns to do an integer compare of OPERANDS. */
18797 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18799 enum machine_mode cmpmode
;
18802 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18803 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18805 /* This is very simple, but making the interface the same as in the
18806 FP case makes the rest of the code easier. */
18807 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18808 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18810 /* Return the test that should be put into the flags user, i.e.
18811 the bcc, scc, or cmov instruction. */
18812 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18815 /* Figure out whether to use ordered or unordered fp comparisons.
18816 Return the appropriate mode to use. */
18819 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18821 /* ??? In order to make all comparisons reversible, we do all comparisons
18822 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18823 all forms trapping and nontrapping comparisons, we can make inequality
18824 comparisons trapping again, since it results in better code when using
18825 FCOM based compares. */
18826 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18830 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18832 enum machine_mode mode
= GET_MODE (op0
);
18834 if (SCALAR_FLOAT_MODE_P (mode
))
18836 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18837 return ix86_fp_compare_mode (code
);
18842 /* Only zero flag is needed. */
18843 case EQ
: /* ZF=0 */
18844 case NE
: /* ZF!=0 */
18846 /* Codes needing carry flag. */
18847 case GEU
: /* CF=0 */
18848 case LTU
: /* CF=1 */
18849 /* Detect overflow checks. They need just the carry flag. */
18850 if (GET_CODE (op0
) == PLUS
18851 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18855 case GTU
: /* CF=0 & ZF=0 */
18856 case LEU
: /* CF=1 | ZF=1 */
18857 /* Detect overflow checks. They need just the carry flag. */
18858 if (GET_CODE (op0
) == MINUS
18859 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18863 /* Codes possibly doable only with sign flag when
18864 comparing against zero. */
18865 case GE
: /* SF=OF or SF=0 */
18866 case LT
: /* SF<>OF or SF=1 */
18867 if (op1
== const0_rtx
)
18870 /* For other cases Carry flag is not required. */
18872 /* Codes doable only with sign flag when comparing
18873 against zero, but we miss jump instruction for it
18874 so we need to use relational tests against overflow
18875 that thus needs to be zero. */
18876 case GT
: /* ZF=0 & SF=OF */
18877 case LE
: /* ZF=1 | SF<>OF */
18878 if (op1
== const0_rtx
)
18882 /* strcmp pattern do (use flags) and combine may ask us for proper
18887 gcc_unreachable ();
18891 /* Return the fixed registers used for condition codes. */
18894 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18901 /* If two condition code modes are compatible, return a condition code
18902 mode which is compatible with both. Otherwise, return
18905 static enum machine_mode
18906 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18911 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18914 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18915 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18918 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18920 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18926 gcc_unreachable ();
18956 /* These are only compatible with themselves, which we already
18963 /* Return a comparison we can do and that it is equivalent to
18964 swap_condition (code) apart possibly from orderedness.
18965 But, never change orderedness if TARGET_IEEE_FP, returning
18966 UNKNOWN in that case if necessary. */
18968 static enum rtx_code
18969 ix86_fp_swap_condition (enum rtx_code code
)
18973 case GT
: /* GTU - CF=0 & ZF=0 */
18974 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18975 case GE
: /* GEU - CF=0 */
18976 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18977 case UNLT
: /* LTU - CF=1 */
18978 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18979 case UNLE
: /* LEU - CF=1 | ZF=1 */
18980 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18982 return swap_condition (code
);
18986 /* Return cost of comparison CODE using the best strategy for performance.
18987 All following functions do use number of instructions as a cost metrics.
18988 In future this should be tweaked to compute bytes for optimize_size and
18989 take into account performance of various instructions on various CPUs. */
18992 ix86_fp_comparison_cost (enum rtx_code code
)
18996 /* The cost of code using bit-twiddling on %ah. */
19013 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19017 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19020 gcc_unreachable ();
19023 switch (ix86_fp_comparison_strategy (code
))
19025 case IX86_FPCMP_COMI
:
19026 return arith_cost
> 4 ? 3 : 2;
19027 case IX86_FPCMP_SAHF
:
19028 return arith_cost
> 4 ? 4 : 3;
19034 /* Return strategy to use for floating-point. We assume that fcomi is always
19035 preferrable where available, since that is also true when looking at size
19036 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19038 enum ix86_fpcmp_strategy
19039 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19041 /* Do fcomi/sahf based test when profitable. */
19044 return IX86_FPCMP_COMI
;
19046 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19047 return IX86_FPCMP_SAHF
;
19049 return IX86_FPCMP_ARITH
;
19052 /* Swap, force into registers, or otherwise massage the two operands
19053 to a fp comparison. The operands are updated in place; the new
19054 comparison code is returned. */
19056 static enum rtx_code
19057 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19059 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19060 rtx op0
= *pop0
, op1
= *pop1
;
19061 enum machine_mode op_mode
= GET_MODE (op0
);
19062 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19064 /* All of the unordered compare instructions only work on registers.
19065 The same is true of the fcomi compare instructions. The XFmode
19066 compare instructions require registers except when comparing
19067 against zero or when converting operand 1 from fixed point to
19071 && (fpcmp_mode
== CCFPUmode
19072 || (op_mode
== XFmode
19073 && ! (standard_80387_constant_p (op0
) == 1
19074 || standard_80387_constant_p (op1
) == 1)
19075 && GET_CODE (op1
) != FLOAT
)
19076 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19078 op0
= force_reg (op_mode
, op0
);
19079 op1
= force_reg (op_mode
, op1
);
19083 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19084 things around if they appear profitable, otherwise force op0
19085 into a register. */
19087 if (standard_80387_constant_p (op0
) == 0
19089 && ! (standard_80387_constant_p (op1
) == 0
19092 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19093 if (new_code
!= UNKNOWN
)
19096 tmp
= op0
, op0
= op1
, op1
= tmp
;
19102 op0
= force_reg (op_mode
, op0
);
19104 if (CONSTANT_P (op1
))
19106 int tmp
= standard_80387_constant_p (op1
);
19108 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19112 op1
= force_reg (op_mode
, op1
);
19115 op1
= force_reg (op_mode
, op1
);
19119 /* Try to rearrange the comparison to make it cheaper. */
19120 if (ix86_fp_comparison_cost (code
)
19121 > ix86_fp_comparison_cost (swap_condition (code
))
19122 && (REG_P (op1
) || can_create_pseudo_p ()))
19125 tmp
= op0
, op0
= op1
, op1
= tmp
;
19126 code
= swap_condition (code
);
19128 op0
= force_reg (op_mode
, op0
);
19136 /* Convert comparison codes we use to represent FP comparison to integer
19137 code that will result in proper branch. Return UNKNOWN if no such code
19141 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19170 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19173 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19175 enum machine_mode fpcmp_mode
, intcmp_mode
;
19178 fpcmp_mode
= ix86_fp_compare_mode (code
);
19179 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19181 /* Do fcomi/sahf based test when profitable. */
19182 switch (ix86_fp_comparison_strategy (code
))
19184 case IX86_FPCMP_COMI
:
19185 intcmp_mode
= fpcmp_mode
;
19186 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19187 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19192 case IX86_FPCMP_SAHF
:
19193 intcmp_mode
= fpcmp_mode
;
19194 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19195 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19199 scratch
= gen_reg_rtx (HImode
);
19200 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19201 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19204 case IX86_FPCMP_ARITH
:
19205 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19206 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19207 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19209 scratch
= gen_reg_rtx (HImode
);
19210 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19212 /* In the unordered case, we have to check C2 for NaN's, which
19213 doesn't happen to work out to anything nice combination-wise.
19214 So do some bit twiddling on the value we've got in AH to come
19215 up with an appropriate set of condition codes. */
19217 intcmp_mode
= CCNOmode
;
19222 if (code
== GT
|| !TARGET_IEEE_FP
)
19224 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19229 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19230 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19231 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19232 intcmp_mode
= CCmode
;
19238 if (code
== LT
&& TARGET_IEEE_FP
)
19240 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19241 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19242 intcmp_mode
= CCmode
;
19247 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19253 if (code
== GE
|| !TARGET_IEEE_FP
)
19255 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19260 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19261 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19267 if (code
== LE
&& TARGET_IEEE_FP
)
19269 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19270 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19271 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19272 intcmp_mode
= CCmode
;
19277 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19283 if (code
== EQ
&& TARGET_IEEE_FP
)
19285 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19286 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19287 intcmp_mode
= CCmode
;
19292 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19298 if (code
== NE
&& TARGET_IEEE_FP
)
19300 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19301 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19307 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19313 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19317 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19322 gcc_unreachable ();
19330 /* Return the test that should be put into the flags user, i.e.
19331 the bcc, scc, or cmov instruction. */
19332 return gen_rtx_fmt_ee (code
, VOIDmode
,
19333 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19338 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19342 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19343 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19345 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19347 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19348 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19351 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19357 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19359 enum machine_mode mode
= GET_MODE (op0
);
19371 tmp
= ix86_expand_compare (code
, op0
, op1
);
19372 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19373 gen_rtx_LABEL_REF (VOIDmode
, label
),
19375 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19382 /* Expand DImode branch into multiple compare+branch. */
19384 rtx lo
[2], hi
[2], label2
;
19385 enum rtx_code code1
, code2
, code3
;
19386 enum machine_mode submode
;
19388 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19390 tmp
= op0
, op0
= op1
, op1
= tmp
;
19391 code
= swap_condition (code
);
19394 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19395 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19397 submode
= mode
== DImode
? SImode
: DImode
;
19399 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19400 avoid two branches. This costs one extra insn, so disable when
19401 optimizing for size. */
19403 if ((code
== EQ
|| code
== NE
)
19404 && (!optimize_insn_for_size_p ()
19405 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19410 if (hi
[1] != const0_rtx
)
19411 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19412 NULL_RTX
, 0, OPTAB_WIDEN
);
19415 if (lo
[1] != const0_rtx
)
19416 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19417 NULL_RTX
, 0, OPTAB_WIDEN
);
19419 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19420 NULL_RTX
, 0, OPTAB_WIDEN
);
19422 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19426 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19427 op1 is a constant and the low word is zero, then we can just
19428 examine the high word. Similarly for low word -1 and
19429 less-or-equal-than or greater-than. */
19431 if (CONST_INT_P (hi
[1]))
19434 case LT
: case LTU
: case GE
: case GEU
:
19435 if (lo
[1] == const0_rtx
)
19437 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19441 case LE
: case LEU
: case GT
: case GTU
:
19442 if (lo
[1] == constm1_rtx
)
19444 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19452 /* Otherwise, we need two or three jumps. */
19454 label2
= gen_label_rtx ();
19457 code2
= swap_condition (code
);
19458 code3
= unsigned_condition (code
);
19462 case LT
: case GT
: case LTU
: case GTU
:
19465 case LE
: code1
= LT
; code2
= GT
; break;
19466 case GE
: code1
= GT
; code2
= LT
; break;
19467 case LEU
: code1
= LTU
; code2
= GTU
; break;
19468 case GEU
: code1
= GTU
; code2
= LTU
; break;
19470 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19471 case NE
: code2
= UNKNOWN
; break;
19474 gcc_unreachable ();
19479 * if (hi(a) < hi(b)) goto true;
19480 * if (hi(a) > hi(b)) goto false;
19481 * if (lo(a) < lo(b)) goto true;
19485 if (code1
!= UNKNOWN
)
19486 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19487 if (code2
!= UNKNOWN
)
19488 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19490 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19492 if (code2
!= UNKNOWN
)
19493 emit_label (label2
);
19498 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19503 /* Split branch based on floating point condition. */
19505 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19506 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19511 if (target2
!= pc_rtx
)
19514 code
= reverse_condition_maybe_unordered (code
);
19519 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19522 /* Remove pushed operand from stack. */
19524 ix86_free_from_memory (GET_MODE (pushed
));
19526 i
= emit_jump_insn (gen_rtx_SET
19528 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19529 condition
, target1
, target2
)));
19530 if (split_branch_probability
>= 0)
19531 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
19535 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19539 gcc_assert (GET_MODE (dest
) == QImode
);
19541 ret
= ix86_expand_compare (code
, op0
, op1
);
19542 PUT_MODE (ret
, QImode
);
19543 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19546 /* Expand comparison setting or clearing carry flag. Return true when
19547 successful and set pop for the operation. */
19549 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19551 enum machine_mode mode
=
19552 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19554 /* Do not handle double-mode compares that go through special path. */
19555 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19558 if (SCALAR_FLOAT_MODE_P (mode
))
19560 rtx compare_op
, compare_seq
;
19562 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19564 /* Shortcut: following common codes never translate
19565 into carry flag compares. */
19566 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19567 || code
== ORDERED
|| code
== UNORDERED
)
19570 /* These comparisons require zero flag; swap operands so they won't. */
19571 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19572 && !TARGET_IEEE_FP
)
19577 code
= swap_condition (code
);
19580 /* Try to expand the comparison and verify that we end up with
19581 carry flag based comparison. This fails to be true only when
19582 we decide to expand comparison using arithmetic that is not
19583 too common scenario. */
19585 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19586 compare_seq
= get_insns ();
19589 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19590 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19591 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19593 code
= GET_CODE (compare_op
);
19595 if (code
!= LTU
&& code
!= GEU
)
19598 emit_insn (compare_seq
);
19603 if (!INTEGRAL_MODE_P (mode
))
19612 /* Convert a==0 into (unsigned)a<1. */
19615 if (op1
!= const0_rtx
)
19618 code
= (code
== EQ
? LTU
: GEU
);
19621 /* Convert a>b into b<a or a>=b-1. */
19624 if (CONST_INT_P (op1
))
19626 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19627 /* Bail out on overflow. We still can swap operands but that
19628 would force loading of the constant into register. */
19629 if (op1
== const0_rtx
19630 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19632 code
= (code
== GTU
? GEU
: LTU
);
19639 code
= (code
== GTU
? LTU
: GEU
);
19643 /* Convert a>=0 into (unsigned)a<0x80000000. */
19646 if (mode
== DImode
|| op1
!= const0_rtx
)
19648 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19649 code
= (code
== LT
? GEU
: LTU
);
19653 if (mode
== DImode
|| op1
!= constm1_rtx
)
19655 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19656 code
= (code
== LE
? GEU
: LTU
);
19662 /* Swapping operands may cause constant to appear as first operand. */
19663 if (!nonimmediate_operand (op0
, VOIDmode
))
19665 if (!can_create_pseudo_p ())
19667 op0
= force_reg (mode
, op0
);
19669 *pop
= ix86_expand_compare (code
, op0
, op1
);
19670 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19675 ix86_expand_int_movcc (rtx operands
[])
19677 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19678 rtx compare_seq
, compare_op
;
19679 enum machine_mode mode
= GET_MODE (operands
[0]);
19680 bool sign_bit_compare_p
= false;
19681 rtx op0
= XEXP (operands
[1], 0);
19682 rtx op1
= XEXP (operands
[1], 1);
19684 if (GET_MODE (op0
) == TImode
19685 || (GET_MODE (op0
) == DImode
19690 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19691 compare_seq
= get_insns ();
19694 compare_code
= GET_CODE (compare_op
);
19696 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19697 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19698 sign_bit_compare_p
= true;
19700 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19701 HImode insns, we'd be swallowed in word prefix ops. */
19703 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19704 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19705 && CONST_INT_P (operands
[2])
19706 && CONST_INT_P (operands
[3]))
19708 rtx out
= operands
[0];
19709 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19710 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19711 HOST_WIDE_INT diff
;
19714 /* Sign bit compares are better done using shifts than we do by using
19716 if (sign_bit_compare_p
19717 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19719 /* Detect overlap between destination and compare sources. */
19722 if (!sign_bit_compare_p
)
19725 bool fpcmp
= false;
19727 compare_code
= GET_CODE (compare_op
);
19729 flags
= XEXP (compare_op
, 0);
19731 if (GET_MODE (flags
) == CCFPmode
19732 || GET_MODE (flags
) == CCFPUmode
)
19736 = ix86_fp_compare_code_to_integer (compare_code
);
19739 /* To simplify rest of code, restrict to the GEU case. */
19740 if (compare_code
== LTU
)
19742 HOST_WIDE_INT tmp
= ct
;
19745 compare_code
= reverse_condition (compare_code
);
19746 code
= reverse_condition (code
);
19751 PUT_CODE (compare_op
,
19752 reverse_condition_maybe_unordered
19753 (GET_CODE (compare_op
)));
19755 PUT_CODE (compare_op
,
19756 reverse_condition (GET_CODE (compare_op
)));
19760 if (reg_overlap_mentioned_p (out
, op0
)
19761 || reg_overlap_mentioned_p (out
, op1
))
19762 tmp
= gen_reg_rtx (mode
);
19764 if (mode
== DImode
)
19765 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19767 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19768 flags
, compare_op
));
19772 if (code
== GT
|| code
== GE
)
19773 code
= reverse_condition (code
);
19776 HOST_WIDE_INT tmp
= ct
;
19781 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19794 tmp
= expand_simple_binop (mode
, PLUS
,
19796 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19807 tmp
= expand_simple_binop (mode
, IOR
,
19809 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19811 else if (diff
== -1 && ct
)
19821 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19823 tmp
= expand_simple_binop (mode
, PLUS
,
19824 copy_rtx (tmp
), GEN_INT (cf
),
19825 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19833 * andl cf - ct, dest
19843 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19846 tmp
= expand_simple_binop (mode
, AND
,
19848 gen_int_mode (cf
- ct
, mode
),
19849 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19851 tmp
= expand_simple_binop (mode
, PLUS
,
19852 copy_rtx (tmp
), GEN_INT (ct
),
19853 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19856 if (!rtx_equal_p (tmp
, out
))
19857 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19864 enum machine_mode cmp_mode
= GET_MODE (op0
);
19867 tmp
= ct
, ct
= cf
, cf
= tmp
;
19870 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19872 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19874 /* We may be reversing unordered compare to normal compare, that
19875 is not valid in general (we may convert non-trapping condition
19876 to trapping one), however on i386 we currently emit all
19877 comparisons unordered. */
19878 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19879 code
= reverse_condition_maybe_unordered (code
);
19883 compare_code
= reverse_condition (compare_code
);
19884 code
= reverse_condition (code
);
19888 compare_code
= UNKNOWN
;
19889 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19890 && CONST_INT_P (op1
))
19892 if (op1
== const0_rtx
19893 && (code
== LT
|| code
== GE
))
19894 compare_code
= code
;
19895 else if (op1
== constm1_rtx
)
19899 else if (code
== GT
)
19904 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19905 if (compare_code
!= UNKNOWN
19906 && GET_MODE (op0
) == GET_MODE (out
)
19907 && (cf
== -1 || ct
== -1))
19909 /* If lea code below could be used, only optimize
19910 if it results in a 2 insn sequence. */
19912 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19913 || diff
== 3 || diff
== 5 || diff
== 9)
19914 || (compare_code
== LT
&& ct
== -1)
19915 || (compare_code
== GE
&& cf
== -1))
19918 * notl op1 (if necessary)
19926 code
= reverse_condition (code
);
19929 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19931 out
= expand_simple_binop (mode
, IOR
,
19933 out
, 1, OPTAB_DIRECT
);
19934 if (out
!= operands
[0])
19935 emit_move_insn (operands
[0], out
);
19942 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19943 || diff
== 3 || diff
== 5 || diff
== 9)
19944 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19946 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19952 * lea cf(dest*(ct-cf)),dest
19956 * This also catches the degenerate setcc-only case.
19962 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19965 /* On x86_64 the lea instruction operates on Pmode, so we need
19966 to get arithmetics done in proper mode to match. */
19968 tmp
= copy_rtx (out
);
19972 out1
= copy_rtx (out
);
19973 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19977 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19983 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19986 if (!rtx_equal_p (tmp
, out
))
19989 out
= force_operand (tmp
, copy_rtx (out
));
19991 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19993 if (!rtx_equal_p (out
, operands
[0]))
19994 emit_move_insn (operands
[0], copy_rtx (out
));
20000 * General case: Jumpful:
20001 * xorl dest,dest cmpl op1, op2
20002 * cmpl op1, op2 movl ct, dest
20003 * setcc dest jcc 1f
20004 * decl dest movl cf, dest
20005 * andl (cf-ct),dest 1:
20008 * Size 20. Size 14.
20010 * This is reasonably steep, but branch mispredict costs are
20011 * high on modern cpus, so consider failing only if optimizing
20015 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20016 && BRANCH_COST (optimize_insn_for_speed_p (),
20021 enum machine_mode cmp_mode
= GET_MODE (op0
);
20026 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20028 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20030 /* We may be reversing unordered compare to normal compare,
20031 that is not valid in general (we may convert non-trapping
20032 condition to trapping one), however on i386 we currently
20033 emit all comparisons unordered. */
20034 code
= reverse_condition_maybe_unordered (code
);
20038 code
= reverse_condition (code
);
20039 if (compare_code
!= UNKNOWN
)
20040 compare_code
= reverse_condition (compare_code
);
20044 if (compare_code
!= UNKNOWN
)
20046 /* notl op1 (if needed)
20051 For x < 0 (resp. x <= -1) there will be no notl,
20052 so if possible swap the constants to get rid of the
20054 True/false will be -1/0 while code below (store flag
20055 followed by decrement) is 0/-1, so the constants need
20056 to be exchanged once more. */
20058 if (compare_code
== GE
|| !cf
)
20060 code
= reverse_condition (code
);
20065 HOST_WIDE_INT tmp
= cf
;
20070 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20074 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20076 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20078 copy_rtx (out
), 1, OPTAB_DIRECT
);
20081 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20082 gen_int_mode (cf
- ct
, mode
),
20083 copy_rtx (out
), 1, OPTAB_DIRECT
);
20085 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20086 copy_rtx (out
), 1, OPTAB_DIRECT
);
20087 if (!rtx_equal_p (out
, operands
[0]))
20088 emit_move_insn (operands
[0], copy_rtx (out
));
20094 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20096 /* Try a few things more with specific constants and a variable. */
20099 rtx var
, orig_out
, out
, tmp
;
20101 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20104 /* If one of the two operands is an interesting constant, load a
20105 constant with the above and mask it in with a logical operation. */
20107 if (CONST_INT_P (operands
[2]))
20110 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20111 operands
[3] = constm1_rtx
, op
= and_optab
;
20112 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20113 operands
[3] = const0_rtx
, op
= ior_optab
;
20117 else if (CONST_INT_P (operands
[3]))
20120 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20121 operands
[2] = constm1_rtx
, op
= and_optab
;
20122 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20123 operands
[2] = const0_rtx
, op
= ior_optab
;
20130 orig_out
= operands
[0];
20131 tmp
= gen_reg_rtx (mode
);
20134 /* Recurse to get the constant loaded. */
20135 if (ix86_expand_int_movcc (operands
) == 0)
20138 /* Mask in the interesting variable. */
20139 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20141 if (!rtx_equal_p (out
, orig_out
))
20142 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20148 * For comparison with above,
20158 if (! nonimmediate_operand (operands
[2], mode
))
20159 operands
[2] = force_reg (mode
, operands
[2]);
20160 if (! nonimmediate_operand (operands
[3], mode
))
20161 operands
[3] = force_reg (mode
, operands
[3]);
20163 if (! register_operand (operands
[2], VOIDmode
)
20165 || ! register_operand (operands
[3], VOIDmode
)))
20166 operands
[2] = force_reg (mode
, operands
[2]);
20169 && ! register_operand (operands
[3], VOIDmode
))
20170 operands
[3] = force_reg (mode
, operands
[3]);
20172 emit_insn (compare_seq
);
20173 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20174 gen_rtx_IF_THEN_ELSE (mode
,
20175 compare_op
, operands
[2],
20180 /* Swap, force into registers, or otherwise massage the two operands
20181 to an sse comparison with a mask result. Thus we differ a bit from
20182 ix86_prepare_fp_compare_args which expects to produce a flags result.
20184 The DEST operand exists to help determine whether to commute commutative
20185 operators. The POP0/POP1 operands are updated in place. The new
20186 comparison code is returned, or UNKNOWN if not implementable. */
20188 static enum rtx_code
20189 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20190 rtx
*pop0
, rtx
*pop1
)
20198 /* AVX supports all the needed comparisons. */
20201 /* We have no LTGT as an operator. We could implement it with
20202 NE & ORDERED, but this requires an extra temporary. It's
20203 not clear that it's worth it. */
20210 /* These are supported directly. */
20217 /* AVX has 3 operand comparisons, no need to swap anything. */
20220 /* For commutative operators, try to canonicalize the destination
20221 operand to be first in the comparison - this helps reload to
20222 avoid extra moves. */
20223 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20231 /* These are not supported directly before AVX, and furthermore
20232 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20233 comparison operands to transform into something that is
20238 code
= swap_condition (code
);
20242 gcc_unreachable ();
20248 /* Detect conditional moves that exactly match min/max operational
20249 semantics. Note that this is IEEE safe, as long as we don't
20250 interchange the operands.
20252 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20253 and TRUE if the operation is successful and instructions are emitted. */
20256 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20257 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20259 enum machine_mode mode
;
20265 else if (code
== UNGE
)
20268 if_true
= if_false
;
20274 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20276 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20281 mode
= GET_MODE (dest
);
20283 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20284 but MODE may be a vector mode and thus not appropriate. */
20285 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20287 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20290 if_true
= force_reg (mode
, if_true
);
20291 v
= gen_rtvec (2, if_true
, if_false
);
20292 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20296 code
= is_min
? SMIN
: SMAX
;
20297 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20300 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20304 /* Expand an sse vector comparison. Return the register with the result. */
20307 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20308 rtx op_true
, rtx op_false
)
20310 enum machine_mode mode
= GET_MODE (dest
);
20311 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20314 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20315 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20316 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20319 || reg_overlap_mentioned_p (dest
, op_true
)
20320 || reg_overlap_mentioned_p (dest
, op_false
))
20321 dest
= gen_reg_rtx (mode
);
20323 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20324 if (cmp_mode
!= mode
)
20326 x
= force_reg (cmp_mode
, x
);
20327 convert_move (dest
, x
, false);
20330 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20335 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20336 operations. This is used for both scalar and vector conditional moves. */
20339 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20341 enum machine_mode mode
= GET_MODE (dest
);
20344 if (vector_all_ones_operand (op_true
, mode
)
20345 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20347 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20349 else if (op_false
== CONST0_RTX (mode
))
20351 op_true
= force_reg (mode
, op_true
);
20352 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20353 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20355 else if (op_true
== CONST0_RTX (mode
))
20357 op_false
= force_reg (mode
, op_false
);
20358 x
= gen_rtx_NOT (mode
, cmp
);
20359 x
= gen_rtx_AND (mode
, x
, op_false
);
20360 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20362 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20364 op_false
= force_reg (mode
, op_false
);
20365 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20366 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20368 else if (TARGET_XOP
)
20370 op_true
= force_reg (mode
, op_true
);
20372 if (!nonimmediate_operand (op_false
, mode
))
20373 op_false
= force_reg (mode
, op_false
);
20375 emit_insn (gen_rtx_SET (mode
, dest
,
20376 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20382 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20384 if (!nonimmediate_operand (op_true
, mode
))
20385 op_true
= force_reg (mode
, op_true
);
20387 op_false
= force_reg (mode
, op_false
);
20393 gen
= gen_sse4_1_blendvps
;
20397 gen
= gen_sse4_1_blendvpd
;
20405 gen
= gen_sse4_1_pblendvb
;
20406 dest
= gen_lowpart (V16QImode
, dest
);
20407 op_false
= gen_lowpart (V16QImode
, op_false
);
20408 op_true
= gen_lowpart (V16QImode
, op_true
);
20409 cmp
= gen_lowpart (V16QImode
, cmp
);
20414 gen
= gen_avx_blendvps256
;
20418 gen
= gen_avx_blendvpd256
;
20426 gen
= gen_avx2_pblendvb
;
20427 dest
= gen_lowpart (V32QImode
, dest
);
20428 op_false
= gen_lowpart (V32QImode
, op_false
);
20429 op_true
= gen_lowpart (V32QImode
, op_true
);
20430 cmp
= gen_lowpart (V32QImode
, cmp
);
20438 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20441 op_true
= force_reg (mode
, op_true
);
20443 t2
= gen_reg_rtx (mode
);
20445 t3
= gen_reg_rtx (mode
);
20449 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20450 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20452 x
= gen_rtx_NOT (mode
, cmp
);
20453 x
= gen_rtx_AND (mode
, x
, op_false
);
20454 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20456 x
= gen_rtx_IOR (mode
, t3
, t2
);
20457 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20462 /* Expand a floating-point conditional move. Return true if successful. */
20465 ix86_expand_fp_movcc (rtx operands
[])
20467 enum machine_mode mode
= GET_MODE (operands
[0]);
20468 enum rtx_code code
= GET_CODE (operands
[1]);
20469 rtx tmp
, compare_op
;
20470 rtx op0
= XEXP (operands
[1], 0);
20471 rtx op1
= XEXP (operands
[1], 1);
20473 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20475 enum machine_mode cmode
;
20477 /* Since we've no cmove for sse registers, don't force bad register
20478 allocation just to gain access to it. Deny movcc when the
20479 comparison mode doesn't match the move mode. */
20480 cmode
= GET_MODE (op0
);
20481 if (cmode
== VOIDmode
)
20482 cmode
= GET_MODE (op1
);
20486 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20487 if (code
== UNKNOWN
)
20490 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20491 operands
[2], operands
[3]))
20494 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20495 operands
[2], operands
[3]);
20496 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20500 if (GET_MODE (op0
) == TImode
20501 || (GET_MODE (op0
) == DImode
20505 /* The floating point conditional move instructions don't directly
20506 support conditions resulting from a signed integer comparison. */
20508 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20509 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20511 tmp
= gen_reg_rtx (QImode
);
20512 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20514 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20517 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20518 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20519 operands
[2], operands
[3])));
20524 /* Expand a floating-point vector conditional move; a vcond operation
20525 rather than a movcc operation. */
20528 ix86_expand_fp_vcond (rtx operands
[])
20530 enum rtx_code code
= GET_CODE (operands
[3]);
20533 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20534 &operands
[4], &operands
[5]);
20535 if (code
== UNKNOWN
)
20538 switch (GET_CODE (operands
[3]))
20541 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20542 operands
[5], operands
[0], operands
[0]);
20543 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20544 operands
[5], operands
[1], operands
[2]);
20548 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20549 operands
[5], operands
[0], operands
[0]);
20550 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20551 operands
[5], operands
[1], operands
[2]);
20555 gcc_unreachable ();
20557 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20559 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20563 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20564 operands
[5], operands
[1], operands
[2]))
20567 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20568 operands
[1], operands
[2]);
20569 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20573 /* Expand a signed/unsigned integral vector conditional move. */
20576 ix86_expand_int_vcond (rtx operands
[])
20578 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20579 enum machine_mode mode
= GET_MODE (operands
[4]);
20580 enum rtx_code code
= GET_CODE (operands
[3]);
20581 bool negate
= false;
20584 cop0
= operands
[4];
20585 cop1
= operands
[5];
20587 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20588 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20589 if ((code
== LT
|| code
== GE
)
20590 && data_mode
== mode
20591 && cop1
== CONST0_RTX (mode
)
20592 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20593 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20594 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20595 && (GET_MODE_SIZE (data_mode
) == 16
20596 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20598 rtx negop
= operands
[2 - (code
== LT
)];
20599 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20600 if (negop
== CONST1_RTX (data_mode
))
20602 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20603 operands
[0], 1, OPTAB_DIRECT
);
20604 if (res
!= operands
[0])
20605 emit_move_insn (operands
[0], res
);
20608 else if (GET_MODE_INNER (data_mode
) != DImode
20609 && vector_all_ones_operand (negop
, data_mode
))
20611 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20612 operands
[0], 0, OPTAB_DIRECT
);
20613 if (res
!= operands
[0])
20614 emit_move_insn (operands
[0], res
);
20619 if (!nonimmediate_operand (cop1
, mode
))
20620 cop1
= force_reg (mode
, cop1
);
20621 if (!general_operand (operands
[1], data_mode
))
20622 operands
[1] = force_reg (data_mode
, operands
[1]);
20623 if (!general_operand (operands
[2], data_mode
))
20624 operands
[2] = force_reg (data_mode
, operands
[2]);
20626 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20628 && (mode
== V16QImode
|| mode
== V8HImode
20629 || mode
== V4SImode
|| mode
== V2DImode
))
20633 /* Canonicalize the comparison to EQ, GT, GTU. */
20644 code
= reverse_condition (code
);
20650 code
= reverse_condition (code
);
20656 code
= swap_condition (code
);
20657 x
= cop0
, cop0
= cop1
, cop1
= x
;
20661 gcc_unreachable ();
20664 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20665 if (mode
== V2DImode
)
20670 /* SSE4.1 supports EQ. */
20671 if (!TARGET_SSE4_1
)
20677 /* SSE4.2 supports GT/GTU. */
20678 if (!TARGET_SSE4_2
)
20683 gcc_unreachable ();
20687 /* Unsigned parallel compare is not supported by the hardware.
20688 Play some tricks to turn this into a signed comparison
20692 cop0
= force_reg (mode
, cop0
);
20702 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20706 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20707 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20708 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20709 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20711 gcc_unreachable ();
20713 /* Subtract (-(INT MAX) - 1) from both operands to make
20715 mask
= ix86_build_signbit_mask (mode
, true, false);
20716 t1
= gen_reg_rtx (mode
);
20717 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20719 t2
= gen_reg_rtx (mode
);
20720 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20732 /* Perform a parallel unsigned saturating subtraction. */
20733 x
= gen_reg_rtx (mode
);
20734 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20735 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20738 cop1
= CONST0_RTX (mode
);
20744 gcc_unreachable ();
20749 /* Allow the comparison to be done in one mode, but the movcc to
20750 happen in another mode. */
20751 if (data_mode
== mode
)
20753 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20754 operands
[1+negate
], operands
[2-negate
]);
20758 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20759 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20761 operands
[1+negate
], operands
[2-negate
]);
20762 x
= gen_lowpart (data_mode
, x
);
20765 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20766 operands
[2-negate
]);
20770 /* Expand a variable vector permutation. */
20773 ix86_expand_vec_perm (rtx operands
[])
20775 rtx target
= operands
[0];
20776 rtx op0
= operands
[1];
20777 rtx op1
= operands
[2];
20778 rtx mask
= operands
[3];
20779 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20780 enum machine_mode mode
= GET_MODE (op0
);
20781 enum machine_mode maskmode
= GET_MODE (mask
);
20783 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20785 /* Number of elements in the vector. */
20786 w
= GET_MODE_NUNITS (mode
);
20787 e
= GET_MODE_UNIT_SIZE (mode
);
20788 gcc_assert (w
<= 32);
20792 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20794 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20795 an constant shuffle operand. With a tiny bit of effort we can
20796 use VPERMD instead. A re-interpretation stall for V4DFmode is
20797 unfortunate but there's no avoiding it.
20798 Similarly for V16HImode we don't have instructions for variable
20799 shuffling, while for V32QImode we can use after preparing suitable
20800 masks vpshufb; vpshufb; vpermq; vpor. */
20802 if (mode
== V16HImode
)
20804 maskmode
= mode
= V32QImode
;
20810 maskmode
= mode
= V8SImode
;
20814 t1
= gen_reg_rtx (maskmode
);
20816 /* Replicate the low bits of the V4DImode mask into V8SImode:
20818 t1 = { A A B B C C D D }. */
20819 for (i
= 0; i
< w
/ 2; ++i
)
20820 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20821 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20822 vt
= force_reg (maskmode
, vt
);
20823 mask
= gen_lowpart (maskmode
, mask
);
20824 if (maskmode
== V8SImode
)
20825 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20827 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20829 /* Multiply the shuffle indicies by two. */
20830 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20833 /* Add one to the odd shuffle indicies:
20834 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20835 for (i
= 0; i
< w
/ 2; ++i
)
20837 vec
[i
* 2] = const0_rtx
;
20838 vec
[i
* 2 + 1] = const1_rtx
;
20840 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20841 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20842 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20845 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20846 operands
[3] = mask
= t1
;
20847 target
= gen_lowpart (mode
, target
);
20848 op0
= gen_lowpart (mode
, op0
);
20849 op1
= gen_lowpart (mode
, op1
);
20855 /* The VPERMD and VPERMPS instructions already properly ignore
20856 the high bits of the shuffle elements. No need for us to
20857 perform an AND ourselves. */
20858 if (one_operand_shuffle
)
20859 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20862 t1
= gen_reg_rtx (V8SImode
);
20863 t2
= gen_reg_rtx (V8SImode
);
20864 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20865 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20871 mask
= gen_lowpart (V8SFmode
, mask
);
20872 if (one_operand_shuffle
)
20873 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20876 t1
= gen_reg_rtx (V8SFmode
);
20877 t2
= gen_reg_rtx (V8SFmode
);
20878 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20879 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20885 /* By combining the two 128-bit input vectors into one 256-bit
20886 input vector, we can use VPERMD and VPERMPS for the full
20887 two-operand shuffle. */
20888 t1
= gen_reg_rtx (V8SImode
);
20889 t2
= gen_reg_rtx (V8SImode
);
20890 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20891 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20892 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20893 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20897 t1
= gen_reg_rtx (V8SFmode
);
20898 t2
= gen_reg_rtx (V8SImode
);
20899 mask
= gen_lowpart (V4SImode
, mask
);
20900 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20901 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20902 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20903 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20907 t1
= gen_reg_rtx (V32QImode
);
20908 t2
= gen_reg_rtx (V32QImode
);
20909 t3
= gen_reg_rtx (V32QImode
);
20910 vt2
= GEN_INT (128);
20911 for (i
= 0; i
< 32; i
++)
20913 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20914 vt
= force_reg (V32QImode
, vt
);
20915 for (i
= 0; i
< 32; i
++)
20916 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20917 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20918 vt2
= force_reg (V32QImode
, vt2
);
20919 /* From mask create two adjusted masks, which contain the same
20920 bits as mask in the low 7 bits of each vector element.
20921 The first mask will have the most significant bit clear
20922 if it requests element from the same 128-bit lane
20923 and MSB set if it requests element from the other 128-bit lane.
20924 The second mask will have the opposite values of the MSB,
20925 and additionally will have its 128-bit lanes swapped.
20926 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20927 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20928 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20929 stands for other 12 bytes. */
20930 /* The bit whether element is from the same lane or the other
20931 lane is bit 4, so shift it up by 3 to the MSB position. */
20932 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20933 gen_lowpart (V4DImode
, mask
),
20935 /* Clear MSB bits from the mask just in case it had them set. */
20936 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20937 /* After this t1 will have MSB set for elements from other lane. */
20938 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20939 /* Clear bits other than MSB. */
20940 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20941 /* Or in the lower bits from mask into t3. */
20942 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20943 /* And invert MSB bits in t1, so MSB is set for elements from the same
20945 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20946 /* Swap 128-bit lanes in t3. */
20947 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20948 gen_lowpart (V4DImode
, t3
),
20949 const2_rtx
, GEN_INT (3),
20950 const0_rtx
, const1_rtx
));
20951 /* And or in the lower bits from mask into t1. */
20952 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20953 if (one_operand_shuffle
)
20955 /* Each of these shuffles will put 0s in places where
20956 element from the other 128-bit lane is needed, otherwise
20957 will shuffle in the requested value. */
20958 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20959 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20960 /* For t3 the 128-bit lanes are swapped again. */
20961 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20962 gen_lowpart (V4DImode
, t3
),
20963 const2_rtx
, GEN_INT (3),
20964 const0_rtx
, const1_rtx
));
20965 /* And oring both together leads to the result. */
20966 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20970 t4
= gen_reg_rtx (V32QImode
);
20971 /* Similarly to the above one_operand_shuffle code,
20972 just for repeated twice for each operand. merge_two:
20973 code will merge the two results together. */
20974 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20975 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20976 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20977 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20978 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20979 gen_lowpart (V4DImode
, t4
),
20980 const2_rtx
, GEN_INT (3),
20981 const0_rtx
, const1_rtx
));
20982 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20983 gen_lowpart (V4DImode
, t3
),
20984 const2_rtx
, GEN_INT (3),
20985 const0_rtx
, const1_rtx
));
20986 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20987 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20993 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21000 /* The XOP VPPERM insn supports three inputs. By ignoring the
21001 one_operand_shuffle special case, we avoid creating another
21002 set of constant vectors in memory. */
21003 one_operand_shuffle
= false;
21005 /* mask = mask & {2*w-1, ...} */
21006 vt
= GEN_INT (2*w
- 1);
21010 /* mask = mask & {w-1, ...} */
21011 vt
= GEN_INT (w
- 1);
21014 for (i
= 0; i
< w
; i
++)
21016 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21017 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21018 NULL_RTX
, 0, OPTAB_DIRECT
);
21020 /* For non-QImode operations, convert the word permutation control
21021 into a byte permutation control. */
21022 if (mode
!= V16QImode
)
21024 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21025 GEN_INT (exact_log2 (e
)),
21026 NULL_RTX
, 0, OPTAB_DIRECT
);
21028 /* Convert mask to vector of chars. */
21029 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21031 /* Replicate each of the input bytes into byte positions:
21032 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21033 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21034 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21035 for (i
= 0; i
< 16; ++i
)
21036 vec
[i
] = GEN_INT (i
/e
* e
);
21037 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21038 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21040 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21042 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21044 /* Convert it into the byte positions by doing
21045 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21046 for (i
= 0; i
< 16; ++i
)
21047 vec
[i
] = GEN_INT (i
% e
);
21048 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21049 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21050 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21053 /* The actual shuffle operations all operate on V16QImode. */
21054 op0
= gen_lowpart (V16QImode
, op0
);
21055 op1
= gen_lowpart (V16QImode
, op1
);
21056 target
= gen_lowpart (V16QImode
, target
);
21060 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21062 else if (one_operand_shuffle
)
21064 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21071 /* Shuffle the two input vectors independently. */
21072 t1
= gen_reg_rtx (V16QImode
);
21073 t2
= gen_reg_rtx (V16QImode
);
21074 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21075 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21078 /* Then merge them together. The key is whether any given control
21079 element contained a bit set that indicates the second word. */
21080 mask
= operands
[3];
21082 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21084 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21085 more shuffle to convert the V2DI input mask into a V4SI
21086 input mask. At which point the masking that expand_int_vcond
21087 will work as desired. */
21088 rtx t3
= gen_reg_rtx (V4SImode
);
21089 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21090 const0_rtx
, const0_rtx
,
21091 const2_rtx
, const2_rtx
));
21093 maskmode
= V4SImode
;
21097 for (i
= 0; i
< w
; i
++)
21099 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21100 vt
= force_reg (maskmode
, vt
);
21101 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21102 NULL_RTX
, 0, OPTAB_DIRECT
);
21104 xops
[0] = gen_lowpart (mode
, operands
[0]);
21105 xops
[1] = gen_lowpart (mode
, t2
);
21106 xops
[2] = gen_lowpart (mode
, t1
);
21107 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21110 ok
= ix86_expand_int_vcond (xops
);
21115 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21116 true if we should do zero extension, else sign extension. HIGH_P is
21117 true if we want the N/2 high elements, else the low elements. */
21120 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21122 enum machine_mode imode
= GET_MODE (src
);
21127 rtx (*unpack
)(rtx
, rtx
);
21128 rtx (*extract
)(rtx
, rtx
) = NULL
;
21129 enum machine_mode halfmode
= BLKmode
;
21135 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21137 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21138 halfmode
= V16QImode
;
21140 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21144 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21146 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21147 halfmode
= V8HImode
;
21149 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21153 unpack
= gen_avx2_zero_extendv4siv4di2
;
21155 unpack
= gen_avx2_sign_extendv4siv4di2
;
21156 halfmode
= V4SImode
;
21158 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21162 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21164 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21168 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21170 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21174 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21176 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21179 gcc_unreachable ();
21182 if (GET_MODE_SIZE (imode
) == 32)
21184 tmp
= gen_reg_rtx (halfmode
);
21185 emit_insn (extract (tmp
, src
));
21189 /* Shift higher 8 bytes to lower 8 bytes. */
21190 tmp
= gen_reg_rtx (imode
);
21191 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
21192 gen_lowpart (V1TImode
, src
),
21198 emit_insn (unpack (dest
, tmp
));
21202 rtx (*unpack
)(rtx
, rtx
, rtx
);
21208 unpack
= gen_vec_interleave_highv16qi
;
21210 unpack
= gen_vec_interleave_lowv16qi
;
21214 unpack
= gen_vec_interleave_highv8hi
;
21216 unpack
= gen_vec_interleave_lowv8hi
;
21220 unpack
= gen_vec_interleave_highv4si
;
21222 unpack
= gen_vec_interleave_lowv4si
;
21225 gcc_unreachable ();
21229 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21231 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21232 src
, pc_rtx
, pc_rtx
);
21234 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
21238 /* Expand conditional increment or decrement using adb/sbb instructions.
21239 The default case using setcc followed by the conditional move can be
21240 done by generic code. */
21242 ix86_expand_int_addcc (rtx operands
[])
21244 enum rtx_code code
= GET_CODE (operands
[1]);
21246 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21248 rtx val
= const0_rtx
;
21249 bool fpcmp
= false;
21250 enum machine_mode mode
;
21251 rtx op0
= XEXP (operands
[1], 0);
21252 rtx op1
= XEXP (operands
[1], 1);
21254 if (operands
[3] != const1_rtx
21255 && operands
[3] != constm1_rtx
)
21257 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21259 code
= GET_CODE (compare_op
);
21261 flags
= XEXP (compare_op
, 0);
21263 if (GET_MODE (flags
) == CCFPmode
21264 || GET_MODE (flags
) == CCFPUmode
)
21267 code
= ix86_fp_compare_code_to_integer (code
);
21274 PUT_CODE (compare_op
,
21275 reverse_condition_maybe_unordered
21276 (GET_CODE (compare_op
)));
21278 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21281 mode
= GET_MODE (operands
[0]);
21283 /* Construct either adc or sbb insn. */
21284 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21289 insn
= gen_subqi3_carry
;
21292 insn
= gen_subhi3_carry
;
21295 insn
= gen_subsi3_carry
;
21298 insn
= gen_subdi3_carry
;
21301 gcc_unreachable ();
21309 insn
= gen_addqi3_carry
;
21312 insn
= gen_addhi3_carry
;
21315 insn
= gen_addsi3_carry
;
21318 insn
= gen_adddi3_carry
;
21321 gcc_unreachable ();
21324 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21330 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21331 but works for floating pointer parameters and nonoffsetable memories.
21332 For pushes, it returns just stack offsets; the values will be saved
21333 in the right order. Maximally three parts are generated. */
21336 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21341 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21343 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21345 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21346 gcc_assert (size
>= 2 && size
<= 4);
21348 /* Optimize constant pool reference to immediates. This is used by fp
21349 moves, that force all constants to memory to allow combining. */
21350 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21352 rtx tmp
= maybe_get_pool_constant (operand
);
21357 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21359 /* The only non-offsetable memories we handle are pushes. */
21360 int ok
= push_operand (operand
, VOIDmode
);
21364 operand
= copy_rtx (operand
);
21365 PUT_MODE (operand
, word_mode
);
21366 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21370 if (GET_CODE (operand
) == CONST_VECTOR
)
21372 enum machine_mode imode
= int_mode_for_mode (mode
);
21373 /* Caution: if we looked through a constant pool memory above,
21374 the operand may actually have a different mode now. That's
21375 ok, since we want to pun this all the way back to an integer. */
21376 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21377 gcc_assert (operand
!= NULL
);
21383 if (mode
== DImode
)
21384 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21389 if (REG_P (operand
))
21391 gcc_assert (reload_completed
);
21392 for (i
= 0; i
< size
; i
++)
21393 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21395 else if (offsettable_memref_p (operand
))
21397 operand
= adjust_address (operand
, SImode
, 0);
21398 parts
[0] = operand
;
21399 for (i
= 1; i
< size
; i
++)
21400 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21402 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21407 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21411 real_to_target (l
, &r
, mode
);
21412 parts
[3] = gen_int_mode (l
[3], SImode
);
21413 parts
[2] = gen_int_mode (l
[2], SImode
);
21416 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21417 long double may not be 80-bit. */
21418 real_to_target (l
, &r
, mode
);
21419 parts
[2] = gen_int_mode (l
[2], SImode
);
21422 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21425 gcc_unreachable ();
21427 parts
[1] = gen_int_mode (l
[1], SImode
);
21428 parts
[0] = gen_int_mode (l
[0], SImode
);
21431 gcc_unreachable ();
21436 if (mode
== TImode
)
21437 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21438 if (mode
== XFmode
|| mode
== TFmode
)
21440 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21441 if (REG_P (operand
))
21443 gcc_assert (reload_completed
);
21444 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21445 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21447 else if (offsettable_memref_p (operand
))
21449 operand
= adjust_address (operand
, DImode
, 0);
21450 parts
[0] = operand
;
21451 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21453 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21458 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21459 real_to_target (l
, &r
, mode
);
21461 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21462 if (HOST_BITS_PER_WIDE_INT
>= 64)
21465 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21466 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21469 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21471 if (upper_mode
== SImode
)
21472 parts
[1] = gen_int_mode (l
[2], SImode
);
21473 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21476 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21477 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21480 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21483 gcc_unreachable ();
21490 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21491 Return false when normal moves are needed; true when all required
21492 insns have been emitted. Operands 2-4 contain the input values
21493 int the correct order; operands 5-7 contain the output values. */
21496 ix86_split_long_move (rtx operands
[])
21501 int collisions
= 0;
21502 enum machine_mode mode
= GET_MODE (operands
[0]);
21503 bool collisionparts
[4];
21505 /* The DFmode expanders may ask us to move double.
21506 For 64bit target this is single move. By hiding the fact
21507 here we simplify i386.md splitters. */
21508 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21510 /* Optimize constant pool reference to immediates. This is used by
21511 fp moves, that force all constants to memory to allow combining. */
21513 if (MEM_P (operands
[1])
21514 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21515 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21516 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21517 if (push_operand (operands
[0], VOIDmode
))
21519 operands
[0] = copy_rtx (operands
[0]);
21520 PUT_MODE (operands
[0], word_mode
);
21523 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21524 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21525 emit_move_insn (operands
[0], operands
[1]);
21529 /* The only non-offsettable memory we handle is push. */
21530 if (push_operand (operands
[0], VOIDmode
))
21533 gcc_assert (!MEM_P (operands
[0])
21534 || offsettable_memref_p (operands
[0]));
21536 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21537 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21539 /* When emitting push, take care for source operands on the stack. */
21540 if (push
&& MEM_P (operands
[1])
21541 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21543 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21545 /* Compensate for the stack decrement by 4. */
21546 if (!TARGET_64BIT
&& nparts
== 3
21547 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21548 src_base
= plus_constant (Pmode
, src_base
, 4);
21550 /* src_base refers to the stack pointer and is
21551 automatically decreased by emitted push. */
21552 for (i
= 0; i
< nparts
; i
++)
21553 part
[1][i
] = change_address (part
[1][i
],
21554 GET_MODE (part
[1][i
]), src_base
);
21557 /* We need to do copy in the right order in case an address register
21558 of the source overlaps the destination. */
21559 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21563 for (i
= 0; i
< nparts
; i
++)
21566 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21567 if (collisionparts
[i
])
21571 /* Collision in the middle part can be handled by reordering. */
21572 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21574 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21575 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21577 else if (collisions
== 1
21579 && (collisionparts
[1] || collisionparts
[2]))
21581 if (collisionparts
[1])
21583 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21584 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21588 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21589 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21593 /* If there are more collisions, we can't handle it by reordering.
21594 Do an lea to the last part and use only one colliding move. */
21595 else if (collisions
> 1)
21601 base
= part
[0][nparts
- 1];
21603 /* Handle the case when the last part isn't valid for lea.
21604 Happens in 64-bit mode storing the 12-byte XFmode. */
21605 if (GET_MODE (base
) != Pmode
)
21606 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21608 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21609 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21610 for (i
= 1; i
< nparts
; i
++)
21612 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21613 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21624 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21625 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21626 stack_pointer_rtx
, GEN_INT (-4)));
21627 emit_move_insn (part
[0][2], part
[1][2]);
21629 else if (nparts
== 4)
21631 emit_move_insn (part
[0][3], part
[1][3]);
21632 emit_move_insn (part
[0][2], part
[1][2]);
21637 /* In 64bit mode we don't have 32bit push available. In case this is
21638 register, it is OK - we will just use larger counterpart. We also
21639 retype memory - these comes from attempt to avoid REX prefix on
21640 moving of second half of TFmode value. */
21641 if (GET_MODE (part
[1][1]) == SImode
)
21643 switch (GET_CODE (part
[1][1]))
21646 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21650 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21654 gcc_unreachable ();
21657 if (GET_MODE (part
[1][0]) == SImode
)
21658 part
[1][0] = part
[1][1];
21661 emit_move_insn (part
[0][1], part
[1][1]);
21662 emit_move_insn (part
[0][0], part
[1][0]);
21666 /* Choose correct order to not overwrite the source before it is copied. */
21667 if ((REG_P (part
[0][0])
21668 && REG_P (part
[1][1])
21669 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21671 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21673 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21675 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21677 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21679 operands
[2 + i
] = part
[0][j
];
21680 operands
[6 + i
] = part
[1][j
];
21685 for (i
= 0; i
< nparts
; i
++)
21687 operands
[2 + i
] = part
[0][i
];
21688 operands
[6 + i
] = part
[1][i
];
21692 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21693 if (optimize_insn_for_size_p ())
21695 for (j
= 0; j
< nparts
- 1; j
++)
21696 if (CONST_INT_P (operands
[6 + j
])
21697 && operands
[6 + j
] != const0_rtx
21698 && REG_P (operands
[2 + j
]))
21699 for (i
= j
; i
< nparts
- 1; i
++)
21700 if (CONST_INT_P (operands
[7 + i
])
21701 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21702 operands
[7 + i
] = operands
[2 + j
];
21705 for (i
= 0; i
< nparts
; i
++)
21706 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21711 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21712 left shift by a constant, either using a single shift or
21713 a sequence of add instructions. */
21716 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21718 rtx (*insn
)(rtx
, rtx
, rtx
);
21721 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21722 && !optimize_insn_for_size_p ()))
21724 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21725 while (count
-- > 0)
21726 emit_insn (insn (operand
, operand
, operand
));
21730 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21731 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21736 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21738 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21739 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21740 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21742 rtx low
[2], high
[2];
21745 if (CONST_INT_P (operands
[2]))
21747 split_double_mode (mode
, operands
, 2, low
, high
);
21748 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21750 if (count
>= half_width
)
21752 emit_move_insn (high
[0], low
[1]);
21753 emit_move_insn (low
[0], const0_rtx
);
21755 if (count
> half_width
)
21756 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21760 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21762 if (!rtx_equal_p (operands
[0], operands
[1]))
21763 emit_move_insn (operands
[0], operands
[1]);
21765 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21766 ix86_expand_ashl_const (low
[0], count
, mode
);
21771 split_double_mode (mode
, operands
, 1, low
, high
);
21773 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21775 if (operands
[1] == const1_rtx
)
21777 /* Assuming we've chosen a QImode capable registers, then 1 << N
21778 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21779 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21781 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21783 ix86_expand_clear (low
[0]);
21784 ix86_expand_clear (high
[0]);
21785 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21787 d
= gen_lowpart (QImode
, low
[0]);
21788 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21789 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21790 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21792 d
= gen_lowpart (QImode
, high
[0]);
21793 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21794 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21795 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21798 /* Otherwise, we can get the same results by manually performing
21799 a bit extract operation on bit 5/6, and then performing the two
21800 shifts. The two methods of getting 0/1 into low/high are exactly
21801 the same size. Avoiding the shift in the bit extract case helps
21802 pentium4 a bit; no one else seems to care much either way. */
21805 enum machine_mode half_mode
;
21806 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21807 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21808 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21809 HOST_WIDE_INT bits
;
21812 if (mode
== DImode
)
21814 half_mode
= SImode
;
21815 gen_lshr3
= gen_lshrsi3
;
21816 gen_and3
= gen_andsi3
;
21817 gen_xor3
= gen_xorsi3
;
21822 half_mode
= DImode
;
21823 gen_lshr3
= gen_lshrdi3
;
21824 gen_and3
= gen_anddi3
;
21825 gen_xor3
= gen_xordi3
;
21829 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21830 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21832 x
= gen_lowpart (half_mode
, operands
[2]);
21833 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21835 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21836 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21837 emit_move_insn (low
[0], high
[0]);
21838 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21841 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21842 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21846 if (operands
[1] == constm1_rtx
)
21848 /* For -1 << N, we can avoid the shld instruction, because we
21849 know that we're shifting 0...31/63 ones into a -1. */
21850 emit_move_insn (low
[0], constm1_rtx
);
21851 if (optimize_insn_for_size_p ())
21852 emit_move_insn (high
[0], low
[0]);
21854 emit_move_insn (high
[0], constm1_rtx
);
21858 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21860 if (!rtx_equal_p (operands
[0], operands
[1]))
21861 emit_move_insn (operands
[0], operands
[1]);
21863 split_double_mode (mode
, operands
, 1, low
, high
);
21864 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21867 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21869 if (TARGET_CMOVE
&& scratch
)
21871 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21872 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21874 ix86_expand_clear (scratch
);
21875 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21879 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21880 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21882 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21887 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21889 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21890 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21891 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21892 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21894 rtx low
[2], high
[2];
21897 if (CONST_INT_P (operands
[2]))
21899 split_double_mode (mode
, operands
, 2, low
, high
);
21900 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21902 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21904 emit_move_insn (high
[0], high
[1]);
21905 emit_insn (gen_ashr3 (high
[0], high
[0],
21906 GEN_INT (half_width
- 1)));
21907 emit_move_insn (low
[0], high
[0]);
21910 else if (count
>= half_width
)
21912 emit_move_insn (low
[0], high
[1]);
21913 emit_move_insn (high
[0], low
[0]);
21914 emit_insn (gen_ashr3 (high
[0], high
[0],
21915 GEN_INT (half_width
- 1)));
21917 if (count
> half_width
)
21918 emit_insn (gen_ashr3 (low
[0], low
[0],
21919 GEN_INT (count
- half_width
)));
21923 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21925 if (!rtx_equal_p (operands
[0], operands
[1]))
21926 emit_move_insn (operands
[0], operands
[1]);
21928 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21929 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21934 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21936 if (!rtx_equal_p (operands
[0], operands
[1]))
21937 emit_move_insn (operands
[0], operands
[1]);
21939 split_double_mode (mode
, operands
, 1, low
, high
);
21941 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21942 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21944 if (TARGET_CMOVE
&& scratch
)
21946 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21947 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21949 emit_move_insn (scratch
, high
[0]);
21950 emit_insn (gen_ashr3 (scratch
, scratch
,
21951 GEN_INT (half_width
- 1)));
21952 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21957 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21958 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21960 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21966 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21968 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21969 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21970 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21971 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21973 rtx low
[2], high
[2];
21976 if (CONST_INT_P (operands
[2]))
21978 split_double_mode (mode
, operands
, 2, low
, high
);
21979 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21981 if (count
>= half_width
)
21983 emit_move_insn (low
[0], high
[1]);
21984 ix86_expand_clear (high
[0]);
21986 if (count
> half_width
)
21987 emit_insn (gen_lshr3 (low
[0], low
[0],
21988 GEN_INT (count
- half_width
)));
21992 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21994 if (!rtx_equal_p (operands
[0], operands
[1]))
21995 emit_move_insn (operands
[0], operands
[1]);
21997 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21998 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22003 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22005 if (!rtx_equal_p (operands
[0], operands
[1]))
22006 emit_move_insn (operands
[0], operands
[1]);
22008 split_double_mode (mode
, operands
, 1, low
, high
);
22010 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22011 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22013 if (TARGET_CMOVE
&& scratch
)
22015 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22016 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22018 ix86_expand_clear (scratch
);
22019 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22024 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22025 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22027 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22032 /* Predict just emitted jump instruction to be taken with probability PROB. */
22034 predict_jump (int prob
)
22036 rtx insn
= get_last_insn ();
22037 gcc_assert (JUMP_P (insn
));
22038 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22041 /* Helper function for the string operations below. Dest VARIABLE whether
22042 it is aligned to VALUE bytes. If true, jump to the label. */
22044 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22046 rtx label
= gen_label_rtx ();
22047 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22048 if (GET_MODE (variable
) == DImode
)
22049 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22051 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22052 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22055 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22057 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22061 /* Adjust COUNTER by the VALUE. */
22063 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22065 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22066 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22068 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22071 /* Zero extend possibly SImode EXP to Pmode register. */
22073 ix86_zero_extend_to_Pmode (rtx exp
)
22075 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22078 /* Divide COUNTREG by SCALE. */
22080 scale_counter (rtx countreg
, int scale
)
22086 if (CONST_INT_P (countreg
))
22087 return GEN_INT (INTVAL (countreg
) / scale
);
22088 gcc_assert (REG_P (countreg
));
22090 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22091 GEN_INT (exact_log2 (scale
)),
22092 NULL
, 1, OPTAB_DIRECT
);
22096 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22097 DImode for constant loop counts. */
22099 static enum machine_mode
22100 counter_mode (rtx count_exp
)
22102 if (GET_MODE (count_exp
) != VOIDmode
)
22103 return GET_MODE (count_exp
);
22104 if (!CONST_INT_P (count_exp
))
22106 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22111 /* Copy the address to a Pmode register. This is used for x32 to
22112 truncate DImode TLS address to a SImode register. */
22115 ix86_copy_addr_to_reg (rtx addr
)
22117 if (GET_MODE (addr
) == Pmode
)
22118 return copy_addr_to_reg (addr
);
22121 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22122 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22126 /* When SRCPTR is non-NULL, output simple loop to move memory
22127 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
22128 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
22129 equivalent loop to set memory by VALUE (supposed to be in MODE).
22131 The size is rounded down to whole number of chunk size moved at once.
22132 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22136 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22137 rtx destptr
, rtx srcptr
, rtx value
,
22138 rtx count
, enum machine_mode mode
, int unroll
,
22141 rtx out_label
, top_label
, iter
, tmp
;
22142 enum machine_mode iter_mode
= counter_mode (count
);
22143 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22144 rtx piece_size
= GEN_INT (piece_size_n
);
22145 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22149 top_label
= gen_label_rtx ();
22150 out_label
= gen_label_rtx ();
22151 iter
= gen_reg_rtx (iter_mode
);
22153 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22154 NULL
, 1, OPTAB_DIRECT
);
22155 /* Those two should combine. */
22156 if (piece_size
== const1_rtx
)
22158 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22160 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22162 emit_move_insn (iter
, const0_rtx
);
22164 emit_label (top_label
);
22166 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22168 /* This assert could be relaxed - in this case we'll need to compute
22169 smallest power of two, containing in PIECE_SIZE_N and pass it to
22171 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22172 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22173 destmem
= adjust_address (destmem
, mode
, 0);
22177 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22178 srcmem
= adjust_address (srcmem
, mode
, 0);
22180 /* When unrolling for chips that reorder memory reads and writes,
22181 we can save registers by using single temporary.
22182 Also using 4 temporaries is overkill in 32bit mode. */
22183 if (!TARGET_64BIT
&& 0)
22185 for (i
= 0; i
< unroll
; i
++)
22190 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22192 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22194 emit_move_insn (destmem
, srcmem
);
22200 gcc_assert (unroll
<= 4);
22201 for (i
= 0; i
< unroll
; i
++)
22203 tmpreg
[i
] = gen_reg_rtx (mode
);
22207 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22209 emit_move_insn (tmpreg
[i
], srcmem
);
22211 for (i
= 0; i
< unroll
; i
++)
22216 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22218 emit_move_insn (destmem
, tmpreg
[i
]);
22223 for (i
= 0; i
< unroll
; i
++)
22227 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22228 emit_move_insn (destmem
, value
);
22231 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22232 true, OPTAB_LIB_WIDEN
);
22234 emit_move_insn (iter
, tmp
);
22236 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22238 if (expected_size
!= -1)
22240 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22241 if (expected_size
== 0)
22243 else if (expected_size
> REG_BR_PROB_BASE
)
22244 predict_jump (REG_BR_PROB_BASE
- 1);
22246 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22249 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22250 iter
= ix86_zero_extend_to_Pmode (iter
);
22251 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22252 true, OPTAB_LIB_WIDEN
);
22253 if (tmp
!= destptr
)
22254 emit_move_insn (destptr
, tmp
);
22257 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22258 true, OPTAB_LIB_WIDEN
);
22260 emit_move_insn (srcptr
, tmp
);
22262 emit_label (out_label
);
22265 /* Output "rep; mov" instruction.
22266 Arguments have same meaning as for previous function */
22268 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
22269 rtx destptr
, rtx srcptr
,
22271 enum machine_mode mode
)
22276 HOST_WIDE_INT rounded_count
;
22278 /* If the size is known, it is shorter to use rep movs. */
22279 if (mode
== QImode
&& CONST_INT_P (count
)
22280 && !(INTVAL (count
) & 3))
22283 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22284 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22285 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22286 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22287 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22288 if (mode
!= QImode
)
22290 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22291 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22292 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22293 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22294 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22295 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22299 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22300 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22302 if (CONST_INT_P (count
))
22304 rounded_count
= (INTVAL (count
)
22305 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22306 destmem
= shallow_copy_rtx (destmem
);
22307 srcmem
= shallow_copy_rtx (srcmem
);
22308 set_mem_size (destmem
, rounded_count
);
22309 set_mem_size (srcmem
, rounded_count
);
22313 if (MEM_SIZE_KNOWN_P (destmem
))
22314 clear_mem_size (destmem
);
22315 if (MEM_SIZE_KNOWN_P (srcmem
))
22316 clear_mem_size (srcmem
);
22318 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22322 /* Output "rep; stos" instruction.
22323 Arguments have same meaning as for previous function */
22325 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
22326 rtx count
, enum machine_mode mode
,
22331 HOST_WIDE_INT rounded_count
;
22333 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22334 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22335 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22336 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22337 if (mode
!= QImode
)
22339 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22340 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22341 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22344 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22345 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
22347 rounded_count
= (INTVAL (count
)
22348 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22349 destmem
= shallow_copy_rtx (destmem
);
22350 set_mem_size (destmem
, rounded_count
);
22352 else if (MEM_SIZE_KNOWN_P (destmem
))
22353 clear_mem_size (destmem
);
22354 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22357 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22359 SRC is passed by pointer to be updated on return.
22360 Return value is updated DST. */
22362 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22363 HOST_WIDE_INT size_to_move
)
22365 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22366 enum insn_code code
;
22367 enum machine_mode move_mode
;
22370 /* Find the widest mode in which we could perform moves.
22371 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22372 it until move of such size is supported. */
22373 piece_size
= 1 << floor_log2 (size_to_move
);
22374 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22375 code
= optab_handler (mov_optab
, move_mode
);
22376 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22379 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22380 code
= optab_handler (mov_optab
, move_mode
);
22383 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22384 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22385 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22387 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22388 move_mode
= mode_for_vector (word_mode
, nunits
);
22389 code
= optab_handler (mov_optab
, move_mode
);
22390 if (code
== CODE_FOR_nothing
)
22392 move_mode
= word_mode
;
22393 piece_size
= GET_MODE_SIZE (move_mode
);
22394 code
= optab_handler (mov_optab
, move_mode
);
22397 gcc_assert (code
!= CODE_FOR_nothing
);
22399 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22400 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22402 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22403 gcc_assert (size_to_move
% piece_size
== 0);
22404 adjust
= GEN_INT (piece_size
);
22405 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22407 /* We move from memory to memory, so we'll need to do it via
22408 a temporary register. */
22409 tempreg
= gen_reg_rtx (move_mode
);
22410 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22411 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22413 emit_move_insn (destptr
,
22414 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22415 emit_move_insn (srcptr
,
22416 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22418 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22420 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22424 /* Update DST and SRC rtx. */
22429 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22431 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22432 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22435 if (CONST_INT_P (count
))
22437 HOST_WIDE_INT countval
= INTVAL (count
);
22438 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22441 /* For now MAX_SIZE should be a power of 2. This assert could be
22442 relaxed, but it'll require a bit more complicated epilogue
22444 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22445 for (i
= max_size
; i
>= 1; i
>>= 1)
22447 if (epilogue_size
& i
)
22448 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22454 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22455 count
, 1, OPTAB_DIRECT
);
22456 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22457 count
, QImode
, 1, 4);
22461 /* When there are stringops, we can cheaply increase dest and src pointers.
22462 Otherwise we save code size by maintaining offset (zero is readily
22463 available from preceding rep operation) and using x86 addressing modes.
22465 if (TARGET_SINGLE_STRINGOP
)
22469 rtx label
= ix86_expand_aligntest (count
, 4, true);
22470 src
= change_address (srcmem
, SImode
, srcptr
);
22471 dest
= change_address (destmem
, SImode
, destptr
);
22472 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22473 emit_label (label
);
22474 LABEL_NUSES (label
) = 1;
22478 rtx label
= ix86_expand_aligntest (count
, 2, true);
22479 src
= change_address (srcmem
, HImode
, srcptr
);
22480 dest
= change_address (destmem
, HImode
, destptr
);
22481 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22482 emit_label (label
);
22483 LABEL_NUSES (label
) = 1;
22487 rtx label
= ix86_expand_aligntest (count
, 1, true);
22488 src
= change_address (srcmem
, QImode
, srcptr
);
22489 dest
= change_address (destmem
, QImode
, destptr
);
22490 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22491 emit_label (label
);
22492 LABEL_NUSES (label
) = 1;
22497 rtx offset
= force_reg (Pmode
, const0_rtx
);
22502 rtx label
= ix86_expand_aligntest (count
, 4, true);
22503 src
= change_address (srcmem
, SImode
, srcptr
);
22504 dest
= change_address (destmem
, SImode
, destptr
);
22505 emit_move_insn (dest
, src
);
22506 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22507 true, OPTAB_LIB_WIDEN
);
22509 emit_move_insn (offset
, tmp
);
22510 emit_label (label
);
22511 LABEL_NUSES (label
) = 1;
22515 rtx label
= ix86_expand_aligntest (count
, 2, true);
22516 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22517 src
= change_address (srcmem
, HImode
, tmp
);
22518 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22519 dest
= change_address (destmem
, HImode
, tmp
);
22520 emit_move_insn (dest
, src
);
22521 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22522 true, OPTAB_LIB_WIDEN
);
22524 emit_move_insn (offset
, tmp
);
22525 emit_label (label
);
22526 LABEL_NUSES (label
) = 1;
22530 rtx label
= ix86_expand_aligntest (count
, 1, true);
22531 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22532 src
= change_address (srcmem
, QImode
, tmp
);
22533 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22534 dest
= change_address (destmem
, QImode
, tmp
);
22535 emit_move_insn (dest
, src
);
22536 emit_label (label
);
22537 LABEL_NUSES (label
) = 1;
22542 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22544 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22545 rtx count
, int max_size
)
22548 expand_simple_binop (counter_mode (count
), AND
, count
,
22549 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22550 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22551 gen_lowpart (QImode
, value
), count
, QImode
,
22555 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22557 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22561 if (CONST_INT_P (count
))
22563 HOST_WIDE_INT countval
= INTVAL (count
);
22566 if ((countval
& 0x10) && max_size
> 16)
22570 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22571 emit_insn (gen_strset (destptr
, dest
, value
));
22572 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22573 emit_insn (gen_strset (destptr
, dest
, value
));
22576 gcc_unreachable ();
22579 if ((countval
& 0x08) && max_size
> 8)
22583 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22584 emit_insn (gen_strset (destptr
, dest
, value
));
22588 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22589 emit_insn (gen_strset (destptr
, dest
, value
));
22590 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22591 emit_insn (gen_strset (destptr
, dest
, value
));
22595 if ((countval
& 0x04) && max_size
> 4)
22597 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22598 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22601 if ((countval
& 0x02) && max_size
> 2)
22603 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22604 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22607 if ((countval
& 0x01) && max_size
> 1)
22609 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22610 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22617 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22622 rtx label
= ix86_expand_aligntest (count
, 16, true);
22625 dest
= change_address (destmem
, DImode
, destptr
);
22626 emit_insn (gen_strset (destptr
, dest
, value
));
22627 emit_insn (gen_strset (destptr
, dest
, value
));
22631 dest
= change_address (destmem
, SImode
, destptr
);
22632 emit_insn (gen_strset (destptr
, dest
, value
));
22633 emit_insn (gen_strset (destptr
, dest
, value
));
22634 emit_insn (gen_strset (destptr
, dest
, value
));
22635 emit_insn (gen_strset (destptr
, dest
, value
));
22637 emit_label (label
);
22638 LABEL_NUSES (label
) = 1;
22642 rtx label
= ix86_expand_aligntest (count
, 8, true);
22645 dest
= change_address (destmem
, DImode
, destptr
);
22646 emit_insn (gen_strset (destptr
, dest
, value
));
22650 dest
= change_address (destmem
, SImode
, destptr
);
22651 emit_insn (gen_strset (destptr
, dest
, value
));
22652 emit_insn (gen_strset (destptr
, dest
, value
));
22654 emit_label (label
);
22655 LABEL_NUSES (label
) = 1;
22659 rtx label
= ix86_expand_aligntest (count
, 4, true);
22660 dest
= change_address (destmem
, SImode
, destptr
);
22661 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22662 emit_label (label
);
22663 LABEL_NUSES (label
) = 1;
22667 rtx label
= ix86_expand_aligntest (count
, 2, true);
22668 dest
= change_address (destmem
, HImode
, destptr
);
22669 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22670 emit_label (label
);
22671 LABEL_NUSES (label
) = 1;
22675 rtx label
= ix86_expand_aligntest (count
, 1, true);
22676 dest
= change_address (destmem
, QImode
, destptr
);
22677 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22678 emit_label (label
);
22679 LABEL_NUSES (label
) = 1;
22683 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22685 Return value is updated DESTMEM. */
22687 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22688 rtx destptr
, rtx srcptr
, rtx count
,
22689 int align
, int desired_alignment
)
22692 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22696 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22697 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22698 ix86_adjust_counter (count
, i
);
22699 emit_label (label
);
22700 LABEL_NUSES (label
) = 1;
22701 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22707 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22708 ALIGN_BYTES is how many bytes need to be copied.
22709 The function updates DST and SRC, namely, it sets proper alignment.
22710 DST is returned via return value, SRC is updated via pointer SRCP. */
22712 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22713 int desired_align
, int align_bytes
)
22716 rtx orig_dst
= dst
;
22717 rtx orig_src
= src
;
22718 int piece_size
= 1;
22719 int copied_bytes
= 0;
22720 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22721 if (src_align_bytes
>= 0)
22722 src_align_bytes
= desired_align
- src_align_bytes
;
22724 for (piece_size
= 1;
22725 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
22728 if (align_bytes
& piece_size
)
22730 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
22731 copied_bytes
+= piece_size
;
22735 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22736 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22737 if (src_align_bytes
>= 0)
22739 unsigned int src_align
;
22740 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
22742 if ((src_align_bytes
& (src_align
- 1))
22743 == (align_bytes
& (src_align
- 1)))
22746 if (src_align
> (unsigned int) desired_align
)
22747 src_align
= desired_align
;
22748 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22749 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22751 if (MEM_SIZE_KNOWN_P (orig_dst
))
22752 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22753 if (MEM_SIZE_KNOWN_P (orig_src
))
22754 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22759 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22760 DESIRED_ALIGNMENT. */
22762 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22763 int align
, int desired_alignment
)
22765 if (align
<= 1 && desired_alignment
> 1)
22767 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22768 destmem
= change_address (destmem
, QImode
, destptr
);
22769 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22770 ix86_adjust_counter (count
, 1);
22771 emit_label (label
);
22772 LABEL_NUSES (label
) = 1;
22774 if (align
<= 2 && desired_alignment
> 2)
22776 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22777 destmem
= change_address (destmem
, HImode
, destptr
);
22778 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22779 ix86_adjust_counter (count
, 2);
22780 emit_label (label
);
22781 LABEL_NUSES (label
) = 1;
22783 if (align
<= 4 && desired_alignment
> 4)
22785 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22786 destmem
= change_address (destmem
, SImode
, destptr
);
22787 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22788 ix86_adjust_counter (count
, 4);
22789 emit_label (label
);
22790 LABEL_NUSES (label
) = 1;
22792 gcc_assert (desired_alignment
<= 8);
22795 /* Set enough from DST to align DST known to by aligned by ALIGN to
22796 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22798 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22799 int desired_align
, int align_bytes
)
22802 rtx orig_dst
= dst
;
22803 if (align_bytes
& 1)
22805 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22807 emit_insn (gen_strset (destreg
, dst
,
22808 gen_lowpart (QImode
, value
)));
22810 if (align_bytes
& 2)
22812 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22813 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22814 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22816 emit_insn (gen_strset (destreg
, dst
,
22817 gen_lowpart (HImode
, value
)));
22819 if (align_bytes
& 4)
22821 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22822 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22823 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22825 emit_insn (gen_strset (destreg
, dst
,
22826 gen_lowpart (SImode
, value
)));
22828 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22829 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22830 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22831 if (MEM_SIZE_KNOWN_P (orig_dst
))
22832 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22836 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22837 static enum stringop_alg
22838 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22839 int *dynamic_check
, bool *noalign
)
22841 const struct stringop_algs
* algs
;
22842 bool optimize_for_speed
;
22843 /* Algorithms using the rep prefix want at least edi and ecx;
22844 additionally, memset wants eax and memcpy wants esi. Don't
22845 consider such algorithms if the user has appropriated those
22846 registers for their own purposes. */
22847 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22849 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22852 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22853 || (alg != rep_prefix_1_byte \
22854 && alg != rep_prefix_4_byte \
22855 && alg != rep_prefix_8_byte))
22856 const struct processor_costs
*cost
;
22858 /* Even if the string operation call is cold, we still might spend a lot
22859 of time processing large blocks. */
22860 if (optimize_function_for_size_p (cfun
)
22861 || (optimize_insn_for_size_p ()
22862 && expected_size
!= -1 && expected_size
< 256))
22863 optimize_for_speed
= false;
22865 optimize_for_speed
= true;
22867 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22869 *dynamic_check
= -1;
22871 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22873 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22874 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22875 return ix86_stringop_alg
;
22876 /* rep; movq or rep; movl is the smallest variant. */
22877 else if (!optimize_for_speed
)
22879 if (!count
|| (count
& 3))
22880 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22882 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22884 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22886 else if (expected_size
!= -1 && expected_size
< 4)
22887 return loop_1_byte
;
22888 else if (expected_size
!= -1)
22891 enum stringop_alg alg
= libcall
;
22892 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22894 /* We get here if the algorithms that were not libcall-based
22895 were rep-prefix based and we are unable to use rep prefixes
22896 based on global register usage. Break out of the loop and
22897 use the heuristic below. */
22898 if (algs
->size
[i
].max
== 0)
22900 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22902 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22904 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22906 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22907 last non-libcall inline algorithm. */
22908 if (TARGET_INLINE_ALL_STRINGOPS
)
22910 /* When the current size is best to be copied by a libcall,
22911 but we are still forced to inline, run the heuristic below
22912 that will pick code for medium sized blocks. */
22913 if (alg
!= libcall
)
22917 else if (ALG_USABLE_P (candidate
))
22919 *noalign
= algs
->size
[i
].noalign
;
22924 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22926 /* When asked to inline the call anyway, try to pick meaningful choice.
22927 We look for maximal size of block that is faster to copy by hand and
22928 take blocks of at most of that size guessing that average size will
22929 be roughly half of the block.
22931 If this turns out to be bad, we might simply specify the preferred
22932 choice in ix86_costs. */
22933 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22934 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22937 enum stringop_alg alg
;
22939 bool any_alg_usable_p
= true;
22941 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22943 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22944 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22946 if (candidate
!= libcall
&& candidate
22947 && ALG_USABLE_P (candidate
))
22948 max
= algs
->size
[i
].max
;
22950 /* If there aren't any usable algorithms, then recursing on
22951 smaller sizes isn't going to find anything. Just return the
22952 simple byte-at-a-time copy loop. */
22953 if (!any_alg_usable_p
)
22955 /* Pick something reasonable. */
22956 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22957 *dynamic_check
= 128;
22958 return loop_1_byte
;
22962 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22963 gcc_assert (*dynamic_check
== -1);
22964 gcc_assert (alg
!= libcall
);
22965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22966 *dynamic_check
= max
;
22969 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22970 #undef ALG_USABLE_P
22973 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22974 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22976 decide_alignment (int align
,
22977 enum stringop_alg alg
,
22979 enum machine_mode move_mode
)
22981 int desired_align
= 0;
22983 gcc_assert (alg
!= no_stringop
);
22985 if (alg
== libcall
)
22987 if (move_mode
== VOIDmode
)
22990 desired_align
= GET_MODE_SIZE (move_mode
);
22991 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22992 copying whole cacheline at once. */
22993 if (TARGET_PENTIUMPRO
22994 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
22999 if (desired_align
< align
)
23000 desired_align
= align
;
23001 if (expected_size
!= -1 && expected_size
< 4)
23002 desired_align
= align
;
23004 return desired_align
;
23007 /* Expand string move (memcpy) operation. Use i386 string operations
23008 when profitable. expand_setmem contains similar code. The code
23009 depends upon architecture, block size and alignment, but always has
23010 the same overall structure:
23012 1) Prologue guard: Conditional that jumps up to epilogues for small
23013 blocks that can be handled by epilogue alone. This is faster
23014 but also needed for correctness, since prologue assume the block
23015 is larger than the desired alignment.
23017 Optional dynamic check for size and libcall for large
23018 blocks is emitted here too, with -minline-stringops-dynamically.
23020 2) Prologue: copy first few bytes in order to get destination
23021 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23022 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23023 copied. We emit either a jump tree on power of two sized
23024 blocks, or a byte loop.
23026 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23027 with specified algorithm.
23029 4) Epilogue: code copying tail of the block that is too small to be
23030 handled by main body (or up to size guarded by prologue guard). */
23033 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
23034 rtx expected_align_exp
, rtx expected_size_exp
)
23040 rtx jump_around_label
= NULL
;
23041 HOST_WIDE_INT align
= 1;
23042 unsigned HOST_WIDE_INT count
= 0;
23043 HOST_WIDE_INT expected_size
= -1;
23044 int size_needed
= 0, epilogue_size_needed
;
23045 int desired_align
= 0, align_bytes
= 0;
23046 enum stringop_alg alg
;
23048 bool need_zero_guard
= false;
23050 enum machine_mode move_mode
= VOIDmode
;
23051 int unroll_factor
= 1;
23053 if (CONST_INT_P (align_exp
))
23054 align
= INTVAL (align_exp
);
23055 /* i386 can do misaligned access on reasonably increased cost. */
23056 if (CONST_INT_P (expected_align_exp
)
23057 && INTVAL (expected_align_exp
) > align
)
23058 align
= INTVAL (expected_align_exp
);
23059 /* ALIGN is the minimum of destination and source alignment, but we care here
23060 just about destination alignment. */
23061 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23062 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23064 if (CONST_INT_P (count_exp
))
23065 count
= expected_size
= INTVAL (count_exp
);
23066 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23067 expected_size
= INTVAL (expected_size_exp
);
23069 /* Make sure we don't need to care about overflow later on. */
23070 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23073 /* Step 0: Decide on preferred algorithm, desired alignment and
23074 size of chunks to be copied by main loop. */
23075 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
23076 if (alg
== libcall
)
23078 gcc_assert (alg
!= no_stringop
);
23081 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23082 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23083 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
23086 move_mode
= word_mode
;
23092 gcc_unreachable ();
23094 need_zero_guard
= true;
23095 move_mode
= QImode
;
23098 need_zero_guard
= true;
23100 case unrolled_loop
:
23101 need_zero_guard
= true;
23102 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23105 need_zero_guard
= true;
23107 /* Find the widest supported mode. */
23108 move_mode
= word_mode
;
23109 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23110 != CODE_FOR_nothing
)
23111 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23113 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23114 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23115 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23117 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23118 move_mode
= mode_for_vector (word_mode
, nunits
);
23119 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23120 move_mode
= word_mode
;
23122 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23124 case rep_prefix_8_byte
:
23125 move_mode
= DImode
;
23127 case rep_prefix_4_byte
:
23128 move_mode
= SImode
;
23130 case rep_prefix_1_byte
:
23131 move_mode
= QImode
;
23134 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23135 epilogue_size_needed
= size_needed
;
23137 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23138 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23139 align
= desired_align
;
23141 /* Step 1: Prologue guard. */
23143 /* Alignment code needs count to be in register. */
23144 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23146 if (INTVAL (count_exp
) > desired_align
23147 && INTVAL (count_exp
) > size_needed
)
23150 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23151 if (align_bytes
<= 0)
23154 align_bytes
= desired_align
- align_bytes
;
23156 if (align_bytes
== 0)
23157 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23159 gcc_assert (desired_align
>= 1 && align
>= 1);
23161 /* Ensure that alignment prologue won't copy past end of block. */
23162 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23164 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23165 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23166 Make sure it is power of 2. */
23167 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23171 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23173 /* If main algorithm works on QImode, no epilogue is needed.
23174 For small sizes just don't align anything. */
23175 if (size_needed
== 1)
23176 desired_align
= align
;
23183 label
= gen_label_rtx ();
23184 emit_cmp_and_jump_insns (count_exp
,
23185 GEN_INT (epilogue_size_needed
),
23186 LTU
, 0, counter_mode (count_exp
), 1, label
);
23187 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23188 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23190 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23194 /* Emit code to decide on runtime whether library call or inline should be
23196 if (dynamic_check
!= -1)
23198 if (CONST_INT_P (count_exp
))
23200 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23202 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23203 count_exp
= const0_rtx
;
23209 rtx hot_label
= gen_label_rtx ();
23210 jump_around_label
= gen_label_rtx ();
23211 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23212 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23213 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23214 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23215 emit_jump (jump_around_label
);
23216 emit_label (hot_label
);
23220 /* Step 2: Alignment prologue. */
23222 if (desired_align
> align
)
23224 if (align_bytes
== 0)
23226 /* Except for the first move in epilogue, we no longer know
23227 constant offset in aliasing info. It don't seems to worth
23228 the pain to maintain it for the first move, so throw away
23230 src
= change_address (src
, BLKmode
, srcreg
);
23231 dst
= change_address (dst
, BLKmode
, destreg
);
23232 dst
= expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
23237 /* If we know how many bytes need to be stored before dst is
23238 sufficiently aligned, maintain aliasing info accurately. */
23239 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
23240 desired_align
, align_bytes
);
23241 count_exp
= plus_constant (counter_mode (count_exp
),
23242 count_exp
, -align_bytes
);
23243 count
-= align_bytes
;
23245 if (need_zero_guard
23246 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23247 || (align_bytes
== 0
23248 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23249 + desired_align
- align
))))
23251 /* It is possible that we copied enough so the main loop will not
23253 gcc_assert (size_needed
> 1);
23254 if (label
== NULL_RTX
)
23255 label
= gen_label_rtx ();
23256 emit_cmp_and_jump_insns (count_exp
,
23257 GEN_INT (size_needed
),
23258 LTU
, 0, counter_mode (count_exp
), 1, label
);
23259 if (expected_size
== -1
23260 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23261 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23263 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23266 if (label
&& size_needed
== 1)
23268 emit_label (label
);
23269 LABEL_NUSES (label
) = 1;
23271 epilogue_size_needed
= 1;
23273 else if (label
== NULL_RTX
)
23274 epilogue_size_needed
= size_needed
;
23276 /* Step 3: Main loop. */
23283 gcc_unreachable ();
23286 case unrolled_loop
:
23288 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
23289 count_exp
, move_mode
, unroll_factor
,
23292 case rep_prefix_8_byte
:
23293 case rep_prefix_4_byte
:
23294 case rep_prefix_1_byte
:
23295 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
23299 /* Adjust properly the offset of src and dest memory for aliasing. */
23300 if (CONST_INT_P (count_exp
))
23302 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23303 (count
/ size_needed
) * size_needed
);
23304 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23305 (count
/ size_needed
) * size_needed
);
23309 src
= change_address (src
, BLKmode
, srcreg
);
23310 dst
= change_address (dst
, BLKmode
, destreg
);
23313 /* Step 4: Epilogue to copy the remaining bytes. */
23317 /* When the main loop is done, COUNT_EXP might hold original count,
23318 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23319 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23320 bytes. Compensate if needed. */
23322 if (size_needed
< epilogue_size_needed
)
23325 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23326 GEN_INT (size_needed
- 1), count_exp
, 1,
23328 if (tmp
!= count_exp
)
23329 emit_move_insn (count_exp
, tmp
);
23331 emit_label (label
);
23332 LABEL_NUSES (label
) = 1;
23335 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23336 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
23337 epilogue_size_needed
);
23338 if (jump_around_label
)
23339 emit_label (jump_around_label
);
23343 /* Helper function for memcpy. For QImode value 0xXY produce
23344 0xXYXYXYXY of wide specified by MODE. This is essentially
23345 a * 0x10101010, but we can do slightly better than
23346 synth_mult by unwinding the sequence by hand on CPUs with
23349 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23351 enum machine_mode valmode
= GET_MODE (val
);
23353 int nops
= mode
== DImode
? 3 : 2;
23355 gcc_assert (mode
== SImode
|| mode
== DImode
);
23356 if (val
== const0_rtx
)
23357 return copy_to_mode_reg (mode
, const0_rtx
);
23358 if (CONST_INT_P (val
))
23360 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23364 if (mode
== DImode
)
23365 v
|= (v
<< 16) << 16;
23366 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23369 if (valmode
== VOIDmode
)
23371 if (valmode
!= QImode
)
23372 val
= gen_lowpart (QImode
, val
);
23373 if (mode
== QImode
)
23375 if (!TARGET_PARTIAL_REG_STALL
)
23377 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23378 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23379 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23380 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23382 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23383 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23384 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23389 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23391 if (!TARGET_PARTIAL_REG_STALL
)
23392 if (mode
== SImode
)
23393 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23395 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23398 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23399 NULL
, 1, OPTAB_DIRECT
);
23401 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23403 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23404 NULL
, 1, OPTAB_DIRECT
);
23405 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23406 if (mode
== SImode
)
23408 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23409 NULL
, 1, OPTAB_DIRECT
);
23410 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23415 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23416 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23417 alignment from ALIGN to DESIRED_ALIGN. */
23419 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23424 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23425 promoted_val
= promote_duplicated_reg (DImode
, val
);
23426 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23427 promoted_val
= promote_duplicated_reg (SImode
, val
);
23428 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23429 promoted_val
= promote_duplicated_reg (HImode
, val
);
23431 promoted_val
= val
;
23433 return promoted_val
;
23436 /* Expand string clear operation (bzero). Use i386 string operations when
23437 profitable. See expand_movmem comment for explanation of individual
23438 steps performed. */
23440 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23441 rtx expected_align_exp
, rtx expected_size_exp
)
23446 rtx jump_around_label
= NULL
;
23447 HOST_WIDE_INT align
= 1;
23448 unsigned HOST_WIDE_INT count
= 0;
23449 HOST_WIDE_INT expected_size
= -1;
23450 int size_needed
= 0, epilogue_size_needed
;
23451 int desired_align
= 0, align_bytes
= 0;
23452 enum stringop_alg alg
;
23453 rtx promoted_val
= NULL
;
23454 bool force_loopy_epilogue
= false;
23456 bool need_zero_guard
= false;
23458 enum machine_mode move_mode
= VOIDmode
;
23461 if (CONST_INT_P (align_exp
))
23462 align
= INTVAL (align_exp
);
23463 /* i386 can do misaligned access on reasonably increased cost. */
23464 if (CONST_INT_P (expected_align_exp
)
23465 && INTVAL (expected_align_exp
) > align
)
23466 align
= INTVAL (expected_align_exp
);
23467 if (CONST_INT_P (count_exp
))
23468 count
= expected_size
= INTVAL (count_exp
);
23469 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23470 expected_size
= INTVAL (expected_size_exp
);
23472 /* Make sure we don't need to care about overflow later on. */
23473 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23476 /* Step 0: Decide on preferred algorithm, desired alignment and
23477 size of chunks to be copied by main loop. */
23479 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23480 if (alg
== libcall
)
23482 gcc_assert (alg
!= no_stringop
);
23485 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23486 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23488 move_mode
= word_mode
;
23495 gcc_unreachable ();
23497 need_zero_guard
= true;
23500 case unrolled_loop
:
23501 need_zero_guard
= true;
23504 case rep_prefix_8_byte
:
23505 move_mode
= DImode
;
23507 case rep_prefix_4_byte
:
23508 move_mode
= SImode
;
23510 case rep_prefix_1_byte
:
23511 move_mode
= QImode
;
23514 need_zero_guard
= true;
23515 move_mode
= QImode
;
23518 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23519 epilogue_size_needed
= size_needed
;
23521 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23522 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23523 align
= desired_align
;
23525 /* Step 1: Prologue guard. */
23527 /* Alignment code needs count to be in register. */
23528 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23530 if (INTVAL (count_exp
) > desired_align
23531 && INTVAL (count_exp
) > size_needed
)
23534 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23535 if (align_bytes
<= 0)
23538 align_bytes
= desired_align
- align_bytes
;
23540 if (align_bytes
== 0)
23542 enum machine_mode mode
= SImode
;
23543 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23545 count_exp
= force_reg (mode
, count_exp
);
23548 /* Do the cheap promotion to allow better CSE across the
23549 main loop and epilogue (ie one load of the big constant in the
23550 front of all code. */
23551 if (CONST_INT_P (val_exp
))
23552 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23553 desired_align
, align
);
23554 /* Ensure that alignment prologue won't copy past end of block. */
23555 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23557 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23558 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23559 Make sure it is power of 2. */
23560 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23562 /* To improve performance of small blocks, we jump around the VAL
23563 promoting mode. This mean that if the promoted VAL is not constant,
23564 we might not use it in the epilogue and have to use byte
23566 if (epilogue_size_needed
> 2 && !promoted_val
)
23567 force_loopy_epilogue
= true;
23570 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23572 /* If main algorithm works on QImode, no epilogue is needed.
23573 For small sizes just don't align anything. */
23574 if (size_needed
== 1)
23575 desired_align
= align
;
23582 label
= gen_label_rtx ();
23583 emit_cmp_and_jump_insns (count_exp
,
23584 GEN_INT (epilogue_size_needed
),
23585 LTU
, 0, counter_mode (count_exp
), 1, label
);
23586 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23587 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23589 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23592 if (dynamic_check
!= -1)
23594 rtx hot_label
= gen_label_rtx ();
23595 jump_around_label
= gen_label_rtx ();
23596 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23597 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23598 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23599 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23600 emit_jump (jump_around_label
);
23601 emit_label (hot_label
);
23604 /* Step 2: Alignment prologue. */
23606 /* Do the expensive promotion once we branched off the small blocks. */
23608 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23609 desired_align
, align
);
23610 gcc_assert (desired_align
>= 1 && align
>= 1);
23612 if (desired_align
> align
)
23614 if (align_bytes
== 0)
23616 /* Except for the first move in epilogue, we no longer know
23617 constant offset in aliasing info. It don't seems to worth
23618 the pain to maintain it for the first move, so throw away
23620 dst
= change_address (dst
, BLKmode
, destreg
);
23621 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23626 /* If we know how many bytes need to be stored before dst is
23627 sufficiently aligned, maintain aliasing info accurately. */
23628 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23629 desired_align
, align_bytes
);
23630 count_exp
= plus_constant (counter_mode (count_exp
),
23631 count_exp
, -align_bytes
);
23632 count
-= align_bytes
;
23634 if (need_zero_guard
23635 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23636 || (align_bytes
== 0
23637 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23638 + desired_align
- align
))))
23640 /* It is possible that we copied enough so the main loop will not
23642 gcc_assert (size_needed
> 1);
23643 if (label
== NULL_RTX
)
23644 label
= gen_label_rtx ();
23645 emit_cmp_and_jump_insns (count_exp
,
23646 GEN_INT (size_needed
),
23647 LTU
, 0, counter_mode (count_exp
), 1, label
);
23648 if (expected_size
== -1
23649 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23650 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23652 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23655 if (label
&& size_needed
== 1)
23657 emit_label (label
);
23658 LABEL_NUSES (label
) = 1;
23660 promoted_val
= val_exp
;
23661 epilogue_size_needed
= 1;
23663 else if (label
== NULL_RTX
)
23664 epilogue_size_needed
= size_needed
;
23666 /* Step 3: Main loop. */
23673 gcc_unreachable ();
23677 case unrolled_loop
:
23678 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23679 count_exp
, move_mode
, unroll_factor
,
23682 case rep_prefix_8_byte
:
23683 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23686 case rep_prefix_4_byte
:
23687 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23690 case rep_prefix_1_byte
:
23691 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23695 /* Adjust properly the offset of src and dest memory for aliasing. */
23696 if (CONST_INT_P (count_exp
))
23697 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23698 (count
/ size_needed
) * size_needed
);
23700 dst
= change_address (dst
, BLKmode
, destreg
);
23702 /* Step 4: Epilogue to copy the remaining bytes. */
23706 /* When the main loop is done, COUNT_EXP might hold original count,
23707 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23708 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23709 bytes. Compensate if needed. */
23711 if (size_needed
< epilogue_size_needed
)
23714 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23715 GEN_INT (size_needed
- 1), count_exp
, 1,
23717 if (tmp
!= count_exp
)
23718 emit_move_insn (count_exp
, tmp
);
23720 emit_label (label
);
23721 LABEL_NUSES (label
) = 1;
23724 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23726 if (force_loopy_epilogue
)
23727 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23728 epilogue_size_needed
);
23730 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23731 epilogue_size_needed
);
23733 if (jump_around_label
)
23734 emit_label (jump_around_label
);
23738 /* Expand the appropriate insns for doing strlen if not just doing
23741 out = result, initialized with the start address
23742 align_rtx = alignment of the address.
23743 scratch = scratch register, initialized with the startaddress when
23744 not aligned, otherwise undefined
23746 This is just the body. It needs the initializations mentioned above and
23747 some address computing at the end. These things are done in i386.md. */
23750 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23754 rtx align_2_label
= NULL_RTX
;
23755 rtx align_3_label
= NULL_RTX
;
23756 rtx align_4_label
= gen_label_rtx ();
23757 rtx end_0_label
= gen_label_rtx ();
23759 rtx tmpreg
= gen_reg_rtx (SImode
);
23760 rtx scratch
= gen_reg_rtx (SImode
);
23764 if (CONST_INT_P (align_rtx
))
23765 align
= INTVAL (align_rtx
);
23767 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23769 /* Is there a known alignment and is it less than 4? */
23772 rtx scratch1
= gen_reg_rtx (Pmode
);
23773 emit_move_insn (scratch1
, out
);
23774 /* Is there a known alignment and is it not 2? */
23777 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23778 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23780 /* Leave just the 3 lower bits. */
23781 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23782 NULL_RTX
, 0, OPTAB_WIDEN
);
23784 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23785 Pmode
, 1, align_4_label
);
23786 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23787 Pmode
, 1, align_2_label
);
23788 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23789 Pmode
, 1, align_3_label
);
23793 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23794 check if is aligned to 4 - byte. */
23796 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23797 NULL_RTX
, 0, OPTAB_WIDEN
);
23799 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23800 Pmode
, 1, align_4_label
);
23803 mem
= change_address (src
, QImode
, out
);
23805 /* Now compare the bytes. */
23807 /* Compare the first n unaligned byte on a byte per byte basis. */
23808 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23809 QImode
, 1, end_0_label
);
23811 /* Increment the address. */
23812 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23814 /* Not needed with an alignment of 2 */
23817 emit_label (align_2_label
);
23819 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23822 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23824 emit_label (align_3_label
);
23827 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23830 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23833 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23834 align this loop. It gives only huge programs, but does not help to
23836 emit_label (align_4_label
);
23838 mem
= change_address (src
, SImode
, out
);
23839 emit_move_insn (scratch
, mem
);
23840 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23842 /* This formula yields a nonzero result iff one of the bytes is zero.
23843 This saves three branches inside loop and many cycles. */
23845 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23846 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23847 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23848 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23849 gen_int_mode (0x80808080, SImode
)));
23850 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23855 rtx reg
= gen_reg_rtx (SImode
);
23856 rtx reg2
= gen_reg_rtx (Pmode
);
23857 emit_move_insn (reg
, tmpreg
);
23858 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23860 /* If zero is not in the first two bytes, move two bytes forward. */
23861 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23862 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23863 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23864 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23865 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23868 /* Emit lea manually to avoid clobbering of flags. */
23869 emit_insn (gen_rtx_SET (SImode
, reg2
,
23870 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23872 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23873 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23874 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23875 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23881 rtx end_2_label
= gen_label_rtx ();
23882 /* Is zero in the first two bytes? */
23884 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23885 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23886 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23887 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23888 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23890 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23891 JUMP_LABEL (tmp
) = end_2_label
;
23893 /* Not in the first two. Move two bytes forward. */
23894 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23895 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23897 emit_label (end_2_label
);
23901 /* Avoid branch in fixing the byte. */
23902 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23903 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23904 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23905 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23906 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23908 emit_label (end_0_label
);
23911 /* Expand strlen. */
23914 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23916 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23918 /* The generic case of strlen expander is long. Avoid it's
23919 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23921 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23922 && !TARGET_INLINE_ALL_STRINGOPS
23923 && !optimize_insn_for_size_p ()
23924 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23927 addr
= force_reg (Pmode
, XEXP (src
, 0));
23928 scratch1
= gen_reg_rtx (Pmode
);
23930 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23931 && !optimize_insn_for_size_p ())
23933 /* Well it seems that some optimizer does not combine a call like
23934 foo(strlen(bar), strlen(bar));
23935 when the move and the subtraction is done here. It does calculate
23936 the length just once when these instructions are done inside of
23937 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23938 often used and I use one fewer register for the lifetime of
23939 output_strlen_unroll() this is better. */
23941 emit_move_insn (out
, addr
);
23943 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23945 /* strlensi_unroll_1 returns the address of the zero at the end of
23946 the string, like memchr(), so compute the length by subtracting
23947 the start address. */
23948 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23954 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23955 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23958 scratch2
= gen_reg_rtx (Pmode
);
23959 scratch3
= gen_reg_rtx (Pmode
);
23960 scratch4
= force_reg (Pmode
, constm1_rtx
);
23962 emit_move_insn (scratch3
, addr
);
23963 eoschar
= force_reg (QImode
, eoschar
);
23965 src
= replace_equiv_address_nv (src
, scratch3
);
23967 /* If .md starts supporting :P, this can be done in .md. */
23968 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23969 scratch4
), UNSPEC_SCAS
);
23970 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23971 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23972 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23977 /* For given symbol (function) construct code to compute address of it's PLT
23978 entry in large x86-64 PIC model. */
23980 construct_plt_address (rtx symbol
)
23984 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23985 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
23986 gcc_assert (Pmode
== DImode
);
23988 tmp
= gen_reg_rtx (Pmode
);
23989 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23991 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23992 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23997 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23999 rtx pop
, bool sibcall
)
24001 unsigned int const cregs_size
24002 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24003 rtx vec
[3 + cregs_size
];
24004 rtx use
= NULL
, call
;
24005 unsigned int vec_len
= 0;
24007 if (pop
== const0_rtx
)
24009 gcc_assert (!TARGET_64BIT
|| !pop
);
24011 if (TARGET_MACHO
&& !TARGET_64BIT
)
24014 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24015 fnaddr
= machopic_indirect_call_target (fnaddr
);
24020 /* Static functions and indirect calls don't need the pic register. */
24023 || (ix86_cmodel
== CM_LARGE_PIC
24024 && DEFAULT_ABI
!= MS_ABI
))
24025 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24026 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24027 use_reg (&use
, pic_offset_table_rtx
);
24030 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24032 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24033 emit_move_insn (al
, callarg2
);
24034 use_reg (&use
, al
);
24037 if (ix86_cmodel
== CM_LARGE_PIC
24040 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24041 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24042 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24044 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24045 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24047 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24048 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24051 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24053 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24054 vec
[vec_len
++] = call
;
24058 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24059 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24060 vec
[vec_len
++] = pop
;
24063 if (TARGET_64BIT_MS_ABI
24064 && (!callarg2
|| INTVAL (callarg2
) != -2))
24068 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24069 UNSPEC_MS_TO_SYSV_CALL
);
24071 for (i
= 0; i
< cregs_size
; i
++)
24073 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24074 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24077 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24082 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24083 call
= emit_call_insn (call
);
24085 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24090 /* Output the assembly for a call instruction. */
24093 ix86_output_call_insn (rtx insn
, rtx call_op
)
24095 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24096 bool seh_nop_p
= false;
24099 if (SIBLING_CALL_P (insn
))
24103 /* SEH epilogue detection requires the indirect branch case
24104 to include REX.W. */
24105 else if (TARGET_SEH
)
24106 xasm
= "rex.W jmp %A0";
24110 output_asm_insn (xasm
, &call_op
);
24114 /* SEH unwinding can require an extra nop to be emitted in several
24115 circumstances. Determine if we have one of those. */
24120 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24122 /* If we get to another real insn, we don't need the nop. */
24126 /* If we get to the epilogue note, prevent a catch region from
24127 being adjacent to the standard epilogue sequence. If non-
24128 call-exceptions, we'll have done this during epilogue emission. */
24129 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24130 && !flag_non_call_exceptions
24131 && !can_throw_internal (insn
))
24138 /* If we didn't find a real insn following the call, prevent the
24139 unwinder from looking into the next function. */
24145 xasm
= "call\t%P0";
24147 xasm
= "call\t%A0";
24149 output_asm_insn (xasm
, &call_op
);
24157 /* Clear stack slot assignments remembered from previous functions.
24158 This is called from INIT_EXPANDERS once before RTL is emitted for each
24161 static struct machine_function
*
24162 ix86_init_machine_status (void)
24164 struct machine_function
*f
;
24166 f
= ggc_alloc_cleared_machine_function ();
24167 f
->use_fast_prologue_epilogue_nregs
= -1;
24168 f
->call_abi
= ix86_abi
;
24173 /* Return a MEM corresponding to a stack slot with mode MODE.
24174 Allocate a new slot if necessary.
24176 The RTL for a function can have several slots available: N is
24177 which slot to use. */
24180 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24182 struct stack_local_entry
*s
;
24184 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24186 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24187 if (s
->mode
== mode
&& s
->n
== n
)
24188 return validize_mem (copy_rtx (s
->rtl
));
24190 s
= ggc_alloc_stack_local_entry ();
24193 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24195 s
->next
= ix86_stack_locals
;
24196 ix86_stack_locals
= s
;
24197 return validize_mem (s
->rtl
);
24201 ix86_instantiate_decls (void)
24203 struct stack_local_entry
*s
;
24205 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24206 if (s
->rtl
!= NULL_RTX
)
24207 instantiate_decl_rtl (s
->rtl
);
24210 /* Calculate the length of the memory address in the instruction encoding.
24211 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24212 or other prefixes. We never generate addr32 prefix for LEA insn. */
24215 memory_address_length (rtx addr
, bool lea
)
24217 struct ix86_address parts
;
24218 rtx base
, index
, disp
;
24222 if (GET_CODE (addr
) == PRE_DEC
24223 || GET_CODE (addr
) == POST_INC
24224 || GET_CODE (addr
) == PRE_MODIFY
24225 || GET_CODE (addr
) == POST_MODIFY
)
24228 ok
= ix86_decompose_address (addr
, &parts
);
24231 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24233 /* If this is not LEA instruction, add the length of addr32 prefix. */
24234 if (TARGET_64BIT
&& !lea
24235 && (SImode_address_operand (addr
, VOIDmode
)
24236 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24237 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24241 index
= parts
.index
;
24244 if (base
&& GET_CODE (base
) == SUBREG
)
24245 base
= SUBREG_REG (base
);
24246 if (index
&& GET_CODE (index
) == SUBREG
)
24247 index
= SUBREG_REG (index
);
24249 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24250 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24253 - esp as the base always wants an index,
24254 - ebp as the base always wants a displacement,
24255 - r12 as the base always wants an index,
24256 - r13 as the base always wants a displacement. */
24258 /* Register Indirect. */
24259 if (base
&& !index
&& !disp
)
24261 /* esp (for its index) and ebp (for its displacement) need
24262 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24264 if (base
== arg_pointer_rtx
24265 || base
== frame_pointer_rtx
24266 || REGNO (base
) == SP_REG
24267 || REGNO (base
) == BP_REG
24268 || REGNO (base
) == R12_REG
24269 || REGNO (base
) == R13_REG
)
24273 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24274 is not disp32, but disp32(%rip), so for disp32
24275 SIB byte is needed, unless print_operand_address
24276 optimizes it into disp32(%rip) or (%rip) is implied
24278 else if (disp
&& !base
&& !index
)
24285 if (GET_CODE (disp
) == CONST
)
24286 symbol
= XEXP (disp
, 0);
24287 if (GET_CODE (symbol
) == PLUS
24288 && CONST_INT_P (XEXP (symbol
, 1)))
24289 symbol
= XEXP (symbol
, 0);
24291 if (GET_CODE (symbol
) != LABEL_REF
24292 && (GET_CODE (symbol
) != SYMBOL_REF
24293 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
24294 && (GET_CODE (symbol
) != UNSPEC
24295 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
24296 && XINT (symbol
, 1) != UNSPEC_PCREL
24297 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
24303 /* Find the length of the displacement constant. */
24306 if (base
&& satisfies_constraint_K (disp
))
24311 /* ebp always wants a displacement. Similarly r13. */
24312 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24315 /* An index requires the two-byte modrm form.... */
24317 /* ...like esp (or r12), which always wants an index. */
24318 || base
== arg_pointer_rtx
24319 || base
== frame_pointer_rtx
24320 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24327 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24328 is set, expect that insn have 8bit immediate alternative. */
24330 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24334 extract_insn_cached (insn
);
24335 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24336 if (CONSTANT_P (recog_data
.operand
[i
]))
24338 enum attr_mode mode
= get_attr_mode (insn
);
24341 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24343 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24350 ival
= trunc_int_for_mode (ival
, HImode
);
24353 ival
= trunc_int_for_mode (ival
, SImode
);
24358 if (IN_RANGE (ival
, -128, 127))
24375 /* Immediates for DImode instructions are encoded
24376 as 32bit sign extended values. */
24381 fatal_insn ("unknown insn mode", insn
);
24387 /* Compute default value for "length_address" attribute. */
24389 ix86_attr_length_address_default (rtx insn
)
24393 if (get_attr_type (insn
) == TYPE_LEA
)
24395 rtx set
= PATTERN (insn
), addr
;
24397 if (GET_CODE (set
) == PARALLEL
)
24398 set
= XVECEXP (set
, 0, 0);
24400 gcc_assert (GET_CODE (set
) == SET
);
24402 addr
= SET_SRC (set
);
24404 return memory_address_length (addr
, true);
24407 extract_insn_cached (insn
);
24408 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24409 if (MEM_P (recog_data
.operand
[i
]))
24411 constrain_operands_cached (reload_completed
);
24412 if (which_alternative
!= -1)
24414 const char *constraints
= recog_data
.constraints
[i
];
24415 int alt
= which_alternative
;
24417 while (*constraints
== '=' || *constraints
== '+')
24420 while (*constraints
++ != ',')
24422 /* Skip ignored operands. */
24423 if (*constraints
== 'X')
24426 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24431 /* Compute default value for "length_vex" attribute. It includes
24432 2 or 3 byte VEX prefix and 1 opcode byte. */
24435 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24439 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24440 byte VEX prefix. */
24441 if (!has_0f_opcode
|| has_vex_w
)
24444 /* We can always use 2 byte VEX prefix in 32bit. */
24448 extract_insn_cached (insn
);
24450 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24451 if (REG_P (recog_data
.operand
[i
]))
24453 /* REX.W bit uses 3 byte VEX prefix. */
24454 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24455 && GENERAL_REG_P (recog_data
.operand
[i
]))
24460 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24461 if (MEM_P (recog_data
.operand
[i
])
24462 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24469 /* Return the maximum number of instructions a cpu can issue. */
24472 ix86_issue_rate (void)
24476 case PROCESSOR_PENTIUM
:
24477 case PROCESSOR_ATOM
:
24478 case PROCESSOR_SLM
:
24480 case PROCESSOR_BTVER2
:
24481 case PROCESSOR_PENTIUM4
:
24482 case PROCESSOR_NOCONA
:
24485 case PROCESSOR_PENTIUMPRO
:
24486 case PROCESSOR_ATHLON
:
24488 case PROCESSOR_AMDFAM10
:
24489 case PROCESSOR_GENERIC
:
24490 case PROCESSOR_BDVER1
:
24491 case PROCESSOR_BDVER2
:
24492 case PROCESSOR_BDVER3
:
24493 case PROCESSOR_BTVER1
:
24496 case PROCESSOR_CORE2
:
24497 case PROCESSOR_COREI7
:
24498 case PROCESSOR_HASWELL
:
24506 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24507 by DEP_INSN and nothing set by DEP_INSN. */
24510 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24514 /* Simplify the test for uninteresting insns. */
24515 if (insn_type
!= TYPE_SETCC
24516 && insn_type
!= TYPE_ICMOV
24517 && insn_type
!= TYPE_FCMOV
24518 && insn_type
!= TYPE_IBR
)
24521 if ((set
= single_set (dep_insn
)) != 0)
24523 set
= SET_DEST (set
);
24526 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24527 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24528 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24529 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24531 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24532 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24537 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24540 /* This test is true if the dependent insn reads the flags but
24541 not any other potentially set register. */
24542 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24545 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24551 /* Return true iff USE_INSN has a memory address with operands set by
24555 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24558 extract_insn_cached (use_insn
);
24559 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24560 if (MEM_P (recog_data
.operand
[i
]))
24562 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24563 return modified_in_p (addr
, set_insn
) != 0;
24568 /* Helper function for exact_store_load_dependency.
24569 Return true if addr is found in insn. */
24571 exact_dependency_1 (rtx addr
, rtx insn
)
24573 enum rtx_code code
;
24574 const char *format_ptr
;
24577 code
= GET_CODE (insn
);
24581 if (rtx_equal_p (addr
, insn
))
24596 format_ptr
= GET_RTX_FORMAT (code
);
24597 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24599 switch (*format_ptr
++)
24602 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24606 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24607 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24615 /* Return true if there exists exact dependency for store & load, i.e.
24616 the same memory address is used in them. */
24618 exact_store_load_dependency (rtx store
, rtx load
)
24622 set1
= single_set (store
);
24625 if (!MEM_P (SET_DEST (set1
)))
24627 set2
= single_set (load
);
24630 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24636 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24638 enum attr_type insn_type
, dep_insn_type
;
24639 enum attr_memory memory
;
24641 int dep_insn_code_number
;
24643 /* Anti and output dependencies have zero cost on all CPUs. */
24644 if (REG_NOTE_KIND (link
) != 0)
24647 dep_insn_code_number
= recog_memoized (dep_insn
);
24649 /* If we can't recognize the insns, we can't really do anything. */
24650 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24653 insn_type
= get_attr_type (insn
);
24654 dep_insn_type
= get_attr_type (dep_insn
);
24658 case PROCESSOR_PENTIUM
:
24659 /* Address Generation Interlock adds a cycle of latency. */
24660 if (insn_type
== TYPE_LEA
)
24662 rtx addr
= PATTERN (insn
);
24664 if (GET_CODE (addr
) == PARALLEL
)
24665 addr
= XVECEXP (addr
, 0, 0);
24667 gcc_assert (GET_CODE (addr
) == SET
);
24669 addr
= SET_SRC (addr
);
24670 if (modified_in_p (addr
, dep_insn
))
24673 else if (ix86_agi_dependent (dep_insn
, insn
))
24676 /* ??? Compares pair with jump/setcc. */
24677 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24680 /* Floating point stores require value to be ready one cycle earlier. */
24681 if (insn_type
== TYPE_FMOV
24682 && get_attr_memory (insn
) == MEMORY_STORE
24683 && !ix86_agi_dependent (dep_insn
, insn
))
24687 case PROCESSOR_PENTIUMPRO
:
24688 memory
= get_attr_memory (insn
);
24690 /* INT->FP conversion is expensive. */
24691 if (get_attr_fp_int_src (dep_insn
))
24694 /* There is one cycle extra latency between an FP op and a store. */
24695 if (insn_type
== TYPE_FMOV
24696 && (set
= single_set (dep_insn
)) != NULL_RTX
24697 && (set2
= single_set (insn
)) != NULL_RTX
24698 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24699 && MEM_P (SET_DEST (set2
)))
24702 /* Show ability of reorder buffer to hide latency of load by executing
24703 in parallel with previous instruction in case
24704 previous instruction is not needed to compute the address. */
24705 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24706 && !ix86_agi_dependent (dep_insn
, insn
))
24708 /* Claim moves to take one cycle, as core can issue one load
24709 at time and the next load can start cycle later. */
24710 if (dep_insn_type
== TYPE_IMOV
24711 || dep_insn_type
== TYPE_FMOV
)
24719 memory
= get_attr_memory (insn
);
24721 /* The esp dependency is resolved before the instruction is really
24723 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24724 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24727 /* INT->FP conversion is expensive. */
24728 if (get_attr_fp_int_src (dep_insn
))
24731 /* Show ability of reorder buffer to hide latency of load by executing
24732 in parallel with previous instruction in case
24733 previous instruction is not needed to compute the address. */
24734 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24735 && !ix86_agi_dependent (dep_insn
, insn
))
24737 /* Claim moves to take one cycle, as core can issue one load
24738 at time and the next load can start cycle later. */
24739 if (dep_insn_type
== TYPE_IMOV
24740 || dep_insn_type
== TYPE_FMOV
)
24749 case PROCESSOR_ATHLON
:
24751 case PROCESSOR_AMDFAM10
:
24752 case PROCESSOR_BDVER1
:
24753 case PROCESSOR_BDVER2
:
24754 case PROCESSOR_BDVER3
:
24755 case PROCESSOR_BTVER1
:
24756 case PROCESSOR_BTVER2
:
24757 case PROCESSOR_GENERIC
:
24758 memory
= get_attr_memory (insn
);
24760 /* Stack engine allows to execute push&pop instructions in parall. */
24761 if (((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24762 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24763 && (ix86_tune
!= PROCESSOR_ATHLON
&& ix86_tune
!= PROCESSOR_K8
))
24766 /* Show ability of reorder buffer to hide latency of load by executing
24767 in parallel with previous instruction in case
24768 previous instruction is not needed to compute the address. */
24769 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24770 && !ix86_agi_dependent (dep_insn
, insn
))
24772 enum attr_unit unit
= get_attr_unit (insn
);
24775 /* Because of the difference between the length of integer and
24776 floating unit pipeline preparation stages, the memory operands
24777 for floating point are cheaper.
24779 ??? For Athlon it the difference is most probably 2. */
24780 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24783 loadcost
= TARGET_ATHLON
? 2 : 0;
24785 if (cost
>= loadcost
)
24792 case PROCESSOR_CORE2
:
24793 case PROCESSOR_COREI7
:
24794 case PROCESSOR_HASWELL
:
24795 memory
= get_attr_memory (insn
);
24797 /* Stack engine allows to execute push&pop instructions in parall. */
24798 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24799 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24802 /* Show ability of reorder buffer to hide latency of load by executing
24803 in parallel with previous instruction in case
24804 previous instruction is not needed to compute the address. */
24805 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24806 && !ix86_agi_dependent (dep_insn
, insn
))
24815 case PROCESSOR_SLM
:
24816 if (!reload_completed
)
24819 /* Increase cost of integer loads. */
24820 memory
= get_attr_memory (dep_insn
);
24821 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24823 enum attr_unit unit
= get_attr_unit (dep_insn
);
24824 if (unit
== UNIT_INTEGER
&& cost
== 1)
24826 if (memory
== MEMORY_LOAD
)
24830 /* Increase cost of ld/st for short int types only
24831 because of store forwarding issue. */
24832 rtx set
= single_set (dep_insn
);
24833 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
24834 || GET_MODE (SET_DEST (set
)) == HImode
))
24836 /* Increase cost of store/load insn if exact
24837 dependence exists and it is load insn. */
24838 enum attr_memory insn_memory
= get_attr_memory (insn
);
24839 if (insn_memory
== MEMORY_LOAD
24840 && exact_store_load_dependency (dep_insn
, insn
))
24854 /* How many alternative schedules to try. This should be as wide as the
24855 scheduling freedom in the DFA, but no wider. Making this value too
24856 large results extra work for the scheduler. */
24859 ia32_multipass_dfa_lookahead (void)
24863 case PROCESSOR_PENTIUM
:
24866 case PROCESSOR_PENTIUMPRO
:
24870 case PROCESSOR_CORE2
:
24871 case PROCESSOR_COREI7
:
24872 case PROCESSOR_HASWELL
:
24873 case PROCESSOR_ATOM
:
24874 case PROCESSOR_SLM
:
24875 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24876 as many instructions can be executed on a cycle, i.e.,
24877 issue_rate. I wonder why tuning for many CPUs does not do this. */
24878 if (reload_completed
)
24879 return ix86_issue_rate ();
24880 /* Don't use lookahead for pre-reload schedule to save compile time. */
24888 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24889 execution. It is applied if
24890 (1) IMUL instruction is on the top of list;
24891 (2) There exists the only producer of independent IMUL instruction in
24893 Return index of IMUL producer if it was found and -1 otherwise. */
24895 do_reorder_for_imul (rtx
*ready
, int n_ready
)
24897 rtx insn
, set
, insn1
, insn2
;
24898 sd_iterator_def sd_it
;
24903 if (ix86_tune
!= PROCESSOR_ATOM
)
24906 /* Check that IMUL instruction is on the top of ready list. */
24907 insn
= ready
[n_ready
- 1];
24908 set
= single_set (insn
);
24911 if (!(GET_CODE (SET_SRC (set
)) == MULT
24912 && GET_MODE (SET_SRC (set
)) == SImode
))
24915 /* Search for producer of independent IMUL instruction. */
24916 for (i
= n_ready
- 2; i
>= 0; i
--)
24919 if (!NONDEBUG_INSN_P (insn
))
24921 /* Skip IMUL instruction. */
24922 insn2
= PATTERN (insn
);
24923 if (GET_CODE (insn2
) == PARALLEL
)
24924 insn2
= XVECEXP (insn2
, 0, 0);
24925 if (GET_CODE (insn2
) == SET
24926 && GET_CODE (SET_SRC (insn2
)) == MULT
24927 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24930 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24933 con
= DEP_CON (dep
);
24934 if (!NONDEBUG_INSN_P (con
))
24936 insn1
= PATTERN (con
);
24937 if (GET_CODE (insn1
) == PARALLEL
)
24938 insn1
= XVECEXP (insn1
, 0, 0);
24940 if (GET_CODE (insn1
) == SET
24941 && GET_CODE (SET_SRC (insn1
)) == MULT
24942 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24944 sd_iterator_def sd_it1
;
24946 /* Check if there is no other dependee for IMUL. */
24948 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24951 pro
= DEP_PRO (dep1
);
24952 if (!NONDEBUG_INSN_P (pro
))
24967 /* Try to find the best candidate on the top of ready list if two insns
24968 have the same priority - candidate is best if its dependees were
24969 scheduled earlier. Applied for Silvermont only.
24970 Return true if top 2 insns must be interchanged. */
24972 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
24974 rtx top
= ready
[n_ready
- 1];
24975 rtx next
= ready
[n_ready
- 2];
24977 sd_iterator_def sd_it
;
24981 #define INSN_TICK(INSN) (HID (INSN)->tick)
24983 if (ix86_tune
!= PROCESSOR_SLM
)
24986 if (!NONDEBUG_INSN_P (top
))
24988 if (!NONJUMP_INSN_P (top
))
24990 if (!NONDEBUG_INSN_P (next
))
24992 if (!NONJUMP_INSN_P (next
))
24994 set
= single_set (top
);
24997 set
= single_set (next
);
25001 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25003 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25005 /* Determine winner more precise. */
25006 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25009 pro
= DEP_PRO (dep
);
25010 if (!NONDEBUG_INSN_P (pro
))
25012 if (INSN_TICK (pro
) > clock1
)
25013 clock1
= INSN_TICK (pro
);
25015 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25018 pro
= DEP_PRO (dep
);
25019 if (!NONDEBUG_INSN_P (pro
))
25021 if (INSN_TICK (pro
) > clock2
)
25022 clock2
= INSN_TICK (pro
);
25025 if (clock1
== clock2
)
25027 /* Determine winner - load must win. */
25028 enum attr_memory memory1
, memory2
;
25029 memory1
= get_attr_memory (top
);
25030 memory2
= get_attr_memory (next
);
25031 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25034 return (bool) (clock2
< clock1
);
25040 /* Perform possible reodering of ready list for Atom/Silvermont only.
25041 Return issue rate. */
25043 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25046 int issue_rate
= -1;
25047 int n_ready
= *pn_ready
;
25052 /* Set up issue rate. */
25053 issue_rate
= ix86_issue_rate ();
25055 /* Do reodering for Atom/SLM only. */
25056 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25059 /* Nothing to do if ready list contains only 1 instruction. */
25063 /* Do reodering for post-reload scheduler only. */
25064 if (!reload_completed
)
25067 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25069 if (sched_verbose
> 1)
25070 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25071 INSN_UID (ready
[index
]));
25073 /* Put IMUL producer (ready[index]) at the top of ready list. */
25074 insn
= ready
[index
];
25075 for (i
= index
; i
< n_ready
- 1; i
++)
25076 ready
[i
] = ready
[i
+ 1];
25077 ready
[n_ready
- 1] = insn
;
25080 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25082 if (sched_verbose
> 1)
25083 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25084 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25085 /* Swap 2 top elements of ready list. */
25086 insn
= ready
[n_ready
- 1];
25087 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25088 ready
[n_ready
- 2] = insn
;
25094 ix86_class_likely_spilled_p (reg_class_t
);
25096 /* Returns true if lhs of insn is HW function argument register and set up
25097 is_spilled to true if it is likely spilled HW register. */
25099 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25103 if (!NONDEBUG_INSN_P (insn
))
25105 /* Call instructions are not movable, ignore it. */
25108 insn
= PATTERN (insn
);
25109 if (GET_CODE (insn
) == PARALLEL
)
25110 insn
= XVECEXP (insn
, 0, 0);
25111 if (GET_CODE (insn
) != SET
)
25113 dst
= SET_DEST (insn
);
25114 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25115 && ix86_function_arg_regno_p (REGNO (dst
)))
25117 /* Is it likely spilled HW register? */
25118 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25119 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25120 *is_spilled
= true;
25126 /* Add output dependencies for chain of function adjacent arguments if only
25127 there is a move to likely spilled HW register. Return first argument
25128 if at least one dependence was added or NULL otherwise. */
25130 add_parameter_dependencies (rtx call
, rtx head
)
25134 rtx first_arg
= NULL
;
25135 bool is_spilled
= false;
25137 head
= PREV_INSN (head
);
25139 /* Find nearest to call argument passing instruction. */
25142 last
= PREV_INSN (last
);
25145 if (!NONDEBUG_INSN_P (last
))
25147 if (insn_is_function_arg (last
, &is_spilled
))
25155 insn
= PREV_INSN (last
);
25156 if (!INSN_P (insn
))
25160 if (!NONDEBUG_INSN_P (insn
))
25165 if (insn_is_function_arg (insn
, &is_spilled
))
25167 /* Add output depdendence between two function arguments if chain
25168 of output arguments contains likely spilled HW registers. */
25170 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25171 first_arg
= last
= insn
;
25181 /* Add output or anti dependency from insn to first_arg to restrict its code
25184 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25189 set
= single_set (insn
);
25192 tmp
= SET_DEST (set
);
25195 /* Add output dependency to the first function argument. */
25196 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25199 /* Add anti dependency. */
25200 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25203 /* Avoid cross block motion of function argument through adding dependency
25204 from the first non-jump instruction in bb. */
25206 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25208 rtx insn
= BB_END (bb
);
25212 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25214 rtx set
= single_set (insn
);
25217 avoid_func_arg_motion (arg
, insn
);
25221 if (insn
== BB_HEAD (bb
))
25223 insn
= PREV_INSN (insn
);
25227 /* Hook for pre-reload schedule - avoid motion of function arguments
25228 passed in likely spilled HW registers. */
25230 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25233 rtx first_arg
= NULL
;
25234 if (reload_completed
)
25236 while (head
!= tail
&& DEBUG_INSN_P (head
))
25237 head
= NEXT_INSN (head
);
25238 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25239 if (INSN_P (insn
) && CALL_P (insn
))
25241 first_arg
= add_parameter_dependencies (insn
, head
);
25244 /* Add dependee for first argument to predecessors if only
25245 region contains more than one block. */
25246 basic_block bb
= BLOCK_FOR_INSN (insn
);
25247 int rgn
= CONTAINING_RGN (bb
->index
);
25248 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25249 /* Skip trivial regions and region head blocks that can have
25250 predecessors outside of region. */
25251 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25255 /* Assume that region is SCC, i.e. all immediate predecessors
25256 of non-head block are in the same region. */
25257 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25259 /* Avoid creating of loop-carried dependencies through
25260 using topological odering in region. */
25261 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25262 add_dependee_for_func_arg (first_arg
, e
->src
);
25270 else if (first_arg
)
25271 avoid_func_arg_motion (first_arg
, insn
);
25274 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25275 HW registers to maximum, to schedule them at soon as possible. These are
25276 moves from function argument registers at the top of the function entry
25277 and moves from function return value registers after call. */
25279 ix86_adjust_priority (rtx insn
, int priority
)
25283 if (reload_completed
)
25286 if (!NONDEBUG_INSN_P (insn
))
25289 set
= single_set (insn
);
25292 rtx tmp
= SET_SRC (set
);
25294 && HARD_REGISTER_P (tmp
)
25295 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25296 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25297 return current_sched_info
->sched_max_insns_priority
;
25303 /* Model decoder of Core 2/i7.
25304 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25305 track the instruction fetch block boundaries and make sure that long
25306 (9+ bytes) instructions are assigned to D0. */
25308 /* Maximum length of an insn that can be handled by
25309 a secondary decoder unit. '8' for Core 2/i7. */
25310 static int core2i7_secondary_decoder_max_insn_size
;
25312 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25313 '16' for Core 2/i7. */
25314 static int core2i7_ifetch_block_size
;
25316 /* Maximum number of instructions decoder can handle per cycle.
25317 '6' for Core 2/i7. */
25318 static int core2i7_ifetch_block_max_insns
;
25320 typedef struct ix86_first_cycle_multipass_data_
*
25321 ix86_first_cycle_multipass_data_t
;
25322 typedef const struct ix86_first_cycle_multipass_data_
*
25323 const_ix86_first_cycle_multipass_data_t
;
25325 /* A variable to store target state across calls to max_issue within
25327 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25328 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25330 /* Initialize DATA. */
25332 core2i7_first_cycle_multipass_init (void *_data
)
25334 ix86_first_cycle_multipass_data_t data
25335 = (ix86_first_cycle_multipass_data_t
) _data
;
25337 data
->ifetch_block_len
= 0;
25338 data
->ifetch_block_n_insns
= 0;
25339 data
->ready_try_change
= NULL
;
25340 data
->ready_try_change_size
= 0;
25343 /* Advancing the cycle; reset ifetch block counts. */
25345 core2i7_dfa_post_advance_cycle (void)
25347 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25349 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25351 data
->ifetch_block_len
= 0;
25352 data
->ifetch_block_n_insns
= 0;
25355 static int min_insn_size (rtx
);
25357 /* Filter out insns from ready_try that the core will not be able to issue
25358 on current cycle due to decoder. */
25360 core2i7_first_cycle_multipass_filter_ready_try
25361 (const_ix86_first_cycle_multipass_data_t data
,
25362 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25369 if (ready_try
[n_ready
])
25372 insn
= get_ready_element (n_ready
);
25373 insn_size
= min_insn_size (insn
);
25375 if (/* If this is a too long an insn for a secondary decoder ... */
25376 (!first_cycle_insn_p
25377 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25378 /* ... or it would not fit into the ifetch block ... */
25379 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25380 /* ... or the decoder is full already ... */
25381 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25382 /* ... mask the insn out. */
25384 ready_try
[n_ready
] = 1;
25386 if (data
->ready_try_change
)
25387 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25392 /* Prepare for a new round of multipass lookahead scheduling. */
25394 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25395 bool first_cycle_insn_p
)
25397 ix86_first_cycle_multipass_data_t data
25398 = (ix86_first_cycle_multipass_data_t
) _data
;
25399 const_ix86_first_cycle_multipass_data_t prev_data
25400 = ix86_first_cycle_multipass_data
;
25402 /* Restore the state from the end of the previous round. */
25403 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25404 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25406 /* Filter instructions that cannot be issued on current cycle due to
25407 decoder restrictions. */
25408 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25409 first_cycle_insn_p
);
25412 /* INSN is being issued in current solution. Account for its impact on
25413 the decoder model. */
25415 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25416 rtx insn
, const void *_prev_data
)
25418 ix86_first_cycle_multipass_data_t data
25419 = (ix86_first_cycle_multipass_data_t
) _data
;
25420 const_ix86_first_cycle_multipass_data_t prev_data
25421 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25423 int insn_size
= min_insn_size (insn
);
25425 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25426 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25427 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25428 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25430 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25431 if (!data
->ready_try_change
)
25433 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25434 data
->ready_try_change_size
= n_ready
;
25436 else if (data
->ready_try_change_size
< n_ready
)
25438 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25440 data
->ready_try_change_size
= n_ready
;
25442 bitmap_clear (data
->ready_try_change
);
25444 /* Filter out insns from ready_try that the core will not be able to issue
25445 on current cycle due to decoder. */
25446 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25450 /* Revert the effect on ready_try. */
25452 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25454 int n_ready ATTRIBUTE_UNUSED
)
25456 const_ix86_first_cycle_multipass_data_t data
25457 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25458 unsigned int i
= 0;
25459 sbitmap_iterator sbi
;
25461 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25462 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25468 /* Save the result of multipass lookahead scheduling for the next round. */
25470 core2i7_first_cycle_multipass_end (const void *_data
)
25472 const_ix86_first_cycle_multipass_data_t data
25473 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25474 ix86_first_cycle_multipass_data_t next_data
25475 = ix86_first_cycle_multipass_data
;
25479 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25480 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25484 /* Deallocate target data. */
25486 core2i7_first_cycle_multipass_fini (void *_data
)
25488 ix86_first_cycle_multipass_data_t data
25489 = (ix86_first_cycle_multipass_data_t
) _data
;
25491 if (data
->ready_try_change
)
25493 sbitmap_free (data
->ready_try_change
);
25494 data
->ready_try_change
= NULL
;
25495 data
->ready_try_change_size
= 0;
25499 /* Prepare for scheduling pass. */
25501 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25502 int verbose ATTRIBUTE_UNUSED
,
25503 int max_uid ATTRIBUTE_UNUSED
)
25505 /* Install scheduling hooks for current CPU. Some of these hooks are used
25506 in time-critical parts of the scheduler, so we only set them up when
25507 they are actually used. */
25510 case PROCESSOR_CORE2
:
25511 case PROCESSOR_COREI7
:
25512 case PROCESSOR_HASWELL
:
25513 /* Do not perform multipass scheduling for pre-reload schedule
25514 to save compile time. */
25515 if (reload_completed
)
25517 targetm
.sched
.dfa_post_advance_cycle
25518 = core2i7_dfa_post_advance_cycle
;
25519 targetm
.sched
.first_cycle_multipass_init
25520 = core2i7_first_cycle_multipass_init
;
25521 targetm
.sched
.first_cycle_multipass_begin
25522 = core2i7_first_cycle_multipass_begin
;
25523 targetm
.sched
.first_cycle_multipass_issue
25524 = core2i7_first_cycle_multipass_issue
;
25525 targetm
.sched
.first_cycle_multipass_backtrack
25526 = core2i7_first_cycle_multipass_backtrack
;
25527 targetm
.sched
.first_cycle_multipass_end
25528 = core2i7_first_cycle_multipass_end
;
25529 targetm
.sched
.first_cycle_multipass_fini
25530 = core2i7_first_cycle_multipass_fini
;
25532 /* Set decoder parameters. */
25533 core2i7_secondary_decoder_max_insn_size
= 8;
25534 core2i7_ifetch_block_size
= 16;
25535 core2i7_ifetch_block_max_insns
= 6;
25538 /* ... Fall through ... */
25540 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25541 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25542 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25543 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25544 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25545 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25546 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
25552 /* Compute the alignment given to a constant that is being placed in memory.
25553 EXP is the constant and ALIGN is the alignment that the object would
25555 The value of this function is used instead of that alignment to align
25559 ix86_constant_alignment (tree exp
, int align
)
25561 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
25562 || TREE_CODE (exp
) == INTEGER_CST
)
25564 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
25566 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
25569 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
25570 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
25571 return BITS_PER_WORD
;
25576 /* Compute the alignment for a static variable.
25577 TYPE is the data type, and ALIGN is the alignment that
25578 the object would ordinarily have. The value of this function is used
25579 instead of that alignment to align the object. */
25582 ix86_data_alignment (tree type
, int align
, bool opt
)
25584 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
25587 && AGGREGATE_TYPE_P (type
)
25588 && TYPE_SIZE (type
)
25589 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25590 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
25591 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
25592 && align
< max_align
)
25595 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25596 to 16byte boundary. */
25599 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
25600 && TYPE_SIZE (type
)
25601 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25602 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
25603 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25610 if (TREE_CODE (type
) == ARRAY_TYPE
)
25612 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25614 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25617 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25620 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25622 if ((TYPE_MODE (type
) == XCmode
25623 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25626 else if ((TREE_CODE (type
) == RECORD_TYPE
25627 || TREE_CODE (type
) == UNION_TYPE
25628 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25629 && TYPE_FIELDS (type
))
25631 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25633 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25636 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25637 || TREE_CODE (type
) == INTEGER_TYPE
)
25639 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25641 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25648 /* Compute the alignment for a local variable or a stack slot. EXP is
25649 the data type or decl itself, MODE is the widest mode available and
25650 ALIGN is the alignment that the object would ordinarily have. The
25651 value of this macro is used instead of that alignment to align the
25655 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25656 unsigned int align
)
25660 if (exp
&& DECL_P (exp
))
25662 type
= TREE_TYPE (exp
);
25671 /* Don't do dynamic stack realignment for long long objects with
25672 -mpreferred-stack-boundary=2. */
25675 && ix86_preferred_stack_boundary
< 64
25676 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25677 && (!type
|| !TYPE_USER_ALIGN (type
))
25678 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25681 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25682 register in MODE. We will return the largest alignment of XF
25686 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25687 align
= GET_MODE_ALIGNMENT (DFmode
);
25691 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25692 to 16byte boundary. Exact wording is:
25694 An array uses the same alignment as its elements, except that a local or
25695 global array variable of length at least 16 bytes or
25696 a C99 variable-length array variable always has alignment of at least 16 bytes.
25698 This was added to allow use of aligned SSE instructions at arrays. This
25699 rule is meant for static storage (where compiler can not do the analysis
25700 by itself). We follow it for automatic variables only when convenient.
25701 We fully control everything in the function compiled and functions from
25702 other unit can not rely on the alignment.
25704 Exclude va_list type. It is the common case of local array where
25705 we can not benefit from the alignment.
25707 TODO: Probably one should optimize for size only when var is not escaping. */
25708 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25711 if (AGGREGATE_TYPE_P (type
)
25712 && (va_list_type_node
== NULL_TREE
25713 || (TYPE_MAIN_VARIANT (type
)
25714 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25715 && TYPE_SIZE (type
)
25716 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25717 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25718 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25721 if (TREE_CODE (type
) == ARRAY_TYPE
)
25723 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25725 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25728 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25730 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25732 if ((TYPE_MODE (type
) == XCmode
25733 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25736 else if ((TREE_CODE (type
) == RECORD_TYPE
25737 || TREE_CODE (type
) == UNION_TYPE
25738 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25739 && TYPE_FIELDS (type
))
25741 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25743 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25746 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25747 || TREE_CODE (type
) == INTEGER_TYPE
)
25750 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25752 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25758 /* Compute the minimum required alignment for dynamic stack realignment
25759 purposes for a local variable, parameter or a stack slot. EXP is
25760 the data type or decl itself, MODE is its mode and ALIGN is the
25761 alignment that the object would ordinarily have. */
25764 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25765 unsigned int align
)
25769 if (exp
&& DECL_P (exp
))
25771 type
= TREE_TYPE (exp
);
25780 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25783 /* Don't do dynamic stack realignment for long long objects with
25784 -mpreferred-stack-boundary=2. */
25785 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25786 && (!type
|| !TYPE_USER_ALIGN (type
))
25787 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25793 /* Find a location for the static chain incoming to a nested function.
25794 This is a register, unless all free registers are used by arguments. */
25797 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25801 if (!DECL_STATIC_CHAIN (fndecl
))
25806 /* We always use R10 in 64-bit mode. */
25814 /* By default in 32-bit mode we use ECX to pass the static chain. */
25817 fntype
= TREE_TYPE (fndecl
);
25818 ccvt
= ix86_get_callcvt (fntype
);
25819 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25821 /* Fastcall functions use ecx/edx for arguments, which leaves
25822 us with EAX for the static chain.
25823 Thiscall functions use ecx for arguments, which also
25824 leaves us with EAX for the static chain. */
25827 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25829 /* Thiscall functions use ecx for arguments, which leaves
25830 us with EAX and EDX for the static chain.
25831 We are using for abi-compatibility EAX. */
25834 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25836 /* For regparm 3, we have no free call-clobbered registers in
25837 which to store the static chain. In order to implement this,
25838 we have the trampoline push the static chain to the stack.
25839 However, we can't push a value below the return address when
25840 we call the nested function directly, so we have to use an
25841 alternate entry point. For this we use ESI, and have the
25842 alternate entry point push ESI, so that things appear the
25843 same once we're executing the nested function. */
25846 if (fndecl
== current_function_decl
)
25847 ix86_static_chain_on_stack
= true;
25848 return gen_frame_mem (SImode
,
25849 plus_constant (Pmode
,
25850 arg_pointer_rtx
, -8));
25856 return gen_rtx_REG (Pmode
, regno
);
25859 /* Emit RTL insns to initialize the variable parts of a trampoline.
25860 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25861 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25862 to be passed to the target function. */
25865 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25871 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25877 /* Load the function address to r11. Try to load address using
25878 the shorter movl instead of movabs. We may want to support
25879 movq for kernel mode, but kernel does not use trampolines at
25880 the moment. FNADDR is a 32bit address and may not be in
25881 DImode when ptr_mode == SImode. Always use movl in this
25883 if (ptr_mode
== SImode
25884 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25886 fnaddr
= copy_addr_to_reg (fnaddr
);
25888 mem
= adjust_address (m_tramp
, HImode
, offset
);
25889 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25891 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25892 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25897 mem
= adjust_address (m_tramp
, HImode
, offset
);
25898 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25900 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25901 emit_move_insn (mem
, fnaddr
);
25905 /* Load static chain using movabs to r10. Use the shorter movl
25906 instead of movabs when ptr_mode == SImode. */
25907 if (ptr_mode
== SImode
)
25918 mem
= adjust_address (m_tramp
, HImode
, offset
);
25919 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25921 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25922 emit_move_insn (mem
, chain_value
);
25925 /* Jump to r11; the last (unused) byte is a nop, only there to
25926 pad the write out to a single 32-bit store. */
25927 mem
= adjust_address (m_tramp
, SImode
, offset
);
25928 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25935 /* Depending on the static chain location, either load a register
25936 with a constant, or push the constant to the stack. All of the
25937 instructions are the same size. */
25938 chain
= ix86_static_chain (fndecl
, true);
25941 switch (REGNO (chain
))
25944 opcode
= 0xb8; break;
25946 opcode
= 0xb9; break;
25948 gcc_unreachable ();
25954 mem
= adjust_address (m_tramp
, QImode
, offset
);
25955 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25957 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25958 emit_move_insn (mem
, chain_value
);
25961 mem
= adjust_address (m_tramp
, QImode
, offset
);
25962 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25964 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25966 /* Compute offset from the end of the jmp to the target function.
25967 In the case in which the trampoline stores the static chain on
25968 the stack, we need to skip the first insn which pushes the
25969 (call-saved) register static chain; this push is 1 byte. */
25971 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25972 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25973 offset
- (MEM_P (chain
) ? 1 : 0)),
25974 NULL_RTX
, 1, OPTAB_DIRECT
);
25975 emit_move_insn (mem
, disp
);
25978 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25980 #ifdef HAVE_ENABLE_EXECUTE_STACK
25981 #ifdef CHECK_EXECUTE_STACK_ENABLED
25982 if (CHECK_EXECUTE_STACK_ENABLED
)
25984 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25985 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25989 /* The following file contains several enumerations and data structures
25990 built from the definitions in i386-builtin-types.def. */
25992 #include "i386-builtin-types.inc"
25994 /* Table for the ix86 builtin non-function types. */
25995 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25997 /* Retrieve an element from the above table, building some of
25998 the types lazily. */
26001 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26003 unsigned int index
;
26006 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26008 type
= ix86_builtin_type_tab
[(int) tcode
];
26012 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26013 if (tcode
<= IX86_BT_LAST_VECT
)
26015 enum machine_mode mode
;
26017 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26018 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26019 mode
= ix86_builtin_type_vect_mode
[index
];
26021 type
= build_vector_type_for_mode (itype
, mode
);
26027 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26028 if (tcode
<= IX86_BT_LAST_PTR
)
26029 quals
= TYPE_UNQUALIFIED
;
26031 quals
= TYPE_QUAL_CONST
;
26033 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26034 if (quals
!= TYPE_UNQUALIFIED
)
26035 itype
= build_qualified_type (itype
, quals
);
26037 type
= build_pointer_type (itype
);
26040 ix86_builtin_type_tab
[(int) tcode
] = type
;
26044 /* Table for the ix86 builtin function types. */
26045 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26047 /* Retrieve an element from the above table, building some of
26048 the types lazily. */
26051 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26055 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26057 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26061 if (tcode
<= IX86_BT_LAST_FUNC
)
26063 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26064 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26065 tree rtype
, atype
, args
= void_list_node
;
26068 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26069 for (i
= after
- 1; i
> start
; --i
)
26071 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26072 args
= tree_cons (NULL
, atype
, args
);
26075 type
= build_function_type (rtype
, args
);
26079 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26080 enum ix86_builtin_func_type icode
;
26082 icode
= ix86_builtin_func_alias_base
[index
];
26083 type
= ix86_get_builtin_func_type (icode
);
26086 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26091 /* Codes for all the SSE/MMX builtins. */
26094 IX86_BUILTIN_ADDPS
,
26095 IX86_BUILTIN_ADDSS
,
26096 IX86_BUILTIN_DIVPS
,
26097 IX86_BUILTIN_DIVSS
,
26098 IX86_BUILTIN_MULPS
,
26099 IX86_BUILTIN_MULSS
,
26100 IX86_BUILTIN_SUBPS
,
26101 IX86_BUILTIN_SUBSS
,
26103 IX86_BUILTIN_CMPEQPS
,
26104 IX86_BUILTIN_CMPLTPS
,
26105 IX86_BUILTIN_CMPLEPS
,
26106 IX86_BUILTIN_CMPGTPS
,
26107 IX86_BUILTIN_CMPGEPS
,
26108 IX86_BUILTIN_CMPNEQPS
,
26109 IX86_BUILTIN_CMPNLTPS
,
26110 IX86_BUILTIN_CMPNLEPS
,
26111 IX86_BUILTIN_CMPNGTPS
,
26112 IX86_BUILTIN_CMPNGEPS
,
26113 IX86_BUILTIN_CMPORDPS
,
26114 IX86_BUILTIN_CMPUNORDPS
,
26115 IX86_BUILTIN_CMPEQSS
,
26116 IX86_BUILTIN_CMPLTSS
,
26117 IX86_BUILTIN_CMPLESS
,
26118 IX86_BUILTIN_CMPNEQSS
,
26119 IX86_BUILTIN_CMPNLTSS
,
26120 IX86_BUILTIN_CMPNLESS
,
26121 IX86_BUILTIN_CMPORDSS
,
26122 IX86_BUILTIN_CMPUNORDSS
,
26124 IX86_BUILTIN_COMIEQSS
,
26125 IX86_BUILTIN_COMILTSS
,
26126 IX86_BUILTIN_COMILESS
,
26127 IX86_BUILTIN_COMIGTSS
,
26128 IX86_BUILTIN_COMIGESS
,
26129 IX86_BUILTIN_COMINEQSS
,
26130 IX86_BUILTIN_UCOMIEQSS
,
26131 IX86_BUILTIN_UCOMILTSS
,
26132 IX86_BUILTIN_UCOMILESS
,
26133 IX86_BUILTIN_UCOMIGTSS
,
26134 IX86_BUILTIN_UCOMIGESS
,
26135 IX86_BUILTIN_UCOMINEQSS
,
26137 IX86_BUILTIN_CVTPI2PS
,
26138 IX86_BUILTIN_CVTPS2PI
,
26139 IX86_BUILTIN_CVTSI2SS
,
26140 IX86_BUILTIN_CVTSI642SS
,
26141 IX86_BUILTIN_CVTSS2SI
,
26142 IX86_BUILTIN_CVTSS2SI64
,
26143 IX86_BUILTIN_CVTTPS2PI
,
26144 IX86_BUILTIN_CVTTSS2SI
,
26145 IX86_BUILTIN_CVTTSS2SI64
,
26147 IX86_BUILTIN_MAXPS
,
26148 IX86_BUILTIN_MAXSS
,
26149 IX86_BUILTIN_MINPS
,
26150 IX86_BUILTIN_MINSS
,
26152 IX86_BUILTIN_LOADUPS
,
26153 IX86_BUILTIN_STOREUPS
,
26154 IX86_BUILTIN_MOVSS
,
26156 IX86_BUILTIN_MOVHLPS
,
26157 IX86_BUILTIN_MOVLHPS
,
26158 IX86_BUILTIN_LOADHPS
,
26159 IX86_BUILTIN_LOADLPS
,
26160 IX86_BUILTIN_STOREHPS
,
26161 IX86_BUILTIN_STORELPS
,
26163 IX86_BUILTIN_MASKMOVQ
,
26164 IX86_BUILTIN_MOVMSKPS
,
26165 IX86_BUILTIN_PMOVMSKB
,
26167 IX86_BUILTIN_MOVNTPS
,
26168 IX86_BUILTIN_MOVNTQ
,
26170 IX86_BUILTIN_LOADDQU
,
26171 IX86_BUILTIN_STOREDQU
,
26173 IX86_BUILTIN_PACKSSWB
,
26174 IX86_BUILTIN_PACKSSDW
,
26175 IX86_BUILTIN_PACKUSWB
,
26177 IX86_BUILTIN_PADDB
,
26178 IX86_BUILTIN_PADDW
,
26179 IX86_BUILTIN_PADDD
,
26180 IX86_BUILTIN_PADDQ
,
26181 IX86_BUILTIN_PADDSB
,
26182 IX86_BUILTIN_PADDSW
,
26183 IX86_BUILTIN_PADDUSB
,
26184 IX86_BUILTIN_PADDUSW
,
26185 IX86_BUILTIN_PSUBB
,
26186 IX86_BUILTIN_PSUBW
,
26187 IX86_BUILTIN_PSUBD
,
26188 IX86_BUILTIN_PSUBQ
,
26189 IX86_BUILTIN_PSUBSB
,
26190 IX86_BUILTIN_PSUBSW
,
26191 IX86_BUILTIN_PSUBUSB
,
26192 IX86_BUILTIN_PSUBUSW
,
26195 IX86_BUILTIN_PANDN
,
26199 IX86_BUILTIN_PAVGB
,
26200 IX86_BUILTIN_PAVGW
,
26202 IX86_BUILTIN_PCMPEQB
,
26203 IX86_BUILTIN_PCMPEQW
,
26204 IX86_BUILTIN_PCMPEQD
,
26205 IX86_BUILTIN_PCMPGTB
,
26206 IX86_BUILTIN_PCMPGTW
,
26207 IX86_BUILTIN_PCMPGTD
,
26209 IX86_BUILTIN_PMADDWD
,
26211 IX86_BUILTIN_PMAXSW
,
26212 IX86_BUILTIN_PMAXUB
,
26213 IX86_BUILTIN_PMINSW
,
26214 IX86_BUILTIN_PMINUB
,
26216 IX86_BUILTIN_PMULHUW
,
26217 IX86_BUILTIN_PMULHW
,
26218 IX86_BUILTIN_PMULLW
,
26220 IX86_BUILTIN_PSADBW
,
26221 IX86_BUILTIN_PSHUFW
,
26223 IX86_BUILTIN_PSLLW
,
26224 IX86_BUILTIN_PSLLD
,
26225 IX86_BUILTIN_PSLLQ
,
26226 IX86_BUILTIN_PSRAW
,
26227 IX86_BUILTIN_PSRAD
,
26228 IX86_BUILTIN_PSRLW
,
26229 IX86_BUILTIN_PSRLD
,
26230 IX86_BUILTIN_PSRLQ
,
26231 IX86_BUILTIN_PSLLWI
,
26232 IX86_BUILTIN_PSLLDI
,
26233 IX86_BUILTIN_PSLLQI
,
26234 IX86_BUILTIN_PSRAWI
,
26235 IX86_BUILTIN_PSRADI
,
26236 IX86_BUILTIN_PSRLWI
,
26237 IX86_BUILTIN_PSRLDI
,
26238 IX86_BUILTIN_PSRLQI
,
26240 IX86_BUILTIN_PUNPCKHBW
,
26241 IX86_BUILTIN_PUNPCKHWD
,
26242 IX86_BUILTIN_PUNPCKHDQ
,
26243 IX86_BUILTIN_PUNPCKLBW
,
26244 IX86_BUILTIN_PUNPCKLWD
,
26245 IX86_BUILTIN_PUNPCKLDQ
,
26247 IX86_BUILTIN_SHUFPS
,
26249 IX86_BUILTIN_RCPPS
,
26250 IX86_BUILTIN_RCPSS
,
26251 IX86_BUILTIN_RSQRTPS
,
26252 IX86_BUILTIN_RSQRTPS_NR
,
26253 IX86_BUILTIN_RSQRTSS
,
26254 IX86_BUILTIN_RSQRTF
,
26255 IX86_BUILTIN_SQRTPS
,
26256 IX86_BUILTIN_SQRTPS_NR
,
26257 IX86_BUILTIN_SQRTSS
,
26259 IX86_BUILTIN_UNPCKHPS
,
26260 IX86_BUILTIN_UNPCKLPS
,
26262 IX86_BUILTIN_ANDPS
,
26263 IX86_BUILTIN_ANDNPS
,
26265 IX86_BUILTIN_XORPS
,
26268 IX86_BUILTIN_LDMXCSR
,
26269 IX86_BUILTIN_STMXCSR
,
26270 IX86_BUILTIN_SFENCE
,
26272 IX86_BUILTIN_FXSAVE
,
26273 IX86_BUILTIN_FXRSTOR
,
26274 IX86_BUILTIN_FXSAVE64
,
26275 IX86_BUILTIN_FXRSTOR64
,
26277 IX86_BUILTIN_XSAVE
,
26278 IX86_BUILTIN_XRSTOR
,
26279 IX86_BUILTIN_XSAVE64
,
26280 IX86_BUILTIN_XRSTOR64
,
26282 IX86_BUILTIN_XSAVEOPT
,
26283 IX86_BUILTIN_XSAVEOPT64
,
26285 /* 3DNow! Original */
26286 IX86_BUILTIN_FEMMS
,
26287 IX86_BUILTIN_PAVGUSB
,
26288 IX86_BUILTIN_PF2ID
,
26289 IX86_BUILTIN_PFACC
,
26290 IX86_BUILTIN_PFADD
,
26291 IX86_BUILTIN_PFCMPEQ
,
26292 IX86_BUILTIN_PFCMPGE
,
26293 IX86_BUILTIN_PFCMPGT
,
26294 IX86_BUILTIN_PFMAX
,
26295 IX86_BUILTIN_PFMIN
,
26296 IX86_BUILTIN_PFMUL
,
26297 IX86_BUILTIN_PFRCP
,
26298 IX86_BUILTIN_PFRCPIT1
,
26299 IX86_BUILTIN_PFRCPIT2
,
26300 IX86_BUILTIN_PFRSQIT1
,
26301 IX86_BUILTIN_PFRSQRT
,
26302 IX86_BUILTIN_PFSUB
,
26303 IX86_BUILTIN_PFSUBR
,
26304 IX86_BUILTIN_PI2FD
,
26305 IX86_BUILTIN_PMULHRW
,
26307 /* 3DNow! Athlon Extensions */
26308 IX86_BUILTIN_PF2IW
,
26309 IX86_BUILTIN_PFNACC
,
26310 IX86_BUILTIN_PFPNACC
,
26311 IX86_BUILTIN_PI2FW
,
26312 IX86_BUILTIN_PSWAPDSI
,
26313 IX86_BUILTIN_PSWAPDSF
,
26316 IX86_BUILTIN_ADDPD
,
26317 IX86_BUILTIN_ADDSD
,
26318 IX86_BUILTIN_DIVPD
,
26319 IX86_BUILTIN_DIVSD
,
26320 IX86_BUILTIN_MULPD
,
26321 IX86_BUILTIN_MULSD
,
26322 IX86_BUILTIN_SUBPD
,
26323 IX86_BUILTIN_SUBSD
,
26325 IX86_BUILTIN_CMPEQPD
,
26326 IX86_BUILTIN_CMPLTPD
,
26327 IX86_BUILTIN_CMPLEPD
,
26328 IX86_BUILTIN_CMPGTPD
,
26329 IX86_BUILTIN_CMPGEPD
,
26330 IX86_BUILTIN_CMPNEQPD
,
26331 IX86_BUILTIN_CMPNLTPD
,
26332 IX86_BUILTIN_CMPNLEPD
,
26333 IX86_BUILTIN_CMPNGTPD
,
26334 IX86_BUILTIN_CMPNGEPD
,
26335 IX86_BUILTIN_CMPORDPD
,
26336 IX86_BUILTIN_CMPUNORDPD
,
26337 IX86_BUILTIN_CMPEQSD
,
26338 IX86_BUILTIN_CMPLTSD
,
26339 IX86_BUILTIN_CMPLESD
,
26340 IX86_BUILTIN_CMPNEQSD
,
26341 IX86_BUILTIN_CMPNLTSD
,
26342 IX86_BUILTIN_CMPNLESD
,
26343 IX86_BUILTIN_CMPORDSD
,
26344 IX86_BUILTIN_CMPUNORDSD
,
26346 IX86_BUILTIN_COMIEQSD
,
26347 IX86_BUILTIN_COMILTSD
,
26348 IX86_BUILTIN_COMILESD
,
26349 IX86_BUILTIN_COMIGTSD
,
26350 IX86_BUILTIN_COMIGESD
,
26351 IX86_BUILTIN_COMINEQSD
,
26352 IX86_BUILTIN_UCOMIEQSD
,
26353 IX86_BUILTIN_UCOMILTSD
,
26354 IX86_BUILTIN_UCOMILESD
,
26355 IX86_BUILTIN_UCOMIGTSD
,
26356 IX86_BUILTIN_UCOMIGESD
,
26357 IX86_BUILTIN_UCOMINEQSD
,
26359 IX86_BUILTIN_MAXPD
,
26360 IX86_BUILTIN_MAXSD
,
26361 IX86_BUILTIN_MINPD
,
26362 IX86_BUILTIN_MINSD
,
26364 IX86_BUILTIN_ANDPD
,
26365 IX86_BUILTIN_ANDNPD
,
26367 IX86_BUILTIN_XORPD
,
26369 IX86_BUILTIN_SQRTPD
,
26370 IX86_BUILTIN_SQRTSD
,
26372 IX86_BUILTIN_UNPCKHPD
,
26373 IX86_BUILTIN_UNPCKLPD
,
26375 IX86_BUILTIN_SHUFPD
,
26377 IX86_BUILTIN_LOADUPD
,
26378 IX86_BUILTIN_STOREUPD
,
26379 IX86_BUILTIN_MOVSD
,
26381 IX86_BUILTIN_LOADHPD
,
26382 IX86_BUILTIN_LOADLPD
,
26384 IX86_BUILTIN_CVTDQ2PD
,
26385 IX86_BUILTIN_CVTDQ2PS
,
26387 IX86_BUILTIN_CVTPD2DQ
,
26388 IX86_BUILTIN_CVTPD2PI
,
26389 IX86_BUILTIN_CVTPD2PS
,
26390 IX86_BUILTIN_CVTTPD2DQ
,
26391 IX86_BUILTIN_CVTTPD2PI
,
26393 IX86_BUILTIN_CVTPI2PD
,
26394 IX86_BUILTIN_CVTSI2SD
,
26395 IX86_BUILTIN_CVTSI642SD
,
26397 IX86_BUILTIN_CVTSD2SI
,
26398 IX86_BUILTIN_CVTSD2SI64
,
26399 IX86_BUILTIN_CVTSD2SS
,
26400 IX86_BUILTIN_CVTSS2SD
,
26401 IX86_BUILTIN_CVTTSD2SI
,
26402 IX86_BUILTIN_CVTTSD2SI64
,
26404 IX86_BUILTIN_CVTPS2DQ
,
26405 IX86_BUILTIN_CVTPS2PD
,
26406 IX86_BUILTIN_CVTTPS2DQ
,
26408 IX86_BUILTIN_MOVNTI
,
26409 IX86_BUILTIN_MOVNTI64
,
26410 IX86_BUILTIN_MOVNTPD
,
26411 IX86_BUILTIN_MOVNTDQ
,
26413 IX86_BUILTIN_MOVQ128
,
26416 IX86_BUILTIN_MASKMOVDQU
,
26417 IX86_BUILTIN_MOVMSKPD
,
26418 IX86_BUILTIN_PMOVMSKB128
,
26420 IX86_BUILTIN_PACKSSWB128
,
26421 IX86_BUILTIN_PACKSSDW128
,
26422 IX86_BUILTIN_PACKUSWB128
,
26424 IX86_BUILTIN_PADDB128
,
26425 IX86_BUILTIN_PADDW128
,
26426 IX86_BUILTIN_PADDD128
,
26427 IX86_BUILTIN_PADDQ128
,
26428 IX86_BUILTIN_PADDSB128
,
26429 IX86_BUILTIN_PADDSW128
,
26430 IX86_BUILTIN_PADDUSB128
,
26431 IX86_BUILTIN_PADDUSW128
,
26432 IX86_BUILTIN_PSUBB128
,
26433 IX86_BUILTIN_PSUBW128
,
26434 IX86_BUILTIN_PSUBD128
,
26435 IX86_BUILTIN_PSUBQ128
,
26436 IX86_BUILTIN_PSUBSB128
,
26437 IX86_BUILTIN_PSUBSW128
,
26438 IX86_BUILTIN_PSUBUSB128
,
26439 IX86_BUILTIN_PSUBUSW128
,
26441 IX86_BUILTIN_PAND128
,
26442 IX86_BUILTIN_PANDN128
,
26443 IX86_BUILTIN_POR128
,
26444 IX86_BUILTIN_PXOR128
,
26446 IX86_BUILTIN_PAVGB128
,
26447 IX86_BUILTIN_PAVGW128
,
26449 IX86_BUILTIN_PCMPEQB128
,
26450 IX86_BUILTIN_PCMPEQW128
,
26451 IX86_BUILTIN_PCMPEQD128
,
26452 IX86_BUILTIN_PCMPGTB128
,
26453 IX86_BUILTIN_PCMPGTW128
,
26454 IX86_BUILTIN_PCMPGTD128
,
26456 IX86_BUILTIN_PMADDWD128
,
26458 IX86_BUILTIN_PMAXSW128
,
26459 IX86_BUILTIN_PMAXUB128
,
26460 IX86_BUILTIN_PMINSW128
,
26461 IX86_BUILTIN_PMINUB128
,
26463 IX86_BUILTIN_PMULUDQ
,
26464 IX86_BUILTIN_PMULUDQ128
,
26465 IX86_BUILTIN_PMULHUW128
,
26466 IX86_BUILTIN_PMULHW128
,
26467 IX86_BUILTIN_PMULLW128
,
26469 IX86_BUILTIN_PSADBW128
,
26470 IX86_BUILTIN_PSHUFHW
,
26471 IX86_BUILTIN_PSHUFLW
,
26472 IX86_BUILTIN_PSHUFD
,
26474 IX86_BUILTIN_PSLLDQI128
,
26475 IX86_BUILTIN_PSLLWI128
,
26476 IX86_BUILTIN_PSLLDI128
,
26477 IX86_BUILTIN_PSLLQI128
,
26478 IX86_BUILTIN_PSRAWI128
,
26479 IX86_BUILTIN_PSRADI128
,
26480 IX86_BUILTIN_PSRLDQI128
,
26481 IX86_BUILTIN_PSRLWI128
,
26482 IX86_BUILTIN_PSRLDI128
,
26483 IX86_BUILTIN_PSRLQI128
,
26485 IX86_BUILTIN_PSLLDQ128
,
26486 IX86_BUILTIN_PSLLW128
,
26487 IX86_BUILTIN_PSLLD128
,
26488 IX86_BUILTIN_PSLLQ128
,
26489 IX86_BUILTIN_PSRAW128
,
26490 IX86_BUILTIN_PSRAD128
,
26491 IX86_BUILTIN_PSRLW128
,
26492 IX86_BUILTIN_PSRLD128
,
26493 IX86_BUILTIN_PSRLQ128
,
26495 IX86_BUILTIN_PUNPCKHBW128
,
26496 IX86_BUILTIN_PUNPCKHWD128
,
26497 IX86_BUILTIN_PUNPCKHDQ128
,
26498 IX86_BUILTIN_PUNPCKHQDQ128
,
26499 IX86_BUILTIN_PUNPCKLBW128
,
26500 IX86_BUILTIN_PUNPCKLWD128
,
26501 IX86_BUILTIN_PUNPCKLDQ128
,
26502 IX86_BUILTIN_PUNPCKLQDQ128
,
26504 IX86_BUILTIN_CLFLUSH
,
26505 IX86_BUILTIN_MFENCE
,
26506 IX86_BUILTIN_LFENCE
,
26507 IX86_BUILTIN_PAUSE
,
26509 IX86_BUILTIN_BSRSI
,
26510 IX86_BUILTIN_BSRDI
,
26511 IX86_BUILTIN_RDPMC
,
26512 IX86_BUILTIN_RDTSC
,
26513 IX86_BUILTIN_RDTSCP
,
26514 IX86_BUILTIN_ROLQI
,
26515 IX86_BUILTIN_ROLHI
,
26516 IX86_BUILTIN_RORQI
,
26517 IX86_BUILTIN_RORHI
,
26520 IX86_BUILTIN_ADDSUBPS
,
26521 IX86_BUILTIN_HADDPS
,
26522 IX86_BUILTIN_HSUBPS
,
26523 IX86_BUILTIN_MOVSHDUP
,
26524 IX86_BUILTIN_MOVSLDUP
,
26525 IX86_BUILTIN_ADDSUBPD
,
26526 IX86_BUILTIN_HADDPD
,
26527 IX86_BUILTIN_HSUBPD
,
26528 IX86_BUILTIN_LDDQU
,
26530 IX86_BUILTIN_MONITOR
,
26531 IX86_BUILTIN_MWAIT
,
26534 IX86_BUILTIN_PHADDW
,
26535 IX86_BUILTIN_PHADDD
,
26536 IX86_BUILTIN_PHADDSW
,
26537 IX86_BUILTIN_PHSUBW
,
26538 IX86_BUILTIN_PHSUBD
,
26539 IX86_BUILTIN_PHSUBSW
,
26540 IX86_BUILTIN_PMADDUBSW
,
26541 IX86_BUILTIN_PMULHRSW
,
26542 IX86_BUILTIN_PSHUFB
,
26543 IX86_BUILTIN_PSIGNB
,
26544 IX86_BUILTIN_PSIGNW
,
26545 IX86_BUILTIN_PSIGND
,
26546 IX86_BUILTIN_PALIGNR
,
26547 IX86_BUILTIN_PABSB
,
26548 IX86_BUILTIN_PABSW
,
26549 IX86_BUILTIN_PABSD
,
26551 IX86_BUILTIN_PHADDW128
,
26552 IX86_BUILTIN_PHADDD128
,
26553 IX86_BUILTIN_PHADDSW128
,
26554 IX86_BUILTIN_PHSUBW128
,
26555 IX86_BUILTIN_PHSUBD128
,
26556 IX86_BUILTIN_PHSUBSW128
,
26557 IX86_BUILTIN_PMADDUBSW128
,
26558 IX86_BUILTIN_PMULHRSW128
,
26559 IX86_BUILTIN_PSHUFB128
,
26560 IX86_BUILTIN_PSIGNB128
,
26561 IX86_BUILTIN_PSIGNW128
,
26562 IX86_BUILTIN_PSIGND128
,
26563 IX86_BUILTIN_PALIGNR128
,
26564 IX86_BUILTIN_PABSB128
,
26565 IX86_BUILTIN_PABSW128
,
26566 IX86_BUILTIN_PABSD128
,
26568 /* AMDFAM10 - SSE4A New Instructions. */
26569 IX86_BUILTIN_MOVNTSD
,
26570 IX86_BUILTIN_MOVNTSS
,
26571 IX86_BUILTIN_EXTRQI
,
26572 IX86_BUILTIN_EXTRQ
,
26573 IX86_BUILTIN_INSERTQI
,
26574 IX86_BUILTIN_INSERTQ
,
26577 IX86_BUILTIN_BLENDPD
,
26578 IX86_BUILTIN_BLENDPS
,
26579 IX86_BUILTIN_BLENDVPD
,
26580 IX86_BUILTIN_BLENDVPS
,
26581 IX86_BUILTIN_PBLENDVB128
,
26582 IX86_BUILTIN_PBLENDW128
,
26587 IX86_BUILTIN_INSERTPS128
,
26589 IX86_BUILTIN_MOVNTDQA
,
26590 IX86_BUILTIN_MPSADBW128
,
26591 IX86_BUILTIN_PACKUSDW128
,
26592 IX86_BUILTIN_PCMPEQQ
,
26593 IX86_BUILTIN_PHMINPOSUW128
,
26595 IX86_BUILTIN_PMAXSB128
,
26596 IX86_BUILTIN_PMAXSD128
,
26597 IX86_BUILTIN_PMAXUD128
,
26598 IX86_BUILTIN_PMAXUW128
,
26600 IX86_BUILTIN_PMINSB128
,
26601 IX86_BUILTIN_PMINSD128
,
26602 IX86_BUILTIN_PMINUD128
,
26603 IX86_BUILTIN_PMINUW128
,
26605 IX86_BUILTIN_PMOVSXBW128
,
26606 IX86_BUILTIN_PMOVSXBD128
,
26607 IX86_BUILTIN_PMOVSXBQ128
,
26608 IX86_BUILTIN_PMOVSXWD128
,
26609 IX86_BUILTIN_PMOVSXWQ128
,
26610 IX86_BUILTIN_PMOVSXDQ128
,
26612 IX86_BUILTIN_PMOVZXBW128
,
26613 IX86_BUILTIN_PMOVZXBD128
,
26614 IX86_BUILTIN_PMOVZXBQ128
,
26615 IX86_BUILTIN_PMOVZXWD128
,
26616 IX86_BUILTIN_PMOVZXWQ128
,
26617 IX86_BUILTIN_PMOVZXDQ128
,
26619 IX86_BUILTIN_PMULDQ128
,
26620 IX86_BUILTIN_PMULLD128
,
26622 IX86_BUILTIN_ROUNDSD
,
26623 IX86_BUILTIN_ROUNDSS
,
26625 IX86_BUILTIN_ROUNDPD
,
26626 IX86_BUILTIN_ROUNDPS
,
26628 IX86_BUILTIN_FLOORPD
,
26629 IX86_BUILTIN_CEILPD
,
26630 IX86_BUILTIN_TRUNCPD
,
26631 IX86_BUILTIN_RINTPD
,
26632 IX86_BUILTIN_ROUNDPD_AZ
,
26634 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26635 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26636 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26638 IX86_BUILTIN_FLOORPS
,
26639 IX86_BUILTIN_CEILPS
,
26640 IX86_BUILTIN_TRUNCPS
,
26641 IX86_BUILTIN_RINTPS
,
26642 IX86_BUILTIN_ROUNDPS_AZ
,
26644 IX86_BUILTIN_FLOORPS_SFIX
,
26645 IX86_BUILTIN_CEILPS_SFIX
,
26646 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26648 IX86_BUILTIN_PTESTZ
,
26649 IX86_BUILTIN_PTESTC
,
26650 IX86_BUILTIN_PTESTNZC
,
26652 IX86_BUILTIN_VEC_INIT_V2SI
,
26653 IX86_BUILTIN_VEC_INIT_V4HI
,
26654 IX86_BUILTIN_VEC_INIT_V8QI
,
26655 IX86_BUILTIN_VEC_EXT_V2DF
,
26656 IX86_BUILTIN_VEC_EXT_V2DI
,
26657 IX86_BUILTIN_VEC_EXT_V4SF
,
26658 IX86_BUILTIN_VEC_EXT_V4SI
,
26659 IX86_BUILTIN_VEC_EXT_V8HI
,
26660 IX86_BUILTIN_VEC_EXT_V2SI
,
26661 IX86_BUILTIN_VEC_EXT_V4HI
,
26662 IX86_BUILTIN_VEC_EXT_V16QI
,
26663 IX86_BUILTIN_VEC_SET_V2DI
,
26664 IX86_BUILTIN_VEC_SET_V4SF
,
26665 IX86_BUILTIN_VEC_SET_V4SI
,
26666 IX86_BUILTIN_VEC_SET_V8HI
,
26667 IX86_BUILTIN_VEC_SET_V4HI
,
26668 IX86_BUILTIN_VEC_SET_V16QI
,
26670 IX86_BUILTIN_VEC_PACK_SFIX
,
26671 IX86_BUILTIN_VEC_PACK_SFIX256
,
26674 IX86_BUILTIN_CRC32QI
,
26675 IX86_BUILTIN_CRC32HI
,
26676 IX86_BUILTIN_CRC32SI
,
26677 IX86_BUILTIN_CRC32DI
,
26679 IX86_BUILTIN_PCMPESTRI128
,
26680 IX86_BUILTIN_PCMPESTRM128
,
26681 IX86_BUILTIN_PCMPESTRA128
,
26682 IX86_BUILTIN_PCMPESTRC128
,
26683 IX86_BUILTIN_PCMPESTRO128
,
26684 IX86_BUILTIN_PCMPESTRS128
,
26685 IX86_BUILTIN_PCMPESTRZ128
,
26686 IX86_BUILTIN_PCMPISTRI128
,
26687 IX86_BUILTIN_PCMPISTRM128
,
26688 IX86_BUILTIN_PCMPISTRA128
,
26689 IX86_BUILTIN_PCMPISTRC128
,
26690 IX86_BUILTIN_PCMPISTRO128
,
26691 IX86_BUILTIN_PCMPISTRS128
,
26692 IX86_BUILTIN_PCMPISTRZ128
,
26694 IX86_BUILTIN_PCMPGTQ
,
26696 /* AES instructions */
26697 IX86_BUILTIN_AESENC128
,
26698 IX86_BUILTIN_AESENCLAST128
,
26699 IX86_BUILTIN_AESDEC128
,
26700 IX86_BUILTIN_AESDECLAST128
,
26701 IX86_BUILTIN_AESIMC128
,
26702 IX86_BUILTIN_AESKEYGENASSIST128
,
26704 /* PCLMUL instruction */
26705 IX86_BUILTIN_PCLMULQDQ128
,
26708 IX86_BUILTIN_ADDPD256
,
26709 IX86_BUILTIN_ADDPS256
,
26710 IX86_BUILTIN_ADDSUBPD256
,
26711 IX86_BUILTIN_ADDSUBPS256
,
26712 IX86_BUILTIN_ANDPD256
,
26713 IX86_BUILTIN_ANDPS256
,
26714 IX86_BUILTIN_ANDNPD256
,
26715 IX86_BUILTIN_ANDNPS256
,
26716 IX86_BUILTIN_BLENDPD256
,
26717 IX86_BUILTIN_BLENDPS256
,
26718 IX86_BUILTIN_BLENDVPD256
,
26719 IX86_BUILTIN_BLENDVPS256
,
26720 IX86_BUILTIN_DIVPD256
,
26721 IX86_BUILTIN_DIVPS256
,
26722 IX86_BUILTIN_DPPS256
,
26723 IX86_BUILTIN_HADDPD256
,
26724 IX86_BUILTIN_HADDPS256
,
26725 IX86_BUILTIN_HSUBPD256
,
26726 IX86_BUILTIN_HSUBPS256
,
26727 IX86_BUILTIN_MAXPD256
,
26728 IX86_BUILTIN_MAXPS256
,
26729 IX86_BUILTIN_MINPD256
,
26730 IX86_BUILTIN_MINPS256
,
26731 IX86_BUILTIN_MULPD256
,
26732 IX86_BUILTIN_MULPS256
,
26733 IX86_BUILTIN_ORPD256
,
26734 IX86_BUILTIN_ORPS256
,
26735 IX86_BUILTIN_SHUFPD256
,
26736 IX86_BUILTIN_SHUFPS256
,
26737 IX86_BUILTIN_SUBPD256
,
26738 IX86_BUILTIN_SUBPS256
,
26739 IX86_BUILTIN_XORPD256
,
26740 IX86_BUILTIN_XORPS256
,
26741 IX86_BUILTIN_CMPSD
,
26742 IX86_BUILTIN_CMPSS
,
26743 IX86_BUILTIN_CMPPD
,
26744 IX86_BUILTIN_CMPPS
,
26745 IX86_BUILTIN_CMPPD256
,
26746 IX86_BUILTIN_CMPPS256
,
26747 IX86_BUILTIN_CVTDQ2PD256
,
26748 IX86_BUILTIN_CVTDQ2PS256
,
26749 IX86_BUILTIN_CVTPD2PS256
,
26750 IX86_BUILTIN_CVTPS2DQ256
,
26751 IX86_BUILTIN_CVTPS2PD256
,
26752 IX86_BUILTIN_CVTTPD2DQ256
,
26753 IX86_BUILTIN_CVTPD2DQ256
,
26754 IX86_BUILTIN_CVTTPS2DQ256
,
26755 IX86_BUILTIN_EXTRACTF128PD256
,
26756 IX86_BUILTIN_EXTRACTF128PS256
,
26757 IX86_BUILTIN_EXTRACTF128SI256
,
26758 IX86_BUILTIN_VZEROALL
,
26759 IX86_BUILTIN_VZEROUPPER
,
26760 IX86_BUILTIN_VPERMILVARPD
,
26761 IX86_BUILTIN_VPERMILVARPS
,
26762 IX86_BUILTIN_VPERMILVARPD256
,
26763 IX86_BUILTIN_VPERMILVARPS256
,
26764 IX86_BUILTIN_VPERMILPD
,
26765 IX86_BUILTIN_VPERMILPS
,
26766 IX86_BUILTIN_VPERMILPD256
,
26767 IX86_BUILTIN_VPERMILPS256
,
26768 IX86_BUILTIN_VPERMIL2PD
,
26769 IX86_BUILTIN_VPERMIL2PS
,
26770 IX86_BUILTIN_VPERMIL2PD256
,
26771 IX86_BUILTIN_VPERMIL2PS256
,
26772 IX86_BUILTIN_VPERM2F128PD256
,
26773 IX86_BUILTIN_VPERM2F128PS256
,
26774 IX86_BUILTIN_VPERM2F128SI256
,
26775 IX86_BUILTIN_VBROADCASTSS
,
26776 IX86_BUILTIN_VBROADCASTSD256
,
26777 IX86_BUILTIN_VBROADCASTSS256
,
26778 IX86_BUILTIN_VBROADCASTPD256
,
26779 IX86_BUILTIN_VBROADCASTPS256
,
26780 IX86_BUILTIN_VINSERTF128PD256
,
26781 IX86_BUILTIN_VINSERTF128PS256
,
26782 IX86_BUILTIN_VINSERTF128SI256
,
26783 IX86_BUILTIN_LOADUPD256
,
26784 IX86_BUILTIN_LOADUPS256
,
26785 IX86_BUILTIN_STOREUPD256
,
26786 IX86_BUILTIN_STOREUPS256
,
26787 IX86_BUILTIN_LDDQU256
,
26788 IX86_BUILTIN_MOVNTDQ256
,
26789 IX86_BUILTIN_MOVNTPD256
,
26790 IX86_BUILTIN_MOVNTPS256
,
26791 IX86_BUILTIN_LOADDQU256
,
26792 IX86_BUILTIN_STOREDQU256
,
26793 IX86_BUILTIN_MASKLOADPD
,
26794 IX86_BUILTIN_MASKLOADPS
,
26795 IX86_BUILTIN_MASKSTOREPD
,
26796 IX86_BUILTIN_MASKSTOREPS
,
26797 IX86_BUILTIN_MASKLOADPD256
,
26798 IX86_BUILTIN_MASKLOADPS256
,
26799 IX86_BUILTIN_MASKSTOREPD256
,
26800 IX86_BUILTIN_MASKSTOREPS256
,
26801 IX86_BUILTIN_MOVSHDUP256
,
26802 IX86_BUILTIN_MOVSLDUP256
,
26803 IX86_BUILTIN_MOVDDUP256
,
26805 IX86_BUILTIN_SQRTPD256
,
26806 IX86_BUILTIN_SQRTPS256
,
26807 IX86_BUILTIN_SQRTPS_NR256
,
26808 IX86_BUILTIN_RSQRTPS256
,
26809 IX86_BUILTIN_RSQRTPS_NR256
,
26811 IX86_BUILTIN_RCPPS256
,
26813 IX86_BUILTIN_ROUNDPD256
,
26814 IX86_BUILTIN_ROUNDPS256
,
26816 IX86_BUILTIN_FLOORPD256
,
26817 IX86_BUILTIN_CEILPD256
,
26818 IX86_BUILTIN_TRUNCPD256
,
26819 IX86_BUILTIN_RINTPD256
,
26820 IX86_BUILTIN_ROUNDPD_AZ256
,
26822 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26823 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26824 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26826 IX86_BUILTIN_FLOORPS256
,
26827 IX86_BUILTIN_CEILPS256
,
26828 IX86_BUILTIN_TRUNCPS256
,
26829 IX86_BUILTIN_RINTPS256
,
26830 IX86_BUILTIN_ROUNDPS_AZ256
,
26832 IX86_BUILTIN_FLOORPS_SFIX256
,
26833 IX86_BUILTIN_CEILPS_SFIX256
,
26834 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26836 IX86_BUILTIN_UNPCKHPD256
,
26837 IX86_BUILTIN_UNPCKLPD256
,
26838 IX86_BUILTIN_UNPCKHPS256
,
26839 IX86_BUILTIN_UNPCKLPS256
,
26841 IX86_BUILTIN_SI256_SI
,
26842 IX86_BUILTIN_PS256_PS
,
26843 IX86_BUILTIN_PD256_PD
,
26844 IX86_BUILTIN_SI_SI256
,
26845 IX86_BUILTIN_PS_PS256
,
26846 IX86_BUILTIN_PD_PD256
,
26848 IX86_BUILTIN_VTESTZPD
,
26849 IX86_BUILTIN_VTESTCPD
,
26850 IX86_BUILTIN_VTESTNZCPD
,
26851 IX86_BUILTIN_VTESTZPS
,
26852 IX86_BUILTIN_VTESTCPS
,
26853 IX86_BUILTIN_VTESTNZCPS
,
26854 IX86_BUILTIN_VTESTZPD256
,
26855 IX86_BUILTIN_VTESTCPD256
,
26856 IX86_BUILTIN_VTESTNZCPD256
,
26857 IX86_BUILTIN_VTESTZPS256
,
26858 IX86_BUILTIN_VTESTCPS256
,
26859 IX86_BUILTIN_VTESTNZCPS256
,
26860 IX86_BUILTIN_PTESTZ256
,
26861 IX86_BUILTIN_PTESTC256
,
26862 IX86_BUILTIN_PTESTNZC256
,
26864 IX86_BUILTIN_MOVMSKPD256
,
26865 IX86_BUILTIN_MOVMSKPS256
,
26868 IX86_BUILTIN_MPSADBW256
,
26869 IX86_BUILTIN_PABSB256
,
26870 IX86_BUILTIN_PABSW256
,
26871 IX86_BUILTIN_PABSD256
,
26872 IX86_BUILTIN_PACKSSDW256
,
26873 IX86_BUILTIN_PACKSSWB256
,
26874 IX86_BUILTIN_PACKUSDW256
,
26875 IX86_BUILTIN_PACKUSWB256
,
26876 IX86_BUILTIN_PADDB256
,
26877 IX86_BUILTIN_PADDW256
,
26878 IX86_BUILTIN_PADDD256
,
26879 IX86_BUILTIN_PADDQ256
,
26880 IX86_BUILTIN_PADDSB256
,
26881 IX86_BUILTIN_PADDSW256
,
26882 IX86_BUILTIN_PADDUSB256
,
26883 IX86_BUILTIN_PADDUSW256
,
26884 IX86_BUILTIN_PALIGNR256
,
26885 IX86_BUILTIN_AND256I
,
26886 IX86_BUILTIN_ANDNOT256I
,
26887 IX86_BUILTIN_PAVGB256
,
26888 IX86_BUILTIN_PAVGW256
,
26889 IX86_BUILTIN_PBLENDVB256
,
26890 IX86_BUILTIN_PBLENDVW256
,
26891 IX86_BUILTIN_PCMPEQB256
,
26892 IX86_BUILTIN_PCMPEQW256
,
26893 IX86_BUILTIN_PCMPEQD256
,
26894 IX86_BUILTIN_PCMPEQQ256
,
26895 IX86_BUILTIN_PCMPGTB256
,
26896 IX86_BUILTIN_PCMPGTW256
,
26897 IX86_BUILTIN_PCMPGTD256
,
26898 IX86_BUILTIN_PCMPGTQ256
,
26899 IX86_BUILTIN_PHADDW256
,
26900 IX86_BUILTIN_PHADDD256
,
26901 IX86_BUILTIN_PHADDSW256
,
26902 IX86_BUILTIN_PHSUBW256
,
26903 IX86_BUILTIN_PHSUBD256
,
26904 IX86_BUILTIN_PHSUBSW256
,
26905 IX86_BUILTIN_PMADDUBSW256
,
26906 IX86_BUILTIN_PMADDWD256
,
26907 IX86_BUILTIN_PMAXSB256
,
26908 IX86_BUILTIN_PMAXSW256
,
26909 IX86_BUILTIN_PMAXSD256
,
26910 IX86_BUILTIN_PMAXUB256
,
26911 IX86_BUILTIN_PMAXUW256
,
26912 IX86_BUILTIN_PMAXUD256
,
26913 IX86_BUILTIN_PMINSB256
,
26914 IX86_BUILTIN_PMINSW256
,
26915 IX86_BUILTIN_PMINSD256
,
26916 IX86_BUILTIN_PMINUB256
,
26917 IX86_BUILTIN_PMINUW256
,
26918 IX86_BUILTIN_PMINUD256
,
26919 IX86_BUILTIN_PMOVMSKB256
,
26920 IX86_BUILTIN_PMOVSXBW256
,
26921 IX86_BUILTIN_PMOVSXBD256
,
26922 IX86_BUILTIN_PMOVSXBQ256
,
26923 IX86_BUILTIN_PMOVSXWD256
,
26924 IX86_BUILTIN_PMOVSXWQ256
,
26925 IX86_BUILTIN_PMOVSXDQ256
,
26926 IX86_BUILTIN_PMOVZXBW256
,
26927 IX86_BUILTIN_PMOVZXBD256
,
26928 IX86_BUILTIN_PMOVZXBQ256
,
26929 IX86_BUILTIN_PMOVZXWD256
,
26930 IX86_BUILTIN_PMOVZXWQ256
,
26931 IX86_BUILTIN_PMOVZXDQ256
,
26932 IX86_BUILTIN_PMULDQ256
,
26933 IX86_BUILTIN_PMULHRSW256
,
26934 IX86_BUILTIN_PMULHUW256
,
26935 IX86_BUILTIN_PMULHW256
,
26936 IX86_BUILTIN_PMULLW256
,
26937 IX86_BUILTIN_PMULLD256
,
26938 IX86_BUILTIN_PMULUDQ256
,
26939 IX86_BUILTIN_POR256
,
26940 IX86_BUILTIN_PSADBW256
,
26941 IX86_BUILTIN_PSHUFB256
,
26942 IX86_BUILTIN_PSHUFD256
,
26943 IX86_BUILTIN_PSHUFHW256
,
26944 IX86_BUILTIN_PSHUFLW256
,
26945 IX86_BUILTIN_PSIGNB256
,
26946 IX86_BUILTIN_PSIGNW256
,
26947 IX86_BUILTIN_PSIGND256
,
26948 IX86_BUILTIN_PSLLDQI256
,
26949 IX86_BUILTIN_PSLLWI256
,
26950 IX86_BUILTIN_PSLLW256
,
26951 IX86_BUILTIN_PSLLDI256
,
26952 IX86_BUILTIN_PSLLD256
,
26953 IX86_BUILTIN_PSLLQI256
,
26954 IX86_BUILTIN_PSLLQ256
,
26955 IX86_BUILTIN_PSRAWI256
,
26956 IX86_BUILTIN_PSRAW256
,
26957 IX86_BUILTIN_PSRADI256
,
26958 IX86_BUILTIN_PSRAD256
,
26959 IX86_BUILTIN_PSRLDQI256
,
26960 IX86_BUILTIN_PSRLWI256
,
26961 IX86_BUILTIN_PSRLW256
,
26962 IX86_BUILTIN_PSRLDI256
,
26963 IX86_BUILTIN_PSRLD256
,
26964 IX86_BUILTIN_PSRLQI256
,
26965 IX86_BUILTIN_PSRLQ256
,
26966 IX86_BUILTIN_PSUBB256
,
26967 IX86_BUILTIN_PSUBW256
,
26968 IX86_BUILTIN_PSUBD256
,
26969 IX86_BUILTIN_PSUBQ256
,
26970 IX86_BUILTIN_PSUBSB256
,
26971 IX86_BUILTIN_PSUBSW256
,
26972 IX86_BUILTIN_PSUBUSB256
,
26973 IX86_BUILTIN_PSUBUSW256
,
26974 IX86_BUILTIN_PUNPCKHBW256
,
26975 IX86_BUILTIN_PUNPCKHWD256
,
26976 IX86_BUILTIN_PUNPCKHDQ256
,
26977 IX86_BUILTIN_PUNPCKHQDQ256
,
26978 IX86_BUILTIN_PUNPCKLBW256
,
26979 IX86_BUILTIN_PUNPCKLWD256
,
26980 IX86_BUILTIN_PUNPCKLDQ256
,
26981 IX86_BUILTIN_PUNPCKLQDQ256
,
26982 IX86_BUILTIN_PXOR256
,
26983 IX86_BUILTIN_MOVNTDQA256
,
26984 IX86_BUILTIN_VBROADCASTSS_PS
,
26985 IX86_BUILTIN_VBROADCASTSS_PS256
,
26986 IX86_BUILTIN_VBROADCASTSD_PD256
,
26987 IX86_BUILTIN_VBROADCASTSI256
,
26988 IX86_BUILTIN_PBLENDD256
,
26989 IX86_BUILTIN_PBLENDD128
,
26990 IX86_BUILTIN_PBROADCASTB256
,
26991 IX86_BUILTIN_PBROADCASTW256
,
26992 IX86_BUILTIN_PBROADCASTD256
,
26993 IX86_BUILTIN_PBROADCASTQ256
,
26994 IX86_BUILTIN_PBROADCASTB128
,
26995 IX86_BUILTIN_PBROADCASTW128
,
26996 IX86_BUILTIN_PBROADCASTD128
,
26997 IX86_BUILTIN_PBROADCASTQ128
,
26998 IX86_BUILTIN_VPERMVARSI256
,
26999 IX86_BUILTIN_VPERMDF256
,
27000 IX86_BUILTIN_VPERMVARSF256
,
27001 IX86_BUILTIN_VPERMDI256
,
27002 IX86_BUILTIN_VPERMTI256
,
27003 IX86_BUILTIN_VEXTRACT128I256
,
27004 IX86_BUILTIN_VINSERT128I256
,
27005 IX86_BUILTIN_MASKLOADD
,
27006 IX86_BUILTIN_MASKLOADQ
,
27007 IX86_BUILTIN_MASKLOADD256
,
27008 IX86_BUILTIN_MASKLOADQ256
,
27009 IX86_BUILTIN_MASKSTORED
,
27010 IX86_BUILTIN_MASKSTOREQ
,
27011 IX86_BUILTIN_MASKSTORED256
,
27012 IX86_BUILTIN_MASKSTOREQ256
,
27013 IX86_BUILTIN_PSLLVV4DI
,
27014 IX86_BUILTIN_PSLLVV2DI
,
27015 IX86_BUILTIN_PSLLVV8SI
,
27016 IX86_BUILTIN_PSLLVV4SI
,
27017 IX86_BUILTIN_PSRAVV8SI
,
27018 IX86_BUILTIN_PSRAVV4SI
,
27019 IX86_BUILTIN_PSRLVV4DI
,
27020 IX86_BUILTIN_PSRLVV2DI
,
27021 IX86_BUILTIN_PSRLVV8SI
,
27022 IX86_BUILTIN_PSRLVV4SI
,
27024 IX86_BUILTIN_GATHERSIV2DF
,
27025 IX86_BUILTIN_GATHERSIV4DF
,
27026 IX86_BUILTIN_GATHERDIV2DF
,
27027 IX86_BUILTIN_GATHERDIV4DF
,
27028 IX86_BUILTIN_GATHERSIV4SF
,
27029 IX86_BUILTIN_GATHERSIV8SF
,
27030 IX86_BUILTIN_GATHERDIV4SF
,
27031 IX86_BUILTIN_GATHERDIV8SF
,
27032 IX86_BUILTIN_GATHERSIV2DI
,
27033 IX86_BUILTIN_GATHERSIV4DI
,
27034 IX86_BUILTIN_GATHERDIV2DI
,
27035 IX86_BUILTIN_GATHERDIV4DI
,
27036 IX86_BUILTIN_GATHERSIV4SI
,
27037 IX86_BUILTIN_GATHERSIV8SI
,
27038 IX86_BUILTIN_GATHERDIV4SI
,
27039 IX86_BUILTIN_GATHERDIV8SI
,
27041 /* Alternate 4 element gather for the vectorizer where
27042 all operands are 32-byte wide. */
27043 IX86_BUILTIN_GATHERALTSIV4DF
,
27044 IX86_BUILTIN_GATHERALTDIV8SF
,
27045 IX86_BUILTIN_GATHERALTSIV4DI
,
27046 IX86_BUILTIN_GATHERALTDIV8SI
,
27048 /* TFmode support builtins. */
27050 IX86_BUILTIN_HUGE_VALQ
,
27051 IX86_BUILTIN_FABSQ
,
27052 IX86_BUILTIN_COPYSIGNQ
,
27054 /* Vectorizer support builtins. */
27055 IX86_BUILTIN_CPYSGNPS
,
27056 IX86_BUILTIN_CPYSGNPD
,
27057 IX86_BUILTIN_CPYSGNPS256
,
27058 IX86_BUILTIN_CPYSGNPD256
,
27060 /* FMA4 instructions. */
27061 IX86_BUILTIN_VFMADDSS
,
27062 IX86_BUILTIN_VFMADDSD
,
27063 IX86_BUILTIN_VFMADDPS
,
27064 IX86_BUILTIN_VFMADDPD
,
27065 IX86_BUILTIN_VFMADDPS256
,
27066 IX86_BUILTIN_VFMADDPD256
,
27067 IX86_BUILTIN_VFMADDSUBPS
,
27068 IX86_BUILTIN_VFMADDSUBPD
,
27069 IX86_BUILTIN_VFMADDSUBPS256
,
27070 IX86_BUILTIN_VFMADDSUBPD256
,
27072 /* FMA3 instructions. */
27073 IX86_BUILTIN_VFMADDSS3
,
27074 IX86_BUILTIN_VFMADDSD3
,
27076 /* XOP instructions. */
27077 IX86_BUILTIN_VPCMOV
,
27078 IX86_BUILTIN_VPCMOV_V2DI
,
27079 IX86_BUILTIN_VPCMOV_V4SI
,
27080 IX86_BUILTIN_VPCMOV_V8HI
,
27081 IX86_BUILTIN_VPCMOV_V16QI
,
27082 IX86_BUILTIN_VPCMOV_V4SF
,
27083 IX86_BUILTIN_VPCMOV_V2DF
,
27084 IX86_BUILTIN_VPCMOV256
,
27085 IX86_BUILTIN_VPCMOV_V4DI256
,
27086 IX86_BUILTIN_VPCMOV_V8SI256
,
27087 IX86_BUILTIN_VPCMOV_V16HI256
,
27088 IX86_BUILTIN_VPCMOV_V32QI256
,
27089 IX86_BUILTIN_VPCMOV_V8SF256
,
27090 IX86_BUILTIN_VPCMOV_V4DF256
,
27092 IX86_BUILTIN_VPPERM
,
27094 IX86_BUILTIN_VPMACSSWW
,
27095 IX86_BUILTIN_VPMACSWW
,
27096 IX86_BUILTIN_VPMACSSWD
,
27097 IX86_BUILTIN_VPMACSWD
,
27098 IX86_BUILTIN_VPMACSSDD
,
27099 IX86_BUILTIN_VPMACSDD
,
27100 IX86_BUILTIN_VPMACSSDQL
,
27101 IX86_BUILTIN_VPMACSSDQH
,
27102 IX86_BUILTIN_VPMACSDQL
,
27103 IX86_BUILTIN_VPMACSDQH
,
27104 IX86_BUILTIN_VPMADCSSWD
,
27105 IX86_BUILTIN_VPMADCSWD
,
27107 IX86_BUILTIN_VPHADDBW
,
27108 IX86_BUILTIN_VPHADDBD
,
27109 IX86_BUILTIN_VPHADDBQ
,
27110 IX86_BUILTIN_VPHADDWD
,
27111 IX86_BUILTIN_VPHADDWQ
,
27112 IX86_BUILTIN_VPHADDDQ
,
27113 IX86_BUILTIN_VPHADDUBW
,
27114 IX86_BUILTIN_VPHADDUBD
,
27115 IX86_BUILTIN_VPHADDUBQ
,
27116 IX86_BUILTIN_VPHADDUWD
,
27117 IX86_BUILTIN_VPHADDUWQ
,
27118 IX86_BUILTIN_VPHADDUDQ
,
27119 IX86_BUILTIN_VPHSUBBW
,
27120 IX86_BUILTIN_VPHSUBWD
,
27121 IX86_BUILTIN_VPHSUBDQ
,
27123 IX86_BUILTIN_VPROTB
,
27124 IX86_BUILTIN_VPROTW
,
27125 IX86_BUILTIN_VPROTD
,
27126 IX86_BUILTIN_VPROTQ
,
27127 IX86_BUILTIN_VPROTB_IMM
,
27128 IX86_BUILTIN_VPROTW_IMM
,
27129 IX86_BUILTIN_VPROTD_IMM
,
27130 IX86_BUILTIN_VPROTQ_IMM
,
27132 IX86_BUILTIN_VPSHLB
,
27133 IX86_BUILTIN_VPSHLW
,
27134 IX86_BUILTIN_VPSHLD
,
27135 IX86_BUILTIN_VPSHLQ
,
27136 IX86_BUILTIN_VPSHAB
,
27137 IX86_BUILTIN_VPSHAW
,
27138 IX86_BUILTIN_VPSHAD
,
27139 IX86_BUILTIN_VPSHAQ
,
27141 IX86_BUILTIN_VFRCZSS
,
27142 IX86_BUILTIN_VFRCZSD
,
27143 IX86_BUILTIN_VFRCZPS
,
27144 IX86_BUILTIN_VFRCZPD
,
27145 IX86_BUILTIN_VFRCZPS256
,
27146 IX86_BUILTIN_VFRCZPD256
,
27148 IX86_BUILTIN_VPCOMEQUB
,
27149 IX86_BUILTIN_VPCOMNEUB
,
27150 IX86_BUILTIN_VPCOMLTUB
,
27151 IX86_BUILTIN_VPCOMLEUB
,
27152 IX86_BUILTIN_VPCOMGTUB
,
27153 IX86_BUILTIN_VPCOMGEUB
,
27154 IX86_BUILTIN_VPCOMFALSEUB
,
27155 IX86_BUILTIN_VPCOMTRUEUB
,
27157 IX86_BUILTIN_VPCOMEQUW
,
27158 IX86_BUILTIN_VPCOMNEUW
,
27159 IX86_BUILTIN_VPCOMLTUW
,
27160 IX86_BUILTIN_VPCOMLEUW
,
27161 IX86_BUILTIN_VPCOMGTUW
,
27162 IX86_BUILTIN_VPCOMGEUW
,
27163 IX86_BUILTIN_VPCOMFALSEUW
,
27164 IX86_BUILTIN_VPCOMTRUEUW
,
27166 IX86_BUILTIN_VPCOMEQUD
,
27167 IX86_BUILTIN_VPCOMNEUD
,
27168 IX86_BUILTIN_VPCOMLTUD
,
27169 IX86_BUILTIN_VPCOMLEUD
,
27170 IX86_BUILTIN_VPCOMGTUD
,
27171 IX86_BUILTIN_VPCOMGEUD
,
27172 IX86_BUILTIN_VPCOMFALSEUD
,
27173 IX86_BUILTIN_VPCOMTRUEUD
,
27175 IX86_BUILTIN_VPCOMEQUQ
,
27176 IX86_BUILTIN_VPCOMNEUQ
,
27177 IX86_BUILTIN_VPCOMLTUQ
,
27178 IX86_BUILTIN_VPCOMLEUQ
,
27179 IX86_BUILTIN_VPCOMGTUQ
,
27180 IX86_BUILTIN_VPCOMGEUQ
,
27181 IX86_BUILTIN_VPCOMFALSEUQ
,
27182 IX86_BUILTIN_VPCOMTRUEUQ
,
27184 IX86_BUILTIN_VPCOMEQB
,
27185 IX86_BUILTIN_VPCOMNEB
,
27186 IX86_BUILTIN_VPCOMLTB
,
27187 IX86_BUILTIN_VPCOMLEB
,
27188 IX86_BUILTIN_VPCOMGTB
,
27189 IX86_BUILTIN_VPCOMGEB
,
27190 IX86_BUILTIN_VPCOMFALSEB
,
27191 IX86_BUILTIN_VPCOMTRUEB
,
27193 IX86_BUILTIN_VPCOMEQW
,
27194 IX86_BUILTIN_VPCOMNEW
,
27195 IX86_BUILTIN_VPCOMLTW
,
27196 IX86_BUILTIN_VPCOMLEW
,
27197 IX86_BUILTIN_VPCOMGTW
,
27198 IX86_BUILTIN_VPCOMGEW
,
27199 IX86_BUILTIN_VPCOMFALSEW
,
27200 IX86_BUILTIN_VPCOMTRUEW
,
27202 IX86_BUILTIN_VPCOMEQD
,
27203 IX86_BUILTIN_VPCOMNED
,
27204 IX86_BUILTIN_VPCOMLTD
,
27205 IX86_BUILTIN_VPCOMLED
,
27206 IX86_BUILTIN_VPCOMGTD
,
27207 IX86_BUILTIN_VPCOMGED
,
27208 IX86_BUILTIN_VPCOMFALSED
,
27209 IX86_BUILTIN_VPCOMTRUED
,
27211 IX86_BUILTIN_VPCOMEQQ
,
27212 IX86_BUILTIN_VPCOMNEQ
,
27213 IX86_BUILTIN_VPCOMLTQ
,
27214 IX86_BUILTIN_VPCOMLEQ
,
27215 IX86_BUILTIN_VPCOMGTQ
,
27216 IX86_BUILTIN_VPCOMGEQ
,
27217 IX86_BUILTIN_VPCOMFALSEQ
,
27218 IX86_BUILTIN_VPCOMTRUEQ
,
27220 /* LWP instructions. */
27221 IX86_BUILTIN_LLWPCB
,
27222 IX86_BUILTIN_SLWPCB
,
27223 IX86_BUILTIN_LWPVAL32
,
27224 IX86_BUILTIN_LWPVAL64
,
27225 IX86_BUILTIN_LWPINS32
,
27226 IX86_BUILTIN_LWPINS64
,
27231 IX86_BUILTIN_XBEGIN
,
27233 IX86_BUILTIN_XABORT
,
27234 IX86_BUILTIN_XTEST
,
27236 /* BMI instructions. */
27237 IX86_BUILTIN_BEXTR32
,
27238 IX86_BUILTIN_BEXTR64
,
27241 /* TBM instructions. */
27242 IX86_BUILTIN_BEXTRI32
,
27243 IX86_BUILTIN_BEXTRI64
,
27245 /* BMI2 instructions. */
27246 IX86_BUILTIN_BZHI32
,
27247 IX86_BUILTIN_BZHI64
,
27248 IX86_BUILTIN_PDEP32
,
27249 IX86_BUILTIN_PDEP64
,
27250 IX86_BUILTIN_PEXT32
,
27251 IX86_BUILTIN_PEXT64
,
27253 /* ADX instructions. */
27254 IX86_BUILTIN_ADDCARRYX32
,
27255 IX86_BUILTIN_ADDCARRYX64
,
27257 /* FSGSBASE instructions. */
27258 IX86_BUILTIN_RDFSBASE32
,
27259 IX86_BUILTIN_RDFSBASE64
,
27260 IX86_BUILTIN_RDGSBASE32
,
27261 IX86_BUILTIN_RDGSBASE64
,
27262 IX86_BUILTIN_WRFSBASE32
,
27263 IX86_BUILTIN_WRFSBASE64
,
27264 IX86_BUILTIN_WRGSBASE32
,
27265 IX86_BUILTIN_WRGSBASE64
,
27267 /* RDRND instructions. */
27268 IX86_BUILTIN_RDRAND16_STEP
,
27269 IX86_BUILTIN_RDRAND32_STEP
,
27270 IX86_BUILTIN_RDRAND64_STEP
,
27272 /* RDSEED instructions. */
27273 IX86_BUILTIN_RDSEED16_STEP
,
27274 IX86_BUILTIN_RDSEED32_STEP
,
27275 IX86_BUILTIN_RDSEED64_STEP
,
27277 /* F16C instructions. */
27278 IX86_BUILTIN_CVTPH2PS
,
27279 IX86_BUILTIN_CVTPH2PS256
,
27280 IX86_BUILTIN_CVTPS2PH
,
27281 IX86_BUILTIN_CVTPS2PH256
,
27283 /* CFString built-in for darwin */
27284 IX86_BUILTIN_CFSTRING
,
27286 /* Builtins to get CPU type and supported features. */
27287 IX86_BUILTIN_CPU_INIT
,
27288 IX86_BUILTIN_CPU_IS
,
27289 IX86_BUILTIN_CPU_SUPPORTS
,
27294 /* Table for the ix86 builtin decls. */
27295 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27297 /* Table of all of the builtin functions that are possible with different ISA's
27298 but are waiting to be built until a function is declared to use that
27300 struct builtin_isa
{
27301 const char *name
; /* function name */
27302 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27303 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27304 bool const_p
; /* true if the declaration is constant */
27305 bool set_and_not_built_p
;
27308 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27311 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27312 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27313 function decl in the ix86_builtins array. Returns the function decl or
27314 NULL_TREE, if the builtin was not added.
27316 If the front end has a special hook for builtin functions, delay adding
27317 builtin functions that aren't in the current ISA until the ISA is changed
27318 with function specific optimization. Doing so, can save about 300K for the
27319 default compiler. When the builtin is expanded, check at that time whether
27322 If the front end doesn't have a special hook, record all builtins, even if
27323 it isn't an instruction set in the current ISA in case the user uses
27324 function specific options for a different ISA, so that we don't get scope
27325 errors if a builtin is added in the middle of a function scope. */
27328 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27329 enum ix86_builtin_func_type tcode
,
27330 enum ix86_builtins code
)
27332 tree decl
= NULL_TREE
;
27334 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27336 ix86_builtins_isa
[(int) code
].isa
= mask
;
27338 mask
&= ~OPTION_MASK_ISA_64BIT
;
27340 || (mask
& ix86_isa_flags
) != 0
27341 || (lang_hooks
.builtin_function
27342 == lang_hooks
.builtin_function_ext_scope
))
27345 tree type
= ix86_get_builtin_func_type (tcode
);
27346 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27348 ix86_builtins
[(int) code
] = decl
;
27349 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27353 ix86_builtins
[(int) code
] = NULL_TREE
;
27354 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27355 ix86_builtins_isa
[(int) code
].name
= name
;
27356 ix86_builtins_isa
[(int) code
].const_p
= false;
27357 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27364 /* Like def_builtin, but also marks the function decl "const". */
27367 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27368 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27370 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27372 TREE_READONLY (decl
) = 1;
27374 ix86_builtins_isa
[(int) code
].const_p
= true;
27379 /* Add any new builtin functions for a given ISA that may not have been
27380 declared. This saves a bit of space compared to adding all of the
27381 declarations to the tree, even if we didn't use them. */
27384 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27388 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27390 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27391 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27395 /* Don't define the builtin again. */
27396 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27398 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27399 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27400 type
, i
, BUILT_IN_MD
, NULL
,
27403 ix86_builtins
[i
] = decl
;
27404 if (ix86_builtins_isa
[i
].const_p
)
27405 TREE_READONLY (decl
) = 1;
27410 /* Bits for builtin_description.flag. */
27412 /* Set when we don't support the comparison natively, and should
27413 swap_comparison in order to support it. */
27414 #define BUILTIN_DESC_SWAP_OPERANDS 1
27416 struct builtin_description
27418 const HOST_WIDE_INT mask
;
27419 const enum insn_code icode
;
27420 const char *const name
;
27421 const enum ix86_builtins code
;
27422 const enum rtx_code comparison
;
27426 static const struct builtin_description bdesc_comi
[] =
27428 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27429 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27430 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27431 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27432 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27433 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27434 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27435 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27436 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27437 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27438 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27439 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27442 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27450 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27454 static const struct builtin_description bdesc_pcmpestr
[] =
27457 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27458 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27459 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27460 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27461 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27462 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27463 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27466 static const struct builtin_description bdesc_pcmpistr
[] =
27469 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27470 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27471 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27472 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27473 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27474 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27475 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27478 /* Special builtins with variable number of arguments. */
27479 static const struct builtin_description bdesc_special_args
[] =
27481 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27482 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27483 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27486 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27489 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27491 /* FXSR, XSAVE and XSAVEOPT */
27492 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27493 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27494 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27495 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27496 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27498 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27499 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27500 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27501 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27502 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27505 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27506 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27507 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27509 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27510 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27511 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27512 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27514 /* SSE or 3DNow!A */
27515 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27516 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27526 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27534 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27537 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27540 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27541 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27549 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
27562 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27563 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
27568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
27572 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
27575 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
27576 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
27577 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
27578 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
27579 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
27580 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
27581 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
27582 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
27583 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
27585 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27586 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
27587 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
27588 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
27589 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
27590 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
27593 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27594 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27595 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27596 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27597 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27598 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27599 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27600 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27603 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27604 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27605 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
27608 /* Builtins with variable number of arguments. */
27609 static const struct builtin_description bdesc_args
[] =
27611 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
27612 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
27613 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
27614 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27615 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27616 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27617 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27620 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27621 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27622 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27623 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27624 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27625 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27627 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27628 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27629 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27630 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27631 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27632 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27633 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27634 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27636 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27637 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27639 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27640 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27641 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27642 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27644 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27645 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27646 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27647 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27648 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27649 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27651 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27652 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27653 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27654 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27655 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27656 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27658 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27659 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27660 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27662 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27664 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27665 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27666 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27667 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27668 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27669 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27671 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27672 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27673 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27674 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27675 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27676 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27678 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27679 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27680 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27681 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27684 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27685 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27686 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27687 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27689 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27690 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27691 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27692 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27693 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27694 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27695 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27696 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27697 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27698 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27699 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27700 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27701 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27702 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27703 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27706 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27707 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27708 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27709 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27710 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27711 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27714 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27715 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27716 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27717 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27718 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27719 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27720 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27721 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27722 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27723 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27724 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27725 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27727 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27729 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27730 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27731 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27732 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27733 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27734 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27735 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27736 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27738 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27739 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27740 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27741 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27742 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27743 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27744 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27745 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27746 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27747 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27748 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27749 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27750 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27751 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27752 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27753 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27754 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27755 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27756 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27757 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27759 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27760 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27761 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27762 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27764 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27765 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27766 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27767 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27769 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27771 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27772 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27773 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27774 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27775 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27777 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27778 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27779 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27781 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27783 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27784 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27785 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27787 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27788 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27790 /* SSE MMX or 3Dnow!A */
27791 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27792 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27793 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27795 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27796 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27797 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27798 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27800 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27801 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27803 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27824 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27825 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27834 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27836 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27838 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27841 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27845 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27849 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27850 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27851 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27852 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27857 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27862 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27863 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27866 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27867 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27868 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27869 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27871 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27873 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27874 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27875 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27877 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27879 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27880 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27881 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27882 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27883 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27884 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27885 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27886 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27888 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27889 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27890 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27891 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27892 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27897 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27906 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27909 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27910 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27911 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27912 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27913 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27915 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27916 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27917 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27918 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27920 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27921 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27922 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27923 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27924 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27925 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27926 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27927 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27929 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27930 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27931 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27933 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27934 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27936 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27937 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27939 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27941 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27942 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27943 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27944 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27946 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27947 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27948 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27949 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27950 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27954 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27955 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27964 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27973 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27980 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27981 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27983 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27984 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27985 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27986 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27987 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27988 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27991 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27992 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27993 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27994 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27995 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27996 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27998 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27999 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28000 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28001 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28002 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28003 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28004 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28005 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28006 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28007 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28008 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28009 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28010 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28011 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28012 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28013 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28014 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28015 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28016 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28017 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28018 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28019 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28020 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28021 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28024 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28025 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28028 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28029 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28030 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28031 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28032 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28033 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28034 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28035 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28036 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28037 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28039 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28040 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28041 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28042 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28043 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28044 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28045 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28046 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28047 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28048 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28049 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28050 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28051 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28053 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28054 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28055 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28056 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28057 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28058 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28059 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28060 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28061 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28062 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28063 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28064 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28067 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28068 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28069 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28070 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28072 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28073 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28074 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28075 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28077 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28078 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28080 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28081 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28083 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28084 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28085 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28086 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28088 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28089 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28091 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28092 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28094 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28095 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28096 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28099 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28100 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28101 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28102 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28103 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28106 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28107 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28108 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28109 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28112 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28113 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28115 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28116 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28117 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28118 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28121 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28124 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28125 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28128 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28129 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28130 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28131 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28132 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28133 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28134 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28135 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28136 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28137 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28138 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28139 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28140 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28141 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28142 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28143 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28144 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28145 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28146 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28147 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28148 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28149 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28151 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28152 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28154 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28156 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28157 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28158 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28159 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28160 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28161 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28162 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28163 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28164 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28165 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28166 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28167 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28168 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28169 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28170 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28171 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28172 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28173 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28174 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28175 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28176 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28177 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28178 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28179 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28180 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28181 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28182 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28183 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28184 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28185 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28186 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28187 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28188 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28189 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28191 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28192 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28193 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28195 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28196 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28197 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28198 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28199 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28201 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28203 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28204 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28206 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28207 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28208 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28209 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28211 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28212 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28214 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28215 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28217 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28218 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28219 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28220 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28222 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28223 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28225 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28226 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28228 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28229 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28230 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28231 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28233 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28234 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28235 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28236 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28237 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28238 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28240 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28241 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28242 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28243 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28244 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28245 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28246 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28247 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28248 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28249 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28250 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28251 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28252 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28253 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28254 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28256 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28257 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28259 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28260 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28262 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28286 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28287 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28288 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28289 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28290 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28291 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28292 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28293 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28294 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28295 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28296 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28297 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28298 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28299 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28300 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28301 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28302 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28303 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28304 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28305 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28306 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28307 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28308 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28309 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28310 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28311 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28312 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28313 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28314 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28315 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28316 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28317 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28318 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28319 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28320 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28321 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28322 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28323 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28324 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28325 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28326 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28327 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28328 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28329 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28330 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28331 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28332 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28333 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28334 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28335 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28336 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28337 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28338 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28339 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28340 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28341 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28342 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28343 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28344 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28345 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28346 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28347 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28348 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28349 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28350 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28351 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28352 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28353 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28354 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28355 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28356 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28357 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28358 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28359 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28360 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28361 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28362 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28363 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28364 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28365 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28366 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28367 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28368 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28369 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28370 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28371 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28372 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28373 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28374 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28375 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28376 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28377 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28378 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28379 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28380 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28381 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28382 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28383 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28384 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28385 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28386 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28387 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28388 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28389 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28390 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28391 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28392 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28393 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28394 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28395 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28396 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28397 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28398 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28399 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28400 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28401 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28402 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28403 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28404 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28405 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28406 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28407 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28408 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28409 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28410 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28412 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28415 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28416 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28417 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28420 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28421 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28424 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28425 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28426 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28427 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28430 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28431 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28432 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28433 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28434 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28435 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28438 /* FMA4 and XOP. */
28439 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28440 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28441 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28442 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28443 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28444 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28445 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28446 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28447 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28448 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28449 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28450 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28451 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28452 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28453 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28454 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28455 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28456 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28457 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28458 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28459 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28460 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28461 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28462 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28463 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28464 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28465 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28466 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28467 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28468 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28469 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28470 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28471 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28472 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28473 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28474 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28475 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28476 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28477 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28478 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28479 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28480 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28481 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28482 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28483 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28484 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28485 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28486 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28487 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28488 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28489 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28490 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28492 static const struct builtin_description bdesc_multi_arg
[] =
28494 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28495 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28496 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28497 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28498 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28499 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28501 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28502 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28503 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28504 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28505 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28506 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28508 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28509 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28510 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28511 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28512 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28513 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28514 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28515 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28516 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28517 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28518 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28519 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28521 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28522 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28523 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28524 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28525 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28526 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28527 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28528 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28529 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28530 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28531 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28532 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28535 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28543 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
28552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28556 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28566 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28567 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
28570 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
28571 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
28572 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
28573 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28574 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28575 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28576 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28577 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28578 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28579 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28580 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28582 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
28583 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
28584 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
28585 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
28586 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
28587 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
28589 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28590 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28591 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28592 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28593 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28594 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28595 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28596 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28597 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28598 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28599 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28600 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28601 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28602 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28603 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28605 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28606 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28607 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28608 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
28609 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
28610 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
28611 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
28613 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28614 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28615 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28616 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
28617 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28618 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28619 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28621 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28622 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28623 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28624 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28625 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28626 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28627 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28629 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28630 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28631 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28632 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28633 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28634 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28635 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28637 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28638 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28639 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28640 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28641 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28642 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28643 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28645 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28646 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28647 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28648 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28649 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28650 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28651 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28653 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28654 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28655 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28656 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28657 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28658 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28659 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28661 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28662 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28663 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28664 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28665 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28666 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28667 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28669 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28670 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28671 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28672 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28673 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28674 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28675 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28676 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28678 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28679 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28680 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28681 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28682 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28683 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28684 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28685 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28687 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28688 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28689 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28690 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28694 /* TM vector builtins. */
28696 /* Reuse the existing x86-specific `struct builtin_description' cause
28697 we're lazy. Add casts to make them fit. */
28698 static const struct builtin_description bdesc_tm
[] =
28700 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28701 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28702 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28703 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28704 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28705 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28706 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28708 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28709 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28710 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28711 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28712 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28713 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28714 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28716 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28717 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28718 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28719 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28720 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28721 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28722 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28724 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28725 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28726 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28729 /* TM callbacks. */
28731 /* Return the builtin decl needed to load a vector of TYPE. */
28734 ix86_builtin_tm_load (tree type
)
28736 if (TREE_CODE (type
) == VECTOR_TYPE
)
28738 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28741 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28743 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28745 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28751 /* Return the builtin decl needed to store a vector of TYPE. */
28754 ix86_builtin_tm_store (tree type
)
28756 if (TREE_CODE (type
) == VECTOR_TYPE
)
28758 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28761 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28763 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28765 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28771 /* Initialize the transactional memory vector load/store builtins. */
28774 ix86_init_tm_builtins (void)
28776 enum ix86_builtin_func_type ftype
;
28777 const struct builtin_description
*d
;
28780 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28781 tree attrs_log
, attrs_type_log
;
28786 /* If there are no builtins defined, we must be compiling in a
28787 language without trans-mem support. */
28788 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28791 /* Use whatever attributes a normal TM load has. */
28792 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28793 attrs_load
= DECL_ATTRIBUTES (decl
);
28794 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28795 /* Use whatever attributes a normal TM store has. */
28796 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28797 attrs_store
= DECL_ATTRIBUTES (decl
);
28798 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28799 /* Use whatever attributes a normal TM log has. */
28800 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28801 attrs_log
= DECL_ATTRIBUTES (decl
);
28802 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28804 for (i
= 0, d
= bdesc_tm
;
28805 i
< ARRAY_SIZE (bdesc_tm
);
28808 if ((d
->mask
& ix86_isa_flags
) != 0
28809 || (lang_hooks
.builtin_function
28810 == lang_hooks
.builtin_function_ext_scope
))
28812 tree type
, attrs
, attrs_type
;
28813 enum built_in_function code
= (enum built_in_function
) d
->code
;
28815 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28816 type
= ix86_get_builtin_func_type (ftype
);
28818 if (BUILTIN_TM_LOAD_P (code
))
28820 attrs
= attrs_load
;
28821 attrs_type
= attrs_type_load
;
28823 else if (BUILTIN_TM_STORE_P (code
))
28825 attrs
= attrs_store
;
28826 attrs_type
= attrs_type_store
;
28831 attrs_type
= attrs_type_log
;
28833 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28834 /* The builtin without the prefix for
28835 calling it directly. */
28836 d
->name
+ strlen ("__builtin_"),
28838 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28839 set the TYPE_ATTRIBUTES. */
28840 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28842 set_builtin_decl (code
, decl
, false);
28847 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28848 in the current target ISA to allow the user to compile particular modules
28849 with different target specific options that differ from the command line
28852 ix86_init_mmx_sse_builtins (void)
28854 const struct builtin_description
* d
;
28855 enum ix86_builtin_func_type ftype
;
28858 /* Add all special builtins with variable number of operands. */
28859 for (i
= 0, d
= bdesc_special_args
;
28860 i
< ARRAY_SIZE (bdesc_special_args
);
28866 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28867 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28870 /* Add all builtins with variable number of operands. */
28871 for (i
= 0, d
= bdesc_args
;
28872 i
< ARRAY_SIZE (bdesc_args
);
28878 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28879 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28882 /* pcmpestr[im] insns. */
28883 for (i
= 0, d
= bdesc_pcmpestr
;
28884 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28887 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28888 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28890 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28891 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28894 /* pcmpistr[im] insns. */
28895 for (i
= 0, d
= bdesc_pcmpistr
;
28896 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28899 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28900 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28902 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28903 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28906 /* comi/ucomi insns. */
28907 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28909 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28910 ftype
= INT_FTYPE_V2DF_V2DF
;
28912 ftype
= INT_FTYPE_V4SF_V4SF
;
28913 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28917 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28918 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28919 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28920 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28922 /* SSE or 3DNow!A */
28923 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28924 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28925 IX86_BUILTIN_MASKMOVQ
);
28928 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28929 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28931 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28932 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28933 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28934 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28937 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28938 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28939 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28940 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28943 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28944 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28945 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28946 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28947 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28948 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28949 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28950 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28951 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28952 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28953 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28954 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28957 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28958 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28961 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28962 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28963 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28964 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28965 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28966 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28967 IX86_BUILTIN_RDRAND64_STEP
);
28970 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28971 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28972 IX86_BUILTIN_GATHERSIV2DF
);
28974 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28975 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28976 IX86_BUILTIN_GATHERSIV4DF
);
28978 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28979 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28980 IX86_BUILTIN_GATHERDIV2DF
);
28982 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28983 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28984 IX86_BUILTIN_GATHERDIV4DF
);
28986 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28987 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28988 IX86_BUILTIN_GATHERSIV4SF
);
28990 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28991 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28992 IX86_BUILTIN_GATHERSIV8SF
);
28994 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28995 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28996 IX86_BUILTIN_GATHERDIV4SF
);
28998 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28999 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29000 IX86_BUILTIN_GATHERDIV8SF
);
29002 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29003 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29004 IX86_BUILTIN_GATHERSIV2DI
);
29006 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29007 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29008 IX86_BUILTIN_GATHERSIV4DI
);
29010 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29011 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29012 IX86_BUILTIN_GATHERDIV2DI
);
29014 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29015 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29016 IX86_BUILTIN_GATHERDIV4DI
);
29018 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29019 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29020 IX86_BUILTIN_GATHERSIV4SI
);
29022 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29023 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29024 IX86_BUILTIN_GATHERSIV8SI
);
29026 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29027 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29028 IX86_BUILTIN_GATHERDIV4SI
);
29030 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29031 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29032 IX86_BUILTIN_GATHERDIV8SI
);
29034 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29035 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29036 IX86_BUILTIN_GATHERALTSIV4DF
);
29038 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29039 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29040 IX86_BUILTIN_GATHERALTDIV8SF
);
29042 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29043 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29044 IX86_BUILTIN_GATHERALTSIV4DI
);
29046 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29047 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29048 IX86_BUILTIN_GATHERALTDIV8SI
);
29051 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29052 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29054 /* MMX access to the vec_init patterns. */
29055 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29056 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29058 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29059 V4HI_FTYPE_HI_HI_HI_HI
,
29060 IX86_BUILTIN_VEC_INIT_V4HI
);
29062 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29063 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29064 IX86_BUILTIN_VEC_INIT_V8QI
);
29066 /* Access to the vec_extract patterns. */
29067 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29068 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29069 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29070 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29071 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29072 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29073 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29074 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29075 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29076 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29078 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29079 "__builtin_ia32_vec_ext_v4hi",
29080 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29082 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29083 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29085 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29086 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29088 /* Access to the vec_set patterns. */
29089 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29090 "__builtin_ia32_vec_set_v2di",
29091 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29093 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29094 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29096 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29097 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29099 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29100 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29102 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29103 "__builtin_ia32_vec_set_v4hi",
29104 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29106 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29107 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29110 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29111 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29112 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29113 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29114 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29115 "__builtin_ia32_rdseed_di_step",
29116 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29119 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29120 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29121 def_builtin (OPTION_MASK_ISA_64BIT
,
29122 "__builtin_ia32_addcarryx_u64",
29123 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29124 IX86_BUILTIN_ADDCARRYX64
);
29126 /* Add FMA4 multi-arg argument instructions */
29127 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29132 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29133 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29137 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29138 to return a pointer to VERSION_DECL if the outcome of the expression
29139 formed by PREDICATE_CHAIN is true. This function will be called during
29140 version dispatch to decide which function version to execute. It returns
29141 the basic block at the end, to which more conditions can be added. */
29144 add_condition_to_bb (tree function_decl
, tree version_decl
,
29145 tree predicate_chain
, basic_block new_bb
)
29147 gimple return_stmt
;
29148 tree convert_expr
, result_var
;
29149 gimple convert_stmt
;
29150 gimple call_cond_stmt
;
29151 gimple if_else_stmt
;
29153 basic_block bb1
, bb2
, bb3
;
29156 tree cond_var
, and_expr_var
= NULL_TREE
;
29159 tree predicate_decl
, predicate_arg
;
29161 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29163 gcc_assert (new_bb
!= NULL
);
29164 gseq
= bb_seq (new_bb
);
29167 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29168 build_fold_addr_expr (version_decl
));
29169 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29170 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29171 return_stmt
= gimple_build_return (result_var
);
29173 if (predicate_chain
== NULL_TREE
)
29175 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29176 gimple_seq_add_stmt (&gseq
, return_stmt
);
29177 set_bb_seq (new_bb
, gseq
);
29178 gimple_set_bb (convert_stmt
, new_bb
);
29179 gimple_set_bb (return_stmt
, new_bb
);
29184 while (predicate_chain
!= NULL
)
29186 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29187 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29188 predicate_arg
= TREE_VALUE (predicate_chain
);
29189 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29190 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29192 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29193 gimple_set_bb (call_cond_stmt
, new_bb
);
29194 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29196 predicate_chain
= TREE_CHAIN (predicate_chain
);
29198 if (and_expr_var
== NULL
)
29199 and_expr_var
= cond_var
;
29202 gimple assign_stmt
;
29203 /* Use MIN_EXPR to check if any integer is zero?.
29204 and_expr_var = min_expr <cond_var, and_expr_var> */
29205 assign_stmt
= gimple_build_assign (and_expr_var
,
29206 build2 (MIN_EXPR
, integer_type_node
,
29207 cond_var
, and_expr_var
));
29209 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29210 gimple_set_bb (assign_stmt
, new_bb
);
29211 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29215 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29217 NULL_TREE
, NULL_TREE
);
29218 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29219 gimple_set_bb (if_else_stmt
, new_bb
);
29220 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29222 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29223 gimple_seq_add_stmt (&gseq
, return_stmt
);
29224 set_bb_seq (new_bb
, gseq
);
29227 e12
= split_block (bb1
, if_else_stmt
);
29229 e12
->flags
&= ~EDGE_FALLTHRU
;
29230 e12
->flags
|= EDGE_TRUE_VALUE
;
29232 e23
= split_block (bb2
, return_stmt
);
29234 gimple_set_bb (convert_stmt
, bb2
);
29235 gimple_set_bb (return_stmt
, bb2
);
29238 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29241 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29248 /* This parses the attribute arguments to target in DECL and determines
29249 the right builtin to use to match the platform specification.
29250 It returns the priority value for this version decl. If PREDICATE_LIST
29251 is not NULL, it stores the list of cpu features that need to be checked
29252 before dispatching this function. */
29254 static unsigned int
29255 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29258 struct cl_target_option cur_target
;
29260 struct cl_target_option
*new_target
;
29261 const char *arg_str
= NULL
;
29262 const char *attrs_str
= NULL
;
29263 char *tok_str
= NULL
;
29266 /* Priority of i386 features, greater value is higher priority. This is
29267 used to decide the order in which function dispatch must happen. For
29268 instance, a version specialized for SSE4.2 should be checked for dispatch
29269 before a version for SSE3, as SSE4.2 implies SSE3. */
29270 enum feature_priority
29291 enum feature_priority priority
= P_ZERO
;
29293 /* These are the target attribute strings for which a dispatcher is
29294 available, from fold_builtin_cpu. */
29296 static struct _feature_list
29298 const char *const name
;
29299 const enum feature_priority priority
;
29301 const feature_list
[] =
29307 {"ssse3", P_SSSE3
},
29308 {"sse4.1", P_SSE4_1
},
29309 {"sse4.2", P_SSE4_2
},
29310 {"popcnt", P_POPCNT
},
29316 static unsigned int NUM_FEATURES
29317 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29321 tree predicate_chain
= NULL_TREE
;
29322 tree predicate_decl
, predicate_arg
;
29324 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29325 gcc_assert (attrs
!= NULL
);
29327 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29329 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29330 attrs_str
= TREE_STRING_POINTER (attrs
);
29332 /* Return priority zero for default function. */
29333 if (strcmp (attrs_str
, "default") == 0)
29336 /* Handle arch= if specified. For priority, set it to be 1 more than
29337 the best instruction set the processor can handle. For instance, if
29338 there is a version for atom and a version for ssse3 (the highest ISA
29339 priority for atom), the atom version must be checked for dispatch
29340 before the ssse3 version. */
29341 if (strstr (attrs_str
, "arch=") != NULL
)
29343 cl_target_option_save (&cur_target
, &global_options
);
29344 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
29345 &global_options_set
);
29347 gcc_assert (target_node
);
29348 new_target
= TREE_TARGET_OPTION (target_node
);
29349 gcc_assert (new_target
);
29351 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29353 switch (new_target
->arch
)
29355 case PROCESSOR_CORE2
:
29357 priority
= P_PROC_SSSE3
;
29359 case PROCESSOR_COREI7
:
29360 arg_str
= "corei7";
29361 priority
= P_PROC_SSE4_2
;
29363 case PROCESSOR_ATOM
:
29365 priority
= P_PROC_SSSE3
;
29367 case PROCESSOR_AMDFAM10
:
29368 arg_str
= "amdfam10h";
29369 priority
= P_PROC_SSE4_a
;
29371 case PROCESSOR_BDVER1
:
29372 arg_str
= "bdver1";
29373 priority
= P_PROC_FMA
;
29375 case PROCESSOR_BDVER2
:
29376 arg_str
= "bdver2";
29377 priority
= P_PROC_FMA
;
29382 cl_target_option_restore (&global_options
, &cur_target
);
29384 if (predicate_list
&& arg_str
== NULL
)
29386 error_at (DECL_SOURCE_LOCATION (decl
),
29387 "No dispatcher found for the versioning attributes");
29391 if (predicate_list
)
29393 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29394 /* For a C string literal the length includes the trailing NULL. */
29395 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29396 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29401 /* Process feature name. */
29402 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29403 strcpy (tok_str
, attrs_str
);
29404 token
= strtok (tok_str
, ",");
29405 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29407 while (token
!= NULL
)
29409 /* Do not process "arch=" */
29410 if (strncmp (token
, "arch=", 5) == 0)
29412 token
= strtok (NULL
, ",");
29415 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29417 if (strcmp (token
, feature_list
[i
].name
) == 0)
29419 if (predicate_list
)
29421 predicate_arg
= build_string_literal (
29422 strlen (feature_list
[i
].name
) + 1,
29423 feature_list
[i
].name
);
29424 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29427 /* Find the maximum priority feature. */
29428 if (feature_list
[i
].priority
> priority
)
29429 priority
= feature_list
[i
].priority
;
29434 if (predicate_list
&& i
== NUM_FEATURES
)
29436 error_at (DECL_SOURCE_LOCATION (decl
),
29437 "No dispatcher found for %s", token
);
29440 token
= strtok (NULL
, ",");
29444 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29446 error_at (DECL_SOURCE_LOCATION (decl
),
29447 "No dispatcher found for the versioning attributes : %s",
29451 else if (predicate_list
)
29453 predicate_chain
= nreverse (predicate_chain
);
29454 *predicate_list
= predicate_chain
;
29460 /* This compares the priority of target features in function DECL1
29461 and DECL2. It returns positive value if DECL1 is higher priority,
29462 negative value if DECL2 is higher priority and 0 if they are the
29466 ix86_compare_version_priority (tree decl1
, tree decl2
)
29468 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29469 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29471 return (int)priority1
- (int)priority2
;
29474 /* V1 and V2 point to function versions with different priorities
29475 based on the target ISA. This function compares their priorities. */
29478 feature_compare (const void *v1
, const void *v2
)
29480 typedef struct _function_version_info
29483 tree predicate_chain
;
29484 unsigned int dispatch_priority
;
29485 } function_version_info
;
29487 const function_version_info c1
= *(const function_version_info
*)v1
;
29488 const function_version_info c2
= *(const function_version_info
*)v2
;
29489 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29492 /* This function generates the dispatch function for
29493 multi-versioned functions. DISPATCH_DECL is the function which will
29494 contain the dispatch logic. FNDECLS are the function choices for
29495 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29496 in DISPATCH_DECL in which the dispatch code is generated. */
29499 dispatch_function_versions (tree dispatch_decl
,
29501 basic_block
*empty_bb
)
29504 gimple ifunc_cpu_init_stmt
;
29508 vec
<tree
> *fndecls
;
29509 unsigned int num_versions
= 0;
29510 unsigned int actual_versions
= 0;
29513 struct _function_version_info
29516 tree predicate_chain
;
29517 unsigned int dispatch_priority
;
29518 }*function_version_info
;
29520 gcc_assert (dispatch_decl
!= NULL
29521 && fndecls_p
!= NULL
29522 && empty_bb
!= NULL
);
29524 /*fndecls_p is actually a vector. */
29525 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29527 /* At least one more version other than the default. */
29528 num_versions
= fndecls
->length ();
29529 gcc_assert (num_versions
>= 2);
29531 function_version_info
= (struct _function_version_info
*)
29532 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29534 /* The first version in the vector is the default decl. */
29535 default_decl
= (*fndecls
)[0];
29537 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29539 gseq
= bb_seq (*empty_bb
);
29540 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29541 constructors, so explicity call __builtin_cpu_init here. */
29542 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29543 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
29544 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
29545 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
29546 set_bb_seq (*empty_bb
, gseq
);
29551 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
29553 tree version_decl
= ele
;
29554 tree predicate_chain
= NULL_TREE
;
29555 unsigned int priority
;
29556 /* Get attribute string, parse it and find the right predicate decl.
29557 The predicate function could be a lengthy combination of many
29558 features, like arch-type and various isa-variants. */
29559 priority
= get_builtin_code_for_version (version_decl
,
29562 if (predicate_chain
== NULL_TREE
)
29565 function_version_info
[actual_versions
].version_decl
= version_decl
;
29566 function_version_info
[actual_versions
].predicate_chain
29568 function_version_info
[actual_versions
].dispatch_priority
= priority
;
29572 /* Sort the versions according to descending order of dispatch priority. The
29573 priority is based on the ISA. This is not a perfect solution. There
29574 could still be ambiguity. If more than one function version is suitable
29575 to execute, which one should be dispatched? In future, allow the user
29576 to specify a dispatch priority next to the version. */
29577 qsort (function_version_info
, actual_versions
,
29578 sizeof (struct _function_version_info
), feature_compare
);
29580 for (i
= 0; i
< actual_versions
; ++i
)
29581 *empty_bb
= add_condition_to_bb (dispatch_decl
,
29582 function_version_info
[i
].version_decl
,
29583 function_version_info
[i
].predicate_chain
,
29586 /* dispatch default version at the end. */
29587 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
29590 free (function_version_info
);
29594 /* Comparator function to be used in qsort routine to sort attribute
29595 specification strings to "target". */
29598 attr_strcmp (const void *v1
, const void *v2
)
29600 const char *c1
= *(char *const*)v1
;
29601 const char *c2
= *(char *const*)v2
;
29602 return strcmp (c1
, c2
);
29605 /* ARGLIST is the argument to target attribute. This function tokenizes
29606 the comma separated arguments, sorts them and returns a string which
29607 is a unique identifier for the comma separated arguments. It also
29608 replaces non-identifier characters "=,-" with "_". */
29611 sorted_attr_string (tree arglist
)
29614 size_t str_len_sum
= 0;
29615 char **args
= NULL
;
29616 char *attr_str
, *ret_str
;
29618 unsigned int argnum
= 1;
29621 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29623 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29624 size_t len
= strlen (str
);
29625 str_len_sum
+= len
+ 1;
29626 if (arg
!= arglist
)
29628 for (i
= 0; i
< strlen (str
); i
++)
29633 attr_str
= XNEWVEC (char, str_len_sum
);
29635 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29637 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29638 size_t len
= strlen (str
);
29639 memcpy (attr_str
+ str_len_sum
, str
, len
);
29640 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29641 str_len_sum
+= len
+ 1;
29644 /* Replace "=,-" with "_". */
29645 for (i
= 0; i
< strlen (attr_str
); i
++)
29646 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29652 args
= XNEWVEC (char *, argnum
);
29655 attr
= strtok (attr_str
, ",");
29656 while (attr
!= NULL
)
29660 attr
= strtok (NULL
, ",");
29663 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29665 ret_str
= XNEWVEC (char, str_len_sum
);
29667 for (i
= 0; i
< argnum
; i
++)
29669 size_t len
= strlen (args
[i
]);
29670 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29671 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29672 str_len_sum
+= len
+ 1;
29676 XDELETEVEC (attr_str
);
29680 /* This function changes the assembler name for functions that are
29681 versions. If DECL is a function version and has a "target"
29682 attribute, it appends the attribute string to its assembler name. */
29685 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29688 const char *orig_name
, *version_string
;
29689 char *attr_str
, *assembler_name
;
29691 if (DECL_DECLARED_INLINE_P (decl
)
29692 && lookup_attribute ("gnu_inline",
29693 DECL_ATTRIBUTES (decl
)))
29694 error_at (DECL_SOURCE_LOCATION (decl
),
29695 "Function versions cannot be marked as gnu_inline,"
29696 " bodies have to be generated");
29698 if (DECL_VIRTUAL_P (decl
)
29699 || DECL_VINDEX (decl
))
29700 sorry ("Virtual function multiversioning not supported");
29702 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29704 /* target attribute string cannot be NULL. */
29705 gcc_assert (version_attr
!= NULL_TREE
);
29707 orig_name
= IDENTIFIER_POINTER (id
);
29709 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29711 if (strcmp (version_string
, "default") == 0)
29714 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29715 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29717 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29719 /* Allow assembler name to be modified if already set. */
29720 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29721 SET_DECL_RTL (decl
, NULL
);
29723 tree ret
= get_identifier (assembler_name
);
29724 XDELETEVEC (attr_str
);
29725 XDELETEVEC (assembler_name
);
29729 /* This function returns true if FN1 and FN2 are versions of the same function,
29730 that is, the target strings of the function decls are different. This assumes
29731 that FN1 and FN2 have the same signature. */
29734 ix86_function_versions (tree fn1
, tree fn2
)
29737 char *target1
, *target2
;
29740 if (TREE_CODE (fn1
) != FUNCTION_DECL
29741 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29744 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29745 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29747 /* At least one function decl should have the target attribute specified. */
29748 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29751 /* Diagnose missing target attribute if one of the decls is already
29752 multi-versioned. */
29753 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29755 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29757 if (attr2
!= NULL_TREE
)
29764 error_at (DECL_SOURCE_LOCATION (fn2
),
29765 "missing %<target%> attribute for multi-versioned %D",
29767 inform (DECL_SOURCE_LOCATION (fn1
),
29768 "previous declaration of %D", fn1
);
29769 /* Prevent diagnosing of the same error multiple times. */
29770 DECL_ATTRIBUTES (fn2
)
29771 = tree_cons (get_identifier ("target"),
29772 copy_node (TREE_VALUE (attr1
)),
29773 DECL_ATTRIBUTES (fn2
));
29778 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29779 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29781 /* The sorted target strings must be different for fn1 and fn2
29783 if (strcmp (target1
, target2
) == 0)
29788 XDELETEVEC (target1
);
29789 XDELETEVEC (target2
);
29795 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29797 /* For function version, add the target suffix to the assembler name. */
29798 if (TREE_CODE (decl
) == FUNCTION_DECL
29799 && DECL_FUNCTION_VERSIONED (decl
))
29800 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29801 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29802 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29808 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29809 is true, append the full path name of the source file. */
29812 make_name (tree decl
, const char *suffix
, bool make_unique
)
29814 char *global_var_name
;
29817 const char *unique_name
= NULL
;
29819 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29821 /* Get a unique name that can be used globally without any chances
29822 of collision at link time. */
29824 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29826 name_len
= strlen (name
) + strlen (suffix
) + 2;
29829 name_len
+= strlen (unique_name
) + 1;
29830 global_var_name
= XNEWVEC (char, name_len
);
29832 /* Use '.' to concatenate names as it is demangler friendly. */
29834 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29837 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29839 return global_var_name
;
29842 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29844 /* Make a dispatcher declaration for the multi-versioned function DECL.
29845 Calls to DECL function will be replaced with calls to the dispatcher
29846 by the front-end. Return the decl created. */
29849 make_dispatcher_decl (const tree decl
)
29853 tree fn_type
, func_type
;
29854 bool is_uniq
= false;
29856 if (TREE_PUBLIC (decl
) == 0)
29859 func_name
= make_name (decl
, "ifunc", is_uniq
);
29861 fn_type
= TREE_TYPE (decl
);
29862 func_type
= build_function_type (TREE_TYPE (fn_type
),
29863 TYPE_ARG_TYPES (fn_type
));
29865 func_decl
= build_fn_decl (func_name
, func_type
);
29866 XDELETEVEC (func_name
);
29867 TREE_USED (func_decl
) = 1;
29868 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29869 DECL_INITIAL (func_decl
) = error_mark_node
;
29870 DECL_ARTIFICIAL (func_decl
) = 1;
29871 /* Mark this func as external, the resolver will flip it again if
29872 it gets generated. */
29873 DECL_EXTERNAL (func_decl
) = 1;
29874 /* This will be of type IFUNCs have to be externally visible. */
29875 TREE_PUBLIC (func_decl
) = 1;
29882 /* Returns true if decl is multi-versioned and DECL is the default function,
29883 that is it is not tagged with target specific optimization. */
29886 is_function_default_version (const tree decl
)
29888 if (TREE_CODE (decl
) != FUNCTION_DECL
29889 || !DECL_FUNCTION_VERSIONED (decl
))
29891 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29893 attr
= TREE_VALUE (TREE_VALUE (attr
));
29894 return (TREE_CODE (attr
) == STRING_CST
29895 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29898 /* Make a dispatcher declaration for the multi-versioned function DECL.
29899 Calls to DECL function will be replaced with calls to the dispatcher
29900 by the front-end. Returns the decl of the dispatcher function. */
29903 ix86_get_function_versions_dispatcher (void *decl
)
29905 tree fn
= (tree
) decl
;
29906 struct cgraph_node
*node
= NULL
;
29907 struct cgraph_node
*default_node
= NULL
;
29908 struct cgraph_function_version_info
*node_v
= NULL
;
29909 struct cgraph_function_version_info
*first_v
= NULL
;
29911 tree dispatch_decl
= NULL
;
29913 struct cgraph_function_version_info
*default_version_info
= NULL
;
29915 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29917 node
= cgraph_get_node (fn
);
29918 gcc_assert (node
!= NULL
);
29920 node_v
= get_cgraph_node_version (node
);
29921 gcc_assert (node_v
!= NULL
);
29923 if (node_v
->dispatcher_resolver
!= NULL
)
29924 return node_v
->dispatcher_resolver
;
29926 /* Find the default version and make it the first node. */
29928 /* Go to the beginning of the chain. */
29929 while (first_v
->prev
!= NULL
)
29930 first_v
= first_v
->prev
;
29931 default_version_info
= first_v
;
29932 while (default_version_info
!= NULL
)
29934 if (is_function_default_version
29935 (default_version_info
->this_node
->symbol
.decl
))
29937 default_version_info
= default_version_info
->next
;
29940 /* If there is no default node, just return NULL. */
29941 if (default_version_info
== NULL
)
29944 /* Make default info the first node. */
29945 if (first_v
!= default_version_info
)
29947 default_version_info
->prev
->next
= default_version_info
->next
;
29948 if (default_version_info
->next
)
29949 default_version_info
->next
->prev
= default_version_info
->prev
;
29950 first_v
->prev
= default_version_info
;
29951 default_version_info
->next
= first_v
;
29952 default_version_info
->prev
= NULL
;
29955 default_node
= default_version_info
->this_node
;
29957 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29958 if (targetm
.has_ifunc_p ())
29960 struct cgraph_function_version_info
*it_v
= NULL
;
29961 struct cgraph_node
*dispatcher_node
= NULL
;
29962 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29964 /* Right now, the dispatching is done via ifunc. */
29965 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29967 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29968 gcc_assert (dispatcher_node
!= NULL
);
29969 dispatcher_node
->dispatcher_function
= 1;
29970 dispatcher_version_info
29971 = insert_new_cgraph_node_version (dispatcher_node
);
29972 dispatcher_version_info
->next
= default_version_info
;
29973 dispatcher_node
->symbol
.definition
= 1;
29975 /* Set the dispatcher for all the versions. */
29976 it_v
= default_version_info
;
29977 while (it_v
!= NULL
)
29979 it_v
->dispatcher_resolver
= dispatch_decl
;
29986 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29987 "multiversioning needs ifunc which is not supported "
29991 return dispatch_decl
;
29994 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29998 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30001 tree attr_arg_name
;
30005 attr_name
= get_identifier (name
);
30006 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30007 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30008 attr
= tree_cons (attr_name
, attr_args
, chain
);
30012 /* Make the resolver function decl to dispatch the versions of
30013 a multi-versioned function, DEFAULT_DECL. Create an
30014 empty basic block in the resolver and store the pointer in
30015 EMPTY_BB. Return the decl of the resolver function. */
30018 make_resolver_func (const tree default_decl
,
30019 const tree dispatch_decl
,
30020 basic_block
*empty_bb
)
30022 char *resolver_name
;
30023 tree decl
, type
, decl_name
, t
;
30024 bool is_uniq
= false;
30026 /* IFUNC's have to be globally visible. So, if the default_decl is
30027 not, then the name of the IFUNC should be made unique. */
30028 if (TREE_PUBLIC (default_decl
) == 0)
30031 /* Append the filename to the resolver function if the versions are
30032 not externally visible. This is because the resolver function has
30033 to be externally visible for the loader to find it. So, appending
30034 the filename will prevent conflicts with a resolver function from
30035 another module which is based on the same version name. */
30036 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30038 /* The resolver function should return a (void *). */
30039 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30041 decl
= build_fn_decl (resolver_name
, type
);
30042 decl_name
= get_identifier (resolver_name
);
30043 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30045 DECL_NAME (decl
) = decl_name
;
30046 TREE_USED (decl
) = 1;
30047 DECL_ARTIFICIAL (decl
) = 1;
30048 DECL_IGNORED_P (decl
) = 0;
30049 /* IFUNC resolvers have to be externally visible. */
30050 TREE_PUBLIC (decl
) = 1;
30051 DECL_UNINLINABLE (decl
) = 1;
30053 /* Resolver is not external, body is generated. */
30054 DECL_EXTERNAL (decl
) = 0;
30055 DECL_EXTERNAL (dispatch_decl
) = 0;
30057 DECL_CONTEXT (decl
) = NULL_TREE
;
30058 DECL_INITIAL (decl
) = make_node (BLOCK
);
30059 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30061 if (DECL_COMDAT_GROUP (default_decl
)
30062 || TREE_PUBLIC (default_decl
))
30064 /* In this case, each translation unit with a call to this
30065 versioned function will put out a resolver. Ensure it
30066 is comdat to keep just one copy. */
30067 DECL_COMDAT (decl
) = 1;
30068 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30070 /* Build result decl and add to function_decl. */
30071 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30072 DECL_ARTIFICIAL (t
) = 1;
30073 DECL_IGNORED_P (t
) = 1;
30074 DECL_RESULT (decl
) = t
;
30076 gimplify_function_tree (decl
);
30077 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30078 *empty_bb
= init_lowered_empty_function (decl
, false);
30080 cgraph_add_new_function (decl
, true);
30081 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30085 gcc_assert (dispatch_decl
!= NULL
);
30086 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30087 DECL_ATTRIBUTES (dispatch_decl
)
30088 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30090 /* Create the alias for dispatch to resolver here. */
30091 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30092 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30093 XDELETEVEC (resolver_name
);
30097 /* Generate the dispatching code body to dispatch multi-versioned function
30098 DECL. The target hook is called to process the "target" attributes and
30099 provide the code to dispatch the right function at run-time. NODE points
30100 to the dispatcher decl whose body will be created. */
30103 ix86_generate_version_dispatcher_body (void *node_p
)
30105 tree resolver_decl
;
30106 basic_block empty_bb
;
30107 vec
<tree
> fn_ver_vec
= vNULL
;
30108 tree default_ver_decl
;
30109 struct cgraph_node
*versn
;
30110 struct cgraph_node
*node
;
30112 struct cgraph_function_version_info
*node_version_info
= NULL
;
30113 struct cgraph_function_version_info
*versn_info
= NULL
;
30115 node
= (cgraph_node
*)node_p
;
30117 node_version_info
= get_cgraph_node_version (node
);
30118 gcc_assert (node
->dispatcher_function
30119 && node_version_info
!= NULL
);
30121 if (node_version_info
->dispatcher_resolver
)
30122 return node_version_info
->dispatcher_resolver
;
30124 /* The first version in the chain corresponds to the default version. */
30125 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
30127 /* node is going to be an alias, so remove the finalized bit. */
30128 node
->symbol
.definition
= false;
30130 resolver_decl
= make_resolver_func (default_ver_decl
,
30131 node
->symbol
.decl
, &empty_bb
);
30133 node_version_info
->dispatcher_resolver
= resolver_decl
;
30135 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30137 fn_ver_vec
.create (2);
30139 for (versn_info
= node_version_info
->next
; versn_info
;
30140 versn_info
= versn_info
->next
)
30142 versn
= versn_info
->this_node
;
30143 /* Check for virtual functions here again, as by this time it should
30144 have been determined if this function needs a vtable index or
30145 not. This happens for methods in derived classes that override
30146 virtual methods in base classes but are not explicitly marked as
30148 if (DECL_VINDEX (versn
->symbol
.decl
))
30149 sorry ("Virtual function multiversioning not supported");
30151 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
30154 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30155 fn_ver_vec
.release ();
30156 rebuild_cgraph_edges ();
30158 return resolver_decl
;
30160 /* This builds the processor_model struct type defined in
30161 libgcc/config/i386/cpuinfo.c */
30164 build_processor_model_struct (void)
30166 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30168 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30170 tree type
= make_node (RECORD_TYPE
);
30172 /* The first 3 fields are unsigned int. */
30173 for (i
= 0; i
< 3; ++i
)
30175 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30176 get_identifier (field_name
[i
]), unsigned_type_node
);
30177 if (field_chain
!= NULL_TREE
)
30178 DECL_CHAIN (field
) = field_chain
;
30179 field_chain
= field
;
30182 /* The last field is an array of unsigned integers of size one. */
30183 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30184 get_identifier (field_name
[3]),
30185 build_array_type (unsigned_type_node
,
30186 build_index_type (size_one_node
)));
30187 if (field_chain
!= NULL_TREE
)
30188 DECL_CHAIN (field
) = field_chain
;
30189 field_chain
= field
;
30191 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30195 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30198 make_var_decl (tree type
, const char *name
)
30202 new_decl
= build_decl (UNKNOWN_LOCATION
,
30204 get_identifier(name
),
30207 DECL_EXTERNAL (new_decl
) = 1;
30208 TREE_STATIC (new_decl
) = 1;
30209 TREE_PUBLIC (new_decl
) = 1;
30210 DECL_INITIAL (new_decl
) = 0;
30211 DECL_ARTIFICIAL (new_decl
) = 0;
30212 DECL_PRESERVE_P (new_decl
) = 1;
30214 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30215 assemble_variable (new_decl
, 0, 0, 0);
30220 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30221 into an integer defined in libgcc/config/i386/cpuinfo.c */
30224 fold_builtin_cpu (tree fndecl
, tree
*args
)
30227 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30228 DECL_FUNCTION_CODE (fndecl
);
30229 tree param_string_cst
= NULL
;
30231 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30232 enum processor_features
30248 /* These are the values for vendor types and cpu types and subtypes
30249 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30250 the corresponding start value. */
30251 enum processor_model
30262 M_CPU_SUBTYPE_START
,
30263 M_INTEL_COREI7_NEHALEM
,
30264 M_INTEL_COREI7_WESTMERE
,
30265 M_INTEL_COREI7_SANDYBRIDGE
,
30266 M_AMDFAM10H_BARCELONA
,
30267 M_AMDFAM10H_SHANGHAI
,
30268 M_AMDFAM10H_ISTANBUL
,
30269 M_AMDFAM15H_BDVER1
,
30270 M_AMDFAM15H_BDVER2
,
30274 static struct _arch_names_table
30276 const char *const name
;
30277 const enum processor_model model
;
30279 const arch_names_table
[] =
30282 {"intel", M_INTEL
},
30283 {"atom", M_INTEL_ATOM
},
30284 {"slm", M_INTEL_SLM
},
30285 {"core2", M_INTEL_CORE2
},
30286 {"corei7", M_INTEL_COREI7
},
30287 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30288 {"westmere", M_INTEL_COREI7_WESTMERE
},
30289 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30290 {"amdfam10h", M_AMDFAM10H
},
30291 {"barcelona", M_AMDFAM10H_BARCELONA
},
30292 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30293 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30294 {"amdfam15h", M_AMDFAM15H
},
30295 {"bdver1", M_AMDFAM15H_BDVER1
},
30296 {"bdver2", M_AMDFAM15H_BDVER2
},
30297 {"bdver3", M_AMDFAM15H_BDVER3
},
30300 static struct _isa_names_table
30302 const char *const name
;
30303 const enum processor_features feature
;
30305 const isa_names_table
[] =
30309 {"popcnt", F_POPCNT
},
30313 {"ssse3", F_SSSE3
},
30314 {"sse4.1", F_SSE4_1
},
30315 {"sse4.2", F_SSE4_2
},
30320 tree __processor_model_type
= build_processor_model_struct ();
30321 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30325 varpool_add_new_variable (__cpu_model_var
);
30327 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30329 param_string_cst
= *args
;
30330 while (param_string_cst
30331 && TREE_CODE (param_string_cst
) != STRING_CST
)
30333 /* *args must be a expr that can contain other EXPRS leading to a
30335 if (!EXPR_P (param_string_cst
))
30337 error ("Parameter to builtin must be a string constant or literal");
30338 return integer_zero_node
;
30340 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30343 gcc_assert (param_string_cst
);
30345 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30351 unsigned int field_val
= 0;
30352 unsigned int NUM_ARCH_NAMES
30353 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30355 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30356 if (strcmp (arch_names_table
[i
].name
,
30357 TREE_STRING_POINTER (param_string_cst
)) == 0)
30360 if (i
== NUM_ARCH_NAMES
)
30362 error ("Parameter to builtin not valid: %s",
30363 TREE_STRING_POINTER (param_string_cst
));
30364 return integer_zero_node
;
30367 field
= TYPE_FIELDS (__processor_model_type
);
30368 field_val
= arch_names_table
[i
].model
;
30370 /* CPU types are stored in the next field. */
30371 if (field_val
> M_CPU_TYPE_START
30372 && field_val
< M_CPU_SUBTYPE_START
)
30374 field
= DECL_CHAIN (field
);
30375 field_val
-= M_CPU_TYPE_START
;
30378 /* CPU subtypes are stored in the next field. */
30379 if (field_val
> M_CPU_SUBTYPE_START
)
30381 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30382 field_val
-= M_CPU_SUBTYPE_START
;
30385 /* Get the appropriate field in __cpu_model. */
30386 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30389 /* Check the value. */
30390 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30391 build_int_cstu (unsigned_type_node
, field_val
));
30392 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30394 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30401 unsigned int field_val
= 0;
30402 unsigned int NUM_ISA_NAMES
30403 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30405 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30406 if (strcmp (isa_names_table
[i
].name
,
30407 TREE_STRING_POINTER (param_string_cst
)) == 0)
30410 if (i
== NUM_ISA_NAMES
)
30412 error ("Parameter to builtin not valid: %s",
30413 TREE_STRING_POINTER (param_string_cst
));
30414 return integer_zero_node
;
30417 field
= TYPE_FIELDS (__processor_model_type
);
30418 /* Get the last field, which is __cpu_features. */
30419 while (DECL_CHAIN (field
))
30420 field
= DECL_CHAIN (field
);
30422 /* Get the appropriate field: __cpu_model.__cpu_features */
30423 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30426 /* Access the 0th element of __cpu_features array. */
30427 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30428 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30430 field_val
= (1 << isa_names_table
[i
].feature
);
30431 /* Return __cpu_model.__cpu_features[0] & field_val */
30432 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30433 build_int_cstu (unsigned_type_node
, field_val
));
30434 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30436 gcc_unreachable ();
30440 ix86_fold_builtin (tree fndecl
, int n_args
,
30441 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30443 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30445 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30446 DECL_FUNCTION_CODE (fndecl
);
30447 if (fn_code
== IX86_BUILTIN_CPU_IS
30448 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30450 gcc_assert (n_args
== 1);
30451 return fold_builtin_cpu (fndecl
, args
);
30455 #ifdef SUBTARGET_FOLD_BUILTIN
30456 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30462 /* Make builtins to detect cpu type and features supported. NAME is
30463 the builtin name, CODE is the builtin code, and FTYPE is the function
30464 type of the builtin. */
30467 make_cpu_type_builtin (const char* name
, int code
,
30468 enum ix86_builtin_func_type ftype
, bool is_const
)
30473 type
= ix86_get_builtin_func_type (ftype
);
30474 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30476 gcc_assert (decl
!= NULL_TREE
);
30477 ix86_builtins
[(int) code
] = decl
;
30478 TREE_READONLY (decl
) = is_const
;
30481 /* Make builtins to get CPU type and features supported. The created
30484 __builtin_cpu_init (), to detect cpu type and features,
30485 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30486 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30490 ix86_init_platform_type_builtins (void)
30492 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30493 INT_FTYPE_VOID
, false);
30494 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30495 INT_FTYPE_PCCHAR
, true);
30496 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30497 INT_FTYPE_PCCHAR
, true);
30500 /* Internal method for ix86_init_builtins. */
30503 ix86_init_builtins_va_builtins_abi (void)
30505 tree ms_va_ref
, sysv_va_ref
;
30506 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30507 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30508 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30509 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30513 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30514 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30515 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30517 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30520 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30521 fnvoid_va_start_ms
=
30522 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30523 fnvoid_va_end_sysv
=
30524 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30525 fnvoid_va_start_sysv
=
30526 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30528 fnvoid_va_copy_ms
=
30529 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30531 fnvoid_va_copy_sysv
=
30532 build_function_type_list (void_type_node
, sysv_va_ref
,
30533 sysv_va_ref
, NULL_TREE
);
30535 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30536 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30537 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30538 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30539 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30540 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30541 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30542 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30543 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
30544 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30545 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
30546 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30550 ix86_init_builtin_types (void)
30552 tree float128_type_node
, float80_type_node
;
30554 /* The __float80 type. */
30555 float80_type_node
= long_double_type_node
;
30556 if (TYPE_MODE (float80_type_node
) != XFmode
)
30558 /* The __float80 type. */
30559 float80_type_node
= make_node (REAL_TYPE
);
30561 TYPE_PRECISION (float80_type_node
) = 80;
30562 layout_type (float80_type_node
);
30564 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
30566 /* The __float128 type. */
30567 float128_type_node
= make_node (REAL_TYPE
);
30568 TYPE_PRECISION (float128_type_node
) = 128;
30569 layout_type (float128_type_node
);
30570 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
30572 /* This macro is built by i386-builtin-types.awk. */
30573 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
30577 ix86_init_builtins (void)
30581 ix86_init_builtin_types ();
30583 /* Builtins to get CPU type and features. */
30584 ix86_init_platform_type_builtins ();
30586 /* TFmode support builtins. */
30587 def_builtin_const (0, "__builtin_infq",
30588 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
30589 def_builtin_const (0, "__builtin_huge_valq",
30590 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
30592 /* We will expand them to normal call if SSE isn't available since
30593 they are used by libgcc. */
30594 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
30595 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
30596 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
30597 TREE_READONLY (t
) = 1;
30598 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
30600 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
30601 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
30602 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
30603 TREE_READONLY (t
) = 1;
30604 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
30606 ix86_init_tm_builtins ();
30607 ix86_init_mmx_sse_builtins ();
30610 ix86_init_builtins_va_builtins_abi ();
30612 #ifdef SUBTARGET_INIT_BUILTINS
30613 SUBTARGET_INIT_BUILTINS
;
30617 /* Return the ix86 builtin for CODE. */
30620 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
30622 if (code
>= IX86_BUILTIN_MAX
)
30623 return error_mark_node
;
30625 return ix86_builtins
[code
];
30628 /* Errors in the source file can cause expand_expr to return const0_rtx
30629 where we expect a vector. To avoid crashing, use one of the vector
30630 clear instructions. */
30632 safe_vector_operand (rtx x
, enum machine_mode mode
)
30634 if (x
== const0_rtx
)
30635 x
= CONST0_RTX (mode
);
30639 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30642 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30645 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30646 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30647 rtx op0
= expand_normal (arg0
);
30648 rtx op1
= expand_normal (arg1
);
30649 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30650 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30651 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30653 if (VECTOR_MODE_P (mode0
))
30654 op0
= safe_vector_operand (op0
, mode0
);
30655 if (VECTOR_MODE_P (mode1
))
30656 op1
= safe_vector_operand (op1
, mode1
);
30658 if (optimize
|| !target
30659 || GET_MODE (target
) != tmode
30660 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30661 target
= gen_reg_rtx (tmode
);
30663 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30665 rtx x
= gen_reg_rtx (V4SImode
);
30666 emit_insn (gen_sse2_loadd (x
, op1
));
30667 op1
= gen_lowpart (TImode
, x
);
30670 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30671 op0
= copy_to_mode_reg (mode0
, op0
);
30672 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30673 op1
= copy_to_mode_reg (mode1
, op1
);
30675 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30684 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30687 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30688 enum ix86_builtin_func_type m_type
,
30689 enum rtx_code sub_code
)
30694 bool comparison_p
= false;
30696 bool last_arg_constant
= false;
30697 int num_memory
= 0;
30700 enum machine_mode mode
;
30703 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30707 case MULTI_ARG_4_DF2_DI_I
:
30708 case MULTI_ARG_4_DF2_DI_I1
:
30709 case MULTI_ARG_4_SF2_SI_I
:
30710 case MULTI_ARG_4_SF2_SI_I1
:
30712 last_arg_constant
= true;
30715 case MULTI_ARG_3_SF
:
30716 case MULTI_ARG_3_DF
:
30717 case MULTI_ARG_3_SF2
:
30718 case MULTI_ARG_3_DF2
:
30719 case MULTI_ARG_3_DI
:
30720 case MULTI_ARG_3_SI
:
30721 case MULTI_ARG_3_SI_DI
:
30722 case MULTI_ARG_3_HI
:
30723 case MULTI_ARG_3_HI_SI
:
30724 case MULTI_ARG_3_QI
:
30725 case MULTI_ARG_3_DI2
:
30726 case MULTI_ARG_3_SI2
:
30727 case MULTI_ARG_3_HI2
:
30728 case MULTI_ARG_3_QI2
:
30732 case MULTI_ARG_2_SF
:
30733 case MULTI_ARG_2_DF
:
30734 case MULTI_ARG_2_DI
:
30735 case MULTI_ARG_2_SI
:
30736 case MULTI_ARG_2_HI
:
30737 case MULTI_ARG_2_QI
:
30741 case MULTI_ARG_2_DI_IMM
:
30742 case MULTI_ARG_2_SI_IMM
:
30743 case MULTI_ARG_2_HI_IMM
:
30744 case MULTI_ARG_2_QI_IMM
:
30746 last_arg_constant
= true;
30749 case MULTI_ARG_1_SF
:
30750 case MULTI_ARG_1_DF
:
30751 case MULTI_ARG_1_SF2
:
30752 case MULTI_ARG_1_DF2
:
30753 case MULTI_ARG_1_DI
:
30754 case MULTI_ARG_1_SI
:
30755 case MULTI_ARG_1_HI
:
30756 case MULTI_ARG_1_QI
:
30757 case MULTI_ARG_1_SI_DI
:
30758 case MULTI_ARG_1_HI_DI
:
30759 case MULTI_ARG_1_HI_SI
:
30760 case MULTI_ARG_1_QI_DI
:
30761 case MULTI_ARG_1_QI_SI
:
30762 case MULTI_ARG_1_QI_HI
:
30766 case MULTI_ARG_2_DI_CMP
:
30767 case MULTI_ARG_2_SI_CMP
:
30768 case MULTI_ARG_2_HI_CMP
:
30769 case MULTI_ARG_2_QI_CMP
:
30771 comparison_p
= true;
30774 case MULTI_ARG_2_SF_TF
:
30775 case MULTI_ARG_2_DF_TF
:
30776 case MULTI_ARG_2_DI_TF
:
30777 case MULTI_ARG_2_SI_TF
:
30778 case MULTI_ARG_2_HI_TF
:
30779 case MULTI_ARG_2_QI_TF
:
30785 gcc_unreachable ();
30788 if (optimize
|| !target
30789 || GET_MODE (target
) != tmode
30790 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30791 target
= gen_reg_rtx (tmode
);
30793 gcc_assert (nargs
<= 4);
30795 for (i
= 0; i
< nargs
; i
++)
30797 tree arg
= CALL_EXPR_ARG (exp
, i
);
30798 rtx op
= expand_normal (arg
);
30799 int adjust
= (comparison_p
) ? 1 : 0;
30800 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30802 if (last_arg_constant
&& i
== nargs
- 1)
30804 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30806 enum insn_code new_icode
= icode
;
30809 case CODE_FOR_xop_vpermil2v2df3
:
30810 case CODE_FOR_xop_vpermil2v4sf3
:
30811 case CODE_FOR_xop_vpermil2v4df3
:
30812 case CODE_FOR_xop_vpermil2v8sf3
:
30813 error ("the last argument must be a 2-bit immediate");
30814 return gen_reg_rtx (tmode
);
30815 case CODE_FOR_xop_rotlv2di3
:
30816 new_icode
= CODE_FOR_rotlv2di3
;
30818 case CODE_FOR_xop_rotlv4si3
:
30819 new_icode
= CODE_FOR_rotlv4si3
;
30821 case CODE_FOR_xop_rotlv8hi3
:
30822 new_icode
= CODE_FOR_rotlv8hi3
;
30824 case CODE_FOR_xop_rotlv16qi3
:
30825 new_icode
= CODE_FOR_rotlv16qi3
;
30827 if (CONST_INT_P (op
))
30829 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30830 op
= GEN_INT (INTVAL (op
) & mask
);
30831 gcc_checking_assert
30832 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30836 gcc_checking_assert
30838 && insn_data
[new_icode
].operand
[0].mode
== tmode
30839 && insn_data
[new_icode
].operand
[1].mode
== tmode
30840 && insn_data
[new_icode
].operand
[2].mode
== mode
30841 && insn_data
[new_icode
].operand
[0].predicate
30842 == insn_data
[icode
].operand
[0].predicate
30843 && insn_data
[new_icode
].operand
[1].predicate
30844 == insn_data
[icode
].operand
[1].predicate
);
30850 gcc_unreachable ();
30857 if (VECTOR_MODE_P (mode
))
30858 op
= safe_vector_operand (op
, mode
);
30860 /* If we aren't optimizing, only allow one memory operand to be
30862 if (memory_operand (op
, mode
))
30865 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30868 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30870 op
= force_reg (mode
, op
);
30874 args
[i
].mode
= mode
;
30880 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30885 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30886 GEN_INT ((int)sub_code
));
30887 else if (! comparison_p
)
30888 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30891 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30895 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30900 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30904 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30908 gcc_unreachable ();
30918 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30919 insns with vec_merge. */
30922 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30926 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30927 rtx op1
, op0
= expand_normal (arg0
);
30928 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30929 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30931 if (optimize
|| !target
30932 || GET_MODE (target
) != tmode
30933 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30934 target
= gen_reg_rtx (tmode
);
30936 if (VECTOR_MODE_P (mode0
))
30937 op0
= safe_vector_operand (op0
, mode0
);
30939 if ((optimize
&& !register_operand (op0
, mode0
))
30940 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30941 op0
= copy_to_mode_reg (mode0
, op0
);
30944 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30945 op1
= copy_to_mode_reg (mode0
, op1
);
30947 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30954 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30957 ix86_expand_sse_compare (const struct builtin_description
*d
,
30958 tree exp
, rtx target
, bool swap
)
30961 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30962 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30963 rtx op0
= expand_normal (arg0
);
30964 rtx op1
= expand_normal (arg1
);
30966 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30967 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30968 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30969 enum rtx_code comparison
= d
->comparison
;
30971 if (VECTOR_MODE_P (mode0
))
30972 op0
= safe_vector_operand (op0
, mode0
);
30973 if (VECTOR_MODE_P (mode1
))
30974 op1
= safe_vector_operand (op1
, mode1
);
30976 /* Swap operands if we have a comparison that isn't available in
30980 rtx tmp
= gen_reg_rtx (mode1
);
30981 emit_move_insn (tmp
, op1
);
30986 if (optimize
|| !target
30987 || GET_MODE (target
) != tmode
30988 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30989 target
= gen_reg_rtx (tmode
);
30991 if ((optimize
&& !register_operand (op0
, mode0
))
30992 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30993 op0
= copy_to_mode_reg (mode0
, op0
);
30994 if ((optimize
&& !register_operand (op1
, mode1
))
30995 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30996 op1
= copy_to_mode_reg (mode1
, op1
);
30998 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30999 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31006 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31009 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31013 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31014 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31015 rtx op0
= expand_normal (arg0
);
31016 rtx op1
= expand_normal (arg1
);
31017 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31018 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31019 enum rtx_code comparison
= d
->comparison
;
31021 if (VECTOR_MODE_P (mode0
))
31022 op0
= safe_vector_operand (op0
, mode0
);
31023 if (VECTOR_MODE_P (mode1
))
31024 op1
= safe_vector_operand (op1
, mode1
);
31026 /* Swap operands if we have a comparison that isn't available in
31028 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31035 target
= gen_reg_rtx (SImode
);
31036 emit_move_insn (target
, const0_rtx
);
31037 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31039 if ((optimize
&& !register_operand (op0
, mode0
))
31040 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31041 op0
= copy_to_mode_reg (mode0
, op0
);
31042 if ((optimize
&& !register_operand (op1
, mode1
))
31043 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31044 op1
= copy_to_mode_reg (mode1
, op1
);
31046 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31050 emit_insn (gen_rtx_SET (VOIDmode
,
31051 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31052 gen_rtx_fmt_ee (comparison
, QImode
,
31056 return SUBREG_REG (target
);
31059 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31062 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31066 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31067 rtx op1
, op0
= expand_normal (arg0
);
31068 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31069 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31071 if (optimize
|| target
== 0
31072 || GET_MODE (target
) != tmode
31073 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31074 target
= gen_reg_rtx (tmode
);
31076 if (VECTOR_MODE_P (mode0
))
31077 op0
= safe_vector_operand (op0
, mode0
);
31079 if ((optimize
&& !register_operand (op0
, mode0
))
31080 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31081 op0
= copy_to_mode_reg (mode0
, op0
);
31083 op1
= GEN_INT (d
->comparison
);
31085 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31093 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31094 tree exp
, rtx target
)
31097 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31098 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31099 rtx op0
= expand_normal (arg0
);
31100 rtx op1
= expand_normal (arg1
);
31102 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31103 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31104 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31106 if (optimize
|| target
== 0
31107 || GET_MODE (target
) != tmode
31108 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31109 target
= gen_reg_rtx (tmode
);
31111 op0
= safe_vector_operand (op0
, mode0
);
31112 op1
= safe_vector_operand (op1
, mode1
);
31114 if ((optimize
&& !register_operand (op0
, mode0
))
31115 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31116 op0
= copy_to_mode_reg (mode0
, op0
);
31117 if ((optimize
&& !register_operand (op1
, mode1
))
31118 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31119 op1
= copy_to_mode_reg (mode1
, op1
);
31121 op2
= GEN_INT (d
->comparison
);
31123 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31130 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31133 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31137 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31138 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31139 rtx op0
= expand_normal (arg0
);
31140 rtx op1
= expand_normal (arg1
);
31141 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31142 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31143 enum rtx_code comparison
= d
->comparison
;
31145 if (VECTOR_MODE_P (mode0
))
31146 op0
= safe_vector_operand (op0
, mode0
);
31147 if (VECTOR_MODE_P (mode1
))
31148 op1
= safe_vector_operand (op1
, mode1
);
31150 target
= gen_reg_rtx (SImode
);
31151 emit_move_insn (target
, const0_rtx
);
31152 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31154 if ((optimize
&& !register_operand (op0
, mode0
))
31155 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31156 op0
= copy_to_mode_reg (mode0
, op0
);
31157 if ((optimize
&& !register_operand (op1
, mode1
))
31158 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31159 op1
= copy_to_mode_reg (mode1
, op1
);
31161 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31165 emit_insn (gen_rtx_SET (VOIDmode
,
31166 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31167 gen_rtx_fmt_ee (comparison
, QImode
,
31171 return SUBREG_REG (target
);
31174 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31177 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31178 tree exp
, rtx target
)
31181 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31182 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31183 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31184 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31185 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31186 rtx scratch0
, scratch1
;
31187 rtx op0
= expand_normal (arg0
);
31188 rtx op1
= expand_normal (arg1
);
31189 rtx op2
= expand_normal (arg2
);
31190 rtx op3
= expand_normal (arg3
);
31191 rtx op4
= expand_normal (arg4
);
31192 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31194 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31195 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31196 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31197 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31198 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31199 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31200 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31202 if (VECTOR_MODE_P (modev2
))
31203 op0
= safe_vector_operand (op0
, modev2
);
31204 if (VECTOR_MODE_P (modev4
))
31205 op2
= safe_vector_operand (op2
, modev4
);
31207 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31208 op0
= copy_to_mode_reg (modev2
, op0
);
31209 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31210 op1
= copy_to_mode_reg (modei3
, op1
);
31211 if ((optimize
&& !register_operand (op2
, modev4
))
31212 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31213 op2
= copy_to_mode_reg (modev4
, op2
);
31214 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31215 op3
= copy_to_mode_reg (modei5
, op3
);
31217 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31219 error ("the fifth argument must be an 8-bit immediate");
31223 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31225 if (optimize
|| !target
31226 || GET_MODE (target
) != tmode0
31227 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31228 target
= gen_reg_rtx (tmode0
);
31230 scratch1
= gen_reg_rtx (tmode1
);
31232 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31234 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31236 if (optimize
|| !target
31237 || GET_MODE (target
) != tmode1
31238 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31239 target
= gen_reg_rtx (tmode1
);
31241 scratch0
= gen_reg_rtx (tmode0
);
31243 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31247 gcc_assert (d
->flag
);
31249 scratch0
= gen_reg_rtx (tmode0
);
31250 scratch1
= gen_reg_rtx (tmode1
);
31252 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31262 target
= gen_reg_rtx (SImode
);
31263 emit_move_insn (target
, const0_rtx
);
31264 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31267 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31268 gen_rtx_fmt_ee (EQ
, QImode
,
31269 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31272 return SUBREG_REG (target
);
31279 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31282 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31283 tree exp
, rtx target
)
31286 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31287 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31288 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31289 rtx scratch0
, scratch1
;
31290 rtx op0
= expand_normal (arg0
);
31291 rtx op1
= expand_normal (arg1
);
31292 rtx op2
= expand_normal (arg2
);
31293 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31295 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31296 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31297 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31298 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31299 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31301 if (VECTOR_MODE_P (modev2
))
31302 op0
= safe_vector_operand (op0
, modev2
);
31303 if (VECTOR_MODE_P (modev3
))
31304 op1
= safe_vector_operand (op1
, modev3
);
31306 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31307 op0
= copy_to_mode_reg (modev2
, op0
);
31308 if ((optimize
&& !register_operand (op1
, modev3
))
31309 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31310 op1
= copy_to_mode_reg (modev3
, op1
);
31312 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31314 error ("the third argument must be an 8-bit immediate");
31318 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31320 if (optimize
|| !target
31321 || GET_MODE (target
) != tmode0
31322 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31323 target
= gen_reg_rtx (tmode0
);
31325 scratch1
= gen_reg_rtx (tmode1
);
31327 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31329 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31331 if (optimize
|| !target
31332 || GET_MODE (target
) != tmode1
31333 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31334 target
= gen_reg_rtx (tmode1
);
31336 scratch0
= gen_reg_rtx (tmode0
);
31338 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31342 gcc_assert (d
->flag
);
31344 scratch0
= gen_reg_rtx (tmode0
);
31345 scratch1
= gen_reg_rtx (tmode1
);
31347 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31357 target
= gen_reg_rtx (SImode
);
31358 emit_move_insn (target
, const0_rtx
);
31359 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31362 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31363 gen_rtx_fmt_ee (EQ
, QImode
,
31364 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31367 return SUBREG_REG (target
);
31373 /* Subroutine of ix86_expand_builtin to take care of insns with
31374 variable number of operands. */
31377 ix86_expand_args_builtin (const struct builtin_description
*d
,
31378 tree exp
, rtx target
)
31380 rtx pat
, real_target
;
31381 unsigned int i
, nargs
;
31382 unsigned int nargs_constant
= 0;
31383 int num_memory
= 0;
31387 enum machine_mode mode
;
31389 bool last_arg_count
= false;
31390 enum insn_code icode
= d
->icode
;
31391 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31392 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31393 enum machine_mode rmode
= VOIDmode
;
31395 enum rtx_code comparison
= d
->comparison
;
31397 switch ((enum ix86_builtin_func_type
) d
->flag
)
31399 case V2DF_FTYPE_V2DF_ROUND
:
31400 case V4DF_FTYPE_V4DF_ROUND
:
31401 case V4SF_FTYPE_V4SF_ROUND
:
31402 case V8SF_FTYPE_V8SF_ROUND
:
31403 case V4SI_FTYPE_V4SF_ROUND
:
31404 case V8SI_FTYPE_V8SF_ROUND
:
31405 return ix86_expand_sse_round (d
, exp
, target
);
31406 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31407 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31408 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31409 case INT_FTYPE_V8SF_V8SF_PTEST
:
31410 case INT_FTYPE_V4DI_V4DI_PTEST
:
31411 case INT_FTYPE_V4DF_V4DF_PTEST
:
31412 case INT_FTYPE_V4SF_V4SF_PTEST
:
31413 case INT_FTYPE_V2DI_V2DI_PTEST
:
31414 case INT_FTYPE_V2DF_V2DF_PTEST
:
31415 return ix86_expand_sse_ptest (d
, exp
, target
);
31416 case FLOAT128_FTYPE_FLOAT128
:
31417 case FLOAT_FTYPE_FLOAT
:
31418 case INT_FTYPE_INT
:
31419 case UINT64_FTYPE_INT
:
31420 case UINT16_FTYPE_UINT16
:
31421 case INT64_FTYPE_INT64
:
31422 case INT64_FTYPE_V4SF
:
31423 case INT64_FTYPE_V2DF
:
31424 case INT_FTYPE_V16QI
:
31425 case INT_FTYPE_V8QI
:
31426 case INT_FTYPE_V8SF
:
31427 case INT_FTYPE_V4DF
:
31428 case INT_FTYPE_V4SF
:
31429 case INT_FTYPE_V2DF
:
31430 case INT_FTYPE_V32QI
:
31431 case V16QI_FTYPE_V16QI
:
31432 case V8SI_FTYPE_V8SF
:
31433 case V8SI_FTYPE_V4SI
:
31434 case V8HI_FTYPE_V8HI
:
31435 case V8HI_FTYPE_V16QI
:
31436 case V8QI_FTYPE_V8QI
:
31437 case V8SF_FTYPE_V8SF
:
31438 case V8SF_FTYPE_V8SI
:
31439 case V8SF_FTYPE_V4SF
:
31440 case V8SF_FTYPE_V8HI
:
31441 case V4SI_FTYPE_V4SI
:
31442 case V4SI_FTYPE_V16QI
:
31443 case V4SI_FTYPE_V4SF
:
31444 case V4SI_FTYPE_V8SI
:
31445 case V4SI_FTYPE_V8HI
:
31446 case V4SI_FTYPE_V4DF
:
31447 case V4SI_FTYPE_V2DF
:
31448 case V4HI_FTYPE_V4HI
:
31449 case V4DF_FTYPE_V4DF
:
31450 case V4DF_FTYPE_V4SI
:
31451 case V4DF_FTYPE_V4SF
:
31452 case V4DF_FTYPE_V2DF
:
31453 case V4SF_FTYPE_V4SF
:
31454 case V4SF_FTYPE_V4SI
:
31455 case V4SF_FTYPE_V8SF
:
31456 case V4SF_FTYPE_V4DF
:
31457 case V4SF_FTYPE_V8HI
:
31458 case V4SF_FTYPE_V2DF
:
31459 case V2DI_FTYPE_V2DI
:
31460 case V2DI_FTYPE_V16QI
:
31461 case V2DI_FTYPE_V8HI
:
31462 case V2DI_FTYPE_V4SI
:
31463 case V2DF_FTYPE_V2DF
:
31464 case V2DF_FTYPE_V4SI
:
31465 case V2DF_FTYPE_V4DF
:
31466 case V2DF_FTYPE_V4SF
:
31467 case V2DF_FTYPE_V2SI
:
31468 case V2SI_FTYPE_V2SI
:
31469 case V2SI_FTYPE_V4SF
:
31470 case V2SI_FTYPE_V2SF
:
31471 case V2SI_FTYPE_V2DF
:
31472 case V2SF_FTYPE_V2SF
:
31473 case V2SF_FTYPE_V2SI
:
31474 case V32QI_FTYPE_V32QI
:
31475 case V32QI_FTYPE_V16QI
:
31476 case V16HI_FTYPE_V16HI
:
31477 case V16HI_FTYPE_V8HI
:
31478 case V8SI_FTYPE_V8SI
:
31479 case V16HI_FTYPE_V16QI
:
31480 case V8SI_FTYPE_V16QI
:
31481 case V4DI_FTYPE_V16QI
:
31482 case V8SI_FTYPE_V8HI
:
31483 case V4DI_FTYPE_V8HI
:
31484 case V4DI_FTYPE_V4SI
:
31485 case V4DI_FTYPE_V2DI
:
31488 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31489 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31490 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31491 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31492 case V16QI_FTYPE_V16QI_V16QI
:
31493 case V16QI_FTYPE_V8HI_V8HI
:
31494 case V8QI_FTYPE_V8QI_V8QI
:
31495 case V8QI_FTYPE_V4HI_V4HI
:
31496 case V8HI_FTYPE_V8HI_V8HI
:
31497 case V8HI_FTYPE_V16QI_V16QI
:
31498 case V8HI_FTYPE_V4SI_V4SI
:
31499 case V8SF_FTYPE_V8SF_V8SF
:
31500 case V8SF_FTYPE_V8SF_V8SI
:
31501 case V4SI_FTYPE_V4SI_V4SI
:
31502 case V4SI_FTYPE_V8HI_V8HI
:
31503 case V4SI_FTYPE_V4SF_V4SF
:
31504 case V4SI_FTYPE_V2DF_V2DF
:
31505 case V4HI_FTYPE_V4HI_V4HI
:
31506 case V4HI_FTYPE_V8QI_V8QI
:
31507 case V4HI_FTYPE_V2SI_V2SI
:
31508 case V4DF_FTYPE_V4DF_V4DF
:
31509 case V4DF_FTYPE_V4DF_V4DI
:
31510 case V4SF_FTYPE_V4SF_V4SF
:
31511 case V4SF_FTYPE_V4SF_V4SI
:
31512 case V4SF_FTYPE_V4SF_V2SI
:
31513 case V4SF_FTYPE_V4SF_V2DF
:
31514 case V4SF_FTYPE_V4SF_DI
:
31515 case V4SF_FTYPE_V4SF_SI
:
31516 case V2DI_FTYPE_V2DI_V2DI
:
31517 case V2DI_FTYPE_V16QI_V16QI
:
31518 case V2DI_FTYPE_V4SI_V4SI
:
31519 case V2UDI_FTYPE_V4USI_V4USI
:
31520 case V2DI_FTYPE_V2DI_V16QI
:
31521 case V2DI_FTYPE_V2DF_V2DF
:
31522 case V2SI_FTYPE_V2SI_V2SI
:
31523 case V2SI_FTYPE_V4HI_V4HI
:
31524 case V2SI_FTYPE_V2SF_V2SF
:
31525 case V2DF_FTYPE_V2DF_V2DF
:
31526 case V2DF_FTYPE_V2DF_V4SF
:
31527 case V2DF_FTYPE_V2DF_V2DI
:
31528 case V2DF_FTYPE_V2DF_DI
:
31529 case V2DF_FTYPE_V2DF_SI
:
31530 case V2SF_FTYPE_V2SF_V2SF
:
31531 case V1DI_FTYPE_V1DI_V1DI
:
31532 case V1DI_FTYPE_V8QI_V8QI
:
31533 case V1DI_FTYPE_V2SI_V2SI
:
31534 case V32QI_FTYPE_V16HI_V16HI
:
31535 case V16HI_FTYPE_V8SI_V8SI
:
31536 case V32QI_FTYPE_V32QI_V32QI
:
31537 case V16HI_FTYPE_V32QI_V32QI
:
31538 case V16HI_FTYPE_V16HI_V16HI
:
31539 case V8SI_FTYPE_V4DF_V4DF
:
31540 case V8SI_FTYPE_V8SI_V8SI
:
31541 case V8SI_FTYPE_V16HI_V16HI
:
31542 case V4DI_FTYPE_V4DI_V4DI
:
31543 case V4DI_FTYPE_V8SI_V8SI
:
31544 case V4UDI_FTYPE_V8USI_V8USI
:
31545 if (comparison
== UNKNOWN
)
31546 return ix86_expand_binop_builtin (icode
, exp
, target
);
31549 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
31550 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
31551 gcc_assert (comparison
!= UNKNOWN
);
31555 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
31556 case V16HI_FTYPE_V16HI_SI_COUNT
:
31557 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
31558 case V8SI_FTYPE_V8SI_SI_COUNT
:
31559 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
31560 case V4DI_FTYPE_V4DI_INT_COUNT
:
31561 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
31562 case V8HI_FTYPE_V8HI_SI_COUNT
:
31563 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
31564 case V4SI_FTYPE_V4SI_SI_COUNT
:
31565 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
31566 case V4HI_FTYPE_V4HI_SI_COUNT
:
31567 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
31568 case V2DI_FTYPE_V2DI_SI_COUNT
:
31569 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
31570 case V2SI_FTYPE_V2SI_SI_COUNT
:
31571 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
31572 case V1DI_FTYPE_V1DI_SI_COUNT
:
31574 last_arg_count
= true;
31576 case UINT64_FTYPE_UINT64_UINT64
:
31577 case UINT_FTYPE_UINT_UINT
:
31578 case UINT_FTYPE_UINT_USHORT
:
31579 case UINT_FTYPE_UINT_UCHAR
:
31580 case UINT16_FTYPE_UINT16_INT
:
31581 case UINT8_FTYPE_UINT8_INT
:
31584 case V2DI_FTYPE_V2DI_INT_CONVERT
:
31587 nargs_constant
= 1;
31589 case V4DI_FTYPE_V4DI_INT_CONVERT
:
31592 nargs_constant
= 1;
31594 case V8HI_FTYPE_V8HI_INT
:
31595 case V8HI_FTYPE_V8SF_INT
:
31596 case V8HI_FTYPE_V4SF_INT
:
31597 case V8SF_FTYPE_V8SF_INT
:
31598 case V4SI_FTYPE_V4SI_INT
:
31599 case V4SI_FTYPE_V8SI_INT
:
31600 case V4HI_FTYPE_V4HI_INT
:
31601 case V4DF_FTYPE_V4DF_INT
:
31602 case V4SF_FTYPE_V4SF_INT
:
31603 case V4SF_FTYPE_V8SF_INT
:
31604 case V2DI_FTYPE_V2DI_INT
:
31605 case V2DF_FTYPE_V2DF_INT
:
31606 case V2DF_FTYPE_V4DF_INT
:
31607 case V16HI_FTYPE_V16HI_INT
:
31608 case V8SI_FTYPE_V8SI_INT
:
31609 case V4DI_FTYPE_V4DI_INT
:
31610 case V2DI_FTYPE_V4DI_INT
:
31612 nargs_constant
= 1;
31614 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
31615 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
31616 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
31617 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
31618 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
31619 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
31622 case V32QI_FTYPE_V32QI_V32QI_INT
:
31623 case V16HI_FTYPE_V16HI_V16HI_INT
:
31624 case V16QI_FTYPE_V16QI_V16QI_INT
:
31625 case V4DI_FTYPE_V4DI_V4DI_INT
:
31626 case V8HI_FTYPE_V8HI_V8HI_INT
:
31627 case V8SI_FTYPE_V8SI_V8SI_INT
:
31628 case V8SI_FTYPE_V8SI_V4SI_INT
:
31629 case V8SF_FTYPE_V8SF_V8SF_INT
:
31630 case V8SF_FTYPE_V8SF_V4SF_INT
:
31631 case V4SI_FTYPE_V4SI_V4SI_INT
:
31632 case V4DF_FTYPE_V4DF_V4DF_INT
:
31633 case V4DF_FTYPE_V4DF_V2DF_INT
:
31634 case V4SF_FTYPE_V4SF_V4SF_INT
:
31635 case V2DI_FTYPE_V2DI_V2DI_INT
:
31636 case V4DI_FTYPE_V4DI_V2DI_INT
:
31637 case V2DF_FTYPE_V2DF_V2DF_INT
:
31639 nargs_constant
= 1;
31641 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31644 nargs_constant
= 1;
31646 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31649 nargs_constant
= 1;
31651 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31654 nargs_constant
= 1;
31656 case V2DI_FTYPE_V2DI_UINT_UINT
:
31658 nargs_constant
= 2;
31660 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31661 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31662 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31663 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31665 nargs_constant
= 1;
31667 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31669 nargs_constant
= 2;
31671 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31672 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31676 gcc_unreachable ();
31679 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31681 if (comparison
!= UNKNOWN
)
31683 gcc_assert (nargs
== 2);
31684 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31687 if (rmode
== VOIDmode
|| rmode
== tmode
)
31691 || GET_MODE (target
) != tmode
31692 || !insn_p
->operand
[0].predicate (target
, tmode
))
31693 target
= gen_reg_rtx (tmode
);
31694 real_target
= target
;
31698 target
= gen_reg_rtx (rmode
);
31699 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31702 for (i
= 0; i
< nargs
; i
++)
31704 tree arg
= CALL_EXPR_ARG (exp
, i
);
31705 rtx op
= expand_normal (arg
);
31706 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31707 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31709 if (last_arg_count
&& (i
+ 1) == nargs
)
31711 /* SIMD shift insns take either an 8-bit immediate or
31712 register as count. But builtin functions take int as
31713 count. If count doesn't match, we put it in register. */
31716 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31717 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31718 op
= copy_to_reg (op
);
31721 else if ((nargs
- i
) <= nargs_constant
)
31726 case CODE_FOR_avx2_inserti128
:
31727 case CODE_FOR_avx2_extracti128
:
31728 error ("the last argument must be an 1-bit immediate");
31731 case CODE_FOR_sse4_1_roundsd
:
31732 case CODE_FOR_sse4_1_roundss
:
31734 case CODE_FOR_sse4_1_roundpd
:
31735 case CODE_FOR_sse4_1_roundps
:
31736 case CODE_FOR_avx_roundpd256
:
31737 case CODE_FOR_avx_roundps256
:
31739 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31740 case CODE_FOR_sse4_1_roundps_sfix
:
31741 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31742 case CODE_FOR_avx_roundps_sfix256
:
31744 case CODE_FOR_sse4_1_blendps
:
31745 case CODE_FOR_avx_blendpd256
:
31746 case CODE_FOR_avx_vpermilv4df
:
31747 error ("the last argument must be a 4-bit immediate");
31750 case CODE_FOR_sse4_1_blendpd
:
31751 case CODE_FOR_avx_vpermilv2df
:
31752 case CODE_FOR_xop_vpermil2v2df3
:
31753 case CODE_FOR_xop_vpermil2v4sf3
:
31754 case CODE_FOR_xop_vpermil2v4df3
:
31755 case CODE_FOR_xop_vpermil2v8sf3
:
31756 error ("the last argument must be a 2-bit immediate");
31759 case CODE_FOR_avx_vextractf128v4df
:
31760 case CODE_FOR_avx_vextractf128v8sf
:
31761 case CODE_FOR_avx_vextractf128v8si
:
31762 case CODE_FOR_avx_vinsertf128v4df
:
31763 case CODE_FOR_avx_vinsertf128v8sf
:
31764 case CODE_FOR_avx_vinsertf128v8si
:
31765 error ("the last argument must be a 1-bit immediate");
31768 case CODE_FOR_avx_vmcmpv2df3
:
31769 case CODE_FOR_avx_vmcmpv4sf3
:
31770 case CODE_FOR_avx_cmpv2df3
:
31771 case CODE_FOR_avx_cmpv4sf3
:
31772 case CODE_FOR_avx_cmpv4df3
:
31773 case CODE_FOR_avx_cmpv8sf3
:
31774 error ("the last argument must be a 5-bit immediate");
31778 switch (nargs_constant
)
31781 if ((nargs
- i
) == nargs_constant
)
31783 error ("the next to last argument must be an 8-bit immediate");
31787 error ("the last argument must be an 8-bit immediate");
31790 gcc_unreachable ();
31797 if (VECTOR_MODE_P (mode
))
31798 op
= safe_vector_operand (op
, mode
);
31800 /* If we aren't optimizing, only allow one memory operand to
31802 if (memory_operand (op
, mode
))
31805 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31807 if (optimize
|| !match
|| num_memory
> 1)
31808 op
= copy_to_mode_reg (mode
, op
);
31812 op
= copy_to_reg (op
);
31813 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31818 args
[i
].mode
= mode
;
31824 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31827 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31830 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31834 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31835 args
[2].op
, args
[3].op
);
31838 gcc_unreachable ();
31848 /* Subroutine of ix86_expand_builtin to take care of special insns
31849 with variable number of operands. */
31852 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31853 tree exp
, rtx target
)
31857 unsigned int i
, nargs
, arg_adjust
, memory
;
31861 enum machine_mode mode
;
31863 enum insn_code icode
= d
->icode
;
31864 bool last_arg_constant
= false;
31865 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31866 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31867 enum { load
, store
} klass
;
31869 switch ((enum ix86_builtin_func_type
) d
->flag
)
31871 case VOID_FTYPE_VOID
:
31872 emit_insn (GEN_FCN (icode
) (target
));
31874 case VOID_FTYPE_UINT64
:
31875 case VOID_FTYPE_UNSIGNED
:
31881 case INT_FTYPE_VOID
:
31882 case UINT64_FTYPE_VOID
:
31883 case UNSIGNED_FTYPE_VOID
:
31888 case UINT64_FTYPE_PUNSIGNED
:
31889 case V2DI_FTYPE_PV2DI
:
31890 case V4DI_FTYPE_PV4DI
:
31891 case V32QI_FTYPE_PCCHAR
:
31892 case V16QI_FTYPE_PCCHAR
:
31893 case V8SF_FTYPE_PCV4SF
:
31894 case V8SF_FTYPE_PCFLOAT
:
31895 case V4SF_FTYPE_PCFLOAT
:
31896 case V4DF_FTYPE_PCV2DF
:
31897 case V4DF_FTYPE_PCDOUBLE
:
31898 case V2DF_FTYPE_PCDOUBLE
:
31899 case VOID_FTYPE_PVOID
:
31904 case VOID_FTYPE_PV2SF_V4SF
:
31905 case VOID_FTYPE_PV4DI_V4DI
:
31906 case VOID_FTYPE_PV2DI_V2DI
:
31907 case VOID_FTYPE_PCHAR_V32QI
:
31908 case VOID_FTYPE_PCHAR_V16QI
:
31909 case VOID_FTYPE_PFLOAT_V8SF
:
31910 case VOID_FTYPE_PFLOAT_V4SF
:
31911 case VOID_FTYPE_PDOUBLE_V4DF
:
31912 case VOID_FTYPE_PDOUBLE_V2DF
:
31913 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31914 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31915 case VOID_FTYPE_PINT_INT
:
31918 /* Reserve memory operand for target. */
31919 memory
= ARRAY_SIZE (args
);
31921 case V4SF_FTYPE_V4SF_PCV2SF
:
31922 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31927 case V8SF_FTYPE_PCV8SF_V8SI
:
31928 case V4DF_FTYPE_PCV4DF_V4DI
:
31929 case V4SF_FTYPE_PCV4SF_V4SI
:
31930 case V2DF_FTYPE_PCV2DF_V2DI
:
31931 case V8SI_FTYPE_PCV8SI_V8SI
:
31932 case V4DI_FTYPE_PCV4DI_V4DI
:
31933 case V4SI_FTYPE_PCV4SI_V4SI
:
31934 case V2DI_FTYPE_PCV2DI_V2DI
:
31939 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31940 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31941 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31942 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31943 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31944 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31945 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31946 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31949 /* Reserve memory operand for target. */
31950 memory
= ARRAY_SIZE (args
);
31952 case VOID_FTYPE_UINT_UINT_UINT
:
31953 case VOID_FTYPE_UINT64_UINT_UINT
:
31954 case UCHAR_FTYPE_UINT_UINT_UINT
:
31955 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31958 memory
= ARRAY_SIZE (args
);
31959 last_arg_constant
= true;
31962 gcc_unreachable ();
31965 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31967 if (klass
== store
)
31969 arg
= CALL_EXPR_ARG (exp
, 0);
31970 op
= expand_normal (arg
);
31971 gcc_assert (target
== 0);
31974 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31975 target
= gen_rtx_MEM (tmode
, op
);
31978 target
= force_reg (tmode
, op
);
31986 || !register_operand (target
, tmode
)
31987 || GET_MODE (target
) != tmode
)
31988 target
= gen_reg_rtx (tmode
);
31991 for (i
= 0; i
< nargs
; i
++)
31993 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31996 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31997 op
= expand_normal (arg
);
31998 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32000 if (last_arg_constant
&& (i
+ 1) == nargs
)
32004 if (icode
== CODE_FOR_lwp_lwpvalsi3
32005 || icode
== CODE_FOR_lwp_lwpinssi3
32006 || icode
== CODE_FOR_lwp_lwpvaldi3
32007 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32008 error ("the last argument must be a 32-bit immediate");
32010 error ("the last argument must be an 8-bit immediate");
32018 /* This must be the memory operand. */
32019 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32020 op
= gen_rtx_MEM (mode
, op
);
32021 gcc_assert (GET_MODE (op
) == mode
32022 || GET_MODE (op
) == VOIDmode
);
32026 /* This must be register. */
32027 if (VECTOR_MODE_P (mode
))
32028 op
= safe_vector_operand (op
, mode
);
32030 gcc_assert (GET_MODE (op
) == mode
32031 || GET_MODE (op
) == VOIDmode
);
32032 op
= copy_to_mode_reg (mode
, op
);
32037 args
[i
].mode
= mode
;
32043 pat
= GEN_FCN (icode
) (target
);
32046 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32049 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32052 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32055 gcc_unreachable ();
32061 return klass
== store
? 0 : target
;
32064 /* Return the integer constant in ARG. Constrain it to be in the range
32065 of the subparts of VEC_TYPE; issue an error if not. */
32068 get_element_number (tree vec_type
, tree arg
)
32070 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32072 if (!host_integerp (arg
, 1)
32073 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
32075 error ("selector must be an integer constant in the range 0..%wi", max
);
32082 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32083 ix86_expand_vector_init. We DO have language-level syntax for this, in
32084 the form of (type){ init-list }. Except that since we can't place emms
32085 instructions from inside the compiler, we can't allow the use of MMX
32086 registers unless the user explicitly asks for it. So we do *not* define
32087 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32088 we have builtins invoked by mmintrin.h that gives us license to emit
32089 these sorts of instructions. */
32092 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32094 enum machine_mode tmode
= TYPE_MODE (type
);
32095 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32096 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32097 rtvec v
= rtvec_alloc (n_elt
);
32099 gcc_assert (VECTOR_MODE_P (tmode
));
32100 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32102 for (i
= 0; i
< n_elt
; ++i
)
32104 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32105 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32108 if (!target
|| !register_operand (target
, tmode
))
32109 target
= gen_reg_rtx (tmode
);
32111 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32115 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32116 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32117 had a language-level syntax for referencing vector elements. */
32120 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32122 enum machine_mode tmode
, mode0
;
32127 arg0
= CALL_EXPR_ARG (exp
, 0);
32128 arg1
= CALL_EXPR_ARG (exp
, 1);
32130 op0
= expand_normal (arg0
);
32131 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32133 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32134 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32135 gcc_assert (VECTOR_MODE_P (mode0
));
32137 op0
= force_reg (mode0
, op0
);
32139 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32140 target
= gen_reg_rtx (tmode
);
32142 ix86_expand_vector_extract (true, target
, op0
, elt
);
32147 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32148 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32149 a language-level syntax for referencing vector elements. */
32152 ix86_expand_vec_set_builtin (tree exp
)
32154 enum machine_mode tmode
, mode1
;
32155 tree arg0
, arg1
, arg2
;
32157 rtx op0
, op1
, target
;
32159 arg0
= CALL_EXPR_ARG (exp
, 0);
32160 arg1
= CALL_EXPR_ARG (exp
, 1);
32161 arg2
= CALL_EXPR_ARG (exp
, 2);
32163 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32164 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32165 gcc_assert (VECTOR_MODE_P (tmode
));
32167 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32168 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32169 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32171 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32172 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32174 op0
= force_reg (tmode
, op0
);
32175 op1
= force_reg (mode1
, op1
);
32177 /* OP0 is the source of these builtin functions and shouldn't be
32178 modified. Create a copy, use it and return it as target. */
32179 target
= gen_reg_rtx (tmode
);
32180 emit_move_insn (target
, op0
);
32181 ix86_expand_vector_set (true, target
, op1
, elt
);
32186 /* Expand an expression EXP that calls a built-in function,
32187 with result going to TARGET if that's convenient
32188 (and in mode MODE if that's convenient).
32189 SUBTARGET may be used as the target for computing one of EXP's operands.
32190 IGNORE is nonzero if the value is to be ignored. */
32193 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32194 enum machine_mode mode
, int ignore
)
32196 const struct builtin_description
*d
;
32198 enum insn_code icode
;
32199 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32200 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32201 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32202 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32203 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32205 /* For CPU builtins that can be folded, fold first and expand the fold. */
32208 case IX86_BUILTIN_CPU_INIT
:
32210 /* Make it call __cpu_indicator_init in libgcc. */
32211 tree call_expr
, fndecl
, type
;
32212 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32213 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32214 call_expr
= build_call_expr (fndecl
, 0);
32215 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32217 case IX86_BUILTIN_CPU_IS
:
32218 case IX86_BUILTIN_CPU_SUPPORTS
:
32220 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32221 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32222 gcc_assert (fold_expr
!= NULL_TREE
);
32223 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32227 /* Determine whether the builtin function is available under the current ISA.
32228 Originally the builtin was not created if it wasn't applicable to the
32229 current ISA based on the command line switches. With function specific
32230 options, we need to check in the context of the function making the call
32231 whether it is supported. */
32232 if (ix86_builtins_isa
[fcode
].isa
32233 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32235 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32236 NULL
, (enum fpmath_unit
) 0, false);
32239 error ("%qE needs unknown isa option", fndecl
);
32242 gcc_assert (opts
!= NULL
);
32243 error ("%qE needs isa option %s", fndecl
, opts
);
32251 case IX86_BUILTIN_MASKMOVQ
:
32252 case IX86_BUILTIN_MASKMOVDQU
:
32253 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32254 ? CODE_FOR_mmx_maskmovq
32255 : CODE_FOR_sse2_maskmovdqu
);
32256 /* Note the arg order is different from the operand order. */
32257 arg1
= CALL_EXPR_ARG (exp
, 0);
32258 arg2
= CALL_EXPR_ARG (exp
, 1);
32259 arg0
= CALL_EXPR_ARG (exp
, 2);
32260 op0
= expand_normal (arg0
);
32261 op1
= expand_normal (arg1
);
32262 op2
= expand_normal (arg2
);
32263 mode0
= insn_data
[icode
].operand
[0].mode
;
32264 mode1
= insn_data
[icode
].operand
[1].mode
;
32265 mode2
= insn_data
[icode
].operand
[2].mode
;
32267 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32268 op0
= gen_rtx_MEM (mode1
, op0
);
32270 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32271 op0
= copy_to_mode_reg (mode0
, op0
);
32272 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32273 op1
= copy_to_mode_reg (mode1
, op1
);
32274 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32275 op2
= copy_to_mode_reg (mode2
, op2
);
32276 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32282 case IX86_BUILTIN_LDMXCSR
:
32283 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32284 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32285 emit_move_insn (target
, op0
);
32286 emit_insn (gen_sse_ldmxcsr (target
));
32289 case IX86_BUILTIN_STMXCSR
:
32290 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32291 emit_insn (gen_sse_stmxcsr (target
));
32292 return copy_to_mode_reg (SImode
, target
);
32294 case IX86_BUILTIN_CLFLUSH
:
32295 arg0
= CALL_EXPR_ARG (exp
, 0);
32296 op0
= expand_normal (arg0
);
32297 icode
= CODE_FOR_sse2_clflush
;
32298 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32299 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32301 emit_insn (gen_sse2_clflush (op0
));
32304 case IX86_BUILTIN_MONITOR
:
32305 arg0
= CALL_EXPR_ARG (exp
, 0);
32306 arg1
= CALL_EXPR_ARG (exp
, 1);
32307 arg2
= CALL_EXPR_ARG (exp
, 2);
32308 op0
= expand_normal (arg0
);
32309 op1
= expand_normal (arg1
);
32310 op2
= expand_normal (arg2
);
32312 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32314 op1
= copy_to_mode_reg (SImode
, op1
);
32316 op2
= copy_to_mode_reg (SImode
, op2
);
32317 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32320 case IX86_BUILTIN_MWAIT
:
32321 arg0
= CALL_EXPR_ARG (exp
, 0);
32322 arg1
= CALL_EXPR_ARG (exp
, 1);
32323 op0
= expand_normal (arg0
);
32324 op1
= expand_normal (arg1
);
32326 op0
= copy_to_mode_reg (SImode
, op0
);
32328 op1
= copy_to_mode_reg (SImode
, op1
);
32329 emit_insn (gen_sse3_mwait (op0
, op1
));
32332 case IX86_BUILTIN_VEC_INIT_V2SI
:
32333 case IX86_BUILTIN_VEC_INIT_V4HI
:
32334 case IX86_BUILTIN_VEC_INIT_V8QI
:
32335 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32337 case IX86_BUILTIN_VEC_EXT_V2DF
:
32338 case IX86_BUILTIN_VEC_EXT_V2DI
:
32339 case IX86_BUILTIN_VEC_EXT_V4SF
:
32340 case IX86_BUILTIN_VEC_EXT_V4SI
:
32341 case IX86_BUILTIN_VEC_EXT_V8HI
:
32342 case IX86_BUILTIN_VEC_EXT_V2SI
:
32343 case IX86_BUILTIN_VEC_EXT_V4HI
:
32344 case IX86_BUILTIN_VEC_EXT_V16QI
:
32345 return ix86_expand_vec_ext_builtin (exp
, target
);
32347 case IX86_BUILTIN_VEC_SET_V2DI
:
32348 case IX86_BUILTIN_VEC_SET_V4SF
:
32349 case IX86_BUILTIN_VEC_SET_V4SI
:
32350 case IX86_BUILTIN_VEC_SET_V8HI
:
32351 case IX86_BUILTIN_VEC_SET_V4HI
:
32352 case IX86_BUILTIN_VEC_SET_V16QI
:
32353 return ix86_expand_vec_set_builtin (exp
);
32355 case IX86_BUILTIN_INFQ
:
32356 case IX86_BUILTIN_HUGE_VALQ
:
32358 REAL_VALUE_TYPE inf
;
32362 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32364 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32367 target
= gen_reg_rtx (mode
);
32369 emit_move_insn (target
, tmp
);
32373 case IX86_BUILTIN_RDPMC
:
32374 case IX86_BUILTIN_RDTSC
:
32375 case IX86_BUILTIN_RDTSCP
:
32377 op0
= gen_reg_rtx (DImode
);
32378 op1
= gen_reg_rtx (DImode
);
32380 if (fcode
== IX86_BUILTIN_RDPMC
)
32382 arg0
= CALL_EXPR_ARG (exp
, 0);
32383 op2
= expand_normal (arg0
);
32384 if (!register_operand (op2
, SImode
))
32385 op2
= copy_to_mode_reg (SImode
, op2
);
32387 insn
= (TARGET_64BIT
32388 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32389 : gen_rdpmc (op0
, op2
));
32392 else if (fcode
== IX86_BUILTIN_RDTSC
)
32394 insn
= (TARGET_64BIT
32395 ? gen_rdtsc_rex64 (op0
, op1
)
32396 : gen_rdtsc (op0
));
32401 op2
= gen_reg_rtx (SImode
);
32403 insn
= (TARGET_64BIT
32404 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32405 : gen_rdtscp (op0
, op2
));
32408 arg0
= CALL_EXPR_ARG (exp
, 0);
32409 op4
= expand_normal (arg0
);
32410 if (!address_operand (op4
, VOIDmode
))
32412 op4
= convert_memory_address (Pmode
, op4
);
32413 op4
= copy_addr_to_reg (op4
);
32415 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32420 /* mode is VOIDmode if __builtin_rd* has been called
32422 if (mode
== VOIDmode
)
32424 target
= gen_reg_rtx (mode
);
32429 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32430 op1
, 1, OPTAB_DIRECT
);
32431 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32432 op0
, 1, OPTAB_DIRECT
);
32435 emit_move_insn (target
, op0
);
32438 case IX86_BUILTIN_FXSAVE
:
32439 case IX86_BUILTIN_FXRSTOR
:
32440 case IX86_BUILTIN_FXSAVE64
:
32441 case IX86_BUILTIN_FXRSTOR64
:
32444 case IX86_BUILTIN_FXSAVE
:
32445 icode
= CODE_FOR_fxsave
;
32447 case IX86_BUILTIN_FXRSTOR
:
32448 icode
= CODE_FOR_fxrstor
;
32450 case IX86_BUILTIN_FXSAVE64
:
32451 icode
= CODE_FOR_fxsave64
;
32453 case IX86_BUILTIN_FXRSTOR64
:
32454 icode
= CODE_FOR_fxrstor64
;
32457 gcc_unreachable ();
32460 arg0
= CALL_EXPR_ARG (exp
, 0);
32461 op0
= expand_normal (arg0
);
32463 if (!address_operand (op0
, VOIDmode
))
32465 op0
= convert_memory_address (Pmode
, op0
);
32466 op0
= copy_addr_to_reg (op0
);
32468 op0
= gen_rtx_MEM (BLKmode
, op0
);
32470 pat
= GEN_FCN (icode
) (op0
);
32475 case IX86_BUILTIN_XSAVE
:
32476 case IX86_BUILTIN_XRSTOR
:
32477 case IX86_BUILTIN_XSAVE64
:
32478 case IX86_BUILTIN_XRSTOR64
:
32479 case IX86_BUILTIN_XSAVEOPT
:
32480 case IX86_BUILTIN_XSAVEOPT64
:
32481 arg0
= CALL_EXPR_ARG (exp
, 0);
32482 arg1
= CALL_EXPR_ARG (exp
, 1);
32483 op0
= expand_normal (arg0
);
32484 op1
= expand_normal (arg1
);
32486 if (!address_operand (op0
, VOIDmode
))
32488 op0
= convert_memory_address (Pmode
, op0
);
32489 op0
= copy_addr_to_reg (op0
);
32491 op0
= gen_rtx_MEM (BLKmode
, op0
);
32493 op1
= force_reg (DImode
, op1
);
32497 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32498 NULL
, 1, OPTAB_DIRECT
);
32501 case IX86_BUILTIN_XSAVE
:
32502 icode
= CODE_FOR_xsave_rex64
;
32504 case IX86_BUILTIN_XRSTOR
:
32505 icode
= CODE_FOR_xrstor_rex64
;
32507 case IX86_BUILTIN_XSAVE64
:
32508 icode
= CODE_FOR_xsave64
;
32510 case IX86_BUILTIN_XRSTOR64
:
32511 icode
= CODE_FOR_xrstor64
;
32513 case IX86_BUILTIN_XSAVEOPT
:
32514 icode
= CODE_FOR_xsaveopt_rex64
;
32516 case IX86_BUILTIN_XSAVEOPT64
:
32517 icode
= CODE_FOR_xsaveopt64
;
32520 gcc_unreachable ();
32523 op2
= gen_lowpart (SImode
, op2
);
32524 op1
= gen_lowpart (SImode
, op1
);
32525 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32531 case IX86_BUILTIN_XSAVE
:
32532 icode
= CODE_FOR_xsave
;
32534 case IX86_BUILTIN_XRSTOR
:
32535 icode
= CODE_FOR_xrstor
;
32537 case IX86_BUILTIN_XSAVEOPT
:
32538 icode
= CODE_FOR_xsaveopt
;
32541 gcc_unreachable ();
32543 pat
= GEN_FCN (icode
) (op0
, op1
);
32550 case IX86_BUILTIN_LLWPCB
:
32551 arg0
= CALL_EXPR_ARG (exp
, 0);
32552 op0
= expand_normal (arg0
);
32553 icode
= CODE_FOR_lwp_llwpcb
;
32554 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32555 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32556 emit_insn (gen_lwp_llwpcb (op0
));
32559 case IX86_BUILTIN_SLWPCB
:
32560 icode
= CODE_FOR_lwp_slwpcb
;
32562 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
32563 target
= gen_reg_rtx (Pmode
);
32564 emit_insn (gen_lwp_slwpcb (target
));
32567 case IX86_BUILTIN_BEXTRI32
:
32568 case IX86_BUILTIN_BEXTRI64
:
32569 arg0
= CALL_EXPR_ARG (exp
, 0);
32570 arg1
= CALL_EXPR_ARG (exp
, 1);
32571 op0
= expand_normal (arg0
);
32572 op1
= expand_normal (arg1
);
32573 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
32574 ? CODE_FOR_tbm_bextri_si
32575 : CODE_FOR_tbm_bextri_di
);
32576 if (!CONST_INT_P (op1
))
32578 error ("last argument must be an immediate");
32583 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
32584 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
32585 op1
= GEN_INT (length
);
32586 op2
= GEN_INT (lsb_index
);
32587 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
32593 case IX86_BUILTIN_RDRAND16_STEP
:
32594 icode
= CODE_FOR_rdrandhi_1
;
32598 case IX86_BUILTIN_RDRAND32_STEP
:
32599 icode
= CODE_FOR_rdrandsi_1
;
32603 case IX86_BUILTIN_RDRAND64_STEP
:
32604 icode
= CODE_FOR_rdranddi_1
;
32608 op0
= gen_reg_rtx (mode0
);
32609 emit_insn (GEN_FCN (icode
) (op0
));
32611 arg0
= CALL_EXPR_ARG (exp
, 0);
32612 op1
= expand_normal (arg0
);
32613 if (!address_operand (op1
, VOIDmode
))
32615 op1
= convert_memory_address (Pmode
, op1
);
32616 op1
= copy_addr_to_reg (op1
);
32618 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32620 op1
= gen_reg_rtx (SImode
);
32621 emit_move_insn (op1
, CONST1_RTX (SImode
));
32623 /* Emit SImode conditional move. */
32624 if (mode0
== HImode
)
32626 op2
= gen_reg_rtx (SImode
);
32627 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32629 else if (mode0
== SImode
)
32632 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32635 target
= gen_reg_rtx (SImode
);
32637 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32639 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32640 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32643 case IX86_BUILTIN_RDSEED16_STEP
:
32644 icode
= CODE_FOR_rdseedhi_1
;
32648 case IX86_BUILTIN_RDSEED32_STEP
:
32649 icode
= CODE_FOR_rdseedsi_1
;
32653 case IX86_BUILTIN_RDSEED64_STEP
:
32654 icode
= CODE_FOR_rdseeddi_1
;
32658 op0
= gen_reg_rtx (mode0
);
32659 emit_insn (GEN_FCN (icode
) (op0
));
32661 arg0
= CALL_EXPR_ARG (exp
, 0);
32662 op1
= expand_normal (arg0
);
32663 if (!address_operand (op1
, VOIDmode
))
32665 op1
= convert_memory_address (Pmode
, op1
);
32666 op1
= copy_addr_to_reg (op1
);
32668 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32670 op2
= gen_reg_rtx (QImode
);
32672 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32674 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32677 target
= gen_reg_rtx (SImode
);
32679 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32682 case IX86_BUILTIN_ADDCARRYX32
:
32683 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32687 case IX86_BUILTIN_ADDCARRYX64
:
32688 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32692 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32693 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32694 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32695 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32697 op0
= gen_reg_rtx (QImode
);
32699 /* Generate CF from input operand. */
32700 op1
= expand_normal (arg0
);
32701 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32702 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32704 /* Gen ADCX instruction to compute X+Y+CF. */
32705 op2
= expand_normal (arg1
);
32706 op3
= expand_normal (arg2
);
32709 op2
= copy_to_mode_reg (mode0
, op2
);
32711 op3
= copy_to_mode_reg (mode0
, op3
);
32713 op0
= gen_reg_rtx (mode0
);
32715 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32716 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32717 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32719 /* Store the result. */
32720 op4
= expand_normal (arg3
);
32721 if (!address_operand (op4
, VOIDmode
))
32723 op4
= convert_memory_address (Pmode
, op4
);
32724 op4
= copy_addr_to_reg (op4
);
32726 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32728 /* Return current CF value. */
32730 target
= gen_reg_rtx (QImode
);
32732 PUT_MODE (pat
, QImode
);
32733 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32736 case IX86_BUILTIN_GATHERSIV2DF
:
32737 icode
= CODE_FOR_avx2_gathersiv2df
;
32739 case IX86_BUILTIN_GATHERSIV4DF
:
32740 icode
= CODE_FOR_avx2_gathersiv4df
;
32742 case IX86_BUILTIN_GATHERDIV2DF
:
32743 icode
= CODE_FOR_avx2_gatherdiv2df
;
32745 case IX86_BUILTIN_GATHERDIV4DF
:
32746 icode
= CODE_FOR_avx2_gatherdiv4df
;
32748 case IX86_BUILTIN_GATHERSIV4SF
:
32749 icode
= CODE_FOR_avx2_gathersiv4sf
;
32751 case IX86_BUILTIN_GATHERSIV8SF
:
32752 icode
= CODE_FOR_avx2_gathersiv8sf
;
32754 case IX86_BUILTIN_GATHERDIV4SF
:
32755 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32757 case IX86_BUILTIN_GATHERDIV8SF
:
32758 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32760 case IX86_BUILTIN_GATHERSIV2DI
:
32761 icode
= CODE_FOR_avx2_gathersiv2di
;
32763 case IX86_BUILTIN_GATHERSIV4DI
:
32764 icode
= CODE_FOR_avx2_gathersiv4di
;
32766 case IX86_BUILTIN_GATHERDIV2DI
:
32767 icode
= CODE_FOR_avx2_gatherdiv2di
;
32769 case IX86_BUILTIN_GATHERDIV4DI
:
32770 icode
= CODE_FOR_avx2_gatherdiv4di
;
32772 case IX86_BUILTIN_GATHERSIV4SI
:
32773 icode
= CODE_FOR_avx2_gathersiv4si
;
32775 case IX86_BUILTIN_GATHERSIV8SI
:
32776 icode
= CODE_FOR_avx2_gathersiv8si
;
32778 case IX86_BUILTIN_GATHERDIV4SI
:
32779 icode
= CODE_FOR_avx2_gatherdiv4si
;
32781 case IX86_BUILTIN_GATHERDIV8SI
:
32782 icode
= CODE_FOR_avx2_gatherdiv8si
;
32784 case IX86_BUILTIN_GATHERALTSIV4DF
:
32785 icode
= CODE_FOR_avx2_gathersiv4df
;
32787 case IX86_BUILTIN_GATHERALTDIV8SF
:
32788 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32790 case IX86_BUILTIN_GATHERALTSIV4DI
:
32791 icode
= CODE_FOR_avx2_gathersiv4di
;
32793 case IX86_BUILTIN_GATHERALTDIV8SI
:
32794 icode
= CODE_FOR_avx2_gatherdiv8si
;
32798 arg0
= CALL_EXPR_ARG (exp
, 0);
32799 arg1
= CALL_EXPR_ARG (exp
, 1);
32800 arg2
= CALL_EXPR_ARG (exp
, 2);
32801 arg3
= CALL_EXPR_ARG (exp
, 3);
32802 arg4
= CALL_EXPR_ARG (exp
, 4);
32803 op0
= expand_normal (arg0
);
32804 op1
= expand_normal (arg1
);
32805 op2
= expand_normal (arg2
);
32806 op3
= expand_normal (arg3
);
32807 op4
= expand_normal (arg4
);
32808 /* Note the arg order is different from the operand order. */
32809 mode0
= insn_data
[icode
].operand
[1].mode
;
32810 mode2
= insn_data
[icode
].operand
[3].mode
;
32811 mode3
= insn_data
[icode
].operand
[4].mode
;
32812 mode4
= insn_data
[icode
].operand
[5].mode
;
32814 if (target
== NULL_RTX
32815 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32816 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32818 subtarget
= target
;
32820 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32821 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32823 rtx half
= gen_reg_rtx (V4SImode
);
32824 if (!nonimmediate_operand (op2
, V8SImode
))
32825 op2
= copy_to_mode_reg (V8SImode
, op2
);
32826 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32829 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32830 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32832 rtx (*gen
) (rtx
, rtx
);
32833 rtx half
= gen_reg_rtx (mode0
);
32834 if (mode0
== V4SFmode
)
32835 gen
= gen_vec_extract_lo_v8sf
;
32837 gen
= gen_vec_extract_lo_v8si
;
32838 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32839 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32840 emit_insn (gen (half
, op0
));
32842 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32843 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32844 emit_insn (gen (half
, op3
));
32848 /* Force memory operand only with base register here. But we
32849 don't want to do it on memory operand for other builtin
32851 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32853 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32854 op0
= copy_to_mode_reg (mode0
, op0
);
32855 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32856 op1
= copy_to_mode_reg (Pmode
, op1
);
32857 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32858 op2
= copy_to_mode_reg (mode2
, op2
);
32859 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32860 op3
= copy_to_mode_reg (mode3
, op3
);
32861 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32863 error ("last argument must be scale 1, 2, 4, 8");
32867 /* Optimize. If mask is known to have all high bits set,
32868 replace op0 with pc_rtx to signal that the instruction
32869 overwrites the whole destination and doesn't use its
32870 previous contents. */
32873 if (TREE_CODE (arg3
) == VECTOR_CST
)
32875 unsigned int negative
= 0;
32876 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32878 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32879 if (TREE_CODE (cst
) == INTEGER_CST
32880 && tree_int_cst_sign_bit (cst
))
32882 else if (TREE_CODE (cst
) == REAL_CST
32883 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32886 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32889 else if (TREE_CODE (arg3
) == SSA_NAME
)
32891 /* Recognize also when mask is like:
32892 __v2df src = _mm_setzero_pd ();
32893 __v2df mask = _mm_cmpeq_pd (src, src);
32895 __v8sf src = _mm256_setzero_ps ();
32896 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32897 as that is a cheaper way to load all ones into
32898 a register than having to load a constant from
32900 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32901 if (is_gimple_call (def_stmt
))
32903 tree fndecl
= gimple_call_fndecl (def_stmt
);
32905 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32906 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32908 case IX86_BUILTIN_CMPPD
:
32909 case IX86_BUILTIN_CMPPS
:
32910 case IX86_BUILTIN_CMPPD256
:
32911 case IX86_BUILTIN_CMPPS256
:
32912 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32915 case IX86_BUILTIN_CMPEQPD
:
32916 case IX86_BUILTIN_CMPEQPS
:
32917 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32918 && initializer_zerop (gimple_call_arg (def_stmt
,
32929 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32934 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32935 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32937 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32938 ? V4SFmode
: V4SImode
;
32939 if (target
== NULL_RTX
)
32940 target
= gen_reg_rtx (tmode
);
32941 if (tmode
== V4SFmode
)
32942 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32944 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32947 target
= subtarget
;
32951 case IX86_BUILTIN_XABORT
:
32952 icode
= CODE_FOR_xabort
;
32953 arg0
= CALL_EXPR_ARG (exp
, 0);
32954 op0
= expand_normal (arg0
);
32955 mode0
= insn_data
[icode
].operand
[0].mode
;
32956 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32958 error ("the xabort's argument must be an 8-bit immediate");
32961 emit_insn (gen_xabort (op0
));
32968 for (i
= 0, d
= bdesc_special_args
;
32969 i
< ARRAY_SIZE (bdesc_special_args
);
32971 if (d
->code
== fcode
)
32972 return ix86_expand_special_args_builtin (d
, exp
, target
);
32974 for (i
= 0, d
= bdesc_args
;
32975 i
< ARRAY_SIZE (bdesc_args
);
32977 if (d
->code
== fcode
)
32980 case IX86_BUILTIN_FABSQ
:
32981 case IX86_BUILTIN_COPYSIGNQ
:
32983 /* Emit a normal call if SSE isn't available. */
32984 return expand_call (exp
, target
, ignore
);
32986 return ix86_expand_args_builtin (d
, exp
, target
);
32989 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32990 if (d
->code
== fcode
)
32991 return ix86_expand_sse_comi (d
, exp
, target
);
32993 for (i
= 0, d
= bdesc_pcmpestr
;
32994 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32996 if (d
->code
== fcode
)
32997 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32999 for (i
= 0, d
= bdesc_pcmpistr
;
33000 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33002 if (d
->code
== fcode
)
33003 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33005 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33006 if (d
->code
== fcode
)
33007 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33008 (enum ix86_builtin_func_type
)
33009 d
->flag
, d
->comparison
);
33011 gcc_unreachable ();
33014 /* Returns a function decl for a vectorized version of the builtin function
33015 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33016 if it is not available. */
33019 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33022 enum machine_mode in_mode
, out_mode
;
33024 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33026 if (TREE_CODE (type_out
) != VECTOR_TYPE
33027 || TREE_CODE (type_in
) != VECTOR_TYPE
33028 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33031 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33032 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33033 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33034 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33038 case BUILT_IN_SQRT
:
33039 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33041 if (out_n
== 2 && in_n
== 2)
33042 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
33043 else if (out_n
== 4 && in_n
== 4)
33044 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
33048 case BUILT_IN_SQRTF
:
33049 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33051 if (out_n
== 4 && in_n
== 4)
33052 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
33053 else if (out_n
== 8 && in_n
== 8)
33054 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
33058 case BUILT_IN_IFLOOR
:
33059 case BUILT_IN_LFLOOR
:
33060 case BUILT_IN_LLFLOOR
:
33061 /* The round insn does not trap on denormals. */
33062 if (flag_trapping_math
|| !TARGET_ROUND
)
33065 if (out_mode
== SImode
&& in_mode
== DFmode
)
33067 if (out_n
== 4 && in_n
== 2)
33068 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
33069 else if (out_n
== 8 && in_n
== 4)
33070 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
33074 case BUILT_IN_IFLOORF
:
33075 case BUILT_IN_LFLOORF
:
33076 case BUILT_IN_LLFLOORF
:
33077 /* The round insn does not trap on denormals. */
33078 if (flag_trapping_math
|| !TARGET_ROUND
)
33081 if (out_mode
== SImode
&& in_mode
== SFmode
)
33083 if (out_n
== 4 && in_n
== 4)
33084 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
33085 else if (out_n
== 8 && in_n
== 8)
33086 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
33090 case BUILT_IN_ICEIL
:
33091 case BUILT_IN_LCEIL
:
33092 case BUILT_IN_LLCEIL
:
33093 /* The round insn does not trap on denormals. */
33094 if (flag_trapping_math
|| !TARGET_ROUND
)
33097 if (out_mode
== SImode
&& in_mode
== DFmode
)
33099 if (out_n
== 4 && in_n
== 2)
33100 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
33101 else if (out_n
== 8 && in_n
== 4)
33102 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
33106 case BUILT_IN_ICEILF
:
33107 case BUILT_IN_LCEILF
:
33108 case BUILT_IN_LLCEILF
:
33109 /* The round insn does not trap on denormals. */
33110 if (flag_trapping_math
|| !TARGET_ROUND
)
33113 if (out_mode
== SImode
&& in_mode
== SFmode
)
33115 if (out_n
== 4 && in_n
== 4)
33116 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
33117 else if (out_n
== 8 && in_n
== 8)
33118 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33122 case BUILT_IN_IRINT
:
33123 case BUILT_IN_LRINT
:
33124 case BUILT_IN_LLRINT
:
33125 if (out_mode
== SImode
&& in_mode
== DFmode
)
33127 if (out_n
== 4 && in_n
== 2)
33128 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33129 else if (out_n
== 8 && in_n
== 4)
33130 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33134 case BUILT_IN_IRINTF
:
33135 case BUILT_IN_LRINTF
:
33136 case BUILT_IN_LLRINTF
:
33137 if (out_mode
== SImode
&& in_mode
== SFmode
)
33139 if (out_n
== 4 && in_n
== 4)
33140 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33141 else if (out_n
== 8 && in_n
== 8)
33142 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33146 case BUILT_IN_IROUND
:
33147 case BUILT_IN_LROUND
:
33148 case BUILT_IN_LLROUND
:
33149 /* The round insn does not trap on denormals. */
33150 if (flag_trapping_math
|| !TARGET_ROUND
)
33153 if (out_mode
== SImode
&& in_mode
== DFmode
)
33155 if (out_n
== 4 && in_n
== 2)
33156 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33157 else if (out_n
== 8 && in_n
== 4)
33158 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33162 case BUILT_IN_IROUNDF
:
33163 case BUILT_IN_LROUNDF
:
33164 case BUILT_IN_LLROUNDF
:
33165 /* The round insn does not trap on denormals. */
33166 if (flag_trapping_math
|| !TARGET_ROUND
)
33169 if (out_mode
== SImode
&& in_mode
== SFmode
)
33171 if (out_n
== 4 && in_n
== 4)
33172 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33173 else if (out_n
== 8 && in_n
== 8)
33174 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33178 case BUILT_IN_COPYSIGN
:
33179 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33181 if (out_n
== 2 && in_n
== 2)
33182 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33183 else if (out_n
== 4 && in_n
== 4)
33184 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33188 case BUILT_IN_COPYSIGNF
:
33189 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33191 if (out_n
== 4 && in_n
== 4)
33192 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33193 else if (out_n
== 8 && in_n
== 8)
33194 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33198 case BUILT_IN_FLOOR
:
33199 /* The round insn does not trap on denormals. */
33200 if (flag_trapping_math
|| !TARGET_ROUND
)
33203 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33205 if (out_n
== 2 && in_n
== 2)
33206 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33207 else if (out_n
== 4 && in_n
== 4)
33208 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33212 case BUILT_IN_FLOORF
:
33213 /* The round insn does not trap on denormals. */
33214 if (flag_trapping_math
|| !TARGET_ROUND
)
33217 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33219 if (out_n
== 4 && in_n
== 4)
33220 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33221 else if (out_n
== 8 && in_n
== 8)
33222 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33226 case BUILT_IN_CEIL
:
33227 /* The round insn does not trap on denormals. */
33228 if (flag_trapping_math
|| !TARGET_ROUND
)
33231 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33233 if (out_n
== 2 && in_n
== 2)
33234 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33235 else if (out_n
== 4 && in_n
== 4)
33236 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33240 case BUILT_IN_CEILF
:
33241 /* The round insn does not trap on denormals. */
33242 if (flag_trapping_math
|| !TARGET_ROUND
)
33245 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33247 if (out_n
== 4 && in_n
== 4)
33248 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33249 else if (out_n
== 8 && in_n
== 8)
33250 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33254 case BUILT_IN_TRUNC
:
33255 /* The round insn does not trap on denormals. */
33256 if (flag_trapping_math
|| !TARGET_ROUND
)
33259 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33261 if (out_n
== 2 && in_n
== 2)
33262 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33263 else if (out_n
== 4 && in_n
== 4)
33264 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33268 case BUILT_IN_TRUNCF
:
33269 /* The round insn does not trap on denormals. */
33270 if (flag_trapping_math
|| !TARGET_ROUND
)
33273 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33275 if (out_n
== 4 && in_n
== 4)
33276 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33277 else if (out_n
== 8 && in_n
== 8)
33278 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33282 case BUILT_IN_RINT
:
33283 /* The round insn does not trap on denormals. */
33284 if (flag_trapping_math
|| !TARGET_ROUND
)
33287 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33289 if (out_n
== 2 && in_n
== 2)
33290 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33291 else if (out_n
== 4 && in_n
== 4)
33292 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33296 case BUILT_IN_RINTF
:
33297 /* The round insn does not trap on denormals. */
33298 if (flag_trapping_math
|| !TARGET_ROUND
)
33301 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33303 if (out_n
== 4 && in_n
== 4)
33304 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33305 else if (out_n
== 8 && in_n
== 8)
33306 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33310 case BUILT_IN_ROUND
:
33311 /* The round insn does not trap on denormals. */
33312 if (flag_trapping_math
|| !TARGET_ROUND
)
33315 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33317 if (out_n
== 2 && in_n
== 2)
33318 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33319 else if (out_n
== 4 && in_n
== 4)
33320 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33324 case BUILT_IN_ROUNDF
:
33325 /* The round insn does not trap on denormals. */
33326 if (flag_trapping_math
|| !TARGET_ROUND
)
33329 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33331 if (out_n
== 4 && in_n
== 4)
33332 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33333 else if (out_n
== 8 && in_n
== 8)
33334 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33339 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33341 if (out_n
== 2 && in_n
== 2)
33342 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33343 if (out_n
== 4 && in_n
== 4)
33344 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33348 case BUILT_IN_FMAF
:
33349 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33351 if (out_n
== 4 && in_n
== 4)
33352 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33353 if (out_n
== 8 && in_n
== 8)
33354 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33362 /* Dispatch to a handler for a vectorization library. */
33363 if (ix86_veclib_handler
)
33364 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33370 /* Handler for an SVML-style interface to
33371 a library with vectorized intrinsics. */
33374 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33377 tree fntype
, new_fndecl
, args
;
33380 enum machine_mode el_mode
, in_mode
;
33383 /* The SVML is suitable for unsafe math only. */
33384 if (!flag_unsafe_math_optimizations
)
33387 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33388 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33389 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33390 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33391 if (el_mode
!= in_mode
33399 case BUILT_IN_LOG10
:
33401 case BUILT_IN_TANH
:
33403 case BUILT_IN_ATAN
:
33404 case BUILT_IN_ATAN2
:
33405 case BUILT_IN_ATANH
:
33406 case BUILT_IN_CBRT
:
33407 case BUILT_IN_SINH
:
33409 case BUILT_IN_ASINH
:
33410 case BUILT_IN_ASIN
:
33411 case BUILT_IN_COSH
:
33413 case BUILT_IN_ACOSH
:
33414 case BUILT_IN_ACOS
:
33415 if (el_mode
!= DFmode
|| n
!= 2)
33419 case BUILT_IN_EXPF
:
33420 case BUILT_IN_LOGF
:
33421 case BUILT_IN_LOG10F
:
33422 case BUILT_IN_POWF
:
33423 case BUILT_IN_TANHF
:
33424 case BUILT_IN_TANF
:
33425 case BUILT_IN_ATANF
:
33426 case BUILT_IN_ATAN2F
:
33427 case BUILT_IN_ATANHF
:
33428 case BUILT_IN_CBRTF
:
33429 case BUILT_IN_SINHF
:
33430 case BUILT_IN_SINF
:
33431 case BUILT_IN_ASINHF
:
33432 case BUILT_IN_ASINF
:
33433 case BUILT_IN_COSHF
:
33434 case BUILT_IN_COSF
:
33435 case BUILT_IN_ACOSHF
:
33436 case BUILT_IN_ACOSF
:
33437 if (el_mode
!= SFmode
|| n
!= 4)
33445 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33447 if (fn
== BUILT_IN_LOGF
)
33448 strcpy (name
, "vmlsLn4");
33449 else if (fn
== BUILT_IN_LOG
)
33450 strcpy (name
, "vmldLn2");
33453 sprintf (name
, "vmls%s", bname
+10);
33454 name
[strlen (name
)-1] = '4';
33457 sprintf (name
, "vmld%s2", bname
+10);
33459 /* Convert to uppercase. */
33463 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33465 args
= TREE_CHAIN (args
))
33469 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33471 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33473 /* Build a function declaration for the vectorized function. */
33474 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33475 FUNCTION_DECL
, get_identifier (name
), fntype
);
33476 TREE_PUBLIC (new_fndecl
) = 1;
33477 DECL_EXTERNAL (new_fndecl
) = 1;
33478 DECL_IS_NOVOPS (new_fndecl
) = 1;
33479 TREE_READONLY (new_fndecl
) = 1;
33484 /* Handler for an ACML-style interface to
33485 a library with vectorized intrinsics. */
33488 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33490 char name
[20] = "__vr.._";
33491 tree fntype
, new_fndecl
, args
;
33494 enum machine_mode el_mode
, in_mode
;
33497 /* The ACML is 64bits only and suitable for unsafe math only as
33498 it does not correctly support parts of IEEE with the required
33499 precision such as denormals. */
33501 || !flag_unsafe_math_optimizations
)
33504 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33505 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33506 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33507 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33508 if (el_mode
!= in_mode
33518 case BUILT_IN_LOG2
:
33519 case BUILT_IN_LOG10
:
33522 if (el_mode
!= DFmode
33527 case BUILT_IN_SINF
:
33528 case BUILT_IN_COSF
:
33529 case BUILT_IN_EXPF
:
33530 case BUILT_IN_POWF
:
33531 case BUILT_IN_LOGF
:
33532 case BUILT_IN_LOG2F
:
33533 case BUILT_IN_LOG10F
:
33536 if (el_mode
!= SFmode
33545 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33546 sprintf (name
+ 7, "%s", bname
+10);
33549 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33551 args
= TREE_CHAIN (args
))
33555 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33557 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33559 /* Build a function declaration for the vectorized function. */
33560 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33561 FUNCTION_DECL
, get_identifier (name
), fntype
);
33562 TREE_PUBLIC (new_fndecl
) = 1;
33563 DECL_EXTERNAL (new_fndecl
) = 1;
33564 DECL_IS_NOVOPS (new_fndecl
) = 1;
33565 TREE_READONLY (new_fndecl
) = 1;
33570 /* Returns a decl of a function that implements gather load with
33571 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33572 Return NULL_TREE if it is not available. */
33575 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
33576 const_tree index_type
, int scale
)
33579 enum ix86_builtins code
;
33584 if ((TREE_CODE (index_type
) != INTEGER_TYPE
33585 && !POINTER_TYPE_P (index_type
))
33586 || (TYPE_MODE (index_type
) != SImode
33587 && TYPE_MODE (index_type
) != DImode
))
33590 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
33593 /* v*gather* insn sign extends index to pointer mode. */
33594 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
33595 && TYPE_UNSIGNED (index_type
))
33600 || (scale
& (scale
- 1)) != 0)
33603 si
= TYPE_MODE (index_type
) == SImode
;
33604 switch (TYPE_MODE (mem_vectype
))
33607 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
33610 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
33613 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
33616 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
33619 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
33622 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
33625 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
33628 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33634 return ix86_builtins
[code
];
33637 /* Returns a code for a target-specific builtin that implements
33638 reciprocal of the function, or NULL_TREE if not available. */
33641 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33642 bool sqrt ATTRIBUTE_UNUSED
)
33644 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33645 && flag_finite_math_only
&& !flag_trapping_math
33646 && flag_unsafe_math_optimizations
))
33650 /* Machine dependent builtins. */
33653 /* Vectorized version of sqrt to rsqrt conversion. */
33654 case IX86_BUILTIN_SQRTPS_NR
:
33655 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33657 case IX86_BUILTIN_SQRTPS_NR256
:
33658 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33664 /* Normal builtins. */
33667 /* Sqrt to rsqrt conversion. */
33668 case BUILT_IN_SQRTF
:
33669 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33676 /* Helper for avx_vpermilps256_operand et al. This is also used by
33677 the expansion functions to turn the parallel back into a mask.
33678 The return value is 0 for no match and the imm8+1 for a match. */
33681 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33683 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33685 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33687 if (XVECLEN (par
, 0) != (int) nelt
)
33690 /* Validate that all of the elements are constants, and not totally
33691 out of range. Copy the data into an integral array to make the
33692 subsequent checks easier. */
33693 for (i
= 0; i
< nelt
; ++i
)
33695 rtx er
= XVECEXP (par
, 0, i
);
33696 unsigned HOST_WIDE_INT ei
;
33698 if (!CONST_INT_P (er
))
33709 /* In the 256-bit DFmode case, we can only move elements within
33711 for (i
= 0; i
< 2; ++i
)
33715 mask
|= ipar
[i
] << i
;
33717 for (i
= 2; i
< 4; ++i
)
33721 mask
|= (ipar
[i
] - 2) << i
;
33726 /* In the 256-bit SFmode case, we have full freedom of movement
33727 within the low 128-bit lane, but the high 128-bit lane must
33728 mirror the exact same pattern. */
33729 for (i
= 0; i
< 4; ++i
)
33730 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33737 /* In the 128-bit case, we've full freedom in the placement of
33738 the elements from the source operand. */
33739 for (i
= 0; i
< nelt
; ++i
)
33740 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33744 gcc_unreachable ();
33747 /* Make sure success has a non-zero value by adding one. */
33751 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33752 the expansion functions to turn the parallel back into a mask.
33753 The return value is 0 for no match and the imm8+1 for a match. */
33756 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33758 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33760 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33762 if (XVECLEN (par
, 0) != (int) nelt
)
33765 /* Validate that all of the elements are constants, and not totally
33766 out of range. Copy the data into an integral array to make the
33767 subsequent checks easier. */
33768 for (i
= 0; i
< nelt
; ++i
)
33770 rtx er
= XVECEXP (par
, 0, i
);
33771 unsigned HOST_WIDE_INT ei
;
33773 if (!CONST_INT_P (er
))
33776 if (ei
>= 2 * nelt
)
33781 /* Validate that the halves of the permute are halves. */
33782 for (i
= 0; i
< nelt2
- 1; ++i
)
33783 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33785 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33786 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33789 /* Reconstruct the mask. */
33790 for (i
= 0; i
< 2; ++i
)
33792 unsigned e
= ipar
[i
* nelt2
];
33796 mask
|= e
<< (i
* 4);
33799 /* Make sure success has a non-zero value by adding one. */
33803 /* Store OPERAND to the memory after reload is completed. This means
33804 that we can't easily use assign_stack_local. */
33806 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33810 gcc_assert (reload_completed
);
33811 if (ix86_using_red_zone ())
33813 result
= gen_rtx_MEM (mode
,
33814 gen_rtx_PLUS (Pmode
,
33816 GEN_INT (-RED_ZONE_SIZE
)));
33817 emit_move_insn (result
, operand
);
33819 else if (TARGET_64BIT
)
33825 operand
= gen_lowpart (DImode
, operand
);
33829 gen_rtx_SET (VOIDmode
,
33830 gen_rtx_MEM (DImode
,
33831 gen_rtx_PRE_DEC (DImode
,
33832 stack_pointer_rtx
)),
33836 gcc_unreachable ();
33838 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33847 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33849 gen_rtx_SET (VOIDmode
,
33850 gen_rtx_MEM (SImode
,
33851 gen_rtx_PRE_DEC (Pmode
,
33852 stack_pointer_rtx
)),
33855 gen_rtx_SET (VOIDmode
,
33856 gen_rtx_MEM (SImode
,
33857 gen_rtx_PRE_DEC (Pmode
,
33858 stack_pointer_rtx
)),
33863 /* Store HImodes as SImodes. */
33864 operand
= gen_lowpart (SImode
, operand
);
33868 gen_rtx_SET (VOIDmode
,
33869 gen_rtx_MEM (GET_MODE (operand
),
33870 gen_rtx_PRE_DEC (SImode
,
33871 stack_pointer_rtx
)),
33875 gcc_unreachable ();
33877 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33882 /* Free operand from the memory. */
33884 ix86_free_from_memory (enum machine_mode mode
)
33886 if (!ix86_using_red_zone ())
33890 if (mode
== DImode
|| TARGET_64BIT
)
33894 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33895 to pop or add instruction if registers are available. */
33896 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33897 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33902 /* Return a register priority for hard reg REGNO. */
33904 ix86_register_priority (int hard_regno
)
33906 /* ebp and r13 as the base always wants a displacement, r12 as the
33907 base always wants an index. So discourage their usage in an
33909 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33911 if (hard_regno
== BP_REG
)
33913 /* New x86-64 int registers result in bigger code size. Discourage
33915 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33917 /* New x86-64 SSE registers result in bigger code size. Discourage
33919 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33921 /* Usage of AX register results in smaller code. Prefer it. */
33922 if (hard_regno
== 0)
33927 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33929 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33930 QImode must go into class Q_REGS.
33931 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33932 movdf to do mem-to-mem moves through integer regs. */
33935 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33937 enum machine_mode mode
= GET_MODE (x
);
33939 /* We're only allowed to return a subclass of CLASS. Many of the
33940 following checks fail for NO_REGS, so eliminate that early. */
33941 if (regclass
== NO_REGS
)
33944 /* All classes can load zeros. */
33945 if (x
== CONST0_RTX (mode
))
33948 /* Force constants into memory if we are loading a (nonzero) constant into
33949 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
33950 instructions to load from a constant. */
33952 && (MAYBE_MMX_CLASS_P (regclass
)
33953 || MAYBE_SSE_CLASS_P (regclass
)
33954 || MAYBE_MASK_CLASS_P (regclass
)))
33957 /* Prefer SSE regs only, if we can use them for math. */
33958 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33959 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33961 /* Floating-point constants need more complex checks. */
33962 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33964 /* General regs can load everything. */
33965 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33968 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33969 zero above. We only want to wind up preferring 80387 registers if
33970 we plan on doing computation with them. */
33972 && standard_80387_constant_p (x
) > 0)
33974 /* Limit class to non-sse. */
33975 if (regclass
== FLOAT_SSE_REGS
)
33977 if (regclass
== FP_TOP_SSE_REGS
)
33979 if (regclass
== FP_SECOND_SSE_REGS
)
33980 return FP_SECOND_REG
;
33981 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33988 /* Generally when we see PLUS here, it's the function invariant
33989 (plus soft-fp const_int). Which can only be computed into general
33991 if (GET_CODE (x
) == PLUS
)
33992 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33994 /* QImode constants are easy to load, but non-constant QImode data
33995 must go into Q_REGS. */
33996 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33998 if (reg_class_subset_p (regclass
, Q_REGS
))
34000 if (reg_class_subset_p (Q_REGS
, regclass
))
34008 /* Discourage putting floating-point values in SSE registers unless
34009 SSE math is being used, and likewise for the 387 registers. */
34011 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34013 enum machine_mode mode
= GET_MODE (x
);
34015 /* Restrict the output reload class to the register bank that we are doing
34016 math on. If we would like not to return a subset of CLASS, reject this
34017 alternative: if reload cannot do this, it will still use its choice. */
34018 mode
= GET_MODE (x
);
34019 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34020 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
34022 if (X87_FLOAT_MODE_P (mode
))
34024 if (regclass
== FP_TOP_SSE_REGS
)
34026 else if (regclass
== FP_SECOND_SSE_REGS
)
34027 return FP_SECOND_REG
;
34029 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34036 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34037 enum machine_mode mode
, secondary_reload_info
*sri
)
34039 /* Double-word spills from general registers to non-offsettable memory
34040 references (zero-extended addresses) require special handling. */
34043 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34044 && INTEGER_CLASS_P (rclass
)
34045 && !offsettable_memref_p (x
))
34048 ? CODE_FOR_reload_noff_load
34049 : CODE_FOR_reload_noff_store
);
34050 /* Add the cost of moving address to a temporary. */
34051 sri
->extra_cost
= 1;
34056 /* QImode spills from non-QI registers require
34057 intermediate register on 32bit targets. */
34059 && (MAYBE_MASK_CLASS_P (rclass
)
34060 || (!TARGET_64BIT
&& !in_p
34061 && INTEGER_CLASS_P (rclass
)
34062 && MAYBE_NON_Q_CLASS_P (rclass
))))
34071 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34072 regno
= true_regnum (x
);
34074 /* Return Q_REGS if the operand is in memory. */
34079 /* This condition handles corner case where an expression involving
34080 pointers gets vectorized. We're trying to use the address of a
34081 stack slot as a vector initializer.
34083 (set (reg:V2DI 74 [ vect_cst_.2 ])
34084 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34086 Eventually frame gets turned into sp+offset like this:
34088 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34089 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34090 (const_int 392 [0x188]))))
34092 That later gets turned into:
34094 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34095 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34096 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34098 We'll have the following reload recorded:
34100 Reload 0: reload_in (DI) =
34101 (plus:DI (reg/f:DI 7 sp)
34102 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34103 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34104 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34105 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34106 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34107 reload_reg_rtx: (reg:V2DI 22 xmm1)
34109 Which isn't going to work since SSE instructions can't handle scalar
34110 additions. Returning GENERAL_REGS forces the addition into integer
34111 register and reload can handle subsequent reloads without problems. */
34113 if (in_p
&& GET_CODE (x
) == PLUS
34114 && SSE_CLASS_P (rclass
)
34115 && SCALAR_INT_MODE_P (mode
))
34116 return GENERAL_REGS
;
34121 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34124 ix86_class_likely_spilled_p (reg_class_t rclass
)
34135 case SSE_FIRST_REG
:
34137 case FP_SECOND_REG
:
34147 /* If we are copying between general and FP registers, we need a memory
34148 location. The same is true for SSE and MMX registers.
34150 To optimize register_move_cost performance, allow inline variant.
34152 The macro can't work reliably when one of the CLASSES is class containing
34153 registers from multiple units (SSE, MMX, integer). We avoid this by never
34154 combining those units in single alternative in the machine description.
34155 Ensure that this constraint holds to avoid unexpected surprises.
34157 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34158 enforce these sanity checks. */
34161 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34162 enum machine_mode mode
, int strict
)
34164 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34166 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34167 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34168 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34169 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34170 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34171 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34173 gcc_assert (!strict
|| lra_in_progress
);
34177 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34180 /* ??? This is a lie. We do have moves between mmx/general, and for
34181 mmx/sse2. But by saying we need secondary memory we discourage the
34182 register allocator from using the mmx registers unless needed. */
34183 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34186 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34188 /* SSE1 doesn't have any direct moves from other classes. */
34192 /* If the target says that inter-unit moves are more expensive
34193 than moving through memory, then don't generate them. */
34194 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34195 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34198 /* Between SSE and general, we have moves no larger than word size. */
34199 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34207 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34208 enum machine_mode mode
, int strict
)
34210 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34213 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34215 On the 80386, this is the size of MODE in words,
34216 except in the FP regs, where a single reg is always enough. */
34218 static unsigned char
34219 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34221 if (MAYBE_INTEGER_CLASS_P (rclass
))
34223 if (mode
== XFmode
)
34224 return (TARGET_64BIT
? 2 : 3);
34225 else if (mode
== XCmode
)
34226 return (TARGET_64BIT
? 4 : 6);
34228 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34232 if (COMPLEX_MODE_P (mode
))
34239 /* Return true if the registers in CLASS cannot represent the change from
34240 modes FROM to TO. */
34243 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34244 enum reg_class regclass
)
34249 /* x87 registers can't do subreg at all, as all values are reformatted
34250 to extended precision. */
34251 if (MAYBE_FLOAT_CLASS_P (regclass
))
34254 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34256 /* Vector registers do not support QI or HImode loads. If we don't
34257 disallow a change to these modes, reload will assume it's ok to
34258 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34259 the vec_dupv4hi pattern. */
34260 if (GET_MODE_SIZE (from
) < 4)
34263 /* Vector registers do not support subreg with nonzero offsets, which
34264 are otherwise valid for integer registers. Since we can't see
34265 whether we have a nonzero offset from here, prohibit all
34266 nonparadoxical subregs changing size. */
34267 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34274 /* Return the cost of moving data of mode M between a
34275 register and memory. A value of 2 is the default; this cost is
34276 relative to those in `REGISTER_MOVE_COST'.
34278 This function is used extensively by register_move_cost that is used to
34279 build tables at startup. Make it inline in this case.
34280 When IN is 2, return maximum of in and out move cost.
34282 If moving between registers and memory is more expensive than
34283 between two registers, you should define this macro to express the
34286 Model also increased moving costs of QImode registers in non
34290 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34294 if (FLOAT_CLASS_P (regclass
))
34312 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34313 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34315 if (SSE_CLASS_P (regclass
))
34318 switch (GET_MODE_SIZE (mode
))
34333 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34334 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34336 if (MMX_CLASS_P (regclass
))
34339 switch (GET_MODE_SIZE (mode
))
34351 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34352 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34354 switch (GET_MODE_SIZE (mode
))
34357 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34360 return ix86_cost
->int_store
[0];
34361 if (TARGET_PARTIAL_REG_DEPENDENCY
34362 && optimize_function_for_speed_p (cfun
))
34363 cost
= ix86_cost
->movzbl_load
;
34365 cost
= ix86_cost
->int_load
[0];
34367 return MAX (cost
, ix86_cost
->int_store
[0]);
34373 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34375 return ix86_cost
->movzbl_load
;
34377 return ix86_cost
->int_store
[0] + 4;
34382 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34383 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34385 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34386 if (mode
== TFmode
)
34389 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34391 cost
= ix86_cost
->int_load
[2];
34393 cost
= ix86_cost
->int_store
[2];
34394 return (cost
* (((int) GET_MODE_SIZE (mode
)
34395 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34400 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34403 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34407 /* Return the cost of moving data from a register in class CLASS1 to
34408 one in class CLASS2.
34410 It is not required that the cost always equal 2 when FROM is the same as TO;
34411 on some machines it is expensive to move between registers if they are not
34412 general registers. */
34415 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34416 reg_class_t class2_i
)
34418 enum reg_class class1
= (enum reg_class
) class1_i
;
34419 enum reg_class class2
= (enum reg_class
) class2_i
;
34421 /* In case we require secondary memory, compute cost of the store followed
34422 by load. In order to avoid bad register allocation choices, we need
34423 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34425 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34429 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34430 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34432 /* In case of copying from general_purpose_register we may emit multiple
34433 stores followed by single load causing memory size mismatch stall.
34434 Count this as arbitrarily high cost of 20. */
34435 if (targetm
.class_max_nregs (class1
, mode
)
34436 > targetm
.class_max_nregs (class2
, mode
))
34439 /* In the case of FP/MMX moves, the registers actually overlap, and we
34440 have to switch modes in order to treat them differently. */
34441 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34442 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34448 /* Moves between SSE/MMX and integer unit are expensive. */
34449 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34450 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34452 /* ??? By keeping returned value relatively high, we limit the number
34453 of moves between integer and MMX/SSE registers for all targets.
34454 Additionally, high value prevents problem with x86_modes_tieable_p(),
34455 where integer modes in MMX/SSE registers are not tieable
34456 because of missing QImode and HImode moves to, from or between
34457 MMX/SSE registers. */
34458 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34460 if (MAYBE_FLOAT_CLASS_P (class1
))
34461 return ix86_cost
->fp_move
;
34462 if (MAYBE_SSE_CLASS_P (class1
))
34463 return ix86_cost
->sse_move
;
34464 if (MAYBE_MMX_CLASS_P (class1
))
34465 return ix86_cost
->mmx_move
;
34469 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34473 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34475 /* Flags and only flags can only hold CCmode values. */
34476 if (CC_REGNO_P (regno
))
34477 return GET_MODE_CLASS (mode
) == MODE_CC
;
34478 if (GET_MODE_CLASS (mode
) == MODE_CC
34479 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34480 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34482 if (STACK_REGNO_P (regno
))
34483 return VALID_FP_MODE_P (mode
);
34484 if (MASK_REGNO_P (regno
))
34485 return VALID_MASK_REG_MODE (mode
);
34486 if (SSE_REGNO_P (regno
))
34488 /* We implement the move patterns for all vector modes into and
34489 out of SSE registers, even when no operation instructions
34492 /* For AVX-512 we allow, regardless of regno:
34494 - any of 512-bit wide vector mode
34495 - any scalar mode. */
34498 || VALID_AVX512F_REG_MODE (mode
)
34499 || VALID_AVX512F_SCALAR_MODE (mode
)))
34502 /* xmm16-xmm31 are only available for AVX-512. */
34503 if (EXT_REX_SSE_REGNO_P (regno
))
34506 /* OImode move is available only when AVX is enabled. */
34507 return ((TARGET_AVX
&& mode
== OImode
)
34508 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34509 || VALID_SSE_REG_MODE (mode
)
34510 || VALID_SSE2_REG_MODE (mode
)
34511 || VALID_MMX_REG_MODE (mode
)
34512 || VALID_MMX_REG_MODE_3DNOW (mode
));
34514 if (MMX_REGNO_P (regno
))
34516 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34517 so if the register is available at all, then we can move data of
34518 the given mode into or out of it. */
34519 return (VALID_MMX_REG_MODE (mode
)
34520 || VALID_MMX_REG_MODE_3DNOW (mode
));
34523 if (mode
== QImode
)
34525 /* Take care for QImode values - they can be in non-QI regs,
34526 but then they do cause partial register stalls. */
34527 if (ANY_QI_REGNO_P (regno
))
34529 if (!TARGET_PARTIAL_REG_STALL
)
34531 /* LRA checks if the hard register is OK for the given mode.
34532 QImode values can live in non-QI regs, so we allow all
34534 if (lra_in_progress
)
34536 return !can_create_pseudo_p ();
34538 /* We handle both integer and floats in the general purpose registers. */
34539 else if (VALID_INT_MODE_P (mode
))
34541 else if (VALID_FP_MODE_P (mode
))
34543 else if (VALID_DFP_MODE_P (mode
))
34545 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34546 on to use that value in smaller contexts, this can easily force a
34547 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34548 supporting DImode, allow it. */
34549 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
34555 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34556 tieable integer mode. */
34559 ix86_tieable_integer_mode_p (enum machine_mode mode
)
34568 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
34571 return TARGET_64BIT
;
34578 /* Return true if MODE1 is accessible in a register that can hold MODE2
34579 without copying. That is, all register classes that can hold MODE2
34580 can also hold MODE1. */
34583 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
34585 if (mode1
== mode2
)
34588 if (ix86_tieable_integer_mode_p (mode1
)
34589 && ix86_tieable_integer_mode_p (mode2
))
34592 /* MODE2 being XFmode implies fp stack or general regs, which means we
34593 can tie any smaller floating point modes to it. Note that we do not
34594 tie this with TFmode. */
34595 if (mode2
== XFmode
)
34596 return mode1
== SFmode
|| mode1
== DFmode
;
34598 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34599 that we can tie it with SFmode. */
34600 if (mode2
== DFmode
)
34601 return mode1
== SFmode
;
34603 /* If MODE2 is only appropriate for an SSE register, then tie with
34604 any other mode acceptable to SSE registers. */
34605 if (GET_MODE_SIZE (mode2
) == 32
34606 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34607 return (GET_MODE_SIZE (mode1
) == 32
34608 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34609 if (GET_MODE_SIZE (mode2
) == 16
34610 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34611 return (GET_MODE_SIZE (mode1
) == 16
34612 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34614 /* If MODE2 is appropriate for an MMX register, then tie
34615 with any other mode acceptable to MMX registers. */
34616 if (GET_MODE_SIZE (mode2
) == 8
34617 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
34618 return (GET_MODE_SIZE (mode1
) == 8
34619 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
34624 /* Return the cost of moving between two registers of mode MODE. */
34627 ix86_set_reg_reg_cost (enum machine_mode mode
)
34629 unsigned int units
= UNITS_PER_WORD
;
34631 switch (GET_MODE_CLASS (mode
))
34637 units
= GET_MODE_SIZE (CCmode
);
34641 if ((TARGET_SSE
&& mode
== TFmode
)
34642 || (TARGET_80387
&& mode
== XFmode
)
34643 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
34644 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
34645 units
= GET_MODE_SIZE (mode
);
34648 case MODE_COMPLEX_FLOAT
:
34649 if ((TARGET_SSE
&& mode
== TCmode
)
34650 || (TARGET_80387
&& mode
== XCmode
)
34651 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
34652 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34653 units
= GET_MODE_SIZE (mode
);
34656 case MODE_VECTOR_INT
:
34657 case MODE_VECTOR_FLOAT
:
34658 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
34659 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34660 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34661 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34662 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34663 units
= GET_MODE_SIZE (mode
);
34666 /* Return the cost of moving between two registers of mode MODE,
34667 assuming that the move will be in pieces of at most UNITS bytes. */
34668 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34671 /* Compute a (partial) cost for rtx X. Return true if the complete
34672 cost has been computed, and false if subexpressions should be
34673 scanned. In either case, *TOTAL contains the cost result. */
34676 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34679 enum rtx_code code
= (enum rtx_code
) code_i
;
34680 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34681 enum machine_mode mode
= GET_MODE (x
);
34682 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34687 if (register_operand (SET_DEST (x
), VOIDmode
)
34688 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34690 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34699 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34701 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34703 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34705 || (!GET_CODE (x
) != LABEL_REF
34706 && (GET_CODE (x
) != SYMBOL_REF
34707 || !SYMBOL_REF_LOCAL_P (x
)))))
34714 if (mode
== VOIDmode
)
34719 switch (standard_80387_constant_p (x
))
34724 default: /* Other constants */
34731 if (SSE_FLOAT_MODE_P (mode
))
34734 switch (standard_sse_constant_p (x
))
34738 case 1: /* 0: xor eliminates false dependency */
34741 default: /* -1: cmp contains false dependency */
34746 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34747 it'll probably end up. Add a penalty for size. */
34748 *total
= (COSTS_N_INSNS (1)
34749 + (flag_pic
!= 0 && !TARGET_64BIT
)
34750 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34754 /* The zero extensions is often completely free on x86_64, so make
34755 it as cheap as possible. */
34756 if (TARGET_64BIT
&& mode
== DImode
34757 && GET_MODE (XEXP (x
, 0)) == SImode
)
34759 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34760 *total
= cost
->add
;
34762 *total
= cost
->movzx
;
34766 *total
= cost
->movsx
;
34770 if (SCALAR_INT_MODE_P (mode
)
34771 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34772 && CONST_INT_P (XEXP (x
, 1)))
34774 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34777 *total
= cost
->add
;
34780 if ((value
== 2 || value
== 3)
34781 && cost
->lea
<= cost
->shift_const
)
34783 *total
= cost
->lea
;
34793 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34795 /* ??? Should be SSE vector operation cost. */
34796 /* At least for published AMD latencies, this really is the same
34797 as the latency for a simple fpu operation like fabs. */
34798 /* V*QImode is emulated with 1-11 insns. */
34799 if (mode
== V16QImode
|| mode
== V32QImode
)
34802 if (TARGET_XOP
&& mode
== V16QImode
)
34804 /* For XOP we use vpshab, which requires a broadcast of the
34805 value to the variable shift insn. For constants this
34806 means a V16Q const in mem; even when we can perform the
34807 shift with one insn set the cost to prefer paddb. */
34808 if (CONSTANT_P (XEXP (x
, 1)))
34810 *total
= (cost
->fabs
34811 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34812 + (speed
? 2 : COSTS_N_BYTES (16)));
34817 else if (TARGET_SSSE3
)
34819 *total
= cost
->fabs
* count
;
34822 *total
= cost
->fabs
;
34824 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34826 if (CONST_INT_P (XEXP (x
, 1)))
34828 if (INTVAL (XEXP (x
, 1)) > 32)
34829 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34831 *total
= cost
->shift_const
* 2;
34835 if (GET_CODE (XEXP (x
, 1)) == AND
)
34836 *total
= cost
->shift_var
* 2;
34838 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34843 if (CONST_INT_P (XEXP (x
, 1)))
34844 *total
= cost
->shift_const
;
34845 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
34846 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
34848 /* Return the cost after shift-and truncation. */
34849 *total
= cost
->shift_var
;
34853 *total
= cost
->shift_var
;
34861 gcc_assert (FLOAT_MODE_P (mode
));
34862 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
34864 /* ??? SSE scalar/vector cost should be used here. */
34865 /* ??? Bald assumption that fma has the same cost as fmul. */
34866 *total
= cost
->fmul
;
34867 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34869 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34871 if (GET_CODE (sub
) == NEG
)
34872 sub
= XEXP (sub
, 0);
34873 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34876 if (GET_CODE (sub
) == NEG
)
34877 sub
= XEXP (sub
, 0);
34878 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34883 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34885 /* ??? SSE scalar cost should be used here. */
34886 *total
= cost
->fmul
;
34889 else if (X87_FLOAT_MODE_P (mode
))
34891 *total
= cost
->fmul
;
34894 else if (FLOAT_MODE_P (mode
))
34896 /* ??? SSE vector cost should be used here. */
34897 *total
= cost
->fmul
;
34900 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34902 /* V*QImode is emulated with 7-13 insns. */
34903 if (mode
== V16QImode
|| mode
== V32QImode
)
34906 if (TARGET_XOP
&& mode
== V16QImode
)
34908 else if (TARGET_SSSE3
)
34910 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34912 /* V*DImode is emulated with 5-8 insns. */
34913 else if (mode
== V2DImode
|| mode
== V4DImode
)
34915 if (TARGET_XOP
&& mode
== V2DImode
)
34916 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34918 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34920 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34921 insns, including two PMULUDQ. */
34922 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34923 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34925 *total
= cost
->fmul
;
34930 rtx op0
= XEXP (x
, 0);
34931 rtx op1
= XEXP (x
, 1);
34933 if (CONST_INT_P (XEXP (x
, 1)))
34935 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34936 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34940 /* This is arbitrary. */
34943 /* Compute costs correctly for widening multiplication. */
34944 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34945 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34946 == GET_MODE_SIZE (mode
))
34948 int is_mulwiden
= 0;
34949 enum machine_mode inner_mode
= GET_MODE (op0
);
34951 if (GET_CODE (op0
) == GET_CODE (op1
))
34952 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34953 else if (CONST_INT_P (op1
))
34955 if (GET_CODE (op0
) == SIGN_EXTEND
)
34956 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34959 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34963 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34966 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34967 + nbits
* cost
->mult_bit
34968 + rtx_cost (op0
, outer_code
, opno
, speed
)
34969 + rtx_cost (op1
, outer_code
, opno
, speed
));
34978 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34979 /* ??? SSE cost should be used here. */
34980 *total
= cost
->fdiv
;
34981 else if (X87_FLOAT_MODE_P (mode
))
34982 *total
= cost
->fdiv
;
34983 else if (FLOAT_MODE_P (mode
))
34984 /* ??? SSE vector cost should be used here. */
34985 *total
= cost
->fdiv
;
34987 *total
= cost
->divide
[MODE_INDEX (mode
)];
34991 if (GET_MODE_CLASS (mode
) == MODE_INT
34992 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34994 if (GET_CODE (XEXP (x
, 0)) == PLUS
34995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34996 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34997 && CONSTANT_P (XEXP (x
, 1)))
34999 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35000 if (val
== 2 || val
== 4 || val
== 8)
35002 *total
= cost
->lea
;
35003 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35004 outer_code
, opno
, speed
);
35005 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35006 outer_code
, opno
, speed
);
35007 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35011 else if (GET_CODE (XEXP (x
, 0)) == MULT
35012 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35014 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35015 if (val
== 2 || val
== 4 || val
== 8)
35017 *total
= cost
->lea
;
35018 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35019 outer_code
, opno
, speed
);
35020 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35024 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35026 *total
= cost
->lea
;
35027 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35028 outer_code
, opno
, speed
);
35029 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35030 outer_code
, opno
, speed
);
35031 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35038 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35040 /* ??? SSE cost should be used here. */
35041 *total
= cost
->fadd
;
35044 else if (X87_FLOAT_MODE_P (mode
))
35046 *total
= cost
->fadd
;
35049 else if (FLOAT_MODE_P (mode
))
35051 /* ??? SSE vector cost should be used here. */
35052 *total
= cost
->fadd
;
35060 if (GET_MODE_CLASS (mode
) == MODE_INT
35061 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35063 *total
= (cost
->add
* 2
35064 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35065 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35066 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35067 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35073 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35075 /* ??? SSE cost should be used here. */
35076 *total
= cost
->fchs
;
35079 else if (X87_FLOAT_MODE_P (mode
))
35081 *total
= cost
->fchs
;
35084 else if (FLOAT_MODE_P (mode
))
35086 /* ??? SSE vector cost should be used here. */
35087 *total
= cost
->fchs
;
35093 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35095 /* ??? Should be SSE vector operation cost. */
35096 /* At least for published AMD latencies, this really is the same
35097 as the latency for a simple fpu operation like fabs. */
35098 *total
= cost
->fabs
;
35100 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35101 *total
= cost
->add
* 2;
35103 *total
= cost
->add
;
35107 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35108 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35109 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35110 && XEXP (x
, 1) == const0_rtx
)
35112 /* This kind of construct is implemented using test[bwl].
35113 Treat it as if we had an AND. */
35114 *total
= (cost
->add
35115 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35116 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35122 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35127 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35128 /* ??? SSE cost should be used here. */
35129 *total
= cost
->fabs
;
35130 else if (X87_FLOAT_MODE_P (mode
))
35131 *total
= cost
->fabs
;
35132 else if (FLOAT_MODE_P (mode
))
35133 /* ??? SSE vector cost should be used here. */
35134 *total
= cost
->fabs
;
35138 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35139 /* ??? SSE cost should be used here. */
35140 *total
= cost
->fsqrt
;
35141 else if (X87_FLOAT_MODE_P (mode
))
35142 *total
= cost
->fsqrt
;
35143 else if (FLOAT_MODE_P (mode
))
35144 /* ??? SSE vector cost should be used here. */
35145 *total
= cost
->fsqrt
;
35149 if (XINT (x
, 1) == UNSPEC_TP
)
35156 case VEC_DUPLICATE
:
35157 /* ??? Assume all of these vector manipulation patterns are
35158 recognizable. In which case they all pretty much have the
35160 *total
= cost
->fabs
;
35170 static int current_machopic_label_num
;
35172 /* Given a symbol name and its associated stub, write out the
35173 definition of the stub. */
35176 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35178 unsigned int length
;
35179 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35180 int label
= ++current_machopic_label_num
;
35182 /* For 64-bit we shouldn't get here. */
35183 gcc_assert (!TARGET_64BIT
);
35185 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35186 symb
= targetm
.strip_name_encoding (symb
);
35188 length
= strlen (stub
);
35189 binder_name
= XALLOCAVEC (char, length
+ 32);
35190 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35192 length
= strlen (symb
);
35193 symbol_name
= XALLOCAVEC (char, length
+ 32);
35194 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35196 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35198 if (MACHOPIC_ATT_STUB
)
35199 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35200 else if (MACHOPIC_PURE
)
35201 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35203 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35205 fprintf (file
, "%s:\n", stub
);
35206 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35208 if (MACHOPIC_ATT_STUB
)
35210 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35212 else if (MACHOPIC_PURE
)
35215 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35216 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35217 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35218 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35219 label
, lazy_ptr_name
, label
);
35220 fprintf (file
, "\tjmp\t*%%ecx\n");
35223 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35225 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35226 it needs no stub-binding-helper. */
35227 if (MACHOPIC_ATT_STUB
)
35230 fprintf (file
, "%s:\n", binder_name
);
35234 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35235 fprintf (file
, "\tpushl\t%%ecx\n");
35238 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35240 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35242 /* N.B. Keep the correspondence of these
35243 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35244 old-pic/new-pic/non-pic stubs; altering this will break
35245 compatibility with existing dylibs. */
35248 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35249 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35252 /* 16-byte -mdynamic-no-pic stub. */
35253 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35255 fprintf (file
, "%s:\n", lazy_ptr_name
);
35256 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35257 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35259 #endif /* TARGET_MACHO */
35261 /* Order the registers for register allocator. */
35264 x86_order_regs_for_local_alloc (void)
35269 /* First allocate the local general purpose registers. */
35270 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35271 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35272 reg_alloc_order
[pos
++] = i
;
35274 /* Global general purpose registers. */
35275 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35276 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35277 reg_alloc_order
[pos
++] = i
;
35279 /* x87 registers come first in case we are doing FP math
35281 if (!TARGET_SSE_MATH
)
35282 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35283 reg_alloc_order
[pos
++] = i
;
35285 /* SSE registers. */
35286 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35287 reg_alloc_order
[pos
++] = i
;
35288 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35289 reg_alloc_order
[pos
++] = i
;
35291 /* Extended REX SSE registers. */
35292 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
35293 reg_alloc_order
[pos
++] = i
;
35295 /* Mask register. */
35296 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
35297 reg_alloc_order
[pos
++] = i
;
35299 /* x87 registers. */
35300 if (TARGET_SSE_MATH
)
35301 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35302 reg_alloc_order
[pos
++] = i
;
35304 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35305 reg_alloc_order
[pos
++] = i
;
35307 /* Initialize the rest of array as we do not allocate some registers
35309 while (pos
< FIRST_PSEUDO_REGISTER
)
35310 reg_alloc_order
[pos
++] = 0;
35313 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35314 in struct attribute_spec handler. */
35316 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35318 int flags ATTRIBUTE_UNUSED
,
35319 bool *no_add_attrs
)
35321 if (TREE_CODE (*node
) != FUNCTION_TYPE
35322 && TREE_CODE (*node
) != METHOD_TYPE
35323 && TREE_CODE (*node
) != FIELD_DECL
35324 && TREE_CODE (*node
) != TYPE_DECL
)
35326 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35328 *no_add_attrs
= true;
35333 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35335 *no_add_attrs
= true;
35338 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35342 cst
= TREE_VALUE (args
);
35343 if (TREE_CODE (cst
) != INTEGER_CST
)
35345 warning (OPT_Wattributes
,
35346 "%qE attribute requires an integer constant argument",
35348 *no_add_attrs
= true;
35350 else if (compare_tree_int (cst
, 0) != 0
35351 && compare_tree_int (cst
, 1) != 0)
35353 warning (OPT_Wattributes
,
35354 "argument to %qE attribute is neither zero, nor one",
35356 *no_add_attrs
= true;
35365 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35366 struct attribute_spec.handler. */
35368 ix86_handle_abi_attribute (tree
*node
, tree name
,
35369 tree args ATTRIBUTE_UNUSED
,
35370 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35372 if (TREE_CODE (*node
) != FUNCTION_TYPE
35373 && TREE_CODE (*node
) != METHOD_TYPE
35374 && TREE_CODE (*node
) != FIELD_DECL
35375 && TREE_CODE (*node
) != TYPE_DECL
)
35377 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35379 *no_add_attrs
= true;
35383 /* Can combine regparm with all attributes but fastcall. */
35384 if (is_attribute_p ("ms_abi", name
))
35386 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35388 error ("ms_abi and sysv_abi attributes are not compatible");
35393 else if (is_attribute_p ("sysv_abi", name
))
35395 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35397 error ("ms_abi and sysv_abi attributes are not compatible");
35406 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35407 struct attribute_spec.handler. */
35409 ix86_handle_struct_attribute (tree
*node
, tree name
,
35410 tree args ATTRIBUTE_UNUSED
,
35411 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35414 if (DECL_P (*node
))
35416 if (TREE_CODE (*node
) == TYPE_DECL
)
35417 type
= &TREE_TYPE (*node
);
35422 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35424 warning (OPT_Wattributes
, "%qE attribute ignored",
35426 *no_add_attrs
= true;
35429 else if ((is_attribute_p ("ms_struct", name
)
35430 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35431 || ((is_attribute_p ("gcc_struct", name
)
35432 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35434 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35436 *no_add_attrs
= true;
35443 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35444 tree args ATTRIBUTE_UNUSED
,
35445 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35447 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35449 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35451 *no_add_attrs
= true;
35457 ix86_ms_bitfield_layout_p (const_tree record_type
)
35459 return ((TARGET_MS_BITFIELD_LAYOUT
35460 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35461 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35464 /* Returns an expression indicating where the this parameter is
35465 located on entry to the FUNCTION. */
35468 x86_this_parameter (tree function
)
35470 tree type
= TREE_TYPE (function
);
35471 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35476 const int *parm_regs
;
35478 if (ix86_function_type_abi (type
) == MS_ABI
)
35479 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35481 parm_regs
= x86_64_int_parameter_registers
;
35482 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35485 nregs
= ix86_function_regparm (type
, function
);
35487 if (nregs
> 0 && !stdarg_p (type
))
35490 unsigned int ccvt
= ix86_get_callcvt (type
);
35492 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35493 regno
= aggr
? DX_REG
: CX_REG
;
35494 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35498 return gen_rtx_MEM (SImode
,
35499 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35508 return gen_rtx_MEM (SImode
,
35509 plus_constant (Pmode
,
35510 stack_pointer_rtx
, 4));
35513 return gen_rtx_REG (SImode
, regno
);
35516 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35520 /* Determine whether x86_output_mi_thunk can succeed. */
35523 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35524 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35525 HOST_WIDE_INT vcall_offset
, const_tree function
)
35527 /* 64-bit can handle anything. */
35531 /* For 32-bit, everything's fine if we have one free register. */
35532 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35535 /* Need a free register for vcall_offset. */
35539 /* Need a free register for GOT references. */
35540 if (flag_pic
&& !targetm
.binds_local_p (function
))
35543 /* Otherwise ok. */
35547 /* Output the assembler code for a thunk function. THUNK_DECL is the
35548 declaration for the thunk function itself, FUNCTION is the decl for
35549 the target function. DELTA is an immediate constant offset to be
35550 added to THIS. If VCALL_OFFSET is nonzero, the word at
35551 *(*this + vcall_offset) should be added to THIS. */
35554 x86_output_mi_thunk (FILE *file
,
35555 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
35556 HOST_WIDE_INT vcall_offset
, tree function
)
35558 rtx this_param
= x86_this_parameter (function
);
35559 rtx this_reg
, tmp
, fnaddr
;
35560 unsigned int tmp_regno
;
35563 tmp_regno
= R10_REG
;
35566 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
35567 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35568 tmp_regno
= AX_REG
;
35569 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35570 tmp_regno
= DX_REG
;
35572 tmp_regno
= CX_REG
;
35575 emit_note (NOTE_INSN_PROLOGUE_END
);
35577 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35578 pull it in now and let DELTA benefit. */
35579 if (REG_P (this_param
))
35580 this_reg
= this_param
;
35581 else if (vcall_offset
)
35583 /* Put the this parameter into %eax. */
35584 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
35585 emit_move_insn (this_reg
, this_param
);
35588 this_reg
= NULL_RTX
;
35590 /* Adjust the this parameter by a fixed constant. */
35593 rtx delta_rtx
= GEN_INT (delta
);
35594 rtx delta_dst
= this_reg
? this_reg
: this_param
;
35598 if (!x86_64_general_operand (delta_rtx
, Pmode
))
35600 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35601 emit_move_insn (tmp
, delta_rtx
);
35606 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
35609 /* Adjust the this parameter by a value stored in the vtable. */
35612 rtx vcall_addr
, vcall_mem
, this_mem
;
35614 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35616 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
35617 if (Pmode
!= ptr_mode
)
35618 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
35619 emit_move_insn (tmp
, this_mem
);
35621 /* Adjust the this parameter. */
35622 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
35624 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
35626 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
35627 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
35628 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
35631 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
35632 if (Pmode
!= ptr_mode
)
35633 emit_insn (gen_addsi_1_zext (this_reg
,
35634 gen_rtx_REG (ptr_mode
,
35638 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
35641 /* If necessary, drop THIS back to its stack slot. */
35642 if (this_reg
&& this_reg
!= this_param
)
35643 emit_move_insn (this_param
, this_reg
);
35645 fnaddr
= XEXP (DECL_RTL (function
), 0);
35648 if (!flag_pic
|| targetm
.binds_local_p (function
)
35653 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
35654 tmp
= gen_rtx_CONST (Pmode
, tmp
);
35655 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
35660 if (!flag_pic
|| targetm
.binds_local_p (function
))
35663 else if (TARGET_MACHO
)
35665 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
35666 fnaddr
= XEXP (fnaddr
, 0);
35668 #endif /* TARGET_MACHO */
35671 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35672 output_set_got (tmp
, NULL_RTX
);
35674 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35675 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35676 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35680 /* Our sibling call patterns do not allow memories, because we have no
35681 predicate that can distinguish between frame and non-frame memory.
35682 For our purposes here, we can get away with (ab)using a jump pattern,
35683 because we're going to do no optimization. */
35684 if (MEM_P (fnaddr
))
35685 emit_jump_insn (gen_indirect_jump (fnaddr
));
35688 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35689 fnaddr
= legitimize_pic_address (fnaddr
,
35690 gen_rtx_REG (Pmode
, tmp_regno
));
35692 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35694 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35695 if (GET_MODE (fnaddr
) != word_mode
)
35696 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35697 emit_move_insn (tmp
, fnaddr
);
35701 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35702 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35703 tmp
= emit_call_insn (tmp
);
35704 SIBLING_CALL_P (tmp
) = 1;
35708 /* Emit just enough of rest_of_compilation to get the insns emitted.
35709 Note that use_thunk calls assemble_start_function et al. */
35710 tmp
= get_insns ();
35711 shorten_branches (tmp
);
35712 final_start_function (tmp
, file
, 1);
35713 final (tmp
, file
, 1);
35714 final_end_function ();
35718 x86_file_start (void)
35720 default_file_start ();
35722 darwin_file_start ();
35724 if (X86_FILE_START_VERSION_DIRECTIVE
)
35725 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35726 if (X86_FILE_START_FLTUSED
)
35727 fputs ("\t.global\t__fltused\n", asm_out_file
);
35728 if (ix86_asm_dialect
== ASM_INTEL
)
35729 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35733 x86_field_alignment (tree field
, int computed
)
35735 enum machine_mode mode
;
35736 tree type
= TREE_TYPE (field
);
35738 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35740 mode
= TYPE_MODE (strip_array_types (type
));
35741 if (mode
== DFmode
|| mode
== DCmode
35742 || GET_MODE_CLASS (mode
) == MODE_INT
35743 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35744 return MIN (32, computed
);
35748 /* Output assembler code to FILE to increment profiler label # LABELNO
35749 for profiling a function entry. */
35751 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35753 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35758 #ifndef NO_PROFILE_COUNTERS
35759 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35762 if (!TARGET_PECOFF
&& flag_pic
)
35763 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35765 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35769 #ifndef NO_PROFILE_COUNTERS
35770 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35773 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35777 #ifndef NO_PROFILE_COUNTERS
35778 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35781 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35785 /* We don't have exact information about the insn sizes, but we may assume
35786 quite safely that we are informed about all 1 byte insns and memory
35787 address sizes. This is enough to eliminate unnecessary padding in
35791 min_insn_size (rtx insn
)
35795 if (!INSN_P (insn
) || !active_insn_p (insn
))
35798 /* Discard alignments we've emit and jump instructions. */
35799 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35800 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35803 /* Important case - calls are always 5 bytes.
35804 It is common to have many calls in the row. */
35806 && symbolic_reference_mentioned_p (PATTERN (insn
))
35807 && !SIBLING_CALL_P (insn
))
35809 len
= get_attr_length (insn
);
35813 /* For normal instructions we rely on get_attr_length being exact,
35814 with a few exceptions. */
35815 if (!JUMP_P (insn
))
35817 enum attr_type type
= get_attr_type (insn
);
35822 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35823 || asm_noperands (PATTERN (insn
)) >= 0)
35830 /* Otherwise trust get_attr_length. */
35834 l
= get_attr_length_address (insn
);
35835 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35844 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35846 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35850 ix86_avoid_jump_mispredicts (void)
35852 rtx insn
, start
= get_insns ();
35853 int nbytes
= 0, njumps
= 0;
35856 /* Look for all minimal intervals of instructions containing 4 jumps.
35857 The intervals are bounded by START and INSN. NBYTES is the total
35858 size of instructions in the interval including INSN and not including
35859 START. When the NBYTES is smaller than 16 bytes, it is possible
35860 that the end of START and INSN ends up in the same 16byte page.
35862 The smallest offset in the page INSN can start is the case where START
35863 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35864 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35866 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35870 if (LABEL_P (insn
))
35872 int align
= label_to_alignment (insn
);
35873 int max_skip
= label_to_max_skip (insn
);
35877 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35878 already in the current 16 byte page, because otherwise
35879 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35880 bytes to reach 16 byte boundary. */
35882 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35885 fprintf (dump_file
, "Label %i with max_skip %i\n",
35886 INSN_UID (insn
), max_skip
);
35889 while (nbytes
+ max_skip
>= 16)
35891 start
= NEXT_INSN (start
);
35892 if (JUMP_P (start
) || CALL_P (start
))
35893 njumps
--, isjump
= 1;
35896 nbytes
-= min_insn_size (start
);
35902 min_size
= min_insn_size (insn
);
35903 nbytes
+= min_size
;
35905 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35906 INSN_UID (insn
), min_size
);
35907 if (JUMP_P (insn
) || CALL_P (insn
))
35914 start
= NEXT_INSN (start
);
35915 if (JUMP_P (start
) || CALL_P (start
))
35916 njumps
--, isjump
= 1;
35919 nbytes
-= min_insn_size (start
);
35921 gcc_assert (njumps
>= 0);
35923 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35924 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35926 if (njumps
== 3 && isjump
&& nbytes
< 16)
35928 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35931 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35932 INSN_UID (insn
), padsize
);
35933 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35939 /* AMD Athlon works faster
35940 when RET is not destination of conditional jump or directly preceded
35941 by other jump instruction. We avoid the penalty by inserting NOP just
35942 before the RET instructions in such cases. */
35944 ix86_pad_returns (void)
35949 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35951 basic_block bb
= e
->src
;
35952 rtx ret
= BB_END (bb
);
35954 bool replace
= false;
35956 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35957 || optimize_bb_for_size_p (bb
))
35959 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35960 if (active_insn_p (prev
) || LABEL_P (prev
))
35962 if (prev
&& LABEL_P (prev
))
35967 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35968 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35969 && !(e
->flags
& EDGE_FALLTHRU
))
35977 prev
= prev_active_insn (ret
);
35979 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35982 /* Empty functions get branch mispredict even when
35983 the jump destination is not visible to us. */
35984 if (!prev
&& !optimize_function_for_size_p (cfun
))
35989 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35995 /* Count the minimum number of instructions in BB. Return 4 if the
35996 number of instructions >= 4. */
35999 ix86_count_insn_bb (basic_block bb
)
36002 int insn_count
= 0;
36004 /* Count number of instructions in this block. Return 4 if the number
36005 of instructions >= 4. */
36006 FOR_BB_INSNS (bb
, insn
)
36008 /* Only happen in exit blocks. */
36010 && ANY_RETURN_P (PATTERN (insn
)))
36013 if (NONDEBUG_INSN_P (insn
)
36014 && GET_CODE (PATTERN (insn
)) != USE
36015 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36018 if (insn_count
>= 4)
36027 /* Count the minimum number of instructions in code path in BB.
36028 Return 4 if the number of instructions >= 4. */
36031 ix86_count_insn (basic_block bb
)
36035 int min_prev_count
;
36037 /* Only bother counting instructions along paths with no
36038 more than 2 basic blocks between entry and exit. Given
36039 that BB has an edge to exit, determine if a predecessor
36040 of BB has an edge from entry. If so, compute the number
36041 of instructions in the predecessor block. If there
36042 happen to be multiple such blocks, compute the minimum. */
36043 min_prev_count
= 4;
36044 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36047 edge_iterator prev_ei
;
36049 if (e
->src
== ENTRY_BLOCK_PTR
)
36051 min_prev_count
= 0;
36054 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36056 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
36058 int count
= ix86_count_insn_bb (e
->src
);
36059 if (count
< min_prev_count
)
36060 min_prev_count
= count
;
36066 if (min_prev_count
< 4)
36067 min_prev_count
+= ix86_count_insn_bb (bb
);
36069 return min_prev_count
;
36072 /* Pad short function to 4 instructions. */
36075 ix86_pad_short_function (void)
36080 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36082 rtx ret
= BB_END (e
->src
);
36083 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36085 int insn_count
= ix86_count_insn (e
->src
);
36087 /* Pad short function. */
36088 if (insn_count
< 4)
36092 /* Find epilogue. */
36095 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36096 insn
= PREV_INSN (insn
);
36101 /* Two NOPs count as one instruction. */
36102 insn_count
= 2 * (4 - insn_count
);
36103 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36109 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36110 the epilogue, the Windows system unwinder will apply epilogue logic and
36111 produce incorrect offsets. This can be avoided by adding a nop between
36112 the last insn that can throw and the first insn of the epilogue. */
36115 ix86_seh_fixup_eh_fallthru (void)
36120 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36124 /* Find the beginning of the epilogue. */
36125 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36126 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36131 /* We only care about preceding insns that can throw. */
36132 insn
= prev_active_insn (insn
);
36133 if (insn
== NULL
|| !can_throw_internal (insn
))
36136 /* Do not separate calls from their debug information. */
36137 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36139 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36140 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36145 emit_insn_after (gen_nops (const1_rtx
), insn
);
36149 /* Implement machine specific optimizations. We implement padding of returns
36150 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36154 /* We are freeing block_for_insn in the toplev to keep compatibility
36155 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36156 compute_bb_for_insn ();
36158 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36159 ix86_seh_fixup_eh_fallthru ();
36161 if (optimize
&& optimize_function_for_speed_p (cfun
))
36163 if (TARGET_PAD_SHORT_FUNCTION
)
36164 ix86_pad_short_function ();
36165 else if (TARGET_PAD_RETURNS
)
36166 ix86_pad_returns ();
36167 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36168 if (TARGET_FOUR_JUMP_LIMIT
)
36169 ix86_avoid_jump_mispredicts ();
36174 /* Return nonzero when QImode register that must be represented via REX prefix
36177 x86_extended_QIreg_mentioned_p (rtx insn
)
36180 extract_insn_cached (insn
);
36181 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36182 if (GENERAL_REG_P (recog_data
.operand
[i
])
36183 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36188 /* Return nonzero when P points to register encoded via REX prefix.
36189 Called via for_each_rtx. */
36191 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36193 unsigned int regno
;
36196 regno
= REGNO (*p
);
36197 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36200 /* Return true when INSN mentions register that must be encoded using REX
36203 x86_extended_reg_mentioned_p (rtx insn
)
36205 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36206 extended_reg_mentioned_1
, NULL
);
36209 /* If profitable, negate (without causing overflow) integer constant
36210 of mode MODE at location LOC. Return true in this case. */
36212 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36216 if (!CONST_INT_P (*loc
))
36222 /* DImode x86_64 constants must fit in 32 bits. */
36223 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36234 gcc_unreachable ();
36237 /* Avoid overflows. */
36238 if (mode_signbit_p (mode
, *loc
))
36241 val
= INTVAL (*loc
);
36243 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36244 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36245 if ((val
< 0 && val
!= -128)
36248 *loc
= GEN_INT (-val
);
36255 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36256 optabs would emit if we didn't have TFmode patterns. */
36259 x86_emit_floatuns (rtx operands
[2])
36261 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36262 enum machine_mode mode
, inmode
;
36264 inmode
= GET_MODE (operands
[1]);
36265 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36268 in
= force_reg (inmode
, operands
[1]);
36269 mode
= GET_MODE (out
);
36270 neglab
= gen_label_rtx ();
36271 donelab
= gen_label_rtx ();
36272 f0
= gen_reg_rtx (mode
);
36274 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36276 expand_float (out
, in
, 0);
36278 emit_jump_insn (gen_jump (donelab
));
36281 emit_label (neglab
);
36283 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36285 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36287 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36289 expand_float (f0
, i0
, 0);
36291 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36293 emit_label (donelab
);
36296 /* AVX512F does support 64-byte integer vector operations,
36297 thus the longest vector we are faced with is V64QImode. */
36298 #define MAX_VECT_LEN 64
36300 struct expand_vec_perm_d
36302 rtx target
, op0
, op1
;
36303 unsigned char perm
[MAX_VECT_LEN
];
36304 enum machine_mode vmode
;
36305 unsigned char nelt
;
36306 bool one_operand_p
;
36310 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36311 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36312 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36314 /* Get a vector mode of the same size as the original but with elements
36315 twice as wide. This is only guaranteed to apply to integral vectors. */
36317 static inline enum machine_mode
36318 get_mode_wider_vector (enum machine_mode o
)
36320 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36321 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36322 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36323 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36327 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36328 with all elements equal to VAR. Return true if successful. */
36331 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36332 rtx target
, rtx val
)
36355 /* First attempt to recognize VAL as-is. */
36356 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36357 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36358 if (recog_memoized (insn
) < 0)
36361 /* If that fails, force VAL into a register. */
36364 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36365 seq
= get_insns ();
36368 emit_insn_before (seq
, insn
);
36370 ok
= recog_memoized (insn
) >= 0;
36379 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36383 val
= gen_lowpart (SImode
, val
);
36384 x
= gen_rtx_TRUNCATE (HImode
, val
);
36385 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36386 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36399 struct expand_vec_perm_d dperm
;
36403 memset (&dperm
, 0, sizeof (dperm
));
36404 dperm
.target
= target
;
36405 dperm
.vmode
= mode
;
36406 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36407 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36408 dperm
.one_operand_p
= true;
36410 /* Extend to SImode using a paradoxical SUBREG. */
36411 tmp1
= gen_reg_rtx (SImode
);
36412 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36414 /* Insert the SImode value as low element of a V4SImode vector. */
36415 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
36416 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36418 ok
= (expand_vec_perm_1 (&dperm
)
36419 || expand_vec_perm_broadcast_1 (&dperm
));
36431 /* Replicate the value once into the next wider mode and recurse. */
36433 enum machine_mode smode
, wsmode
, wvmode
;
36436 smode
= GET_MODE_INNER (mode
);
36437 wvmode
= get_mode_wider_vector (mode
);
36438 wsmode
= GET_MODE_INNER (wvmode
);
36440 val
= convert_modes (wsmode
, smode
, val
, true);
36441 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36442 GEN_INT (GET_MODE_BITSIZE (smode
)),
36443 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36444 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36446 x
= gen_lowpart (wvmode
, target
);
36447 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36455 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36456 rtx x
= gen_reg_rtx (hvmode
);
36458 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36461 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36462 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36471 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36472 whose ONE_VAR element is VAR, and other elements are zero. Return true
36476 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36477 rtx target
, rtx var
, int one_var
)
36479 enum machine_mode vsimode
;
36482 bool use_vector_set
= false;
36487 /* For SSE4.1, we normally use vector set. But if the second
36488 element is zero and inter-unit moves are OK, we use movq
36490 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36491 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36497 use_vector_set
= TARGET_SSE4_1
;
36500 use_vector_set
= TARGET_SSE2
;
36503 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36510 use_vector_set
= TARGET_AVX
;
36513 /* Use ix86_expand_vector_set in 64bit mode only. */
36514 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36520 if (use_vector_set
)
36522 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36523 var
= force_reg (GET_MODE_INNER (mode
), var
);
36524 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36540 var
= force_reg (GET_MODE_INNER (mode
), var
);
36541 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
36542 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36547 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
36548 new_target
= gen_reg_rtx (mode
);
36550 new_target
= target
;
36551 var
= force_reg (GET_MODE_INNER (mode
), var
);
36552 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
36553 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
36554 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
36557 /* We need to shuffle the value to the correct position, so
36558 create a new pseudo to store the intermediate result. */
36560 /* With SSE2, we can use the integer shuffle insns. */
36561 if (mode
!= V4SFmode
&& TARGET_SSE2
)
36563 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
36565 GEN_INT (one_var
== 1 ? 0 : 1),
36566 GEN_INT (one_var
== 2 ? 0 : 1),
36567 GEN_INT (one_var
== 3 ? 0 : 1)));
36568 if (target
!= new_target
)
36569 emit_move_insn (target
, new_target
);
36573 /* Otherwise convert the intermediate result to V4SFmode and
36574 use the SSE1 shuffle instructions. */
36575 if (mode
!= V4SFmode
)
36577 tmp
= gen_reg_rtx (V4SFmode
);
36578 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
36583 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
36585 GEN_INT (one_var
== 1 ? 0 : 1),
36586 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
36587 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
36589 if (mode
!= V4SFmode
)
36590 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
36591 else if (tmp
!= target
)
36592 emit_move_insn (target
, tmp
);
36594 else if (target
!= new_target
)
36595 emit_move_insn (target
, new_target
);
36600 vsimode
= V4SImode
;
36606 vsimode
= V2SImode
;
36612 /* Zero extend the variable element to SImode and recurse. */
36613 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
36615 x
= gen_reg_rtx (vsimode
);
36616 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
36618 gcc_unreachable ();
36620 emit_move_insn (target
, gen_lowpart (mode
, x
));
36628 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36629 consisting of the values in VALS. It is known that all elements
36630 except ONE_VAR are constants. Return true if successful. */
36633 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
36634 rtx target
, rtx vals
, int one_var
)
36636 rtx var
= XVECEXP (vals
, 0, one_var
);
36637 enum machine_mode wmode
;
36640 const_vec
= copy_rtx (vals
);
36641 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
36642 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
36650 /* For the two element vectors, it's just as easy to use
36651 the general case. */
36655 /* Use ix86_expand_vector_set in 64bit mode only. */
36678 /* There's no way to set one QImode entry easily. Combine
36679 the variable value with its adjacent constant value, and
36680 promote to an HImode set. */
36681 x
= XVECEXP (vals
, 0, one_var
^ 1);
36684 var
= convert_modes (HImode
, QImode
, var
, true);
36685 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
36686 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36687 x
= GEN_INT (INTVAL (x
) & 0xff);
36691 var
= convert_modes (HImode
, QImode
, var
, true);
36692 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
36694 if (x
!= const0_rtx
)
36695 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
36696 1, OPTAB_LIB_WIDEN
);
36698 x
= gen_reg_rtx (wmode
);
36699 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
36700 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
36702 emit_move_insn (target
, gen_lowpart (mode
, x
));
36709 emit_move_insn (target
, const_vec
);
36710 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36714 /* A subroutine of ix86_expand_vector_init_general. Use vector
36715 concatenate to handle the most general case: all values variable,
36716 and none identical. */
36719 ix86_expand_vector_init_concat (enum machine_mode mode
,
36720 rtx target
, rtx
*ops
, int n
)
36722 enum machine_mode cmode
, hmode
= VOIDmode
;
36723 rtx first
[8], second
[4];
36763 gcc_unreachable ();
36766 if (!register_operand (ops
[1], cmode
))
36767 ops
[1] = force_reg (cmode
, ops
[1]);
36768 if (!register_operand (ops
[0], cmode
))
36769 ops
[0] = force_reg (cmode
, ops
[0]);
36770 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36771 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36791 gcc_unreachable ();
36807 gcc_unreachable ();
36812 /* FIXME: We process inputs backward to help RA. PR 36222. */
36815 for (; i
> 0; i
-= 2, j
--)
36817 first
[j
] = gen_reg_rtx (cmode
);
36818 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36819 ix86_expand_vector_init (false, first
[j
],
36820 gen_rtx_PARALLEL (cmode
, v
));
36826 gcc_assert (hmode
!= VOIDmode
);
36827 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36829 second
[j
] = gen_reg_rtx (hmode
);
36830 ix86_expand_vector_init_concat (hmode
, second
[j
],
36834 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36837 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36841 gcc_unreachable ();
36845 /* A subroutine of ix86_expand_vector_init_general. Use vector
36846 interleave to handle the most general case: all values variable,
36847 and none identical. */
36850 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36851 rtx target
, rtx
*ops
, int n
)
36853 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36856 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36857 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36858 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36863 gen_load_even
= gen_vec_setv8hi
;
36864 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36865 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36866 inner_mode
= HImode
;
36867 first_imode
= V4SImode
;
36868 second_imode
= V2DImode
;
36869 third_imode
= VOIDmode
;
36872 gen_load_even
= gen_vec_setv16qi
;
36873 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36874 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36875 inner_mode
= QImode
;
36876 first_imode
= V8HImode
;
36877 second_imode
= V4SImode
;
36878 third_imode
= V2DImode
;
36881 gcc_unreachable ();
36884 for (i
= 0; i
< n
; i
++)
36886 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36887 op0
= gen_reg_rtx (SImode
);
36888 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36890 /* Insert the SImode value as low element of V4SImode vector. */
36891 op1
= gen_reg_rtx (V4SImode
);
36892 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36893 gen_rtx_VEC_DUPLICATE (V4SImode
,
36895 CONST0_RTX (V4SImode
),
36897 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36899 /* Cast the V4SImode vector back to a vector in orignal mode. */
36900 op0
= gen_reg_rtx (mode
);
36901 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36903 /* Load even elements into the second position. */
36904 emit_insn (gen_load_even (op0
,
36905 force_reg (inner_mode
,
36909 /* Cast vector to FIRST_IMODE vector. */
36910 ops
[i
] = gen_reg_rtx (first_imode
);
36911 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36914 /* Interleave low FIRST_IMODE vectors. */
36915 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36917 op0
= gen_reg_rtx (first_imode
);
36918 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36920 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36921 ops
[j
] = gen_reg_rtx (second_imode
);
36922 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36925 /* Interleave low SECOND_IMODE vectors. */
36926 switch (second_imode
)
36929 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36931 op0
= gen_reg_rtx (second_imode
);
36932 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36935 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36937 ops
[j
] = gen_reg_rtx (third_imode
);
36938 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36940 second_imode
= V2DImode
;
36941 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36945 op0
= gen_reg_rtx (second_imode
);
36946 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36949 /* Cast the SECOND_IMODE vector back to a vector on original
36951 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36952 gen_lowpart (mode
, op0
)));
36956 gcc_unreachable ();
36960 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36961 all values variable, and none identical. */
36964 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36965 rtx target
, rtx vals
)
36967 rtx ops
[32], op0
, op1
;
36968 enum machine_mode half_mode
= VOIDmode
;
36975 if (!mmx_ok
&& !TARGET_SSE
)
36987 n
= GET_MODE_NUNITS (mode
);
36988 for (i
= 0; i
< n
; i
++)
36989 ops
[i
] = XVECEXP (vals
, 0, i
);
36990 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36994 half_mode
= V16QImode
;
36998 half_mode
= V8HImode
;
37002 n
= GET_MODE_NUNITS (mode
);
37003 for (i
= 0; i
< n
; i
++)
37004 ops
[i
] = XVECEXP (vals
, 0, i
);
37005 op0
= gen_reg_rtx (half_mode
);
37006 op1
= gen_reg_rtx (half_mode
);
37007 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37009 ix86_expand_vector_init_interleave (half_mode
, op1
,
37010 &ops
[n
>> 1], n
>> 2);
37011 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37012 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37016 if (!TARGET_SSE4_1
)
37024 /* Don't use ix86_expand_vector_init_interleave if we can't
37025 move from GPR to SSE register directly. */
37026 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37029 n
= GET_MODE_NUNITS (mode
);
37030 for (i
= 0; i
< n
; i
++)
37031 ops
[i
] = XVECEXP (vals
, 0, i
);
37032 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37040 gcc_unreachable ();
37044 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37045 enum machine_mode inner_mode
;
37046 rtx words
[4], shift
;
37048 inner_mode
= GET_MODE_INNER (mode
);
37049 n_elts
= GET_MODE_NUNITS (mode
);
37050 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37051 n_elt_per_word
= n_elts
/ n_words
;
37052 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37054 for (i
= 0; i
< n_words
; ++i
)
37056 rtx word
= NULL_RTX
;
37058 for (j
= 0; j
< n_elt_per_word
; ++j
)
37060 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37061 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37067 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37068 word
, 1, OPTAB_LIB_WIDEN
);
37069 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37070 word
, 1, OPTAB_LIB_WIDEN
);
37078 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37079 else if (n_words
== 2)
37081 rtx tmp
= gen_reg_rtx (mode
);
37082 emit_clobber (tmp
);
37083 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37084 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37085 emit_move_insn (target
, tmp
);
37087 else if (n_words
== 4)
37089 rtx tmp
= gen_reg_rtx (V4SImode
);
37090 gcc_assert (word_mode
== SImode
);
37091 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37092 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37093 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37096 gcc_unreachable ();
37100 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37101 instructions unless MMX_OK is true. */
37104 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37106 enum machine_mode mode
= GET_MODE (target
);
37107 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37108 int n_elts
= GET_MODE_NUNITS (mode
);
37109 int n_var
= 0, one_var
= -1;
37110 bool all_same
= true, all_const_zero
= true;
37114 for (i
= 0; i
< n_elts
; ++i
)
37116 x
= XVECEXP (vals
, 0, i
);
37117 if (!(CONST_INT_P (x
)
37118 || GET_CODE (x
) == CONST_DOUBLE
37119 || GET_CODE (x
) == CONST_FIXED
))
37120 n_var
++, one_var
= i
;
37121 else if (x
!= CONST0_RTX (inner_mode
))
37122 all_const_zero
= false;
37123 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37127 /* Constants are best loaded from the constant pool. */
37130 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37134 /* If all values are identical, broadcast the value. */
37136 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37137 XVECEXP (vals
, 0, 0)))
37140 /* Values where only one field is non-constant are best loaded from
37141 the pool and overwritten via move later. */
37145 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37146 XVECEXP (vals
, 0, one_var
),
37150 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37154 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37158 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37160 enum machine_mode mode
= GET_MODE (target
);
37161 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37162 enum machine_mode half_mode
;
37163 bool use_vec_merge
= false;
37165 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37167 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37168 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37169 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37170 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37171 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37172 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37174 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37176 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37177 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37178 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37179 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37180 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37181 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37191 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37192 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37194 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37196 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37197 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37203 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37207 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37208 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37210 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37212 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37213 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37220 /* For the two element vectors, we implement a VEC_CONCAT with
37221 the extraction of the other element. */
37223 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37224 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37227 op0
= val
, op1
= tmp
;
37229 op0
= tmp
, op1
= val
;
37231 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37232 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37237 use_vec_merge
= TARGET_SSE4_1
;
37244 use_vec_merge
= true;
37248 /* tmp = target = A B C D */
37249 tmp
= copy_to_reg (target
);
37250 /* target = A A B B */
37251 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37252 /* target = X A B B */
37253 ix86_expand_vector_set (false, target
, val
, 0);
37254 /* target = A X C D */
37255 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37256 const1_rtx
, const0_rtx
,
37257 GEN_INT (2+4), GEN_INT (3+4)));
37261 /* tmp = target = A B C D */
37262 tmp
= copy_to_reg (target
);
37263 /* tmp = X B C D */
37264 ix86_expand_vector_set (false, tmp
, val
, 0);
37265 /* target = A B X D */
37266 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37267 const0_rtx
, const1_rtx
,
37268 GEN_INT (0+4), GEN_INT (3+4)));
37272 /* tmp = target = A B C D */
37273 tmp
= copy_to_reg (target
);
37274 /* tmp = X B C D */
37275 ix86_expand_vector_set (false, tmp
, val
, 0);
37276 /* target = A B X D */
37277 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37278 const0_rtx
, const1_rtx
,
37279 GEN_INT (2+4), GEN_INT (0+4)));
37283 gcc_unreachable ();
37288 use_vec_merge
= TARGET_SSE4_1
;
37292 /* Element 0 handled by vec_merge below. */
37295 use_vec_merge
= true;
37301 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37302 store into element 0, then shuffle them back. */
37306 order
[0] = GEN_INT (elt
);
37307 order
[1] = const1_rtx
;
37308 order
[2] = const2_rtx
;
37309 order
[3] = GEN_INT (3);
37310 order
[elt
] = const0_rtx
;
37312 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37313 order
[1], order
[2], order
[3]));
37315 ix86_expand_vector_set (false, target
, val
, 0);
37317 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37318 order
[1], order
[2], order
[3]));
37322 /* For SSE1, we have to reuse the V4SF code. */
37323 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
37324 gen_lowpart (SFmode
, val
), elt
);
37329 use_vec_merge
= TARGET_SSE2
;
37332 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37336 use_vec_merge
= TARGET_SSE4_1
;
37343 half_mode
= V16QImode
;
37349 half_mode
= V8HImode
;
37355 half_mode
= V4SImode
;
37361 half_mode
= V2DImode
;
37367 half_mode
= V4SFmode
;
37373 half_mode
= V2DFmode
;
37379 /* Compute offset. */
37383 gcc_assert (i
<= 1);
37385 /* Extract the half. */
37386 tmp
= gen_reg_rtx (half_mode
);
37387 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37389 /* Put val in tmp at elt. */
37390 ix86_expand_vector_set (false, tmp
, val
, elt
);
37393 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37402 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37403 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37404 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37408 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37410 emit_move_insn (mem
, target
);
37412 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37413 emit_move_insn (tmp
, val
);
37415 emit_move_insn (target
, mem
);
37420 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37422 enum machine_mode mode
= GET_MODE (vec
);
37423 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37424 bool use_vec_extr
= false;
37437 use_vec_extr
= true;
37441 use_vec_extr
= TARGET_SSE4_1
;
37453 tmp
= gen_reg_rtx (mode
);
37454 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37455 GEN_INT (elt
), GEN_INT (elt
),
37456 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37460 tmp
= gen_reg_rtx (mode
);
37461 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37465 gcc_unreachable ();
37468 use_vec_extr
= true;
37473 use_vec_extr
= TARGET_SSE4_1
;
37487 tmp
= gen_reg_rtx (mode
);
37488 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37489 GEN_INT (elt
), GEN_INT (elt
),
37490 GEN_INT (elt
), GEN_INT (elt
)));
37494 tmp
= gen_reg_rtx (mode
);
37495 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37499 gcc_unreachable ();
37502 use_vec_extr
= true;
37507 /* For SSE1, we have to reuse the V4SF code. */
37508 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37509 gen_lowpart (V4SFmode
, vec
), elt
);
37515 use_vec_extr
= TARGET_SSE2
;
37518 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37522 use_vec_extr
= TARGET_SSE4_1
;
37528 tmp
= gen_reg_rtx (V4SFmode
);
37530 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37532 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37533 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37541 tmp
= gen_reg_rtx (V2DFmode
);
37543 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
37545 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
37546 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37554 tmp
= gen_reg_rtx (V16QImode
);
37556 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
37558 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
37559 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
37567 tmp
= gen_reg_rtx (V8HImode
);
37569 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
37571 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
37572 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
37580 tmp
= gen_reg_rtx (V4SImode
);
37582 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
37584 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
37585 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37593 tmp
= gen_reg_rtx (V2DImode
);
37595 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
37597 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
37598 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37604 /* ??? Could extract the appropriate HImode element and shift. */
37611 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
37612 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
37614 /* Let the rtl optimizers know about the zero extension performed. */
37615 if (inner_mode
== QImode
|| inner_mode
== HImode
)
37617 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
37618 target
= gen_lowpart (SImode
, target
);
37621 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37625 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37627 emit_move_insn (mem
, vec
);
37629 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37630 emit_move_insn (target
, tmp
);
37634 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37635 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37636 The upper bits of DEST are undefined, though they shouldn't cause
37637 exceptions (some bits from src or all zeros are ok). */
37640 emit_reduc_half (rtx dest
, rtx src
, int i
)
37643 switch (GET_MODE (src
))
37647 tem
= gen_sse_movhlps (dest
, src
, src
);
37649 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
37650 GEN_INT (1 + 4), GEN_INT (1 + 4));
37653 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
37659 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
37660 gen_lowpart (V1TImode
, src
),
37665 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
37667 tem
= gen_avx_shufps256 (dest
, src
, src
,
37668 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
37672 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
37674 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
37681 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
37682 gen_lowpart (V4DImode
, src
),
37683 gen_lowpart (V4DImode
, src
),
37686 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
37687 gen_lowpart (V2TImode
, src
),
37691 gcc_unreachable ();
37696 /* Expand a vector reduction. FN is the binary pattern to reduce;
37697 DEST is the destination; IN is the input vector. */
37700 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
37702 rtx half
, dst
, vec
= in
;
37703 enum machine_mode mode
= GET_MODE (in
);
37706 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37708 && mode
== V8HImode
37709 && fn
== gen_uminv8hi3
)
37711 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37715 for (i
= GET_MODE_BITSIZE (mode
);
37716 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37719 half
= gen_reg_rtx (mode
);
37720 emit_reduc_half (half
, vec
, i
);
37721 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37724 dst
= gen_reg_rtx (mode
);
37725 emit_insn (fn (dst
, half
, vec
));
37730 /* Target hook for scalar_mode_supported_p. */
37732 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37734 if (DECIMAL_FLOAT_MODE_P (mode
))
37735 return default_decimal_float_supported_p ();
37736 else if (mode
== TFmode
)
37739 return default_scalar_mode_supported_p (mode
);
37742 /* Implements target hook vector_mode_supported_p. */
37744 ix86_vector_mode_supported_p (enum machine_mode mode
)
37746 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37748 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37750 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37752 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37754 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37759 /* Target hook for c_mode_for_suffix. */
37760 static enum machine_mode
37761 ix86_c_mode_for_suffix (char suffix
)
37771 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37773 We do this in the new i386 backend to maintain source compatibility
37774 with the old cc0-based compiler. */
37777 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37778 tree inputs ATTRIBUTE_UNUSED
,
37781 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37783 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37788 /* Implements target vector targetm.asm.encode_section_info. */
37790 static void ATTRIBUTE_UNUSED
37791 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37793 default_encode_section_info (decl
, rtl
, first
);
37795 if (TREE_CODE (decl
) == VAR_DECL
37796 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37797 && ix86_in_large_data_p (decl
))
37798 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37801 /* Worker function for REVERSE_CONDITION. */
37804 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37806 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37807 ? reverse_condition (code
)
37808 : reverse_condition_maybe_unordered (code
));
37811 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37815 output_387_reg_move (rtx insn
, rtx
*operands
)
37817 if (REG_P (operands
[0]))
37819 if (REG_P (operands
[1])
37820 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37822 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37823 return output_387_ffreep (operands
, 0);
37824 return "fstp\t%y0";
37826 if (STACK_TOP_P (operands
[0]))
37827 return "fld%Z1\t%y1";
37830 else if (MEM_P (operands
[0]))
37832 gcc_assert (REG_P (operands
[1]));
37833 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37834 return "fstp%Z0\t%y0";
37837 /* There is no non-popping store to memory for XFmode.
37838 So if we need one, follow the store with a load. */
37839 if (GET_MODE (operands
[0]) == XFmode
)
37840 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37842 return "fst%Z0\t%y0";
37849 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37850 FP status register is set. */
37853 ix86_emit_fp_unordered_jump (rtx label
)
37855 rtx reg
= gen_reg_rtx (HImode
);
37858 emit_insn (gen_x86_fnstsw_1 (reg
));
37860 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37862 emit_insn (gen_x86_sahf_1 (reg
));
37864 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37865 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37869 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37871 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37872 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37875 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37876 gen_rtx_LABEL_REF (VOIDmode
, label
),
37878 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37880 emit_jump_insn (temp
);
37881 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37884 /* Output code to perform a log1p XFmode calculation. */
37886 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37888 rtx label1
= gen_label_rtx ();
37889 rtx label2
= gen_label_rtx ();
37891 rtx tmp
= gen_reg_rtx (XFmode
);
37892 rtx tmp2
= gen_reg_rtx (XFmode
);
37895 emit_insn (gen_absxf2 (tmp
, op1
));
37896 test
= gen_rtx_GE (VOIDmode
, tmp
,
37897 CONST_DOUBLE_FROM_REAL_VALUE (
37898 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37900 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37902 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37903 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37904 emit_jump (label2
);
37906 emit_label (label1
);
37907 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37908 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37909 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37910 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37912 emit_label (label2
);
37915 /* Emit code for round calculation. */
37916 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37918 enum machine_mode inmode
= GET_MODE (op1
);
37919 enum machine_mode outmode
= GET_MODE (op0
);
37920 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37921 rtx scratch
= gen_reg_rtx (HImode
);
37922 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37923 rtx jump_label
= gen_label_rtx ();
37925 rtx (*gen_abs
) (rtx
, rtx
);
37926 rtx (*gen_neg
) (rtx
, rtx
);
37931 gen_abs
= gen_abssf2
;
37934 gen_abs
= gen_absdf2
;
37937 gen_abs
= gen_absxf2
;
37940 gcc_unreachable ();
37946 gen_neg
= gen_negsf2
;
37949 gen_neg
= gen_negdf2
;
37952 gen_neg
= gen_negxf2
;
37955 gen_neg
= gen_neghi2
;
37958 gen_neg
= gen_negsi2
;
37961 gen_neg
= gen_negdi2
;
37964 gcc_unreachable ();
37967 e1
= gen_reg_rtx (inmode
);
37968 e2
= gen_reg_rtx (inmode
);
37969 res
= gen_reg_rtx (outmode
);
37971 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37973 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37975 /* scratch = fxam(op1) */
37976 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37977 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37979 /* e1 = fabs(op1) */
37980 emit_insn (gen_abs (e1
, op1
));
37982 /* e2 = e1 + 0.5 */
37983 half
= force_reg (inmode
, half
);
37984 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37985 gen_rtx_PLUS (inmode
, e1
, half
)));
37987 /* res = floor(e2) */
37988 if (inmode
!= XFmode
)
37990 tmp1
= gen_reg_rtx (XFmode
);
37992 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37993 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38003 rtx tmp0
= gen_reg_rtx (XFmode
);
38005 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38007 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38008 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38009 UNSPEC_TRUNC_NOOP
)));
38013 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38016 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38019 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38022 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38025 gcc_unreachable ();
38028 /* flags = signbit(a) */
38029 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38031 /* if (flags) then res = -res */
38032 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38033 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38034 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38036 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38037 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38038 JUMP_LABEL (insn
) = jump_label
;
38040 emit_insn (gen_neg (res
, res
));
38042 emit_label (jump_label
);
38043 LABEL_NUSES (jump_label
) = 1;
38045 emit_move_insn (op0
, res
);
38048 /* Output code to perform a Newton-Rhapson approximation of a single precision
38049 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38051 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38053 rtx x0
, x1
, e0
, e1
;
38055 x0
= gen_reg_rtx (mode
);
38056 e0
= gen_reg_rtx (mode
);
38057 e1
= gen_reg_rtx (mode
);
38058 x1
= gen_reg_rtx (mode
);
38060 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38062 b
= force_reg (mode
, b
);
38064 /* x0 = rcp(b) estimate */
38065 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38066 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38069 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38070 gen_rtx_MULT (mode
, x0
, b
)));
38073 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38074 gen_rtx_MULT (mode
, x0
, e0
)));
38077 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38078 gen_rtx_PLUS (mode
, x0
, x0
)));
38081 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38082 gen_rtx_MINUS (mode
, e1
, e0
)));
38085 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38086 gen_rtx_MULT (mode
, a
, x1
)));
38089 /* Output code to perform a Newton-Rhapson approximation of a
38090 single precision floating point [reciprocal] square root. */
38092 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38095 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38098 x0
= gen_reg_rtx (mode
);
38099 e0
= gen_reg_rtx (mode
);
38100 e1
= gen_reg_rtx (mode
);
38101 e2
= gen_reg_rtx (mode
);
38102 e3
= gen_reg_rtx (mode
);
38104 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
38105 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38107 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38108 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38110 if (VECTOR_MODE_P (mode
))
38112 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38113 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38116 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38117 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38119 a
= force_reg (mode
, a
);
38121 /* x0 = rsqrt(a) estimate */
38122 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38123 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38126 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38131 zero
= gen_reg_rtx (mode
);
38132 mask
= gen_reg_rtx (mode
);
38134 zero
= force_reg (mode
, CONST0_RTX(mode
));
38135 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38136 gen_rtx_NE (mode
, zero
, a
)));
38138 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38139 gen_rtx_AND (mode
, x0
, mask
)));
38143 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38144 gen_rtx_MULT (mode
, x0
, a
)));
38146 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38147 gen_rtx_MULT (mode
, e0
, x0
)));
38150 mthree
= force_reg (mode
, mthree
);
38151 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38152 gen_rtx_PLUS (mode
, e1
, mthree
)));
38154 mhalf
= force_reg (mode
, mhalf
);
38156 /* e3 = -.5 * x0 */
38157 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38158 gen_rtx_MULT (mode
, x0
, mhalf
)));
38160 /* e3 = -.5 * e0 */
38161 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38162 gen_rtx_MULT (mode
, e0
, mhalf
)));
38163 /* ret = e2 * e3 */
38164 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38165 gen_rtx_MULT (mode
, e2
, e3
)));
38168 #ifdef TARGET_SOLARIS
38169 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38172 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38175 /* With Binutils 2.15, the "@unwind" marker must be specified on
38176 every occurrence of the ".eh_frame" section, not just the first
38179 && strcmp (name
, ".eh_frame") == 0)
38181 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38182 flags
& SECTION_WRITE
? "aw" : "a");
38187 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38189 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38194 default_elf_asm_named_section (name
, flags
, decl
);
38196 #endif /* TARGET_SOLARIS */
38198 /* Return the mangling of TYPE if it is an extended fundamental type. */
38200 static const char *
38201 ix86_mangle_type (const_tree type
)
38203 type
= TYPE_MAIN_VARIANT (type
);
38205 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38206 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38209 switch (TYPE_MODE (type
))
38212 /* __float128 is "g". */
38215 /* "long double" or __float80 is "e". */
38222 /* For 32-bit code we can save PIC register setup by using
38223 __stack_chk_fail_local hidden function instead of calling
38224 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38225 register, so it is better to call __stack_chk_fail directly. */
38227 static tree ATTRIBUTE_UNUSED
38228 ix86_stack_protect_fail (void)
38230 return TARGET_64BIT
38231 ? default_external_stack_protect_fail ()
38232 : default_hidden_stack_protect_fail ();
38235 /* Select a format to encode pointers in exception handling data. CODE
38236 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38237 true if the symbol may be affected by dynamic relocations.
38239 ??? All x86 object file formats are capable of representing this.
38240 After all, the relocation needed is the same as for the call insn.
38241 Whether or not a particular assembler allows us to enter such, I
38242 guess we'll have to see. */
38244 asm_preferred_eh_data_format (int code
, int global
)
38248 int type
= DW_EH_PE_sdata8
;
38250 || ix86_cmodel
== CM_SMALL_PIC
38251 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38252 type
= DW_EH_PE_sdata4
;
38253 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38255 if (ix86_cmodel
== CM_SMALL
38256 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38257 return DW_EH_PE_udata4
;
38258 return DW_EH_PE_absptr
;
38261 /* Expand copysign from SIGN to the positive value ABS_VALUE
38262 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38265 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38267 enum machine_mode mode
= GET_MODE (sign
);
38268 rtx sgn
= gen_reg_rtx (mode
);
38269 if (mask
== NULL_RTX
)
38271 enum machine_mode vmode
;
38273 if (mode
== SFmode
)
38275 else if (mode
== DFmode
)
38280 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38281 if (!VECTOR_MODE_P (mode
))
38283 /* We need to generate a scalar mode mask in this case. */
38284 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38285 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38286 mask
= gen_reg_rtx (mode
);
38287 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38291 mask
= gen_rtx_NOT (mode
, mask
);
38292 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38293 gen_rtx_AND (mode
, mask
, sign
)));
38294 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38295 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38298 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38299 mask for masking out the sign-bit is stored in *SMASK, if that is
38302 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38304 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38307 xa
= gen_reg_rtx (mode
);
38308 if (mode
== SFmode
)
38310 else if (mode
== DFmode
)
38314 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38315 if (!VECTOR_MODE_P (mode
))
38317 /* We need to generate a scalar mode mask in this case. */
38318 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38319 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38320 mask
= gen_reg_rtx (mode
);
38321 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38323 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38324 gen_rtx_AND (mode
, op0
, mask
)));
38332 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38333 swapping the operands if SWAP_OPERANDS is true. The expanded
38334 code is a forward jump to a newly created label in case the
38335 comparison is true. The generated label rtx is returned. */
38337 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38338 bool swap_operands
)
38340 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
38350 label
= gen_label_rtx ();
38351 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
38352 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38353 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
38354 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38355 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38356 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38357 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38358 JUMP_LABEL (tmp
) = label
;
38363 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38364 using comparison code CODE. Operands are swapped for the comparison if
38365 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38367 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38368 bool swap_operands
)
38370 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38371 enum machine_mode mode
= GET_MODE (op0
);
38372 rtx mask
= gen_reg_rtx (mode
);
38381 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38383 emit_insn (insn (mask
, op0
, op1
,
38384 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38388 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38389 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38391 ix86_gen_TWO52 (enum machine_mode mode
)
38393 REAL_VALUE_TYPE TWO52r
;
38396 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38397 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38398 TWO52
= force_reg (mode
, TWO52
);
38403 /* Expand SSE sequence for computing lround from OP1 storing
38406 ix86_expand_lround (rtx op0
, rtx op1
)
38408 /* C code for the stuff we're doing below:
38409 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38412 enum machine_mode mode
= GET_MODE (op1
);
38413 const struct real_format
*fmt
;
38414 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38417 /* load nextafter (0.5, 0.0) */
38418 fmt
= REAL_MODE_FORMAT (mode
);
38419 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38420 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38422 /* adj = copysign (0.5, op1) */
38423 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38424 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38426 /* adj = op1 + adj */
38427 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38429 /* op0 = (imode)adj */
38430 expand_fix (op0
, adj
, 0);
38433 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38436 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38438 /* C code for the stuff we're doing below (for do_floor):
38440 xi -= (double)xi > op1 ? 1 : 0;
38443 enum machine_mode fmode
= GET_MODE (op1
);
38444 enum machine_mode imode
= GET_MODE (op0
);
38445 rtx ireg
, freg
, label
, tmp
;
38447 /* reg = (long)op1 */
38448 ireg
= gen_reg_rtx (imode
);
38449 expand_fix (ireg
, op1
, 0);
38451 /* freg = (double)reg */
38452 freg
= gen_reg_rtx (fmode
);
38453 expand_float (freg
, ireg
, 0);
38455 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38456 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38457 freg
, op1
, !do_floor
);
38458 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38459 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38460 emit_move_insn (ireg
, tmp
);
38462 emit_label (label
);
38463 LABEL_NUSES (label
) = 1;
38465 emit_move_insn (op0
, ireg
);
38468 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38469 result in OPERAND0. */
38471 ix86_expand_rint (rtx operand0
, rtx operand1
)
38473 /* C code for the stuff we're doing below:
38474 xa = fabs (operand1);
38475 if (!isless (xa, 2**52))
38477 xa = xa + 2**52 - 2**52;
38478 return copysign (xa, operand1);
38480 enum machine_mode mode
= GET_MODE (operand0
);
38481 rtx res
, xa
, label
, TWO52
, mask
;
38483 res
= gen_reg_rtx (mode
);
38484 emit_move_insn (res
, operand1
);
38486 /* xa = abs (operand1) */
38487 xa
= ix86_expand_sse_fabs (res
, &mask
);
38489 /* if (!isless (xa, TWO52)) goto label; */
38490 TWO52
= ix86_gen_TWO52 (mode
);
38491 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38493 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38494 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38496 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38498 emit_label (label
);
38499 LABEL_NUSES (label
) = 1;
38501 emit_move_insn (operand0
, res
);
38504 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38507 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38509 /* C code for the stuff we expand below.
38510 double xa = fabs (x), x2;
38511 if (!isless (xa, TWO52))
38513 xa = xa + TWO52 - TWO52;
38514 x2 = copysign (xa, x);
38523 enum machine_mode mode
= GET_MODE (operand0
);
38524 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38526 TWO52
= ix86_gen_TWO52 (mode
);
38528 /* Temporary for holding the result, initialized to the input
38529 operand to ease control flow. */
38530 res
= gen_reg_rtx (mode
);
38531 emit_move_insn (res
, operand1
);
38533 /* xa = abs (operand1) */
38534 xa
= ix86_expand_sse_fabs (res
, &mask
);
38536 /* if (!isless (xa, TWO52)) goto label; */
38537 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38539 /* xa = xa + TWO52 - TWO52; */
38540 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38541 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38543 /* xa = copysign (xa, operand1) */
38544 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
38546 /* generate 1.0 or -1.0 */
38547 one
= force_reg (mode
,
38548 const_double_from_real_value (do_floor
38549 ? dconst1
: dconstm1
, mode
));
38551 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38552 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38553 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38554 gen_rtx_AND (mode
, one
, tmp
)));
38555 /* We always need to subtract here to preserve signed zero. */
38556 tmp
= expand_simple_binop (mode
, MINUS
,
38557 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38558 emit_move_insn (res
, tmp
);
38560 emit_label (label
);
38561 LABEL_NUSES (label
) = 1;
38563 emit_move_insn (operand0
, res
);
38566 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38569 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
38571 /* C code for the stuff we expand below.
38572 double xa = fabs (x), x2;
38573 if (!isless (xa, TWO52))
38575 x2 = (double)(long)x;
38582 if (HONOR_SIGNED_ZEROS (mode))
38583 return copysign (x2, x);
38586 enum machine_mode mode
= GET_MODE (operand0
);
38587 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
38589 TWO52
= ix86_gen_TWO52 (mode
);
38591 /* Temporary for holding the result, initialized to the input
38592 operand to ease control flow. */
38593 res
= gen_reg_rtx (mode
);
38594 emit_move_insn (res
, operand1
);
38596 /* xa = abs (operand1) */
38597 xa
= ix86_expand_sse_fabs (res
, &mask
);
38599 /* if (!isless (xa, TWO52)) goto label; */
38600 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38602 /* xa = (double)(long)x */
38603 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38604 expand_fix (xi
, res
, 0);
38605 expand_float (xa
, xi
, 0);
38608 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38610 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38611 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38612 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38613 gen_rtx_AND (mode
, one
, tmp
)));
38614 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
38615 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38616 emit_move_insn (res
, tmp
);
38618 if (HONOR_SIGNED_ZEROS (mode
))
38619 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38621 emit_label (label
);
38622 LABEL_NUSES (label
) = 1;
38624 emit_move_insn (operand0
, res
);
38627 /* Expand SSE sequence for computing round from OPERAND1 storing
38628 into OPERAND0. Sequence that works without relying on DImode truncation
38629 via cvttsd2siq that is only available on 64bit targets. */
38631 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
38633 /* C code for the stuff we expand below.
38634 double xa = fabs (x), xa2, x2;
38635 if (!isless (xa, TWO52))
38637 Using the absolute value and copying back sign makes
38638 -0.0 -> -0.0 correct.
38639 xa2 = xa + TWO52 - TWO52;
38644 else if (dxa > 0.5)
38646 x2 = copysign (xa2, x);
38649 enum machine_mode mode
= GET_MODE (operand0
);
38650 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
38652 TWO52
= ix86_gen_TWO52 (mode
);
38654 /* Temporary for holding the result, initialized to the input
38655 operand to ease control flow. */
38656 res
= gen_reg_rtx (mode
);
38657 emit_move_insn (res
, operand1
);
38659 /* xa = abs (operand1) */
38660 xa
= ix86_expand_sse_fabs (res
, &mask
);
38662 /* if (!isless (xa, TWO52)) goto label; */
38663 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38665 /* xa2 = xa + TWO52 - TWO52; */
38666 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38667 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
38669 /* dxa = xa2 - xa; */
38670 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
38672 /* generate 0.5, 1.0 and -0.5 */
38673 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
38674 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38675 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
38679 tmp
= gen_reg_rtx (mode
);
38680 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38681 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
38682 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38683 gen_rtx_AND (mode
, one
, tmp
)));
38684 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38685 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38686 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
38687 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38688 gen_rtx_AND (mode
, one
, tmp
)));
38689 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38691 /* res = copysign (xa2, operand1) */
38692 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
38694 emit_label (label
);
38695 LABEL_NUSES (label
) = 1;
38697 emit_move_insn (operand0
, res
);
38700 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38703 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38705 /* C code for SSE variant we expand below.
38706 double xa = fabs (x), x2;
38707 if (!isless (xa, TWO52))
38709 x2 = (double)(long)x;
38710 if (HONOR_SIGNED_ZEROS (mode))
38711 return copysign (x2, x);
38714 enum machine_mode mode
= GET_MODE (operand0
);
38715 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38717 TWO52
= ix86_gen_TWO52 (mode
);
38719 /* Temporary for holding the result, initialized to the input
38720 operand to ease control flow. */
38721 res
= gen_reg_rtx (mode
);
38722 emit_move_insn (res
, operand1
);
38724 /* xa = abs (operand1) */
38725 xa
= ix86_expand_sse_fabs (res
, &mask
);
38727 /* if (!isless (xa, TWO52)) goto label; */
38728 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38730 /* x = (double)(long)x */
38731 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38732 expand_fix (xi
, res
, 0);
38733 expand_float (res
, xi
, 0);
38735 if (HONOR_SIGNED_ZEROS (mode
))
38736 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38738 emit_label (label
);
38739 LABEL_NUSES (label
) = 1;
38741 emit_move_insn (operand0
, res
);
38744 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38747 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38749 enum machine_mode mode
= GET_MODE (operand0
);
38750 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38752 /* C code for SSE variant we expand below.
38753 double xa = fabs (x), x2;
38754 if (!isless (xa, TWO52))
38756 xa2 = xa + TWO52 - TWO52;
38760 x2 = copysign (xa2, x);
38764 TWO52
= ix86_gen_TWO52 (mode
);
38766 /* Temporary for holding the result, initialized to the input
38767 operand to ease control flow. */
38768 res
= gen_reg_rtx (mode
);
38769 emit_move_insn (res
, operand1
);
38771 /* xa = abs (operand1) */
38772 xa
= ix86_expand_sse_fabs (res
, &smask
);
38774 /* if (!isless (xa, TWO52)) goto label; */
38775 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38777 /* res = xa + TWO52 - TWO52; */
38778 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38779 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38780 emit_move_insn (res
, tmp
);
38783 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38785 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38786 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38787 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38788 gen_rtx_AND (mode
, mask
, one
)));
38789 tmp
= expand_simple_binop (mode
, MINUS
,
38790 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38791 emit_move_insn (res
, tmp
);
38793 /* res = copysign (res, operand1) */
38794 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38796 emit_label (label
);
38797 LABEL_NUSES (label
) = 1;
38799 emit_move_insn (operand0
, res
);
38802 /* Expand SSE sequence for computing round from OPERAND1 storing
38805 ix86_expand_round (rtx operand0
, rtx operand1
)
38807 /* C code for the stuff we're doing below:
38808 double xa = fabs (x);
38809 if (!isless (xa, TWO52))
38811 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38812 return copysign (xa, x);
38814 enum machine_mode mode
= GET_MODE (operand0
);
38815 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38816 const struct real_format
*fmt
;
38817 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38819 /* Temporary for holding the result, initialized to the input
38820 operand to ease control flow. */
38821 res
= gen_reg_rtx (mode
);
38822 emit_move_insn (res
, operand1
);
38824 TWO52
= ix86_gen_TWO52 (mode
);
38825 xa
= ix86_expand_sse_fabs (res
, &mask
);
38826 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38828 /* load nextafter (0.5, 0.0) */
38829 fmt
= REAL_MODE_FORMAT (mode
);
38830 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38831 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38833 /* xa = xa + 0.5 */
38834 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38835 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38837 /* xa = (double)(int64_t)xa */
38838 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38839 expand_fix (xi
, xa
, 0);
38840 expand_float (xa
, xi
, 0);
38842 /* res = copysign (xa, operand1) */
38843 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38845 emit_label (label
);
38846 LABEL_NUSES (label
) = 1;
38848 emit_move_insn (operand0
, res
);
38851 /* Expand SSE sequence for computing round
38852 from OP1 storing into OP0 using sse4 round insn. */
38854 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38856 enum machine_mode mode
= GET_MODE (op0
);
38857 rtx e1
, e2
, res
, half
;
38858 const struct real_format
*fmt
;
38859 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38860 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38861 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38866 gen_copysign
= gen_copysignsf3
;
38867 gen_round
= gen_sse4_1_roundsf2
;
38870 gen_copysign
= gen_copysigndf3
;
38871 gen_round
= gen_sse4_1_rounddf2
;
38874 gcc_unreachable ();
38877 /* round (a) = trunc (a + copysign (0.5, a)) */
38879 /* load nextafter (0.5, 0.0) */
38880 fmt
= REAL_MODE_FORMAT (mode
);
38881 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38882 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38883 half
= const_double_from_real_value (pred_half
, mode
);
38885 /* e1 = copysign (0.5, op1) */
38886 e1
= gen_reg_rtx (mode
);
38887 emit_insn (gen_copysign (e1
, half
, op1
));
38889 /* e2 = op1 + e1 */
38890 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38892 /* res = trunc (e2) */
38893 res
= gen_reg_rtx (mode
);
38894 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38896 emit_move_insn (op0
, res
);
38900 /* Table of valid machine attributes. */
38901 static const struct attribute_spec ix86_attribute_table
[] =
38903 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38904 affects_type_identity } */
38905 /* Stdcall attribute says callee is responsible for popping arguments
38906 if they are not variable. */
38907 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38909 /* Fastcall attribute says callee is responsible for popping arguments
38910 if they are not variable. */
38911 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38913 /* Thiscall attribute says callee is responsible for popping arguments
38914 if they are not variable. */
38915 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38917 /* Cdecl attribute says the callee is a normal C declaration */
38918 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38920 /* Regparm attribute specifies how many integer arguments are to be
38921 passed in registers. */
38922 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38924 /* Sseregparm attribute says we are using x86_64 calling conventions
38925 for FP arguments. */
38926 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38928 /* The transactional memory builtins are implicitly regparm or fastcall
38929 depending on the ABI. Override the generic do-nothing attribute that
38930 these builtins were declared with. */
38931 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38933 /* force_align_arg_pointer says this function realigns the stack at entry. */
38934 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38935 false, true, true, ix86_handle_cconv_attribute
, false },
38936 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38937 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38938 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38939 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38942 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38944 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38946 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38947 SUBTARGET_ATTRIBUTE_TABLE
,
38949 /* ms_abi and sysv_abi calling convention function attributes. */
38950 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38951 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38952 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38954 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38955 ix86_handle_callee_pop_aggregate_return
, true },
38957 { NULL
, 0, 0, false, false, false, NULL
, false }
38960 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38962 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38964 int misalign ATTRIBUTE_UNUSED
)
38968 switch (type_of_cost
)
38971 return ix86_cost
->scalar_stmt_cost
;
38974 return ix86_cost
->scalar_load_cost
;
38977 return ix86_cost
->scalar_store_cost
;
38980 return ix86_cost
->vec_stmt_cost
;
38983 return ix86_cost
->vec_align_load_cost
;
38986 return ix86_cost
->vec_store_cost
;
38988 case vec_to_scalar
:
38989 return ix86_cost
->vec_to_scalar_cost
;
38991 case scalar_to_vec
:
38992 return ix86_cost
->scalar_to_vec_cost
;
38994 case unaligned_load
:
38995 case unaligned_store
:
38996 return ix86_cost
->vec_unalign_load_cost
;
38998 case cond_branch_taken
:
38999 return ix86_cost
->cond_taken_branch_cost
;
39001 case cond_branch_not_taken
:
39002 return ix86_cost
->cond_not_taken_branch_cost
;
39005 case vec_promote_demote
:
39006 return ix86_cost
->vec_stmt_cost
;
39008 case vec_construct
:
39009 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39010 return elements
/ 2 + 1;
39013 gcc_unreachable ();
39017 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39018 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39019 insn every time. */
39021 static GTY(()) rtx vselect_insn
;
39023 /* Initialize vselect_insn. */
39026 init_vselect_insn (void)
39031 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39032 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39033 XVECEXP (x
, 0, i
) = const0_rtx
;
39034 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39036 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39038 vselect_insn
= emit_insn (x
);
39042 /* Construct (set target (vec_select op0 (parallel perm))) and
39043 return true if that's a valid instruction in the active ISA. */
39046 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39047 unsigned nelt
, bool testing_p
)
39050 rtx x
, save_vconcat
;
39053 if (vselect_insn
== NULL_RTX
)
39054 init_vselect_insn ();
39056 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39057 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39058 for (i
= 0; i
< nelt
; ++i
)
39059 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39060 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39061 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39062 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39063 SET_DEST (PATTERN (vselect_insn
)) = target
;
39064 icode
= recog_memoized (vselect_insn
);
39066 if (icode
>= 0 && !testing_p
)
39067 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39069 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39070 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39071 INSN_CODE (vselect_insn
) = -1;
39076 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39079 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39080 const unsigned char *perm
, unsigned nelt
,
39083 enum machine_mode v2mode
;
39087 if (vselect_insn
== NULL_RTX
)
39088 init_vselect_insn ();
39090 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39091 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39092 PUT_MODE (x
, v2mode
);
39095 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39096 XEXP (x
, 0) = const0_rtx
;
39097 XEXP (x
, 1) = const0_rtx
;
39101 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39102 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39105 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39107 enum machine_mode vmode
= d
->vmode
;
39108 unsigned i
, mask
, nelt
= d
->nelt
;
39109 rtx target
, op0
, op1
, x
;
39110 rtx rperm
[32], vperm
;
39112 if (d
->one_operand_p
)
39114 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39116 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39118 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39123 /* This is a blend, not a permute. Elements must stay in their
39124 respective lanes. */
39125 for (i
= 0; i
< nelt
; ++i
)
39127 unsigned e
= d
->perm
[i
];
39128 if (!(e
== i
|| e
== i
+ nelt
))
39135 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39136 decision should be extracted elsewhere, so that we only try that
39137 sequence once all budget==3 options have been tried. */
39138 target
= d
->target
;
39151 for (i
= 0; i
< nelt
; ++i
)
39152 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39156 for (i
= 0; i
< 2; ++i
)
39157 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39162 for (i
= 0; i
< 4; ++i
)
39163 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39168 /* See if bytes move in pairs so we can use pblendw with
39169 an immediate argument, rather than pblendvb with a vector
39171 for (i
= 0; i
< 16; i
+= 2)
39172 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39175 for (i
= 0; i
< nelt
; ++i
)
39176 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39179 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39180 vperm
= force_reg (vmode
, vperm
);
39182 if (GET_MODE_SIZE (vmode
) == 16)
39183 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39185 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39189 for (i
= 0; i
< 8; ++i
)
39190 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39195 target
= gen_lowpart (vmode
, target
);
39196 op0
= gen_lowpart (vmode
, op0
);
39197 op1
= gen_lowpart (vmode
, op1
);
39201 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39202 for (i
= 0; i
< 32; i
+= 2)
39203 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39205 /* See if bytes move in quadruplets. If yes, vpblendd
39206 with immediate can be used. */
39207 for (i
= 0; i
< 32; i
+= 4)
39208 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39212 /* See if bytes move the same in both lanes. If yes,
39213 vpblendw with immediate can be used. */
39214 for (i
= 0; i
< 16; i
+= 2)
39215 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39218 /* Use vpblendw. */
39219 for (i
= 0; i
< 16; ++i
)
39220 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39225 /* Use vpblendd. */
39226 for (i
= 0; i
< 8; ++i
)
39227 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39232 /* See if words move in pairs. If yes, vpblendd can be used. */
39233 for (i
= 0; i
< 16; i
+= 2)
39234 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39238 /* See if words move the same in both lanes. If not,
39239 vpblendvb must be used. */
39240 for (i
= 0; i
< 8; i
++)
39241 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39243 /* Use vpblendvb. */
39244 for (i
= 0; i
< 32; ++i
)
39245 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39249 target
= gen_lowpart (vmode
, target
);
39250 op0
= gen_lowpart (vmode
, op0
);
39251 op1
= gen_lowpart (vmode
, op1
);
39252 goto finish_pblendvb
;
39255 /* Use vpblendw. */
39256 for (i
= 0; i
< 16; ++i
)
39257 mask
|= (d
->perm
[i
] >= 16) << i
;
39261 /* Use vpblendd. */
39262 for (i
= 0; i
< 8; ++i
)
39263 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39268 /* Use vpblendd. */
39269 for (i
= 0; i
< 4; ++i
)
39270 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39275 gcc_unreachable ();
39278 /* This matches five different patterns with the different modes. */
39279 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39280 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39286 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39287 in terms of the variable form of vpermilps.
39289 Note that we will have already failed the immediate input vpermilps,
39290 which requires that the high and low part shuffle be identical; the
39291 variable form doesn't require that. */
39294 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39296 rtx rperm
[8], vperm
;
39299 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39302 /* We can only permute within the 128-bit lane. */
39303 for (i
= 0; i
< 8; ++i
)
39305 unsigned e
= d
->perm
[i
];
39306 if (i
< 4 ? e
>= 4 : e
< 4)
39313 for (i
= 0; i
< 8; ++i
)
39315 unsigned e
= d
->perm
[i
];
39317 /* Within each 128-bit lane, the elements of op0 are numbered
39318 from 0 and the elements of op1 are numbered from 4. */
39324 rperm
[i
] = GEN_INT (e
);
39327 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39328 vperm
= force_reg (V8SImode
, vperm
);
39329 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39334 /* Return true if permutation D can be performed as VMODE permutation
39338 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39340 unsigned int i
, j
, chunk
;
39342 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39343 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39344 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39347 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39350 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39351 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39352 if (d
->perm
[i
] & (chunk
- 1))
39355 for (j
= 1; j
< chunk
; ++j
)
39356 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39362 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39363 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39366 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39368 unsigned i
, nelt
, eltsz
, mask
;
39369 unsigned char perm
[32];
39370 enum machine_mode vmode
= V16QImode
;
39371 rtx rperm
[32], vperm
, target
, op0
, op1
;
39375 if (!d
->one_operand_p
)
39377 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39380 && valid_perm_using_mode_p (V2TImode
, d
))
39385 /* Use vperm2i128 insn. The pattern uses
39386 V4DImode instead of V2TImode. */
39387 target
= gen_lowpart (V4DImode
, d
->target
);
39388 op0
= gen_lowpart (V4DImode
, d
->op0
);
39389 op1
= gen_lowpart (V4DImode
, d
->op1
);
39391 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39392 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39393 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39401 if (GET_MODE_SIZE (d
->vmode
) == 16)
39406 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39411 /* V4DImode should be already handled through
39412 expand_vselect by vpermq instruction. */
39413 gcc_assert (d
->vmode
!= V4DImode
);
39416 if (d
->vmode
== V8SImode
39417 || d
->vmode
== V16HImode
39418 || d
->vmode
== V32QImode
)
39420 /* First see if vpermq can be used for
39421 V8SImode/V16HImode/V32QImode. */
39422 if (valid_perm_using_mode_p (V4DImode
, d
))
39424 for (i
= 0; i
< 4; i
++)
39425 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39428 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
39429 gen_lowpart (V4DImode
, d
->op0
),
39433 /* Next see if vpermd can be used. */
39434 if (valid_perm_using_mode_p (V8SImode
, d
))
39437 /* Or if vpermps can be used. */
39438 else if (d
->vmode
== V8SFmode
)
39441 if (vmode
== V32QImode
)
39443 /* vpshufb only works intra lanes, it is not
39444 possible to shuffle bytes in between the lanes. */
39445 for (i
= 0; i
< nelt
; ++i
)
39446 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39457 if (vmode
== V8SImode
)
39458 for (i
= 0; i
< 8; ++i
)
39459 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39462 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39463 if (!d
->one_operand_p
)
39464 mask
= 2 * nelt
- 1;
39465 else if (vmode
== V16QImode
)
39468 mask
= nelt
/ 2 - 1;
39470 for (i
= 0; i
< nelt
; ++i
)
39472 unsigned j
, e
= d
->perm
[i
] & mask
;
39473 for (j
= 0; j
< eltsz
; ++j
)
39474 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39478 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39479 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39480 vperm
= force_reg (vmode
, vperm
);
39482 target
= gen_lowpart (vmode
, d
->target
);
39483 op0
= gen_lowpart (vmode
, d
->op0
);
39484 if (d
->one_operand_p
)
39486 if (vmode
== V16QImode
)
39487 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39488 else if (vmode
== V32QImode
)
39489 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39490 else if (vmode
== V8SFmode
)
39491 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39493 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39497 op1
= gen_lowpart (vmode
, d
->op1
);
39498 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39504 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39505 in a single instruction. */
39508 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
39510 unsigned i
, nelt
= d
->nelt
;
39511 unsigned char perm2
[MAX_VECT_LEN
];
39513 /* Check plain VEC_SELECT first, because AVX has instructions that could
39514 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39515 input where SEL+CONCAT may not. */
39516 if (d
->one_operand_p
)
39518 int mask
= nelt
- 1;
39519 bool identity_perm
= true;
39520 bool broadcast_perm
= true;
39522 for (i
= 0; i
< nelt
; i
++)
39524 perm2
[i
] = d
->perm
[i
] & mask
;
39526 identity_perm
= false;
39528 broadcast_perm
= false;
39534 emit_move_insn (d
->target
, d
->op0
);
39537 else if (broadcast_perm
&& TARGET_AVX2
)
39539 /* Use vpbroadcast{b,w,d}. */
39540 rtx (*gen
) (rtx
, rtx
) = NULL
;
39544 gen
= gen_avx2_pbroadcastv32qi_1
;
39547 gen
= gen_avx2_pbroadcastv16hi_1
;
39550 gen
= gen_avx2_pbroadcastv8si_1
;
39553 gen
= gen_avx2_pbroadcastv16qi
;
39556 gen
= gen_avx2_pbroadcastv8hi
;
39559 gen
= gen_avx2_vec_dupv8sf_1
;
39561 /* For other modes prefer other shuffles this function creates. */
39567 emit_insn (gen (d
->target
, d
->op0
));
39572 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
39575 /* There are plenty of patterns in sse.md that are written for
39576 SEL+CONCAT and are not replicated for a single op. Perhaps
39577 that should be changed, to avoid the nastiness here. */
39579 /* Recognize interleave style patterns, which means incrementing
39580 every other permutation operand. */
39581 for (i
= 0; i
< nelt
; i
+= 2)
39583 perm2
[i
] = d
->perm
[i
] & mask
;
39584 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
39586 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39590 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39593 for (i
= 0; i
< nelt
; i
+= 4)
39595 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
39596 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
39597 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
39598 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
39601 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39607 /* Finally, try the fully general two operand permute. */
39608 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
39612 /* Recognize interleave style patterns with reversed operands. */
39613 if (!d
->one_operand_p
)
39615 for (i
= 0; i
< nelt
; ++i
)
39617 unsigned e
= d
->perm
[i
];
39625 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
39630 /* Try the SSE4.1 blend variable merge instructions. */
39631 if (expand_vec_perm_blend (d
))
39634 /* Try one of the AVX vpermil variable permutations. */
39635 if (expand_vec_perm_vpermil (d
))
39638 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39639 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39640 if (expand_vec_perm_pshufb (d
))
39646 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39647 in terms of a pair of pshuflw + pshufhw instructions. */
39650 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
39652 unsigned char perm2
[MAX_VECT_LEN
];
39656 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
39659 /* The two permutations only operate in 64-bit lanes. */
39660 for (i
= 0; i
< 4; ++i
)
39661 if (d
->perm
[i
] >= 4)
39663 for (i
= 4; i
< 8; ++i
)
39664 if (d
->perm
[i
] < 4)
39670 /* Emit the pshuflw. */
39671 memcpy (perm2
, d
->perm
, 4);
39672 for (i
= 4; i
< 8; ++i
)
39674 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
39677 /* Emit the pshufhw. */
39678 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
39679 for (i
= 0; i
< 4; ++i
)
39681 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
39687 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39688 the permutation using the SSSE3 palignr instruction. This succeeds
39689 when all of the elements in PERM fit within one vector and we merely
39690 need to shift them down so that a single vector permutation has a
39691 chance to succeed. */
39694 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
39696 unsigned i
, nelt
= d
->nelt
;
39701 /* Even with AVX, palignr only operates on 128-bit vectors. */
39702 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39705 min
= nelt
, max
= 0;
39706 for (i
= 0; i
< nelt
; ++i
)
39708 unsigned e
= d
->perm
[i
];
39714 if (min
== 0 || max
- min
>= nelt
)
39717 /* Given that we have SSSE3, we know we'll be able to implement the
39718 single operand permutation after the palignr with pshufb. */
39722 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39723 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39724 gen_lowpart (TImode
, d
->op1
),
39725 gen_lowpart (TImode
, d
->op0
), shift
));
39727 d
->op0
= d
->op1
= d
->target
;
39728 d
->one_operand_p
= true;
39731 for (i
= 0; i
< nelt
; ++i
)
39733 unsigned e
= d
->perm
[i
] - min
;
39739 /* Test for the degenerate case where the alignment by itself
39740 produces the desired permutation. */
39744 ok
= expand_vec_perm_1 (d
);
39750 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39752 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39753 a two vector permutation into a single vector permutation by using
39754 an interleave operation to merge the vectors. */
39757 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39759 struct expand_vec_perm_d dremap
, dfinal
;
39760 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39761 unsigned HOST_WIDE_INT contents
;
39762 unsigned char remap
[2 * MAX_VECT_LEN
];
39764 bool ok
, same_halves
= false;
39766 if (GET_MODE_SIZE (d
->vmode
) == 16)
39768 if (d
->one_operand_p
)
39771 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39775 /* For 32-byte modes allow even d->one_operand_p.
39776 The lack of cross-lane shuffling in some instructions
39777 might prevent a single insn shuffle. */
39779 dfinal
.testing_p
= true;
39780 /* If expand_vec_perm_interleave3 can expand this into
39781 a 3 insn sequence, give up and let it be expanded as
39782 3 insn sequence. While that is one insn longer,
39783 it doesn't need a memory operand and in the common
39784 case that both interleave low and high permutations
39785 with the same operands are adjacent needs 4 insns
39786 for both after CSE. */
39787 if (expand_vec_perm_interleave3 (&dfinal
))
39793 /* Examine from whence the elements come. */
39795 for (i
= 0; i
< nelt
; ++i
)
39796 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39798 memset (remap
, 0xff, sizeof (remap
));
39801 if (GET_MODE_SIZE (d
->vmode
) == 16)
39803 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39805 /* Split the two input vectors into 4 halves. */
39806 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39811 /* If the elements from the low halves use interleave low, and similarly
39812 for interleave high. If the elements are from mis-matched halves, we
39813 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39814 if ((contents
& (h1
| h3
)) == contents
)
39817 for (i
= 0; i
< nelt2
; ++i
)
39820 remap
[i
+ nelt
] = i
* 2 + 1;
39821 dremap
.perm
[i
* 2] = i
;
39822 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39824 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39825 dremap
.vmode
= V4SFmode
;
39827 else if ((contents
& (h2
| h4
)) == contents
)
39830 for (i
= 0; i
< nelt2
; ++i
)
39832 remap
[i
+ nelt2
] = i
* 2;
39833 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39834 dremap
.perm
[i
* 2] = i
+ nelt2
;
39835 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39837 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39838 dremap
.vmode
= V4SFmode
;
39840 else if ((contents
& (h1
| h4
)) == contents
)
39843 for (i
= 0; i
< nelt2
; ++i
)
39846 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39847 dremap
.perm
[i
] = i
;
39848 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39853 dremap
.vmode
= V2DImode
;
39855 dremap
.perm
[0] = 0;
39856 dremap
.perm
[1] = 3;
39859 else if ((contents
& (h2
| h3
)) == contents
)
39862 for (i
= 0; i
< nelt2
; ++i
)
39864 remap
[i
+ nelt2
] = i
;
39865 remap
[i
+ nelt
] = i
+ nelt2
;
39866 dremap
.perm
[i
] = i
+ nelt2
;
39867 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39872 dremap
.vmode
= V2DImode
;
39874 dremap
.perm
[0] = 1;
39875 dremap
.perm
[1] = 2;
39883 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39884 unsigned HOST_WIDE_INT q
[8];
39885 unsigned int nonzero_halves
[4];
39887 /* Split the two input vectors into 8 quarters. */
39888 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39889 for (i
= 1; i
< 8; ++i
)
39890 q
[i
] = q
[0] << (nelt4
* i
);
39891 for (i
= 0; i
< 4; ++i
)
39892 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39894 nonzero_halves
[nzcnt
] = i
;
39900 gcc_assert (d
->one_operand_p
);
39901 nonzero_halves
[1] = nonzero_halves
[0];
39902 same_halves
= true;
39904 else if (d
->one_operand_p
)
39906 gcc_assert (nonzero_halves
[0] == 0);
39907 gcc_assert (nonzero_halves
[1] == 1);
39912 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39914 /* Attempt to increase the likelihood that dfinal
39915 shuffle will be intra-lane. */
39916 char tmph
= nonzero_halves
[0];
39917 nonzero_halves
[0] = nonzero_halves
[1];
39918 nonzero_halves
[1] = tmph
;
39921 /* vperm2f128 or vperm2i128. */
39922 for (i
= 0; i
< nelt2
; ++i
)
39924 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39925 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39926 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39927 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39930 if (d
->vmode
!= V8SFmode
39931 && d
->vmode
!= V4DFmode
39932 && d
->vmode
!= V8SImode
)
39934 dremap
.vmode
= V8SImode
;
39936 for (i
= 0; i
< 4; ++i
)
39938 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39939 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39943 else if (d
->one_operand_p
)
39945 else if (TARGET_AVX2
39946 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39949 for (i
= 0; i
< nelt4
; ++i
)
39952 remap
[i
+ nelt
] = i
* 2 + 1;
39953 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39954 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39955 dremap
.perm
[i
* 2] = i
;
39956 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39957 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39958 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39961 else if (TARGET_AVX2
39962 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39965 for (i
= 0; i
< nelt4
; ++i
)
39967 remap
[i
+ nelt4
] = i
* 2;
39968 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39969 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39970 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39971 dremap
.perm
[i
* 2] = i
+ nelt4
;
39972 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39973 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39974 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39981 /* Use the remapping array set up above to move the elements from their
39982 swizzled locations into their final destinations. */
39984 for (i
= 0; i
< nelt
; ++i
)
39986 unsigned e
= remap
[d
->perm
[i
]];
39987 gcc_assert (e
< nelt
);
39988 /* If same_halves is true, both halves of the remapped vector are the
39989 same. Avoid cross-lane accesses if possible. */
39990 if (same_halves
&& i
>= nelt2
)
39992 gcc_assert (e
< nelt2
);
39993 dfinal
.perm
[i
] = e
+ nelt2
;
39996 dfinal
.perm
[i
] = e
;
39998 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39999 dfinal
.op1
= dfinal
.op0
;
40000 dfinal
.one_operand_p
= true;
40001 dremap
.target
= dfinal
.op0
;
40003 /* Test if the final remap can be done with a single insn. For V4SFmode or
40004 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40006 ok
= expand_vec_perm_1 (&dfinal
);
40007 seq
= get_insns ();
40016 if (dremap
.vmode
!= dfinal
.vmode
)
40018 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
40019 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40020 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40023 ok
= expand_vec_perm_1 (&dremap
);
40030 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40031 a single vector cross-lane permutation into vpermq followed
40032 by any of the single insn permutations. */
40035 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40037 struct expand_vec_perm_d dremap
, dfinal
;
40038 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40039 unsigned contents
[2];
40043 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40044 && d
->one_operand_p
))
40049 for (i
= 0; i
< nelt2
; ++i
)
40051 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40052 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40055 for (i
= 0; i
< 2; ++i
)
40057 unsigned int cnt
= 0;
40058 for (j
= 0; j
< 4; ++j
)
40059 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40067 dremap
.vmode
= V4DImode
;
40069 dremap
.target
= gen_reg_rtx (V4DImode
);
40070 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40071 dremap
.op1
= dremap
.op0
;
40072 dremap
.one_operand_p
= true;
40073 for (i
= 0; i
< 2; ++i
)
40075 unsigned int cnt
= 0;
40076 for (j
= 0; j
< 4; ++j
)
40077 if ((contents
[i
] & (1u << j
)) != 0)
40078 dremap
.perm
[2 * i
+ cnt
++] = j
;
40079 for (; cnt
< 2; ++cnt
)
40080 dremap
.perm
[2 * i
+ cnt
] = 0;
40084 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40085 dfinal
.op1
= dfinal
.op0
;
40086 dfinal
.one_operand_p
= true;
40087 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40091 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40092 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40094 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40095 dfinal
.perm
[i
] |= nelt4
;
40097 gcc_unreachable ();
40100 ok
= expand_vec_perm_1 (&dremap
);
40103 ok
= expand_vec_perm_1 (&dfinal
);
40109 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40110 a vector permutation using two instructions, vperm2f128 resp.
40111 vperm2i128 followed by any single in-lane permutation. */
40114 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40116 struct expand_vec_perm_d dfirst
, dsecond
;
40117 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40121 || GET_MODE_SIZE (d
->vmode
) != 32
40122 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40126 dsecond
.one_operand_p
= false;
40127 dsecond
.testing_p
= true;
40129 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40130 immediate. For perm < 16 the second permutation uses
40131 d->op0 as first operand, for perm >= 16 it uses d->op1
40132 as first operand. The second operand is the result of
40134 for (perm
= 0; perm
< 32; perm
++)
40136 /* Ignore permutations which do not move anything cross-lane. */
40139 /* The second shuffle for e.g. V4DFmode has
40140 0123 and ABCD operands.
40141 Ignore AB23, as 23 is already in the second lane
40142 of the first operand. */
40143 if ((perm
& 0xc) == (1 << 2)) continue;
40144 /* And 01CD, as 01 is in the first lane of the first
40146 if ((perm
& 3) == 0) continue;
40147 /* And 4567, as then the vperm2[fi]128 doesn't change
40148 anything on the original 4567 second operand. */
40149 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40153 /* The second shuffle for e.g. V4DFmode has
40154 4567 and ABCD operands.
40155 Ignore AB67, as 67 is already in the second lane
40156 of the first operand. */
40157 if ((perm
& 0xc) == (3 << 2)) continue;
40158 /* And 45CD, as 45 is in the first lane of the first
40160 if ((perm
& 3) == 2) continue;
40161 /* And 0123, as then the vperm2[fi]128 doesn't change
40162 anything on the original 0123 first operand. */
40163 if ((perm
& 0xf) == (1 << 2)) continue;
40166 for (i
= 0; i
< nelt
; i
++)
40168 j
= d
->perm
[i
] / nelt2
;
40169 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40170 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40171 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40172 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40180 ok
= expand_vec_perm_1 (&dsecond
);
40191 /* Found a usable second shuffle. dfirst will be
40192 vperm2f128 on d->op0 and d->op1. */
40193 dsecond
.testing_p
= false;
40195 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40196 for (i
= 0; i
< nelt
; i
++)
40197 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40198 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40200 ok
= expand_vec_perm_1 (&dfirst
);
40203 /* And dsecond is some single insn shuffle, taking
40204 d->op0 and result of vperm2f128 (if perm < 16) or
40205 d->op1 and result of vperm2f128 (otherwise). */
40206 dsecond
.op1
= dfirst
.target
;
40208 dsecond
.op0
= dfirst
.op1
;
40210 ok
= expand_vec_perm_1 (&dsecond
);
40216 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40217 if (d
->one_operand_p
)
40224 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40225 a two vector permutation using 2 intra-lane interleave insns
40226 and cross-lane shuffle for 32-byte vectors. */
40229 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40232 rtx (*gen
) (rtx
, rtx
, rtx
);
40234 if (d
->one_operand_p
)
40236 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40238 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40244 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40246 for (i
= 0; i
< nelt
; i
+= 2)
40247 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40248 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40258 gen
= gen_vec_interleave_highv32qi
;
40260 gen
= gen_vec_interleave_lowv32qi
;
40264 gen
= gen_vec_interleave_highv16hi
;
40266 gen
= gen_vec_interleave_lowv16hi
;
40270 gen
= gen_vec_interleave_highv8si
;
40272 gen
= gen_vec_interleave_lowv8si
;
40276 gen
= gen_vec_interleave_highv4di
;
40278 gen
= gen_vec_interleave_lowv4di
;
40282 gen
= gen_vec_interleave_highv8sf
;
40284 gen
= gen_vec_interleave_lowv8sf
;
40288 gen
= gen_vec_interleave_highv4df
;
40290 gen
= gen_vec_interleave_lowv4df
;
40293 gcc_unreachable ();
40296 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40300 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40301 a single vector permutation using a single intra-lane vector
40302 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40303 the non-swapped and swapped vectors together. */
40306 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40308 struct expand_vec_perm_d dfirst
, dsecond
;
40309 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40312 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40316 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40317 || !d
->one_operand_p
)
40321 for (i
= 0; i
< nelt
; i
++)
40322 dfirst
.perm
[i
] = 0xff;
40323 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40325 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40326 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40328 dfirst
.perm
[j
] = d
->perm
[i
];
40332 for (i
= 0; i
< nelt
; i
++)
40333 if (dfirst
.perm
[i
] == 0xff)
40334 dfirst
.perm
[i
] = i
;
40337 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40340 ok
= expand_vec_perm_1 (&dfirst
);
40341 seq
= get_insns ();
40353 dsecond
.op0
= dfirst
.target
;
40354 dsecond
.op1
= dfirst
.target
;
40355 dsecond
.one_operand_p
= true;
40356 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40357 for (i
= 0; i
< nelt
; i
++)
40358 dsecond
.perm
[i
] = i
^ nelt2
;
40360 ok
= expand_vec_perm_1 (&dsecond
);
40363 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40364 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40368 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40369 permutation using two vperm2f128, followed by a vshufpd insn blending
40370 the two vectors together. */
40373 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40375 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40378 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40388 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40389 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40390 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40391 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40392 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40393 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40394 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40395 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40396 dthird
.perm
[0] = (d
->perm
[0] % 2);
40397 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40398 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40399 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40401 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40402 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40403 dthird
.op0
= dfirst
.target
;
40404 dthird
.op1
= dsecond
.target
;
40405 dthird
.one_operand_p
= false;
40407 canonicalize_perm (&dfirst
);
40408 canonicalize_perm (&dsecond
);
40410 ok
= expand_vec_perm_1 (&dfirst
)
40411 && expand_vec_perm_1 (&dsecond
)
40412 && expand_vec_perm_1 (&dthird
);
40419 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40420 permutation with two pshufb insns and an ior. We should have already
40421 failed all two instruction sequences. */
40424 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40426 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40427 unsigned int i
, nelt
, eltsz
;
40429 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40431 gcc_assert (!d
->one_operand_p
);
40434 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40436 /* Generate two permutation masks. If the required element is within
40437 the given vector it is shuffled into the proper lane. If the required
40438 element is in the other vector, force a zero into the lane by setting
40439 bit 7 in the permutation mask. */
40440 m128
= GEN_INT (-128);
40441 for (i
= 0; i
< nelt
; ++i
)
40443 unsigned j
, e
= d
->perm
[i
];
40444 unsigned which
= (e
>= nelt
);
40448 for (j
= 0; j
< eltsz
; ++j
)
40450 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40451 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40455 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40456 vperm
= force_reg (V16QImode
, vperm
);
40458 l
= gen_reg_rtx (V16QImode
);
40459 op
= gen_lowpart (V16QImode
, d
->op0
);
40460 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40462 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40463 vperm
= force_reg (V16QImode
, vperm
);
40465 h
= gen_reg_rtx (V16QImode
);
40466 op
= gen_lowpart (V16QImode
, d
->op1
);
40467 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40469 op
= gen_lowpart (V16QImode
, d
->target
);
40470 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40475 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40476 with two vpshufb insns, vpermq and vpor. We should have already failed
40477 all two or three instruction sequences. */
40480 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40482 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40483 unsigned int i
, nelt
, eltsz
;
40486 || !d
->one_operand_p
40487 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40494 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40496 /* Generate two permutation masks. If the required element is within
40497 the same lane, it is shuffled in. If the required element from the
40498 other lane, force a zero by setting bit 7 in the permutation mask.
40499 In the other mask the mask has non-negative elements if element
40500 is requested from the other lane, but also moved to the other lane,
40501 so that the result of vpshufb can have the two V2TImode halves
40503 m128
= GEN_INT (-128);
40504 for (i
= 0; i
< nelt
; ++i
)
40506 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40507 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40509 for (j
= 0; j
< eltsz
; ++j
)
40511 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
40512 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
40516 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40517 vperm
= force_reg (V32QImode
, vperm
);
40519 h
= gen_reg_rtx (V32QImode
);
40520 op
= gen_lowpart (V32QImode
, d
->op0
);
40521 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40523 /* Swap the 128-byte lanes of h into hp. */
40524 hp
= gen_reg_rtx (V4DImode
);
40525 op
= gen_lowpart (V4DImode
, h
);
40526 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
40529 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40530 vperm
= force_reg (V32QImode
, vperm
);
40532 l
= gen_reg_rtx (V32QImode
);
40533 op
= gen_lowpart (V32QImode
, d
->op0
);
40534 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40536 op
= gen_lowpart (V32QImode
, d
->target
);
40537 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
40542 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40543 and extract-odd permutations of two V32QImode and V16QImode operand
40544 with two vpshufb insns, vpor and vpermq. We should have already
40545 failed all two or three instruction sequences. */
40548 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
40550 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
40551 unsigned int i
, nelt
, eltsz
;
40554 || d
->one_operand_p
40555 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40558 for (i
= 0; i
< d
->nelt
; ++i
)
40559 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
40566 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40568 /* Generate two permutation masks. In the first permutation mask
40569 the first quarter will contain indexes for the first half
40570 of the op0, the second quarter will contain bit 7 set, third quarter
40571 will contain indexes for the second half of the op0 and the
40572 last quarter bit 7 set. In the second permutation mask
40573 the first quarter will contain bit 7 set, the second quarter
40574 indexes for the first half of the op1, the third quarter bit 7 set
40575 and last quarter indexes for the second half of the op1.
40576 I.e. the first mask e.g. for V32QImode extract even will be:
40577 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40578 (all values masked with 0xf except for -128) and second mask
40579 for extract even will be
40580 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40581 m128
= GEN_INT (-128);
40582 for (i
= 0; i
< nelt
; ++i
)
40584 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40585 unsigned which
= d
->perm
[i
] >= nelt
;
40586 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
40588 for (j
= 0; j
< eltsz
; ++j
)
40590 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
40591 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
40595 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40596 vperm
= force_reg (V32QImode
, vperm
);
40598 l
= gen_reg_rtx (V32QImode
);
40599 op
= gen_lowpart (V32QImode
, d
->op0
);
40600 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40602 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40603 vperm
= force_reg (V32QImode
, vperm
);
40605 h
= gen_reg_rtx (V32QImode
);
40606 op
= gen_lowpart (V32QImode
, d
->op1
);
40607 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40609 ior
= gen_reg_rtx (V32QImode
);
40610 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
40612 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40613 op
= gen_lowpart (V4DImode
, d
->target
);
40614 ior
= gen_lowpart (V4DImode
, ior
);
40615 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
40616 const1_rtx
, GEN_INT (3)));
40621 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40622 and extract-odd permutations. */
40625 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
40632 t1
= gen_reg_rtx (V4DFmode
);
40633 t2
= gen_reg_rtx (V4DFmode
);
40635 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40636 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40637 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40639 /* Now an unpck[lh]pd will produce the result required. */
40641 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
40643 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
40649 int mask
= odd
? 0xdd : 0x88;
40651 t1
= gen_reg_rtx (V8SFmode
);
40652 t2
= gen_reg_rtx (V8SFmode
);
40653 t3
= gen_reg_rtx (V8SFmode
);
40655 /* Shuffle within the 128-bit lanes to produce:
40656 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40657 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
40660 /* Shuffle the lanes around to produce:
40661 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40662 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
40665 /* Shuffle within the 128-bit lanes to produce:
40666 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40667 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
40669 /* Shuffle within the 128-bit lanes to produce:
40670 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40671 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
40673 /* Shuffle the lanes around to produce:
40674 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40675 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
40684 /* These are always directly implementable by expand_vec_perm_1. */
40685 gcc_unreachable ();
40689 return expand_vec_perm_pshufb2 (d
);
40692 /* We need 2*log2(N)-1 operations to achieve odd/even
40693 with interleave. */
40694 t1
= gen_reg_rtx (V8HImode
);
40695 t2
= gen_reg_rtx (V8HImode
);
40696 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
40697 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
40698 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
40699 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
40701 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
40703 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40710 return expand_vec_perm_pshufb2 (d
);
40713 t1
= gen_reg_rtx (V16QImode
);
40714 t2
= gen_reg_rtx (V16QImode
);
40715 t3
= gen_reg_rtx (V16QImode
);
40716 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40717 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40718 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40719 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40720 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40721 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40723 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40725 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40732 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40737 struct expand_vec_perm_d d_copy
= *d
;
40738 d_copy
.vmode
= V4DFmode
;
40739 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40740 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40741 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40742 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40745 t1
= gen_reg_rtx (V4DImode
);
40746 t2
= gen_reg_rtx (V4DImode
);
40748 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40749 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40750 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40752 /* Now an vpunpck[lh]qdq will produce the result required. */
40754 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40756 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40763 struct expand_vec_perm_d d_copy
= *d
;
40764 d_copy
.vmode
= V8SFmode
;
40765 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40766 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40767 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40768 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40771 t1
= gen_reg_rtx (V8SImode
);
40772 t2
= gen_reg_rtx (V8SImode
);
40774 /* Shuffle the lanes around into
40775 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40776 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40777 gen_lowpart (V4DImode
, d
->op0
),
40778 gen_lowpart (V4DImode
, d
->op1
),
40780 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40781 gen_lowpart (V4DImode
, d
->op0
),
40782 gen_lowpart (V4DImode
, d
->op1
),
40785 /* Swap the 2nd and 3rd position in each lane into
40786 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40787 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40788 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40789 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40790 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40792 /* Now an vpunpck[lh]qdq will produce
40793 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40795 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40796 gen_lowpart (V4DImode
, t1
),
40797 gen_lowpart (V4DImode
, t2
));
40799 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40800 gen_lowpart (V4DImode
, t1
),
40801 gen_lowpart (V4DImode
, t2
));
40806 gcc_unreachable ();
40812 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40813 extract-even and extract-odd permutations. */
40816 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40818 unsigned i
, odd
, nelt
= d
->nelt
;
40821 if (odd
!= 0 && odd
!= 1)
40824 for (i
= 1; i
< nelt
; ++i
)
40825 if (d
->perm
[i
] != 2 * i
+ odd
)
40828 return expand_vec_perm_even_odd_1 (d
, odd
);
40831 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40832 permutations. We assume that expand_vec_perm_1 has already failed. */
40835 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40837 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40838 enum machine_mode vmode
= d
->vmode
;
40839 unsigned char perm2
[4];
40847 /* These are special-cased in sse.md so that we can optionally
40848 use the vbroadcast instruction. They expand to two insns
40849 if the input happens to be in a register. */
40850 gcc_unreachable ();
40856 /* These are always implementable using standard shuffle patterns. */
40857 gcc_unreachable ();
40861 /* These can be implemented via interleave. We save one insn by
40862 stopping once we have promoted to V4SImode and then use pshufd. */
40866 rtx (*gen
) (rtx
, rtx
, rtx
)
40867 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40868 : gen_vec_interleave_lowv8hi
;
40872 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40873 : gen_vec_interleave_highv8hi
;
40878 dest
= gen_reg_rtx (vmode
);
40879 emit_insn (gen (dest
, op0
, op0
));
40880 vmode
= get_mode_wider_vector (vmode
);
40881 op0
= gen_lowpart (vmode
, dest
);
40883 while (vmode
!= V4SImode
);
40885 memset (perm2
, elt
, 4);
40886 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40895 /* For AVX2 broadcasts of the first element vpbroadcast* or
40896 vpermq should be used by expand_vec_perm_1. */
40897 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40901 gcc_unreachable ();
40905 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40906 broadcast permutations. */
40909 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40911 unsigned i
, elt
, nelt
= d
->nelt
;
40913 if (!d
->one_operand_p
)
40917 for (i
= 1; i
< nelt
; ++i
)
40918 if (d
->perm
[i
] != elt
)
40921 return expand_vec_perm_broadcast_1 (d
);
40924 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40925 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40926 all the shorter instruction sequences. */
40929 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40931 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40932 unsigned int i
, nelt
, eltsz
;
40936 || d
->one_operand_p
40937 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40944 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40946 /* Generate 4 permutation masks. If the required element is within
40947 the same lane, it is shuffled in. If the required element from the
40948 other lane, force a zero by setting bit 7 in the permutation mask.
40949 In the other mask the mask has non-negative elements if element
40950 is requested from the other lane, but also moved to the other lane,
40951 so that the result of vpshufb can have the two V2TImode halves
40953 m128
= GEN_INT (-128);
40954 for (i
= 0; i
< 32; ++i
)
40956 rperm
[0][i
] = m128
;
40957 rperm
[1][i
] = m128
;
40958 rperm
[2][i
] = m128
;
40959 rperm
[3][i
] = m128
;
40965 for (i
= 0; i
< nelt
; ++i
)
40967 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40968 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40969 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40971 for (j
= 0; j
< eltsz
; ++j
)
40972 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40973 used
[which
] = true;
40976 for (i
= 0; i
< 2; ++i
)
40978 if (!used
[2 * i
+ 1])
40983 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40984 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40985 vperm
= force_reg (V32QImode
, vperm
);
40986 h
[i
] = gen_reg_rtx (V32QImode
);
40987 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40988 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40991 /* Swap the 128-byte lanes of h[X]. */
40992 for (i
= 0; i
< 2; ++i
)
40994 if (h
[i
] == NULL_RTX
)
40996 op
= gen_reg_rtx (V4DImode
);
40997 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40998 const2_rtx
, GEN_INT (3), const0_rtx
,
41000 h
[i
] = gen_lowpart (V32QImode
, op
);
41003 for (i
= 0; i
< 2; ++i
)
41010 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41011 vperm
= force_reg (V32QImode
, vperm
);
41012 l
[i
] = gen_reg_rtx (V32QImode
);
41013 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41014 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41017 for (i
= 0; i
< 2; ++i
)
41021 op
= gen_reg_rtx (V32QImode
);
41022 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41029 gcc_assert (l
[0] && l
[1]);
41030 op
= gen_lowpart (V32QImode
, d
->target
);
41031 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41035 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41036 With all of the interface bits taken care of, perform the expansion
41037 in D and return true on success. */
41040 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41042 /* Try a single instruction expansion. */
41043 if (expand_vec_perm_1 (d
))
41046 /* Try sequences of two instructions. */
41048 if (expand_vec_perm_pshuflw_pshufhw (d
))
41051 if (expand_vec_perm_palignr (d
))
41054 if (expand_vec_perm_interleave2 (d
))
41057 if (expand_vec_perm_broadcast (d
))
41060 if (expand_vec_perm_vpermq_perm_1 (d
))
41063 if (expand_vec_perm_vperm2f128 (d
))
41066 /* Try sequences of three instructions. */
41068 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41071 if (expand_vec_perm_pshufb2 (d
))
41074 if (expand_vec_perm_interleave3 (d
))
41077 if (expand_vec_perm_vperm2f128_vblend (d
))
41080 /* Try sequences of four instructions. */
41082 if (expand_vec_perm_vpshufb2_vpermq (d
))
41085 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41088 /* ??? Look for narrow permutations whose element orderings would
41089 allow the promotion to a wider mode. */
41091 /* ??? Look for sequences of interleave or a wider permute that place
41092 the data into the correct lanes for a half-vector shuffle like
41093 pshuf[lh]w or vpermilps. */
41095 /* ??? Look for sequences of interleave that produce the desired results.
41096 The combinatorics of punpck[lh] get pretty ugly... */
41098 if (expand_vec_perm_even_odd (d
))
41101 /* Even longer sequences. */
41102 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41108 /* If a permutation only uses one operand, make it clear. Returns true
41109 if the permutation references both operands. */
41112 canonicalize_perm (struct expand_vec_perm_d
*d
)
41114 int i
, which
, nelt
= d
->nelt
;
41116 for (i
= which
= 0; i
< nelt
; ++i
)
41117 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41119 d
->one_operand_p
= true;
41126 if (!rtx_equal_p (d
->op0
, d
->op1
))
41128 d
->one_operand_p
= false;
41131 /* The elements of PERM do not suggest that only the first operand
41132 is used, but both operands are identical. Allow easier matching
41133 of the permutation by folding the permutation into the single
41138 for (i
= 0; i
< nelt
; ++i
)
41139 d
->perm
[i
] &= nelt
- 1;
41148 return (which
== 3);
41152 ix86_expand_vec_perm_const (rtx operands
[4])
41154 struct expand_vec_perm_d d
;
41155 unsigned char perm
[MAX_VECT_LEN
];
41160 d
.target
= operands
[0];
41161 d
.op0
= operands
[1];
41162 d
.op1
= operands
[2];
41165 d
.vmode
= GET_MODE (d
.target
);
41166 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41167 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41168 d
.testing_p
= false;
41170 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41171 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41172 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41174 for (i
= 0; i
< nelt
; ++i
)
41176 rtx e
= XVECEXP (sel
, 0, i
);
41177 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41182 two_args
= canonicalize_perm (&d
);
41184 if (ix86_expand_vec_perm_const_1 (&d
))
41187 /* If the selector says both arguments are needed, but the operands are the
41188 same, the above tried to expand with one_operand_p and flattened selector.
41189 If that didn't work, retry without one_operand_p; we succeeded with that
41191 if (two_args
&& d
.one_operand_p
)
41193 d
.one_operand_p
= false;
41194 memcpy (d
.perm
, perm
, sizeof (perm
));
41195 return ix86_expand_vec_perm_const_1 (&d
);
41201 /* Implement targetm.vectorize.vec_perm_const_ok. */
41204 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41205 const unsigned char *sel
)
41207 struct expand_vec_perm_d d
;
41208 unsigned int i
, nelt
, which
;
41212 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41213 d
.testing_p
= true;
41215 /* Given sufficient ISA support we can just return true here
41216 for selected vector modes. */
41217 if (GET_MODE_SIZE (d
.vmode
) == 16)
41219 /* All implementable with a single vpperm insn. */
41222 /* All implementable with 2 pshufb + 1 ior. */
41225 /* All implementable with shufpd or unpck[lh]pd. */
41230 /* Extract the values from the vector CST into the permutation
41232 memcpy (d
.perm
, sel
, nelt
);
41233 for (i
= which
= 0; i
< nelt
; ++i
)
41235 unsigned char e
= d
.perm
[i
];
41236 gcc_assert (e
< 2 * nelt
);
41237 which
|= (e
< nelt
? 1 : 2);
41240 /* For all elements from second vector, fold the elements to first. */
41242 for (i
= 0; i
< nelt
; ++i
)
41245 /* Check whether the mask can be applied to the vector type. */
41246 d
.one_operand_p
= (which
!= 3);
41248 /* Implementable with shufps or pshufd. */
41249 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41252 /* Otherwise we have to go through the motions and see if we can
41253 figure out how to generate the requested permutation. */
41254 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41255 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41256 if (!d
.one_operand_p
)
41257 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41260 ret
= ix86_expand_vec_perm_const_1 (&d
);
41267 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41269 struct expand_vec_perm_d d
;
41275 d
.vmode
= GET_MODE (targ
);
41276 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41277 d
.one_operand_p
= false;
41278 d
.testing_p
= false;
41280 for (i
= 0; i
< nelt
; ++i
)
41281 d
.perm
[i
] = i
* 2 + odd
;
41283 /* We'll either be able to implement the permutation directly... */
41284 if (expand_vec_perm_1 (&d
))
41287 /* ... or we use the special-case patterns. */
41288 expand_vec_perm_even_odd_1 (&d
, odd
);
41292 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41294 struct expand_vec_perm_d d
;
41295 unsigned i
, nelt
, base
;
41301 d
.vmode
= GET_MODE (targ
);
41302 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41303 d
.one_operand_p
= false;
41304 d
.testing_p
= false;
41306 base
= high_p
? nelt
/ 2 : 0;
41307 for (i
= 0; i
< nelt
/ 2; ++i
)
41309 d
.perm
[i
* 2] = i
+ base
;
41310 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41313 /* Note that for AVX this isn't one instruction. */
41314 ok
= ix86_expand_vec_perm_const_1 (&d
);
41319 /* Expand a vector operation CODE for a V*QImode in terms of the
41320 same operation on V*HImode. */
41323 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41325 enum machine_mode qimode
= GET_MODE (dest
);
41326 enum machine_mode himode
;
41327 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41328 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41329 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41330 struct expand_vec_perm_d d
;
41331 bool ok
, full_interleave
;
41332 bool uns_p
= false;
41339 gen_il
= gen_vec_interleave_lowv16qi
;
41340 gen_ih
= gen_vec_interleave_highv16qi
;
41343 himode
= V16HImode
;
41344 gen_il
= gen_avx2_interleave_lowv32qi
;
41345 gen_ih
= gen_avx2_interleave_highv32qi
;
41348 gcc_unreachable ();
41351 op2_l
= op2_h
= op2
;
41355 /* Unpack data such that we've got a source byte in each low byte of
41356 each word. We don't care what goes into the high byte of each word.
41357 Rather than trying to get zero in there, most convenient is to let
41358 it be a copy of the low byte. */
41359 op2_l
= gen_reg_rtx (qimode
);
41360 op2_h
= gen_reg_rtx (qimode
);
41361 emit_insn (gen_il (op2_l
, op2
, op2
));
41362 emit_insn (gen_ih (op2_h
, op2
, op2
));
41365 op1_l
= gen_reg_rtx (qimode
);
41366 op1_h
= gen_reg_rtx (qimode
);
41367 emit_insn (gen_il (op1_l
, op1
, op1
));
41368 emit_insn (gen_ih (op1_h
, op1
, op1
));
41369 full_interleave
= qimode
== V16QImode
;
41377 op1_l
= gen_reg_rtx (himode
);
41378 op1_h
= gen_reg_rtx (himode
);
41379 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41380 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41381 full_interleave
= true;
41384 gcc_unreachable ();
41387 /* Perform the operation. */
41388 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41390 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41392 gcc_assert (res_l
&& res_h
);
41394 /* Merge the data back into the right place. */
41396 d
.op0
= gen_lowpart (qimode
, res_l
);
41397 d
.op1
= gen_lowpart (qimode
, res_h
);
41399 d
.nelt
= GET_MODE_NUNITS (qimode
);
41400 d
.one_operand_p
= false;
41401 d
.testing_p
= false;
41403 if (full_interleave
)
41405 /* For SSE2, we used an full interleave, so the desired
41406 results are in the even elements. */
41407 for (i
= 0; i
< 32; ++i
)
41412 /* For AVX, the interleave used above was not cross-lane. So the
41413 extraction is evens but with the second and third quarter swapped.
41414 Happily, that is even one insn shorter than even extraction. */
41415 for (i
= 0; i
< 32; ++i
)
41416 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41419 ok
= ix86_expand_vec_perm_const_1 (&d
);
41422 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41423 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41426 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41427 if op is CONST_VECTOR with all odd elements equal to their
41428 preceding element. */
41431 const_vector_equal_evenodd_p (rtx op
)
41433 enum machine_mode mode
= GET_MODE (op
);
41434 int i
, nunits
= GET_MODE_NUNITS (mode
);
41435 if (GET_CODE (op
) != CONST_VECTOR
41436 || nunits
!= CONST_VECTOR_NUNITS (op
))
41438 for (i
= 0; i
< nunits
; i
+= 2)
41439 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41445 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41446 bool uns_p
, bool odd_p
)
41448 enum machine_mode mode
= GET_MODE (op1
);
41449 enum machine_mode wmode
= GET_MODE (dest
);
41451 rtx orig_op1
= op1
, orig_op2
= op2
;
41453 if (!nonimmediate_operand (op1
, mode
))
41454 op1
= force_reg (mode
, op1
);
41455 if (!nonimmediate_operand (op2
, mode
))
41456 op2
= force_reg (mode
, op2
);
41458 /* We only play even/odd games with vectors of SImode. */
41459 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41461 /* If we're looking for the odd results, shift those members down to
41462 the even slots. For some cpus this is faster than a PSHUFD. */
41465 /* For XOP use vpmacsdqh, but only for smult, as it is only
41467 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41469 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41470 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41474 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41475 if (!const_vector_equal_evenodd_p (orig_op1
))
41476 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
41477 x
, NULL
, 1, OPTAB_DIRECT
);
41478 if (!const_vector_equal_evenodd_p (orig_op2
))
41479 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
41480 x
, NULL
, 1, OPTAB_DIRECT
);
41481 op1
= gen_lowpart (mode
, op1
);
41482 op2
= gen_lowpart (mode
, op2
);
41485 if (mode
== V8SImode
)
41488 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
41490 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
41493 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
41494 else if (TARGET_SSE4_1
)
41495 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
41498 rtx s1
, s2
, t0
, t1
, t2
;
41500 /* The easiest way to implement this without PMULDQ is to go through
41501 the motions as if we are performing a full 64-bit multiply. With
41502 the exception that we need to do less shuffling of the elements. */
41504 /* Compute the sign-extension, aka highparts, of the two operands. */
41505 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41506 op1
, pc_rtx
, pc_rtx
);
41507 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41508 op2
, pc_rtx
, pc_rtx
);
41510 /* Multiply LO(A) * HI(B), and vice-versa. */
41511 t1
= gen_reg_rtx (wmode
);
41512 t2
= gen_reg_rtx (wmode
);
41513 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
41514 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
41516 /* Multiply LO(A) * LO(B). */
41517 t0
= gen_reg_rtx (wmode
);
41518 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
41520 /* Combine and shift the highparts into place. */
41521 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
41522 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
41525 /* Combine high and low parts. */
41526 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
41533 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
41534 bool uns_p
, bool high_p
)
41536 enum machine_mode wmode
= GET_MODE (dest
);
41537 enum machine_mode mode
= GET_MODE (op1
);
41538 rtx t1
, t2
, t3
, t4
, mask
;
41543 t1
= gen_reg_rtx (mode
);
41544 t2
= gen_reg_rtx (mode
);
41545 if (TARGET_XOP
&& !uns_p
)
41547 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41548 shuffle the elements once so that all elements are in the right
41549 place for immediate use: { A C B D }. */
41550 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
41551 const1_rtx
, GEN_INT (3)));
41552 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
41553 const1_rtx
, GEN_INT (3)));
41557 /* Put the elements into place for the multiply. */
41558 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
41559 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
41562 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
41566 /* Shuffle the elements between the lanes. After this we
41567 have { A B E F | C D G H } for each operand. */
41568 t1
= gen_reg_rtx (V4DImode
);
41569 t2
= gen_reg_rtx (V4DImode
);
41570 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
41571 const0_rtx
, const2_rtx
,
41572 const1_rtx
, GEN_INT (3)));
41573 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
41574 const0_rtx
, const2_rtx
,
41575 const1_rtx
, GEN_INT (3)));
41577 /* Shuffle the elements within the lanes. After this we
41578 have { A A B B | C C D D } or { E E F F | G G H H }. */
41579 t3
= gen_reg_rtx (V8SImode
);
41580 t4
= gen_reg_rtx (V8SImode
);
41581 mask
= GEN_INT (high_p
41582 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41583 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41584 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
41585 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
41587 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
41592 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
41593 uns_p
, OPTAB_DIRECT
);
41594 t2
= expand_binop (mode
,
41595 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
41596 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
41597 gcc_assert (t1
&& t2
);
41599 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
41604 t1
= gen_reg_rtx (wmode
);
41605 t2
= gen_reg_rtx (wmode
);
41606 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
41607 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
41609 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
41613 gcc_unreachable ();
41618 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
41622 res_1
= gen_reg_rtx (V4SImode
);
41623 res_2
= gen_reg_rtx (V4SImode
);
41624 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
41625 op1
, op2
, true, false);
41626 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
41627 op1
, op2
, true, true);
41629 /* Move the results in element 2 down to element 1; we don't care
41630 what goes in elements 2 and 3. Then we can merge the parts
41631 back together with an interleave.
41633 Note that two other sequences were tried:
41634 (1) Use interleaves at the start instead of psrldq, which allows
41635 us to use a single shufps to merge things back at the end.
41636 (2) Use shufps here to combine the two vectors, then pshufd to
41637 put the elements in the correct order.
41638 In both cases the cost of the reformatting stall was too high
41639 and the overall sequence slower. */
41641 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
41642 const0_rtx
, const0_rtx
));
41643 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
41644 const0_rtx
, const0_rtx
));
41645 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
41647 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
41651 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
41653 enum machine_mode mode
= GET_MODE (op0
);
41654 rtx t1
, t2
, t3
, t4
, t5
, t6
;
41656 if (TARGET_XOP
&& mode
== V2DImode
)
41658 /* op1: A,B,C,D, op2: E,F,G,H */
41659 op1
= gen_lowpart (V4SImode
, op1
);
41660 op2
= gen_lowpart (V4SImode
, op2
);
41662 t1
= gen_reg_rtx (V4SImode
);
41663 t2
= gen_reg_rtx (V4SImode
);
41664 t3
= gen_reg_rtx (V2DImode
);
41665 t4
= gen_reg_rtx (V2DImode
);
41668 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
41674 /* t2: (B*E),(A*F),(D*G),(C*H) */
41675 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
41677 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41678 emit_insn (gen_xop_phadddq (t3
, t2
));
41680 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41681 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
41683 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41684 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
41688 enum machine_mode nmode
;
41689 rtx (*umul
) (rtx
, rtx
, rtx
);
41691 if (mode
== V2DImode
)
41693 umul
= gen_vec_widen_umult_even_v4si
;
41696 else if (mode
== V4DImode
)
41698 umul
= gen_vec_widen_umult_even_v8si
;
41702 gcc_unreachable ();
41705 /* Multiply low parts. */
41706 t1
= gen_reg_rtx (mode
);
41707 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
41709 /* Shift input vectors right 32 bits so we can multiply high parts. */
41711 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
41712 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
41714 /* Multiply high parts by low parts. */
41715 t4
= gen_reg_rtx (mode
);
41716 t5
= gen_reg_rtx (mode
);
41717 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
41718 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
41720 /* Combine and shift the highparts back. */
41721 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
41722 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
41724 /* Combine high and low parts. */
41725 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
41728 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41729 gen_rtx_MULT (mode
, op1
, op2
));
41732 /* Expand an insert into a vector register through pinsr insn.
41733 Return true if successful. */
41736 ix86_expand_pinsr (rtx
*operands
)
41738 rtx dst
= operands
[0];
41739 rtx src
= operands
[3];
41741 unsigned int size
= INTVAL (operands
[1]);
41742 unsigned int pos
= INTVAL (operands
[2]);
41744 if (GET_CODE (dst
) == SUBREG
)
41746 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41747 dst
= SUBREG_REG (dst
);
41750 if (GET_CODE (src
) == SUBREG
)
41751 src
= SUBREG_REG (src
);
41753 switch (GET_MODE (dst
))
41760 enum machine_mode srcmode
, dstmode
;
41761 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41763 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41768 if (!TARGET_SSE4_1
)
41770 dstmode
= V16QImode
;
41771 pinsr
= gen_sse4_1_pinsrb
;
41777 dstmode
= V8HImode
;
41778 pinsr
= gen_sse2_pinsrw
;
41782 if (!TARGET_SSE4_1
)
41784 dstmode
= V4SImode
;
41785 pinsr
= gen_sse4_1_pinsrd
;
41789 gcc_assert (TARGET_64BIT
);
41790 if (!TARGET_SSE4_1
)
41792 dstmode
= V2DImode
;
41793 pinsr
= gen_sse4_1_pinsrq
;
41800 dst
= gen_lowpart (dstmode
, dst
);
41801 src
= gen_lowpart (srcmode
, src
);
41805 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41814 /* This function returns the calling abi specific va_list type node.
41815 It returns the FNDECL specific va_list type. */
41818 ix86_fn_abi_va_list (tree fndecl
)
41821 return va_list_type_node
;
41822 gcc_assert (fndecl
!= NULL_TREE
);
41824 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41825 return ms_va_list_type_node
;
41827 return sysv_va_list_type_node
;
41830 /* Returns the canonical va_list type specified by TYPE. If there
41831 is no valid TYPE provided, it return NULL_TREE. */
41834 ix86_canonical_va_list_type (tree type
)
41838 /* Resolve references and pointers to va_list type. */
41839 if (TREE_CODE (type
) == MEM_REF
)
41840 type
= TREE_TYPE (type
);
41841 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41842 type
= TREE_TYPE (type
);
41843 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41844 type
= TREE_TYPE (type
);
41846 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41848 wtype
= va_list_type_node
;
41849 gcc_assert (wtype
!= NULL_TREE
);
41851 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41853 /* If va_list is an array type, the argument may have decayed
41854 to a pointer type, e.g. by being passed to another function.
41855 In that case, unwrap both types so that we can compare the
41856 underlying records. */
41857 if (TREE_CODE (htype
) == ARRAY_TYPE
41858 || POINTER_TYPE_P (htype
))
41860 wtype
= TREE_TYPE (wtype
);
41861 htype
= TREE_TYPE (htype
);
41864 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41865 return va_list_type_node
;
41866 wtype
= sysv_va_list_type_node
;
41867 gcc_assert (wtype
!= NULL_TREE
);
41869 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41871 /* If va_list is an array type, the argument may have decayed
41872 to a pointer type, e.g. by being passed to another function.
41873 In that case, unwrap both types so that we can compare the
41874 underlying records. */
41875 if (TREE_CODE (htype
) == ARRAY_TYPE
41876 || POINTER_TYPE_P (htype
))
41878 wtype
= TREE_TYPE (wtype
);
41879 htype
= TREE_TYPE (htype
);
41882 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41883 return sysv_va_list_type_node
;
41884 wtype
= ms_va_list_type_node
;
41885 gcc_assert (wtype
!= NULL_TREE
);
41887 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41889 /* If va_list is an array type, the argument may have decayed
41890 to a pointer type, e.g. by being passed to another function.
41891 In that case, unwrap both types so that we can compare the
41892 underlying records. */
41893 if (TREE_CODE (htype
) == ARRAY_TYPE
41894 || POINTER_TYPE_P (htype
))
41896 wtype
= TREE_TYPE (wtype
);
41897 htype
= TREE_TYPE (htype
);
41900 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41901 return ms_va_list_type_node
;
41904 return std_canonical_va_list_type (type
);
41907 /* Iterate through the target-specific builtin types for va_list.
41908 IDX denotes the iterator, *PTREE is set to the result type of
41909 the va_list builtin, and *PNAME to its internal type.
41910 Returns zero if there is no element for this index, otherwise
41911 IDX should be increased upon the next call.
41912 Note, do not iterate a base builtin's name like __builtin_va_list.
41913 Used from c_common_nodes_and_builtins. */
41916 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41926 *ptree
= ms_va_list_type_node
;
41927 *pname
= "__builtin_ms_va_list";
41931 *ptree
= sysv_va_list_type_node
;
41932 *pname
= "__builtin_sysv_va_list";
41940 #undef TARGET_SCHED_DISPATCH
41941 #define TARGET_SCHED_DISPATCH has_dispatch
41942 #undef TARGET_SCHED_DISPATCH_DO
41943 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41944 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41945 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41946 #undef TARGET_SCHED_REORDER
41947 #define TARGET_SCHED_REORDER ix86_sched_reorder
41948 #undef TARGET_SCHED_ADJUST_PRIORITY
41949 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41950 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41951 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41952 ix86_dependencies_evaluation_hook
41954 /* The size of the dispatch window is the total number of bytes of
41955 object code allowed in a window. */
41956 #define DISPATCH_WINDOW_SIZE 16
41958 /* Number of dispatch windows considered for scheduling. */
41959 #define MAX_DISPATCH_WINDOWS 3
41961 /* Maximum number of instructions in a window. */
41964 /* Maximum number of immediate operands in a window. */
41967 /* Maximum number of immediate bits allowed in a window. */
41968 #define MAX_IMM_SIZE 128
41970 /* Maximum number of 32 bit immediates allowed in a window. */
41971 #define MAX_IMM_32 4
41973 /* Maximum number of 64 bit immediates allowed in a window. */
41974 #define MAX_IMM_64 2
41976 /* Maximum total of loads or prefetches allowed in a window. */
41979 /* Maximum total of stores allowed in a window. */
41980 #define MAX_STORE 1
41986 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41987 enum dispatch_group
{
42002 /* Number of allowable groups in a dispatch window. It is an array
42003 indexed by dispatch_group enum. 100 is used as a big number,
42004 because the number of these kind of operations does not have any
42005 effect in dispatch window, but we need them for other reasons in
42007 static unsigned int num_allowable_groups
[disp_last
] = {
42008 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42011 char group_name
[disp_last
+ 1][16] = {
42012 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42013 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42014 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42017 /* Instruction path. */
42020 path_single
, /* Single micro op. */
42021 path_double
, /* Double micro op. */
42022 path_multi
, /* Instructions with more than 2 micro op.. */
42026 /* sched_insn_info defines a window to the instructions scheduled in
42027 the basic block. It contains a pointer to the insn_info table and
42028 the instruction scheduled.
42030 Windows are allocated for each basic block and are linked
42032 typedef struct sched_insn_info_s
{
42034 enum dispatch_group group
;
42035 enum insn_path path
;
42040 /* Linked list of dispatch windows. This is a two way list of
42041 dispatch windows of a basic block. It contains information about
42042 the number of uops in the window and the total number of
42043 instructions and of bytes in the object code for this dispatch
42045 typedef struct dispatch_windows_s
{
42046 int num_insn
; /* Number of insn in the window. */
42047 int num_uops
; /* Number of uops in the window. */
42048 int window_size
; /* Number of bytes in the window. */
42049 int window_num
; /* Window number between 0 or 1. */
42050 int num_imm
; /* Number of immediates in an insn. */
42051 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42052 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42053 int imm_size
; /* Total immediates in the window. */
42054 int num_loads
; /* Total memory loads in the window. */
42055 int num_stores
; /* Total memory stores in the window. */
42056 int violation
; /* Violation exists in window. */
42057 sched_insn_info
*window
; /* Pointer to the window. */
42058 struct dispatch_windows_s
*next
;
42059 struct dispatch_windows_s
*prev
;
42060 } dispatch_windows
;
42062 /* Immediate valuse used in an insn. */
42063 typedef struct imm_info_s
42070 static dispatch_windows
*dispatch_window_list
;
42071 static dispatch_windows
*dispatch_window_list1
;
42073 /* Get dispatch group of insn. */
42075 static enum dispatch_group
42076 get_mem_group (rtx insn
)
42078 enum attr_memory memory
;
42080 if (INSN_CODE (insn
) < 0)
42081 return disp_no_group
;
42082 memory
= get_attr_memory (insn
);
42083 if (memory
== MEMORY_STORE
)
42086 if (memory
== MEMORY_LOAD
)
42089 if (memory
== MEMORY_BOTH
)
42090 return disp_load_store
;
42092 return disp_no_group
;
42095 /* Return true if insn is a compare instruction. */
42100 enum attr_type type
;
42102 type
= get_attr_type (insn
);
42103 return (type
== TYPE_TEST
42104 || type
== TYPE_ICMP
42105 || type
== TYPE_FCMP
42106 || GET_CODE (PATTERN (insn
)) == COMPARE
);
42109 /* Return true if a dispatch violation encountered. */
42112 dispatch_violation (void)
42114 if (dispatch_window_list
->next
)
42115 return dispatch_window_list
->next
->violation
;
42116 return dispatch_window_list
->violation
;
42119 /* Return true if insn is a branch instruction. */
42122 is_branch (rtx insn
)
42124 return (CALL_P (insn
) || JUMP_P (insn
));
42127 /* Return true if insn is a prefetch instruction. */
42130 is_prefetch (rtx insn
)
42132 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42135 /* This function initializes a dispatch window and the list container holding a
42136 pointer to the window. */
42139 init_window (int window_num
)
42142 dispatch_windows
*new_list
;
42144 if (window_num
== 0)
42145 new_list
= dispatch_window_list
;
42147 new_list
= dispatch_window_list1
;
42149 new_list
->num_insn
= 0;
42150 new_list
->num_uops
= 0;
42151 new_list
->window_size
= 0;
42152 new_list
->next
= NULL
;
42153 new_list
->prev
= NULL
;
42154 new_list
->window_num
= window_num
;
42155 new_list
->num_imm
= 0;
42156 new_list
->num_imm_32
= 0;
42157 new_list
->num_imm_64
= 0;
42158 new_list
->imm_size
= 0;
42159 new_list
->num_loads
= 0;
42160 new_list
->num_stores
= 0;
42161 new_list
->violation
= false;
42163 for (i
= 0; i
< MAX_INSN
; i
++)
42165 new_list
->window
[i
].insn
= NULL
;
42166 new_list
->window
[i
].group
= disp_no_group
;
42167 new_list
->window
[i
].path
= no_path
;
42168 new_list
->window
[i
].byte_len
= 0;
42169 new_list
->window
[i
].imm_bytes
= 0;
42174 /* This function allocates and initializes a dispatch window and the
42175 list container holding a pointer to the window. */
42177 static dispatch_windows
*
42178 allocate_window (void)
42180 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42181 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42186 /* This routine initializes the dispatch scheduling information. It
42187 initiates building dispatch scheduler tables and constructs the
42188 first dispatch window. */
42191 init_dispatch_sched (void)
42193 /* Allocate a dispatch list and a window. */
42194 dispatch_window_list
= allocate_window ();
42195 dispatch_window_list1
= allocate_window ();
42200 /* This function returns true if a branch is detected. End of a basic block
42201 does not have to be a branch, but here we assume only branches end a
42205 is_end_basic_block (enum dispatch_group group
)
42207 return group
== disp_branch
;
42210 /* This function is called when the end of a window processing is reached. */
42213 process_end_window (void)
42215 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42216 if (dispatch_window_list
->next
)
42218 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42219 gcc_assert (dispatch_window_list
->window_size
42220 + dispatch_window_list1
->window_size
<= 48);
42226 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42227 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42228 for 48 bytes of instructions. Note that these windows are not dispatch
42229 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42231 static dispatch_windows
*
42232 allocate_next_window (int window_num
)
42234 if (window_num
== 0)
42236 if (dispatch_window_list
->next
)
42239 return dispatch_window_list
;
42242 dispatch_window_list
->next
= dispatch_window_list1
;
42243 dispatch_window_list1
->prev
= dispatch_window_list
;
42245 return dispatch_window_list1
;
42248 /* Increment the number of immediate operands of an instruction. */
42251 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42256 switch ( GET_CODE (*in_rtx
))
42261 (imm_values
->imm
)++;
42262 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42263 (imm_values
->imm32
)++;
42265 (imm_values
->imm64
)++;
42269 (imm_values
->imm
)++;
42270 (imm_values
->imm64
)++;
42274 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42276 (imm_values
->imm
)++;
42277 (imm_values
->imm32
)++;
42288 /* Compute number of immediate operands of an instruction. */
42291 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42293 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42294 (rtx_function
) find_constant_1
, (void *) imm_values
);
42297 /* Return total size of immediate operands of an instruction along with number
42298 of corresponding immediate-operands. It initializes its parameters to zero
42299 befor calling FIND_CONSTANT.
42300 INSN is the input instruction. IMM is the total of immediates.
42301 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42305 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42307 imm_info imm_values
= {0, 0, 0};
42309 find_constant (insn
, &imm_values
);
42310 *imm
= imm_values
.imm
;
42311 *imm32
= imm_values
.imm32
;
42312 *imm64
= imm_values
.imm64
;
42313 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42316 /* This function indicates if an operand of an instruction is an
42320 has_immediate (rtx insn
)
42322 int num_imm_operand
;
42323 int num_imm32_operand
;
42324 int num_imm64_operand
;
42327 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42328 &num_imm64_operand
);
42332 /* Return single or double path for instructions. */
42334 static enum insn_path
42335 get_insn_path (rtx insn
)
42337 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42339 if ((int)path
== 0)
42340 return path_single
;
42342 if ((int)path
== 1)
42343 return path_double
;
42348 /* Return insn dispatch group. */
42350 static enum dispatch_group
42351 get_insn_group (rtx insn
)
42353 enum dispatch_group group
= get_mem_group (insn
);
42357 if (is_branch (insn
))
42358 return disp_branch
;
42363 if (has_immediate (insn
))
42366 if (is_prefetch (insn
))
42367 return disp_prefetch
;
42369 return disp_no_group
;
42372 /* Count number of GROUP restricted instructions in a dispatch
42373 window WINDOW_LIST. */
42376 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42378 enum dispatch_group group
= get_insn_group (insn
);
42380 int num_imm_operand
;
42381 int num_imm32_operand
;
42382 int num_imm64_operand
;
42384 if (group
== disp_no_group
)
42387 if (group
== disp_imm
)
42389 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42390 &num_imm64_operand
);
42391 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42392 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42393 || (num_imm32_operand
> 0
42394 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42395 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42396 || (num_imm64_operand
> 0
42397 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42398 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42399 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42400 && num_imm64_operand
> 0
42401 && ((window_list
->num_imm_64
> 0
42402 && window_list
->num_insn
>= 2)
42403 || window_list
->num_insn
>= 3)))
42409 if ((group
== disp_load_store
42410 && (window_list
->num_loads
>= MAX_LOAD
42411 || window_list
->num_stores
>= MAX_STORE
))
42412 || ((group
== disp_load
42413 || group
== disp_prefetch
)
42414 && window_list
->num_loads
>= MAX_LOAD
)
42415 || (group
== disp_store
42416 && window_list
->num_stores
>= MAX_STORE
))
42422 /* This function returns true if insn satisfies dispatch rules on the
42423 last window scheduled. */
42426 fits_dispatch_window (rtx insn
)
42428 dispatch_windows
*window_list
= dispatch_window_list
;
42429 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
42430 unsigned int num_restrict
;
42431 enum dispatch_group group
= get_insn_group (insn
);
42432 enum insn_path path
= get_insn_path (insn
);
42435 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42436 instructions should be given the lowest priority in the
42437 scheduling process in Haifa scheduler to make sure they will be
42438 scheduled in the same dispatch window as the reference to them. */
42439 if (group
== disp_jcc
|| group
== disp_cmp
)
42442 /* Check nonrestricted. */
42443 if (group
== disp_no_group
|| group
== disp_branch
)
42446 /* Get last dispatch window. */
42447 if (window_list_next
)
42448 window_list
= window_list_next
;
42450 if (window_list
->window_num
== 1)
42452 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
42455 || (min_insn_size (insn
) + sum
) >= 48)
42456 /* Window 1 is full. Go for next window. */
42460 num_restrict
= count_num_restricted (insn
, window_list
);
42462 if (num_restrict
> num_allowable_groups
[group
])
42465 /* See if it fits in the first window. */
42466 if (window_list
->window_num
== 0)
42468 /* The first widow should have only single and double path
42470 if (path
== path_double
42471 && (window_list
->num_uops
+ 2) > MAX_INSN
)
42473 else if (path
!= path_single
)
42479 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42480 dispatch window WINDOW_LIST. */
42483 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
42485 int byte_len
= min_insn_size (insn
);
42486 int num_insn
= window_list
->num_insn
;
42488 sched_insn_info
*window
= window_list
->window
;
42489 enum dispatch_group group
= get_insn_group (insn
);
42490 enum insn_path path
= get_insn_path (insn
);
42491 int num_imm_operand
;
42492 int num_imm32_operand
;
42493 int num_imm64_operand
;
42495 if (!window_list
->violation
&& group
!= disp_cmp
42496 && !fits_dispatch_window (insn
))
42497 window_list
->violation
= true;
42499 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42500 &num_imm64_operand
);
42502 /* Initialize window with new instruction. */
42503 window
[num_insn
].insn
= insn
;
42504 window
[num_insn
].byte_len
= byte_len
;
42505 window
[num_insn
].group
= group
;
42506 window
[num_insn
].path
= path
;
42507 window
[num_insn
].imm_bytes
= imm_size
;
42509 window_list
->window_size
+= byte_len
;
42510 window_list
->num_insn
= num_insn
+ 1;
42511 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
42512 window_list
->imm_size
+= imm_size
;
42513 window_list
->num_imm
+= num_imm_operand
;
42514 window_list
->num_imm_32
+= num_imm32_operand
;
42515 window_list
->num_imm_64
+= num_imm64_operand
;
42517 if (group
== disp_store
)
42518 window_list
->num_stores
+= 1;
42519 else if (group
== disp_load
42520 || group
== disp_prefetch
)
42521 window_list
->num_loads
+= 1;
42522 else if (group
== disp_load_store
)
42524 window_list
->num_stores
+= 1;
42525 window_list
->num_loads
+= 1;
42529 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42530 If the total bytes of instructions or the number of instructions in
42531 the window exceed allowable, it allocates a new window. */
42534 add_to_dispatch_window (rtx insn
)
42537 dispatch_windows
*window_list
;
42538 dispatch_windows
*next_list
;
42539 dispatch_windows
*window0_list
;
42540 enum insn_path path
;
42541 enum dispatch_group insn_group
;
42549 if (INSN_CODE (insn
) < 0)
42552 byte_len
= min_insn_size (insn
);
42553 window_list
= dispatch_window_list
;
42554 next_list
= window_list
->next
;
42555 path
= get_insn_path (insn
);
42556 insn_group
= get_insn_group (insn
);
42558 /* Get the last dispatch window. */
42560 window_list
= dispatch_window_list
->next
;
42562 if (path
== path_single
)
42564 else if (path
== path_double
)
42567 insn_num_uops
= (int) path
;
42569 /* If current window is full, get a new window.
42570 Window number zero is full, if MAX_INSN uops are scheduled in it.
42571 Window number one is full, if window zero's bytes plus window
42572 one's bytes is 32, or if the bytes of the new instruction added
42573 to the total makes it greater than 48, or it has already MAX_INSN
42574 instructions in it. */
42575 num_insn
= window_list
->num_insn
;
42576 num_uops
= window_list
->num_uops
;
42577 window_num
= window_list
->window_num
;
42578 insn_fits
= fits_dispatch_window (insn
);
42580 if (num_insn
>= MAX_INSN
42581 || num_uops
+ insn_num_uops
> MAX_INSN
42584 window_num
= ~window_num
& 1;
42585 window_list
= allocate_next_window (window_num
);
42588 if (window_num
== 0)
42590 add_insn_window (insn
, window_list
, insn_num_uops
);
42591 if (window_list
->num_insn
>= MAX_INSN
42592 && insn_group
== disp_branch
)
42594 process_end_window ();
42598 else if (window_num
== 1)
42600 window0_list
= window_list
->prev
;
42601 sum
= window0_list
->window_size
+ window_list
->window_size
;
42603 || (byte_len
+ sum
) >= 48)
42605 process_end_window ();
42606 window_list
= dispatch_window_list
;
42609 add_insn_window (insn
, window_list
, insn_num_uops
);
42612 gcc_unreachable ();
42614 if (is_end_basic_block (insn_group
))
42616 /* End of basic block is reached do end-basic-block process. */
42617 process_end_window ();
42622 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42624 DEBUG_FUNCTION
static void
42625 debug_dispatch_window_file (FILE *file
, int window_num
)
42627 dispatch_windows
*list
;
42630 if (window_num
== 0)
42631 list
= dispatch_window_list
;
42633 list
= dispatch_window_list1
;
42635 fprintf (file
, "Window #%d:\n", list
->window_num
);
42636 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
42637 list
->num_insn
, list
->num_uops
, list
->window_size
);
42638 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42639 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
42641 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
42643 fprintf (file
, " insn info:\n");
42645 for (i
= 0; i
< MAX_INSN
; i
++)
42647 if (!list
->window
[i
].insn
)
42649 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42650 i
, group_name
[list
->window
[i
].group
],
42651 i
, (void *)list
->window
[i
].insn
,
42652 i
, list
->window
[i
].path
,
42653 i
, list
->window
[i
].byte_len
,
42654 i
, list
->window
[i
].imm_bytes
);
42658 /* Print to stdout a dispatch window. */
42660 DEBUG_FUNCTION
void
42661 debug_dispatch_window (int window_num
)
42663 debug_dispatch_window_file (stdout
, window_num
);
42666 /* Print INSN dispatch information to FILE. */
42668 DEBUG_FUNCTION
static void
42669 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
42672 enum insn_path path
;
42673 enum dispatch_group group
;
42675 int num_imm_operand
;
42676 int num_imm32_operand
;
42677 int num_imm64_operand
;
42679 if (INSN_CODE (insn
) < 0)
42682 byte_len
= min_insn_size (insn
);
42683 path
= get_insn_path (insn
);
42684 group
= get_insn_group (insn
);
42685 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42686 &num_imm64_operand
);
42688 fprintf (file
, " insn info:\n");
42689 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
42690 group_name
[group
], path
, byte_len
);
42691 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42692 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
42695 /* Print to STDERR the status of the ready list with respect to
42696 dispatch windows. */
42698 DEBUG_FUNCTION
void
42699 debug_ready_dispatch (void)
42702 int no_ready
= number_in_ready ();
42704 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
42706 for (i
= 0; i
< no_ready
; i
++)
42707 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
42710 /* This routine is the driver of the dispatch scheduler. */
42713 do_dispatch (rtx insn
, int mode
)
42715 if (mode
== DISPATCH_INIT
)
42716 init_dispatch_sched ();
42717 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
42718 add_to_dispatch_window (insn
);
42721 /* Return TRUE if Dispatch Scheduling is supported. */
42724 has_dispatch (rtx insn
, int action
)
42726 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
42727 && flag_dispatch_scheduler
)
42733 case IS_DISPATCH_ON
:
42738 return is_cmp (insn
);
42740 case DISPATCH_VIOLATION
:
42741 return dispatch_violation ();
42743 case FITS_DISPATCH_WINDOW
:
42744 return fits_dispatch_window (insn
);
42750 /* Implementation of reassociation_width target hook used by
42751 reassoc phase to identify parallelism level in reassociated
42752 tree. Statements tree_code is passed in OPC. Arguments type
42755 Currently parallel reassociation is enabled for Atom
42756 processors only and we set reassociation width to be 2
42757 because Atom may issue up to 2 instructions per cycle.
42759 Return value should be fixed if parallel reassociation is
42760 enabled for other processors. */
42763 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42764 enum machine_mode mode
)
42768 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42770 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42776 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42777 place emms and femms instructions. */
42779 static enum machine_mode
42780 ix86_preferred_simd_mode (enum machine_mode mode
)
42788 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42790 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42792 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42794 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42797 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42803 if (!TARGET_VECTORIZE_DOUBLE
)
42805 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42807 else if (TARGET_SSE2
)
42816 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42819 static unsigned int
42820 ix86_autovectorize_vector_sizes (void)
42822 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42827 /* Return class of registers which could be used for pseudo of MODE
42828 and of class RCLASS for spilling instead of memory. Return NO_REGS
42829 if it is not possible or non-profitable. */
42831 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42833 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42834 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42835 && INTEGER_CLASS_P (rclass
))
42836 return ALL_SSE_REGS
;
42840 /* Implement targetm.vectorize.init_cost. */
42843 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42845 unsigned *cost
= XNEWVEC (unsigned, 3);
42846 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42850 /* Implement targetm.vectorize.add_stmt_cost. */
42853 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42854 struct _stmt_vec_info
*stmt_info
, int misalign
,
42855 enum vect_cost_model_location where
)
42857 unsigned *cost
= (unsigned *) data
;
42858 unsigned retval
= 0;
42860 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42861 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42863 /* Statements in an inner loop relative to the loop being
42864 vectorized are weighted more heavily. The value here is
42865 arbitrary and could potentially be improved with analysis. */
42866 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42867 count
*= 50; /* FIXME. */
42869 retval
= (unsigned) (count
* stmt_cost
);
42870 cost
[where
] += retval
;
42875 /* Implement targetm.vectorize.finish_cost. */
42878 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42879 unsigned *body_cost
, unsigned *epilogue_cost
)
42881 unsigned *cost
= (unsigned *) data
;
42882 *prologue_cost
= cost
[vect_prologue
];
42883 *body_cost
= cost
[vect_body
];
42884 *epilogue_cost
= cost
[vect_epilogue
];
42887 /* Implement targetm.vectorize.destroy_cost_data. */
42890 ix86_destroy_cost_data (void *data
)
42895 /* Validate target specific memory model bits in VAL. */
42897 static unsigned HOST_WIDE_INT
42898 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42900 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42903 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42905 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42907 warning (OPT_Winvalid_memory_model
,
42908 "Unknown architecture specific memory model");
42909 return MEMMODEL_SEQ_CST
;
42911 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42912 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42914 warning (OPT_Winvalid_memory_model
,
42915 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42916 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42918 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42920 warning (OPT_Winvalid_memory_model
,
42921 "HLE_RELEASE not used with RELEASE or stronger memory model");
42922 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42927 /* Initialize the GCC target structure. */
42928 #undef TARGET_RETURN_IN_MEMORY
42929 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42931 #undef TARGET_LEGITIMIZE_ADDRESS
42932 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42934 #undef TARGET_ATTRIBUTE_TABLE
42935 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42936 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
42937 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
42938 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42939 # undef TARGET_MERGE_DECL_ATTRIBUTES
42940 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42943 #undef TARGET_COMP_TYPE_ATTRIBUTES
42944 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42946 #undef TARGET_INIT_BUILTINS
42947 #define TARGET_INIT_BUILTINS ix86_init_builtins
42948 #undef TARGET_BUILTIN_DECL
42949 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42950 #undef TARGET_EXPAND_BUILTIN
42951 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42953 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42954 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42955 ix86_builtin_vectorized_function
42957 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42958 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42960 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42961 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42963 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42964 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42966 #undef TARGET_BUILTIN_RECIPROCAL
42967 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42969 #undef TARGET_ASM_FUNCTION_EPILOGUE
42970 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42972 #undef TARGET_ENCODE_SECTION_INFO
42973 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42974 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42976 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42979 #undef TARGET_ASM_OPEN_PAREN
42980 #define TARGET_ASM_OPEN_PAREN ""
42981 #undef TARGET_ASM_CLOSE_PAREN
42982 #define TARGET_ASM_CLOSE_PAREN ""
42984 #undef TARGET_ASM_BYTE_OP
42985 #define TARGET_ASM_BYTE_OP ASM_BYTE
42987 #undef TARGET_ASM_ALIGNED_HI_OP
42988 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42989 #undef TARGET_ASM_ALIGNED_SI_OP
42990 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42992 #undef TARGET_ASM_ALIGNED_DI_OP
42993 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42996 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42997 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42999 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
43000 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
43002 #undef TARGET_ASM_UNALIGNED_HI_OP
43003 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
43004 #undef TARGET_ASM_UNALIGNED_SI_OP
43005 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
43006 #undef TARGET_ASM_UNALIGNED_DI_OP
43007 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
43009 #undef TARGET_PRINT_OPERAND
43010 #define TARGET_PRINT_OPERAND ix86_print_operand
43011 #undef TARGET_PRINT_OPERAND_ADDRESS
43012 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
43013 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
43014 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
43015 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
43016 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
43018 #undef TARGET_SCHED_INIT_GLOBAL
43019 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
43020 #undef TARGET_SCHED_ADJUST_COST
43021 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
43022 #undef TARGET_SCHED_ISSUE_RATE
43023 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
43024 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
43025 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
43026 ia32_multipass_dfa_lookahead
43028 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
43029 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
43031 #undef TARGET_MEMMODEL_CHECK
43032 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
43035 #undef TARGET_HAVE_TLS
43036 #define TARGET_HAVE_TLS true
43038 #undef TARGET_CANNOT_FORCE_CONST_MEM
43039 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
43040 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
43041 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
43043 #undef TARGET_DELEGITIMIZE_ADDRESS
43044 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
43046 #undef TARGET_MS_BITFIELD_LAYOUT_P
43047 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
43050 #undef TARGET_BINDS_LOCAL_P
43051 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
43053 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43054 #undef TARGET_BINDS_LOCAL_P
43055 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
43058 #undef TARGET_ASM_OUTPUT_MI_THUNK
43059 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
43060 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
43061 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
43063 #undef TARGET_ASM_FILE_START
43064 #define TARGET_ASM_FILE_START x86_file_start
43066 #undef TARGET_OPTION_OVERRIDE
43067 #define TARGET_OPTION_OVERRIDE ix86_option_override
43069 #undef TARGET_REGISTER_MOVE_COST
43070 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
43071 #undef TARGET_MEMORY_MOVE_COST
43072 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
43073 #undef TARGET_RTX_COSTS
43074 #define TARGET_RTX_COSTS ix86_rtx_costs
43075 #undef TARGET_ADDRESS_COST
43076 #define TARGET_ADDRESS_COST ix86_address_cost
43078 #undef TARGET_FIXED_CONDITION_CODE_REGS
43079 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
43080 #undef TARGET_CC_MODES_COMPATIBLE
43081 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
43083 #undef TARGET_MACHINE_DEPENDENT_REORG
43084 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
43086 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
43087 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
43089 #undef TARGET_BUILD_BUILTIN_VA_LIST
43090 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
43092 #undef TARGET_FOLD_BUILTIN
43093 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
43095 #undef TARGET_COMPARE_VERSION_PRIORITY
43096 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
43098 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
43099 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
43100 ix86_generate_version_dispatcher_body
43102 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
43103 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
43104 ix86_get_function_versions_dispatcher
43106 #undef TARGET_ENUM_VA_LIST_P
43107 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
43109 #undef TARGET_FN_ABI_VA_LIST
43110 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
43112 #undef TARGET_CANONICAL_VA_LIST_TYPE
43113 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
43115 #undef TARGET_EXPAND_BUILTIN_VA_START
43116 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
43118 #undef TARGET_MD_ASM_CLOBBERS
43119 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43121 #undef TARGET_PROMOTE_PROTOTYPES
43122 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43123 #undef TARGET_STRUCT_VALUE_RTX
43124 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
43125 #undef TARGET_SETUP_INCOMING_VARARGS
43126 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
43127 #undef TARGET_MUST_PASS_IN_STACK
43128 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
43129 #undef TARGET_FUNCTION_ARG_ADVANCE
43130 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
43131 #undef TARGET_FUNCTION_ARG
43132 #define TARGET_FUNCTION_ARG ix86_function_arg
43133 #undef TARGET_FUNCTION_ARG_BOUNDARY
43134 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
43135 #undef TARGET_PASS_BY_REFERENCE
43136 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
43137 #undef TARGET_INTERNAL_ARG_POINTER
43138 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
43139 #undef TARGET_UPDATE_STACK_BOUNDARY
43140 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
43141 #undef TARGET_GET_DRAP_RTX
43142 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
43143 #undef TARGET_STRICT_ARGUMENT_NAMING
43144 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
43145 #undef TARGET_STATIC_CHAIN
43146 #define TARGET_STATIC_CHAIN ix86_static_chain
43147 #undef TARGET_TRAMPOLINE_INIT
43148 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
43149 #undef TARGET_RETURN_POPS_ARGS
43150 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
43152 #undef TARGET_LEGITIMATE_COMBINED_INSN
43153 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
43155 #undef TARGET_ASAN_SHADOW_OFFSET
43156 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
43158 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
43159 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
43161 #undef TARGET_SCALAR_MODE_SUPPORTED_P
43162 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
43164 #undef TARGET_VECTOR_MODE_SUPPORTED_P
43165 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
43167 #undef TARGET_C_MODE_FOR_SUFFIX
43168 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
43171 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
43172 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
43175 #ifdef SUBTARGET_INSERT_ATTRIBUTES
43176 #undef TARGET_INSERT_ATTRIBUTES
43177 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
43180 #undef TARGET_MANGLE_TYPE
43181 #define TARGET_MANGLE_TYPE ix86_mangle_type
43184 #undef TARGET_STACK_PROTECT_FAIL
43185 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43188 #undef TARGET_FUNCTION_VALUE
43189 #define TARGET_FUNCTION_VALUE ix86_function_value
43191 #undef TARGET_FUNCTION_VALUE_REGNO_P
43192 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43194 #undef TARGET_PROMOTE_FUNCTION_MODE
43195 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43197 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43198 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43200 #undef TARGET_INSTANTIATE_DECLS
43201 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43203 #undef TARGET_SECONDARY_RELOAD
43204 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43206 #undef TARGET_CLASS_MAX_NREGS
43207 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43209 #undef TARGET_PREFERRED_RELOAD_CLASS
43210 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43211 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43212 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43213 #undef TARGET_CLASS_LIKELY_SPILLED_P
43214 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43216 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43217 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43218 ix86_builtin_vectorization_cost
43219 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43220 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43221 ix86_vectorize_vec_perm_const_ok
43222 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43223 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43224 ix86_preferred_simd_mode
43225 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43226 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43227 ix86_autovectorize_vector_sizes
43228 #undef TARGET_VECTORIZE_INIT_COST
43229 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43230 #undef TARGET_VECTORIZE_ADD_STMT_COST
43231 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43232 #undef TARGET_VECTORIZE_FINISH_COST
43233 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43234 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43235 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43237 #undef TARGET_SET_CURRENT_FUNCTION
43238 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43240 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43241 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43243 #undef TARGET_OPTION_SAVE
43244 #define TARGET_OPTION_SAVE ix86_function_specific_save
43246 #undef TARGET_OPTION_RESTORE
43247 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43249 #undef TARGET_OPTION_PRINT
43250 #define TARGET_OPTION_PRINT ix86_function_specific_print
43252 #undef TARGET_OPTION_FUNCTION_VERSIONS
43253 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43255 #undef TARGET_CAN_INLINE_P
43256 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43258 #undef TARGET_EXPAND_TO_RTL_HOOK
43259 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43261 #undef TARGET_LEGITIMATE_ADDRESS_P
43262 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43264 #undef TARGET_LRA_P
43265 #define TARGET_LRA_P hook_bool_void_true
43267 #undef TARGET_REGISTER_PRIORITY
43268 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43270 #undef TARGET_REGISTER_USAGE_LEVELING_P
43271 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43273 #undef TARGET_LEGITIMATE_CONSTANT_P
43274 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43276 #undef TARGET_FRAME_POINTER_REQUIRED
43277 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43279 #undef TARGET_CAN_ELIMINATE
43280 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43282 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43283 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43285 #undef TARGET_ASM_CODE_END
43286 #define TARGET_ASM_CODE_END ix86_code_end
43288 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43289 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43292 #undef TARGET_INIT_LIBFUNCS
43293 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43296 #undef TARGET_SPILL_CLASS
43297 #define TARGET_SPILL_CLASS ix86_spill_class
43299 struct gcc_target targetm
= TARGET_INITIALIZER
;
43301 #include "gt-i386.h"