1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 #include "pass_manager.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
69 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
70 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
72 #ifndef CHECK_STACK_LIMIT
73 #define CHECK_STACK_LIMIT (-1)
76 /* Return index of given mode in mult and division cost tables. */
77 #define MODE_INDEX(mode) \
78 ((mode) == QImode ? 0 \
79 : (mode) == HImode ? 1 \
80 : (mode) == SImode ? 2 \
81 : (mode) == DImode ? 3 \
84 /* Processor costs (relative to an add) */
85 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
86 #define COSTS_N_BYTES(N) ((N) * 2)
88 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
90 static stringop_algs ix86_size_memcpy
[2] = {
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
92 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
93 static stringop_algs ix86_size_memset
[2] = {
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
95 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
98 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
99 COSTS_N_BYTES (2), /* cost of an add instruction */
100 COSTS_N_BYTES (3), /* cost of a lea instruction */
101 COSTS_N_BYTES (2), /* variable shift costs */
102 COSTS_N_BYTES (3), /* constant shift costs */
103 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
104 COSTS_N_BYTES (3), /* HI */
105 COSTS_N_BYTES (3), /* SI */
106 COSTS_N_BYTES (3), /* DI */
107 COSTS_N_BYTES (5)}, /* other */
108 0, /* cost of multiply per each bit set */
109 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
110 COSTS_N_BYTES (3), /* HI */
111 COSTS_N_BYTES (3), /* SI */
112 COSTS_N_BYTES (3), /* DI */
113 COSTS_N_BYTES (5)}, /* other */
114 COSTS_N_BYTES (3), /* cost of movsx */
115 COSTS_N_BYTES (3), /* cost of movzx */
116 0, /* "large" insn */
118 2, /* cost for loading QImode using movzbl */
119 {2, 2, 2}, /* cost of loading integer registers
120 in QImode, HImode and SImode.
121 Relative to reg-reg move (2). */
122 {2, 2, 2}, /* cost of storing integer registers */
123 2, /* cost of reg,reg fld/fst */
124 {2, 2, 2}, /* cost of loading fp registers
125 in SFmode, DFmode and XFmode */
126 {2, 2, 2}, /* cost of storing fp registers
127 in SFmode, DFmode and XFmode */
128 3, /* cost of moving MMX register */
129 {3, 3}, /* cost of loading MMX registers
130 in SImode and DImode */
131 {3, 3}, /* cost of storing MMX registers
132 in SImode and DImode */
133 3, /* cost of moving SSE register */
134 {3, 3, 3}, /* cost of loading SSE registers
135 in SImode, DImode and TImode */
136 {3, 3, 3}, /* cost of storing SSE registers
137 in SImode, DImode and TImode */
138 3, /* MMX or SSE register to integer */
139 0, /* size of l1 cache */
140 0, /* size of l2 cache */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
145 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
146 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
147 COSTS_N_BYTES (2), /* cost of FABS instruction. */
148 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
149 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
152 1, /* scalar_stmt_cost. */
153 1, /* scalar load_cost. */
154 1, /* scalar_store_cost. */
155 1, /* vec_stmt_cost. */
156 1, /* vec_to_scalar_cost. */
157 1, /* scalar_to_vec_cost. */
158 1, /* vec_align_load_cost. */
159 1, /* vec_unalign_load_cost. */
160 1, /* vec_store_cost. */
161 1, /* cond_taken_branch_cost. */
162 1, /* cond_not_taken_branch_cost. */
165 /* Processor costs (relative to an add) */
166 static stringop_algs i386_memcpy
[2] = {
167 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
168 DUMMY_STRINGOP_ALGS
};
169 static stringop_algs i386_memset
[2] = {
170 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
171 DUMMY_STRINGOP_ALGS
};
174 struct processor_costs i386_cost
= { /* 386 specific costs */
175 COSTS_N_INSNS (1), /* cost of an add instruction */
176 COSTS_N_INSNS (1), /* cost of a lea instruction */
177 COSTS_N_INSNS (3), /* variable shift costs */
178 COSTS_N_INSNS (2), /* constant shift costs */
179 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
180 COSTS_N_INSNS (6), /* HI */
181 COSTS_N_INSNS (6), /* SI */
182 COSTS_N_INSNS (6), /* DI */
183 COSTS_N_INSNS (6)}, /* other */
184 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
185 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
186 COSTS_N_INSNS (23), /* HI */
187 COSTS_N_INSNS (23), /* SI */
188 COSTS_N_INSNS (23), /* DI */
189 COSTS_N_INSNS (23)}, /* other */
190 COSTS_N_INSNS (3), /* cost of movsx */
191 COSTS_N_INSNS (2), /* cost of movzx */
192 15, /* "large" insn */
194 4, /* cost for loading QImode using movzbl */
195 {2, 4, 2}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 4, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {8, 8, 8}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {8, 8, 8}, /* cost of storing fp registers
203 in SFmode, DFmode and XFmode */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of l1 cache */
216 0, /* size of l2 cache */
217 0, /* size of prefetch block */
218 0, /* number of parallel prefetches */
220 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
221 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
222 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
223 COSTS_N_INSNS (22), /* cost of FABS instruction. */
224 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
225 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
228 1, /* scalar_stmt_cost. */
229 1, /* scalar load_cost. */
230 1, /* scalar_store_cost. */
231 1, /* vec_stmt_cost. */
232 1, /* vec_to_scalar_cost. */
233 1, /* scalar_to_vec_cost. */
234 1, /* vec_align_load_cost. */
235 2, /* vec_unalign_load_cost. */
236 1, /* vec_store_cost. */
237 3, /* cond_taken_branch_cost. */
238 1, /* cond_not_taken_branch_cost. */
241 static stringop_algs i486_memcpy
[2] = {
242 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
243 DUMMY_STRINGOP_ALGS
};
244 static stringop_algs i486_memset
[2] = {
245 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
246 DUMMY_STRINGOP_ALGS
};
249 struct processor_costs i486_cost
= { /* 486 specific costs */
250 COSTS_N_INSNS (1), /* cost of an add instruction */
251 COSTS_N_INSNS (1), /* cost of a lea instruction */
252 COSTS_N_INSNS (3), /* variable shift costs */
253 COSTS_N_INSNS (2), /* constant shift costs */
254 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
255 COSTS_N_INSNS (12), /* HI */
256 COSTS_N_INSNS (12), /* SI */
257 COSTS_N_INSNS (12), /* DI */
258 COSTS_N_INSNS (12)}, /* other */
259 1, /* cost of multiply per each bit set */
260 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
261 COSTS_N_INSNS (40), /* HI */
262 COSTS_N_INSNS (40), /* SI */
263 COSTS_N_INSNS (40), /* DI */
264 COSTS_N_INSNS (40)}, /* other */
265 COSTS_N_INSNS (3), /* cost of movsx */
266 COSTS_N_INSNS (2), /* cost of movzx */
267 15, /* "large" insn */
269 4, /* cost for loading QImode using movzbl */
270 {2, 4, 2}, /* cost of loading integer registers
271 in QImode, HImode and SImode.
272 Relative to reg-reg move (2). */
273 {2, 4, 2}, /* cost of storing integer registers */
274 2, /* cost of reg,reg fld/fst */
275 {8, 8, 8}, /* cost of loading fp registers
276 in SFmode, DFmode and XFmode */
277 {8, 8, 8}, /* cost of storing fp registers
278 in SFmode, DFmode and XFmode */
279 2, /* cost of moving MMX register */
280 {4, 8}, /* cost of loading MMX registers
281 in SImode and DImode */
282 {4, 8}, /* cost of storing MMX registers
283 in SImode and DImode */
284 2, /* cost of moving SSE register */
285 {4, 8, 16}, /* cost of loading SSE registers
286 in SImode, DImode and TImode */
287 {4, 8, 16}, /* cost of storing SSE registers
288 in SImode, DImode and TImode */
289 3, /* MMX or SSE register to integer */
290 4, /* size of l1 cache. 486 has 8kB cache
291 shared for code and data, so 4kB is
292 not really precise. */
293 4, /* size of l2 cache */
294 0, /* size of prefetch block */
295 0, /* number of parallel prefetches */
297 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
298 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
299 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
300 COSTS_N_INSNS (3), /* cost of FABS instruction. */
301 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
302 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
305 1, /* scalar_stmt_cost. */
306 1, /* scalar load_cost. */
307 1, /* scalar_store_cost. */
308 1, /* vec_stmt_cost. */
309 1, /* vec_to_scalar_cost. */
310 1, /* scalar_to_vec_cost. */
311 1, /* vec_align_load_cost. */
312 2, /* vec_unalign_load_cost. */
313 1, /* vec_store_cost. */
314 3, /* cond_taken_branch_cost. */
315 1, /* cond_not_taken_branch_cost. */
318 static stringop_algs pentium_memcpy
[2] = {
319 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
320 DUMMY_STRINGOP_ALGS
};
321 static stringop_algs pentium_memset
[2] = {
322 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
323 DUMMY_STRINGOP_ALGS
};
326 struct processor_costs pentium_cost
= {
327 COSTS_N_INSNS (1), /* cost of an add instruction */
328 COSTS_N_INSNS (1), /* cost of a lea instruction */
329 COSTS_N_INSNS (4), /* variable shift costs */
330 COSTS_N_INSNS (1), /* constant shift costs */
331 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
332 COSTS_N_INSNS (11), /* HI */
333 COSTS_N_INSNS (11), /* SI */
334 COSTS_N_INSNS (11), /* DI */
335 COSTS_N_INSNS (11)}, /* other */
336 0, /* cost of multiply per each bit set */
337 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
338 COSTS_N_INSNS (25), /* HI */
339 COSTS_N_INSNS (25), /* SI */
340 COSTS_N_INSNS (25), /* DI */
341 COSTS_N_INSNS (25)}, /* other */
342 COSTS_N_INSNS (3), /* cost of movsx */
343 COSTS_N_INSNS (2), /* cost of movzx */
344 8, /* "large" insn */
346 6, /* cost for loading QImode using movzbl */
347 {2, 4, 2}, /* cost of loading integer registers
348 in QImode, HImode and SImode.
349 Relative to reg-reg move (2). */
350 {2, 4, 2}, /* cost of storing integer registers */
351 2, /* cost of reg,reg fld/fst */
352 {2, 2, 6}, /* cost of loading fp registers
353 in SFmode, DFmode and XFmode */
354 {4, 4, 6}, /* cost of storing fp registers
355 in SFmode, DFmode and XFmode */
356 8, /* cost of moving MMX register */
357 {8, 8}, /* cost of loading MMX registers
358 in SImode and DImode */
359 {8, 8}, /* cost of storing MMX registers
360 in SImode and DImode */
361 2, /* cost of moving SSE register */
362 {4, 8, 16}, /* cost of loading SSE registers
363 in SImode, DImode and TImode */
364 {4, 8, 16}, /* cost of storing SSE registers
365 in SImode, DImode and TImode */
366 3, /* MMX or SSE register to integer */
367 8, /* size of l1 cache. */
368 8, /* size of l2 cache */
369 0, /* size of prefetch block */
370 0, /* number of parallel prefetches */
372 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
373 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
374 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
375 COSTS_N_INSNS (1), /* cost of FABS instruction. */
376 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
377 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
380 1, /* scalar_stmt_cost. */
381 1, /* scalar load_cost. */
382 1, /* scalar_store_cost. */
383 1, /* vec_stmt_cost. */
384 1, /* vec_to_scalar_cost. */
385 1, /* scalar_to_vec_cost. */
386 1, /* vec_align_load_cost. */
387 2, /* vec_unalign_load_cost. */
388 1, /* vec_store_cost. */
389 3, /* cond_taken_branch_cost. */
390 1, /* cond_not_taken_branch_cost. */
393 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
394 (we ensure the alignment). For small blocks inline loop is still a
395 noticeable win, for bigger blocks either rep movsl or rep movsb is
396 way to go. Rep movsb has apparently more expensive startup time in CPU,
397 but after 4K the difference is down in the noise. */
398 static stringop_algs pentiumpro_memcpy
[2] = {
399 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
400 {8192, rep_prefix_4_byte
, false},
401 {-1, rep_prefix_1_byte
, false}}},
402 DUMMY_STRINGOP_ALGS
};
403 static stringop_algs pentiumpro_memset
[2] = {
404 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
405 {8192, rep_prefix_4_byte
, false},
406 {-1, libcall
, false}}},
407 DUMMY_STRINGOP_ALGS
};
409 struct processor_costs pentiumpro_cost
= {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1), /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (4), /* HI */
416 COSTS_N_INSNS (4), /* SI */
417 COSTS_N_INSNS (4), /* DI */
418 COSTS_N_INSNS (4)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (17), /* HI */
422 COSTS_N_INSNS (17), /* SI */
423 COSTS_N_INSNS (17), /* DI */
424 COSTS_N_INSNS (17)}, /* other */
425 COSTS_N_INSNS (1), /* cost of movsx */
426 COSTS_N_INSNS (1), /* cost of movzx */
427 8, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 4, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 2, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 2, /* cost of moving MMX register */
440 {2, 2}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {2, 2}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {2, 2, 8}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {2, 2, 8}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 256, /* size of l2 cache */
452 32, /* size of prefetch block */
453 6, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (2), /* cost of FABS instruction. */
459 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 static stringop_algs geode_memcpy
[2] = {
477 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
478 DUMMY_STRINGOP_ALGS
};
479 static stringop_algs geode_memset
[2] = {
480 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
481 DUMMY_STRINGOP_ALGS
};
483 struct processor_costs geode_cost
= {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (1), /* cost of a lea instruction */
486 COSTS_N_INSNS (2), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (4), /* HI */
490 COSTS_N_INSNS (7), /* SI */
491 COSTS_N_INSNS (7), /* DI */
492 COSTS_N_INSNS (7)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (23), /* HI */
496 COSTS_N_INSNS (39), /* SI */
497 COSTS_N_INSNS (39), /* DI */
498 COSTS_N_INSNS (39)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 1, /* cost for loading QImode using movzbl */
504 {1, 1, 1}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {1, 1, 1}, /* cost of storing integer registers */
508 1, /* cost of reg,reg fld/fst */
509 {1, 1, 1}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {4, 6, 6}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
514 1, /* cost of moving MMX register */
515 {1, 1}, /* cost of loading MMX registers
516 in SImode and DImode */
517 {1, 1}, /* cost of storing MMX registers
518 in SImode and DImode */
519 1, /* cost of moving SSE register */
520 {1, 1, 1}, /* cost of loading SSE registers
521 in SImode, DImode and TImode */
522 {1, 1, 1}, /* cost of storing SSE registers
523 in SImode, DImode and TImode */
524 1, /* MMX or SSE register to integer */
525 64, /* size of l1 cache. */
526 128, /* size of l2 cache. */
527 32, /* size of prefetch block */
528 1, /* number of parallel prefetches */
530 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (1), /* cost of FABS instruction. */
534 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
538 1, /* scalar_stmt_cost. */
539 1, /* scalar load_cost. */
540 1, /* scalar_store_cost. */
541 1, /* vec_stmt_cost. */
542 1, /* vec_to_scalar_cost. */
543 1, /* scalar_to_vec_cost. */
544 1, /* vec_align_load_cost. */
545 2, /* vec_unalign_load_cost. */
546 1, /* vec_store_cost. */
547 3, /* cond_taken_branch_cost. */
548 1, /* cond_not_taken_branch_cost. */
551 static stringop_algs k6_memcpy
[2] = {
552 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
553 DUMMY_STRINGOP_ALGS
};
554 static stringop_algs k6_memset
[2] = {
555 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
556 DUMMY_STRINGOP_ALGS
};
558 struct processor_costs k6_cost
= {
559 COSTS_N_INSNS (1), /* cost of an add instruction */
560 COSTS_N_INSNS (2), /* cost of a lea instruction */
561 COSTS_N_INSNS (1), /* variable shift costs */
562 COSTS_N_INSNS (1), /* constant shift costs */
563 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
564 COSTS_N_INSNS (3), /* HI */
565 COSTS_N_INSNS (3), /* SI */
566 COSTS_N_INSNS (3), /* DI */
567 COSTS_N_INSNS (3)}, /* other */
568 0, /* cost of multiply per each bit set */
569 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
570 COSTS_N_INSNS (18), /* HI */
571 COSTS_N_INSNS (18), /* SI */
572 COSTS_N_INSNS (18), /* DI */
573 COSTS_N_INSNS (18)}, /* other */
574 COSTS_N_INSNS (2), /* cost of movsx */
575 COSTS_N_INSNS (2), /* cost of movzx */
576 8, /* "large" insn */
578 3, /* cost for loading QImode using movzbl */
579 {4, 5, 4}, /* cost of loading integer registers
580 in QImode, HImode and SImode.
581 Relative to reg-reg move (2). */
582 {2, 3, 2}, /* cost of storing integer registers */
583 4, /* cost of reg,reg fld/fst */
584 {6, 6, 6}, /* cost of loading fp registers
585 in SFmode, DFmode and XFmode */
586 {4, 4, 4}, /* cost of storing fp registers
587 in SFmode, DFmode and XFmode */
588 2, /* cost of moving MMX register */
589 {2, 2}, /* cost of loading MMX registers
590 in SImode and DImode */
591 {2, 2}, /* cost of storing MMX registers
592 in SImode and DImode */
593 2, /* cost of moving SSE register */
594 {2, 2, 8}, /* cost of loading SSE registers
595 in SImode, DImode and TImode */
596 {2, 2, 8}, /* cost of storing SSE registers
597 in SImode, DImode and TImode */
598 6, /* MMX or SSE register to integer */
599 32, /* size of l1 cache. */
600 32, /* size of l2 cache. Some models
601 have integrated l2 cache, but
602 optimizing for k6 is not important
603 enough to worry about that. */
604 32, /* size of prefetch block */
605 1, /* number of parallel prefetches */
607 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
608 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
609 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
610 COSTS_N_INSNS (2), /* cost of FABS instruction. */
611 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
612 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
615 1, /* scalar_stmt_cost. */
616 1, /* scalar load_cost. */
617 1, /* scalar_store_cost. */
618 1, /* vec_stmt_cost. */
619 1, /* vec_to_scalar_cost. */
620 1, /* scalar_to_vec_cost. */
621 1, /* vec_align_load_cost. */
622 2, /* vec_unalign_load_cost. */
623 1, /* vec_store_cost. */
624 3, /* cond_taken_branch_cost. */
625 1, /* cond_not_taken_branch_cost. */
628 /* For some reason, Athlon deals better with REP prefix (relative to loops)
629 compared to K8. Alignment becomes important after 8 bytes for memcpy and
630 128 bytes for memset. */
631 static stringop_algs athlon_memcpy
[2] = {
632 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
633 DUMMY_STRINGOP_ALGS
};
634 static stringop_algs athlon_memset
[2] = {
635 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
636 DUMMY_STRINGOP_ALGS
};
638 struct processor_costs athlon_cost
= {
639 COSTS_N_INSNS (1), /* cost of an add instruction */
640 COSTS_N_INSNS (2), /* cost of a lea instruction */
641 COSTS_N_INSNS (1), /* variable shift costs */
642 COSTS_N_INSNS (1), /* constant shift costs */
643 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
644 COSTS_N_INSNS (5), /* HI */
645 COSTS_N_INSNS (5), /* SI */
646 COSTS_N_INSNS (5), /* DI */
647 COSTS_N_INSNS (5)}, /* other */
648 0, /* cost of multiply per each bit set */
649 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
650 COSTS_N_INSNS (26), /* HI */
651 COSTS_N_INSNS (42), /* SI */
652 COSTS_N_INSNS (74), /* DI */
653 COSTS_N_INSNS (74)}, /* other */
654 COSTS_N_INSNS (1), /* cost of movsx */
655 COSTS_N_INSNS (1), /* cost of movzx */
656 8, /* "large" insn */
658 4, /* cost for loading QImode using movzbl */
659 {3, 4, 3}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {3, 4, 3}, /* cost of storing integer registers */
663 4, /* cost of reg,reg fld/fst */
664 {4, 4, 12}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {6, 6, 8}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {4, 4}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {4, 4}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, /* cost of moving SSE register */
674 {4, 4, 6}, /* cost of loading SSE registers
675 in SImode, DImode and TImode */
676 {4, 4, 5}, /* cost of storing SSE registers
677 in SImode, DImode and TImode */
678 5, /* MMX or SSE register to integer */
679 64, /* size of l1 cache. */
680 256, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 6, /* number of parallel prefetches */
684 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
685 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
686 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
687 COSTS_N_INSNS (2), /* cost of FABS instruction. */
688 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
689 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
692 1, /* scalar_stmt_cost. */
693 1, /* scalar load_cost. */
694 1, /* scalar_store_cost. */
695 1, /* vec_stmt_cost. */
696 1, /* vec_to_scalar_cost. */
697 1, /* scalar_to_vec_cost. */
698 1, /* vec_align_load_cost. */
699 2, /* vec_unalign_load_cost. */
700 1, /* vec_store_cost. */
701 3, /* cond_taken_branch_cost. */
702 1, /* cond_not_taken_branch_cost. */
705 /* K8 has optimized REP instruction for medium sized blocks, but for very
706 small blocks it is better to use loop. For large blocks, libcall can
707 do nontemporary accesses and beat inline considerably. */
708 static stringop_algs k8_memcpy
[2] = {
709 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
710 {-1, rep_prefix_4_byte
, false}}},
711 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
712 {-1, libcall
, false}}}};
713 static stringop_algs k8_memset
[2] = {
714 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
715 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
716 {libcall
, {{48, unrolled_loop
, false},
717 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
719 struct processor_costs k8_cost
= {
720 COSTS_N_INSNS (1), /* cost of an add instruction */
721 COSTS_N_INSNS (2), /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (5)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
739 4, /* cost for loading QImode using movzbl */
740 {3, 4, 3}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {3, 4, 3}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {4, 4, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {3, 3}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {4, 4}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {4, 3, 6}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {4, 4, 5}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of l1 cache. */
761 512, /* size of l2 cache. */
762 64, /* size of prefetch block */
763 /* New AMD processors never drop prefetches; if they cannot be performed
764 immediately, they are queued. We set number of simultaneous prefetches
765 to a large constant to reflect this (it probably is not a good idea not
766 to limit number of prefetches at all, as their execution also takes some
768 100, /* number of parallel prefetches */
770 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (2), /* cost of FABS instruction. */
774 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
779 4, /* scalar_stmt_cost. */
780 2, /* scalar load_cost. */
781 2, /* scalar_store_cost. */
782 5, /* vec_stmt_cost. */
783 0, /* vec_to_scalar_cost. */
784 2, /* scalar_to_vec_cost. */
785 2, /* vec_align_load_cost. */
786 3, /* vec_unalign_load_cost. */
787 3, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 2, /* cond_not_taken_branch_cost. */
792 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
793 very small blocks it is better to use loop. For large blocks, libcall can
794 do nontemporary accesses and beat inline considerably. */
795 static stringop_algs amdfam10_memcpy
[2] = {
796 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
797 {-1, rep_prefix_4_byte
, false}}},
798 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
799 {-1, libcall
, false}}}};
800 static stringop_algs amdfam10_memset
[2] = {
801 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
802 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
803 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
804 {-1, libcall
, false}}}};
805 struct processor_costs amdfam10_cost
= {
806 COSTS_N_INSNS (1), /* cost of an add instruction */
807 COSTS_N_INSNS (2), /* cost of a lea instruction */
808 COSTS_N_INSNS (1), /* variable shift costs */
809 COSTS_N_INSNS (1), /* constant shift costs */
810 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
811 COSTS_N_INSNS (4), /* HI */
812 COSTS_N_INSNS (3), /* SI */
813 COSTS_N_INSNS (4), /* DI */
814 COSTS_N_INSNS (5)}, /* other */
815 0, /* cost of multiply per each bit set */
816 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
817 COSTS_N_INSNS (35), /* HI */
818 COSTS_N_INSNS (51), /* SI */
819 COSTS_N_INSNS (83), /* DI */
820 COSTS_N_INSNS (83)}, /* other */
821 COSTS_N_INSNS (1), /* cost of movsx */
822 COSTS_N_INSNS (1), /* cost of movzx */
823 8, /* "large" insn */
825 4, /* cost for loading QImode using movzbl */
826 {3, 4, 3}, /* cost of loading integer registers
827 in QImode, HImode and SImode.
828 Relative to reg-reg move (2). */
829 {3, 4, 3}, /* cost of storing integer registers */
830 4, /* cost of reg,reg fld/fst */
831 {4, 4, 12}, /* cost of loading fp registers
832 in SFmode, DFmode and XFmode */
833 {6, 6, 8}, /* cost of storing fp registers
834 in SFmode, DFmode and XFmode */
835 2, /* cost of moving MMX register */
836 {3, 3}, /* cost of loading MMX registers
837 in SImode and DImode */
838 {4, 4}, /* cost of storing MMX registers
839 in SImode and DImode */
840 2, /* cost of moving SSE register */
841 {4, 4, 3}, /* cost of loading SSE registers
842 in SImode, DImode and TImode */
843 {4, 4, 5}, /* cost of storing SSE registers
844 in SImode, DImode and TImode */
845 3, /* MMX or SSE register to integer */
847 MOVD reg64, xmmreg Double FSTORE 4
848 MOVD reg32, xmmreg Double FSTORE 4
850 MOVD reg64, xmmreg Double FADD 3
852 MOVD reg32, xmmreg Double FADD 3
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
862 100, /* number of parallel prefetches */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 6, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 2, /* vec_unalign_load_cost. */
881 2, /* vec_store_cost. */
882 2, /* cond_taken_branch_cost. */
883 1, /* cond_not_taken_branch_cost. */
886 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall
888 can do nontemporary accesses and beat inline considerably. */
889 static stringop_algs bdver1_memcpy
[2] = {
890 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
891 {-1, rep_prefix_4_byte
, false}}},
892 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
893 {-1, libcall
, false}}}};
894 static stringop_algs bdver1_memset
[2] = {
895 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
896 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
897 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
898 {-1, libcall
, false}}}};
900 const struct processor_costs bdver1_cost
= {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (1), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (4), /* SI */
908 COSTS_N_INSNS (6), /* DI */
909 COSTS_N_INSNS (6)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (35), /* HI */
913 COSTS_N_INSNS (51), /* SI */
914 COSTS_N_INSNS (83), /* DI */
915 COSTS_N_INSNS (83)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
920 4, /* cost for loading QImode using movzbl */
921 {5, 5, 4}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {4, 4, 4}, /* cost of storing integer registers */
925 2, /* cost of reg,reg fld/fst */
926 {5, 5, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {4, 4, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {4, 4}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 4, 4}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 4}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 2, /* MMX or SSE register to integer */
942 MOVD reg64, xmmreg Double FSTORE 4
943 MOVD reg32, xmmreg Double FSTORE 4
945 MOVD reg64, xmmreg Double FADD 3
947 MOVD reg32, xmmreg Double FADD 3
949 16, /* size of l1 cache. */
950 2048, /* size of l2 cache. */
951 64, /* size of prefetch block */
952 /* New AMD processors never drop prefetches; if they cannot be performed
953 immediately, they are queued. We set number of simultaneous prefetches
954 to a large constant to reflect this (it probably is not a good idea not
955 to limit number of prefetches at all, as their execution also takes some
957 100, /* number of parallel prefetches */
959 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
960 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
961 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
962 COSTS_N_INSNS (2), /* cost of FABS instruction. */
963 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
964 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
968 6, /* scalar_stmt_cost. */
969 4, /* scalar load_cost. */
970 4, /* scalar_store_cost. */
971 6, /* vec_stmt_cost. */
972 0, /* vec_to_scalar_cost. */
973 2, /* scalar_to_vec_cost. */
974 4, /* vec_align_load_cost. */
975 4, /* vec_unalign_load_cost. */
976 4, /* vec_store_cost. */
977 2, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
981 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
982 very small blocks it is better to use loop. For large blocks, libcall
983 can do nontemporary accesses and beat inline considerably. */
985 static stringop_algs bdver2_memcpy
[2] = {
986 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
987 {-1, rep_prefix_4_byte
, false}}},
988 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
989 {-1, libcall
, false}}}};
990 static stringop_algs bdver2_memset
[2] = {
991 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
992 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
993 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
994 {-1, libcall
, false}}}};
996 const struct processor_costs bdver2_cost
= {
997 COSTS_N_INSNS (1), /* cost of an add instruction */
998 COSTS_N_INSNS (1), /* cost of a lea instruction */
999 COSTS_N_INSNS (1), /* variable shift costs */
1000 COSTS_N_INSNS (1), /* constant shift costs */
1001 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1002 COSTS_N_INSNS (4), /* HI */
1003 COSTS_N_INSNS (4), /* SI */
1004 COSTS_N_INSNS (6), /* DI */
1005 COSTS_N_INSNS (6)}, /* other */
1006 0, /* cost of multiply per each bit set */
1007 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1008 COSTS_N_INSNS (35), /* HI */
1009 COSTS_N_INSNS (51), /* SI */
1010 COSTS_N_INSNS (83), /* DI */
1011 COSTS_N_INSNS (83)}, /* other */
1012 COSTS_N_INSNS (1), /* cost of movsx */
1013 COSTS_N_INSNS (1), /* cost of movzx */
1014 8, /* "large" insn */
1016 4, /* cost for loading QImode using movzbl */
1017 {5, 5, 4}, /* cost of loading integer registers
1018 in QImode, HImode and SImode.
1019 Relative to reg-reg move (2). */
1020 {4, 4, 4}, /* cost of storing integer registers */
1021 2, /* cost of reg,reg fld/fst */
1022 {5, 5, 12}, /* cost of loading fp registers
1023 in SFmode, DFmode and XFmode */
1024 {4, 4, 8}, /* cost of storing fp registers
1025 in SFmode, DFmode and XFmode */
1026 2, /* cost of moving MMX register */
1027 {4, 4}, /* cost of loading MMX registers
1028 in SImode and DImode */
1029 {4, 4}, /* cost of storing MMX registers
1030 in SImode and DImode */
1031 2, /* cost of moving SSE register */
1032 {4, 4, 4}, /* cost of loading SSE registers
1033 in SImode, DImode and TImode */
1034 {4, 4, 4}, /* cost of storing SSE registers
1035 in SImode, DImode and TImode */
1036 2, /* MMX or SSE register to integer */
1038 MOVD reg64, xmmreg Double FSTORE 4
1039 MOVD reg32, xmmreg Double FSTORE 4
1041 MOVD reg64, xmmreg Double FADD 3
1043 MOVD reg32, xmmreg Double FADD 3
1045 16, /* size of l1 cache. */
1046 2048, /* size of l2 cache. */
1047 64, /* size of prefetch block */
1048 /* New AMD processors never drop prefetches; if they cannot be performed
1049 immediately, they are queued. We set number of simultaneous prefetches
1050 to a large constant to reflect this (it probably is not a good idea not
1051 to limit number of prefetches at all, as their execution also takes some
1053 100, /* number of parallel prefetches */
1054 2, /* Branch cost */
1055 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1056 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1057 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1058 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1059 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1060 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1064 6, /* scalar_stmt_cost. */
1065 4, /* scalar load_cost. */
1066 4, /* scalar_store_cost. */
1067 6, /* vec_stmt_cost. */
1068 0, /* vec_to_scalar_cost. */
1069 2, /* scalar_to_vec_cost. */
1070 4, /* vec_align_load_cost. */
1071 4, /* vec_unalign_load_cost. */
1072 4, /* vec_store_cost. */
1073 2, /* cond_taken_branch_cost. */
1074 1, /* cond_not_taken_branch_cost. */
1078 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1079 very small blocks it is better to use loop. For large blocks, libcall
1080 can do nontemporary accesses and beat inline considerably. */
1081 static stringop_algs bdver3_memcpy
[2] = {
1082 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1083 {-1, rep_prefix_4_byte
, false}}},
1084 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}};
1086 static stringop_algs bdver3_memset
[2] = {
1087 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1088 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1089 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1090 {-1, libcall
, false}}}};
1091 struct processor_costs bdver3_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (1), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (4), /* SI */
1099 COSTS_N_INSNS (6), /* DI */
1100 COSTS_N_INSNS (6)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (35), /* HI */
1104 COSTS_N_INSNS (51), /* SI */
1105 COSTS_N_INSNS (83), /* DI */
1106 COSTS_N_INSNS (83)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {5, 5, 4}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {4, 4, 4}, /* cost of storing integer registers */
1116 2, /* cost of reg,reg fld/fst */
1117 {5, 5, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {4, 4, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {4, 4}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 4, 4}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 4}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 2, /* MMX or SSE register to integer */
1132 16, /* size of l1 cache. */
1133 2048, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 2, /* Branch cost */
1142 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1151 6, /* scalar_stmt_cost. */
1152 4, /* scalar load_cost. */
1153 4, /* scalar_store_cost. */
1154 6, /* vec_stmt_cost. */
1155 0, /* vec_to_scalar_cost. */
1156 2, /* scalar_to_vec_cost. */
1157 4, /* vec_align_load_cost. */
1158 4, /* vec_unalign_load_cost. */
1159 4, /* vec_store_cost. */
1160 2, /* cond_taken_branch_cost. */
1161 1, /* cond_not_taken_branch_cost. */
1164 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1165 very small blocks it is better to use loop. For large blocks, libcall can
1166 do nontemporary accesses and beat inline considerably. */
1167 static stringop_algs btver1_memcpy
[2] = {
1168 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1169 {-1, rep_prefix_4_byte
, false}}},
1170 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1171 {-1, libcall
, false}}}};
1172 static stringop_algs btver1_memset
[2] = {
1173 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1174 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1175 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1176 {-1, libcall
, false}}}};
1177 const struct processor_costs btver1_cost
= {
1178 COSTS_N_INSNS (1), /* cost of an add instruction */
1179 COSTS_N_INSNS (2), /* cost of a lea instruction */
1180 COSTS_N_INSNS (1), /* variable shift costs */
1181 COSTS_N_INSNS (1), /* constant shift costs */
1182 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1183 COSTS_N_INSNS (4), /* HI */
1184 COSTS_N_INSNS (3), /* SI */
1185 COSTS_N_INSNS (4), /* DI */
1186 COSTS_N_INSNS (5)}, /* other */
1187 0, /* cost of multiply per each bit set */
1188 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1189 COSTS_N_INSNS (35), /* HI */
1190 COSTS_N_INSNS (51), /* SI */
1191 COSTS_N_INSNS (83), /* DI */
1192 COSTS_N_INSNS (83)}, /* other */
1193 COSTS_N_INSNS (1), /* cost of movsx */
1194 COSTS_N_INSNS (1), /* cost of movzx */
1195 8, /* "large" insn */
1197 4, /* cost for loading QImode using movzbl */
1198 {3, 4, 3}, /* cost of loading integer registers
1199 in QImode, HImode and SImode.
1200 Relative to reg-reg move (2). */
1201 {3, 4, 3}, /* cost of storing integer registers */
1202 4, /* cost of reg,reg fld/fst */
1203 {4, 4, 12}, /* cost of loading fp registers
1204 in SFmode, DFmode and XFmode */
1205 {6, 6, 8}, /* cost of storing fp registers
1206 in SFmode, DFmode and XFmode */
1207 2, /* cost of moving MMX register */
1208 {3, 3}, /* cost of loading MMX registers
1209 in SImode and DImode */
1210 {4, 4}, /* cost of storing MMX registers
1211 in SImode and DImode */
1212 2, /* cost of moving SSE register */
1213 {4, 4, 3}, /* cost of loading SSE registers
1214 in SImode, DImode and TImode */
1215 {4, 4, 5}, /* cost of storing SSE registers
1216 in SImode, DImode and TImode */
1217 3, /* MMX or SSE register to integer */
1219 MOVD reg64, xmmreg Double FSTORE 4
1220 MOVD reg32, xmmreg Double FSTORE 4
1222 MOVD reg64, xmmreg Double FADD 3
1224 MOVD reg32, xmmreg Double FADD 3
1226 32, /* size of l1 cache. */
1227 512, /* size of l2 cache. */
1228 64, /* size of prefetch block */
1229 100, /* number of parallel prefetches */
1230 2, /* Branch cost */
1231 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1232 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1233 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1234 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1235 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1236 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 static stringop_algs btver2_memcpy
[2] = {
1254 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1255 {-1, rep_prefix_4_byte
, false}}},
1256 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1257 {-1, libcall
, false}}}};
1258 static stringop_algs btver2_memset
[2] = {
1259 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1260 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1261 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1262 {-1, libcall
, false}}}};
1263 const struct processor_costs btver2_cost
= {
1264 COSTS_N_INSNS (1), /* cost of an add instruction */
1265 COSTS_N_INSNS (2), /* cost of a lea instruction */
1266 COSTS_N_INSNS (1), /* variable shift costs */
1267 COSTS_N_INSNS (1), /* constant shift costs */
1268 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1269 COSTS_N_INSNS (4), /* HI */
1270 COSTS_N_INSNS (3), /* SI */
1271 COSTS_N_INSNS (4), /* DI */
1272 COSTS_N_INSNS (5)}, /* other */
1273 0, /* cost of multiply per each bit set */
1274 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1275 COSTS_N_INSNS (35), /* HI */
1276 COSTS_N_INSNS (51), /* SI */
1277 COSTS_N_INSNS (83), /* DI */
1278 COSTS_N_INSNS (83)}, /* other */
1279 COSTS_N_INSNS (1), /* cost of movsx */
1280 COSTS_N_INSNS (1), /* cost of movzx */
1281 8, /* "large" insn */
1283 4, /* cost for loading QImode using movzbl */
1284 {3, 4, 3}, /* cost of loading integer registers
1285 in QImode, HImode and SImode.
1286 Relative to reg-reg move (2). */
1287 {3, 4, 3}, /* cost of storing integer registers */
1288 4, /* cost of reg,reg fld/fst */
1289 {4, 4, 12}, /* cost of loading fp registers
1290 in SFmode, DFmode and XFmode */
1291 {6, 6, 8}, /* cost of storing fp registers
1292 in SFmode, DFmode and XFmode */
1293 2, /* cost of moving MMX register */
1294 {3, 3}, /* cost of loading MMX registers
1295 in SImode and DImode */
1296 {4, 4}, /* cost of storing MMX registers
1297 in SImode and DImode */
1298 2, /* cost of moving SSE register */
1299 {4, 4, 3}, /* cost of loading SSE registers
1300 in SImode, DImode and TImode */
1301 {4, 4, 5}, /* cost of storing SSE registers
1302 in SImode, DImode and TImode */
1303 3, /* MMX or SSE register to integer */
1305 MOVD reg64, xmmreg Double FSTORE 4
1306 MOVD reg32, xmmreg Double FSTORE 4
1308 MOVD reg64, xmmreg Double FADD 3
1310 MOVD reg32, xmmreg Double FADD 3
1312 32, /* size of l1 cache. */
1313 2048, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 100, /* number of parallel prefetches */
1316 2, /* Branch cost */
1317 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1318 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1319 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1320 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1321 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1322 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1325 4, /* scalar_stmt_cost. */
1326 2, /* scalar load_cost. */
1327 2, /* scalar_store_cost. */
1328 6, /* vec_stmt_cost. */
1329 0, /* vec_to_scalar_cost. */
1330 2, /* scalar_to_vec_cost. */
1331 2, /* vec_align_load_cost. */
1332 2, /* vec_unalign_load_cost. */
1333 2, /* vec_store_cost. */
1334 2, /* cond_taken_branch_cost. */
1335 1, /* cond_not_taken_branch_cost. */
1338 static stringop_algs pentium4_memcpy
[2] = {
1339 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1340 DUMMY_STRINGOP_ALGS
};
1341 static stringop_algs pentium4_memset
[2] = {
1342 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1343 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1344 DUMMY_STRINGOP_ALGS
};
1347 struct processor_costs pentium4_cost
= {
1348 COSTS_N_INSNS (1), /* cost of an add instruction */
1349 COSTS_N_INSNS (3), /* cost of a lea instruction */
1350 COSTS_N_INSNS (4), /* variable shift costs */
1351 COSTS_N_INSNS (4), /* constant shift costs */
1352 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1353 COSTS_N_INSNS (15), /* HI */
1354 COSTS_N_INSNS (15), /* SI */
1355 COSTS_N_INSNS (15), /* DI */
1356 COSTS_N_INSNS (15)}, /* other */
1357 0, /* cost of multiply per each bit set */
1358 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1359 COSTS_N_INSNS (56), /* HI */
1360 COSTS_N_INSNS (56), /* SI */
1361 COSTS_N_INSNS (56), /* DI */
1362 COSTS_N_INSNS (56)}, /* other */
1363 COSTS_N_INSNS (1), /* cost of movsx */
1364 COSTS_N_INSNS (1), /* cost of movzx */
1365 16, /* "large" insn */
1367 2, /* cost for loading QImode using movzbl */
1368 {4, 5, 4}, /* cost of loading integer registers
1369 in QImode, HImode and SImode.
1370 Relative to reg-reg move (2). */
1371 {2, 3, 2}, /* cost of storing integer registers */
1372 2, /* cost of reg,reg fld/fst */
1373 {2, 2, 6}, /* cost of loading fp registers
1374 in SFmode, DFmode and XFmode */
1375 {4, 4, 6}, /* cost of storing fp registers
1376 in SFmode, DFmode and XFmode */
1377 2, /* cost of moving MMX register */
1378 {2, 2}, /* cost of loading MMX registers
1379 in SImode and DImode */
1380 {2, 2}, /* cost of storing MMX registers
1381 in SImode and DImode */
1382 12, /* cost of moving SSE register */
1383 {12, 12, 12}, /* cost of loading SSE registers
1384 in SImode, DImode and TImode */
1385 {2, 2, 8}, /* cost of storing SSE registers
1386 in SImode, DImode and TImode */
1387 10, /* MMX or SSE register to integer */
1388 8, /* size of l1 cache. */
1389 256, /* size of l2 cache. */
1390 64, /* size of prefetch block */
1391 6, /* number of parallel prefetches */
1392 2, /* Branch cost */
1393 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1394 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1395 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1396 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1397 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1398 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1401 1, /* scalar_stmt_cost. */
1402 1, /* scalar load_cost. */
1403 1, /* scalar_store_cost. */
1404 1, /* vec_stmt_cost. */
1405 1, /* vec_to_scalar_cost. */
1406 1, /* scalar_to_vec_cost. */
1407 1, /* vec_align_load_cost. */
1408 2, /* vec_unalign_load_cost. */
1409 1, /* vec_store_cost. */
1410 3, /* cond_taken_branch_cost. */
1411 1, /* cond_not_taken_branch_cost. */
1414 static stringop_algs nocona_memcpy
[2] = {
1415 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1416 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1417 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1419 static stringop_algs nocona_memset
[2] = {
1420 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1421 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1422 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1423 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1426 struct processor_costs nocona_cost
= {
1427 COSTS_N_INSNS (1), /* cost of an add instruction */
1428 COSTS_N_INSNS (1), /* cost of a lea instruction */
1429 COSTS_N_INSNS (1), /* variable shift costs */
1430 COSTS_N_INSNS (1), /* constant shift costs */
1431 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1432 COSTS_N_INSNS (10), /* HI */
1433 COSTS_N_INSNS (10), /* SI */
1434 COSTS_N_INSNS (10), /* DI */
1435 COSTS_N_INSNS (10)}, /* other */
1436 0, /* cost of multiply per each bit set */
1437 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1438 COSTS_N_INSNS (66), /* HI */
1439 COSTS_N_INSNS (66), /* SI */
1440 COSTS_N_INSNS (66), /* DI */
1441 COSTS_N_INSNS (66)}, /* other */
1442 COSTS_N_INSNS (1), /* cost of movsx */
1443 COSTS_N_INSNS (1), /* cost of movzx */
1444 16, /* "large" insn */
1445 17, /* MOVE_RATIO */
1446 4, /* cost for loading QImode using movzbl */
1447 {4, 4, 4}, /* cost of loading integer registers
1448 in QImode, HImode and SImode.
1449 Relative to reg-reg move (2). */
1450 {4, 4, 4}, /* cost of storing integer registers */
1451 3, /* cost of reg,reg fld/fst */
1452 {12, 12, 12}, /* cost of loading fp registers
1453 in SFmode, DFmode and XFmode */
1454 {4, 4, 4}, /* cost of storing fp registers
1455 in SFmode, DFmode and XFmode */
1456 6, /* cost of moving MMX register */
1457 {12, 12}, /* cost of loading MMX registers
1458 in SImode and DImode */
1459 {12, 12}, /* cost of storing MMX registers
1460 in SImode and DImode */
1461 6, /* cost of moving SSE register */
1462 {12, 12, 12}, /* cost of loading SSE registers
1463 in SImode, DImode and TImode */
1464 {12, 12, 12}, /* cost of storing SSE registers
1465 in SImode, DImode and TImode */
1466 8, /* MMX or SSE register to integer */
1467 8, /* size of l1 cache. */
1468 1024, /* size of l2 cache. */
1469 128, /* size of prefetch block */
1470 8, /* number of parallel prefetches */
1471 1, /* Branch cost */
1472 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1473 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1474 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1475 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1476 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1477 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1493 static stringop_algs atom_memcpy
[2] = {
1494 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1495 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1496 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1497 static stringop_algs atom_memset
[2] = {
1498 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1499 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1500 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1501 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1503 struct processor_costs atom_cost
= {
1504 COSTS_N_INSNS (1), /* cost of an add instruction */
1505 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1506 COSTS_N_INSNS (1), /* variable shift costs */
1507 COSTS_N_INSNS (1), /* constant shift costs */
1508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1509 COSTS_N_INSNS (4), /* HI */
1510 COSTS_N_INSNS (3), /* SI */
1511 COSTS_N_INSNS (4), /* DI */
1512 COSTS_N_INSNS (2)}, /* other */
1513 0, /* cost of multiply per each bit set */
1514 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1515 COSTS_N_INSNS (26), /* HI */
1516 COSTS_N_INSNS (42), /* SI */
1517 COSTS_N_INSNS (74), /* DI */
1518 COSTS_N_INSNS (74)}, /* other */
1519 COSTS_N_INSNS (1), /* cost of movsx */
1520 COSTS_N_INSNS (1), /* cost of movzx */
1521 8, /* "large" insn */
1522 17, /* MOVE_RATIO */
1523 4, /* cost for loading QImode using movzbl */
1524 {4, 4, 4}, /* cost of loading integer registers
1525 in QImode, HImode and SImode.
1526 Relative to reg-reg move (2). */
1527 {4, 4, 4}, /* cost of storing integer registers */
1528 4, /* cost of reg,reg fld/fst */
1529 {12, 12, 12}, /* cost of loading fp registers
1530 in SFmode, DFmode and XFmode */
1531 {6, 6, 8}, /* cost of storing fp registers
1532 in SFmode, DFmode and XFmode */
1533 2, /* cost of moving MMX register */
1534 {8, 8}, /* cost of loading MMX registers
1535 in SImode and DImode */
1536 {8, 8}, /* cost of storing MMX registers
1537 in SImode and DImode */
1538 2, /* cost of moving SSE register */
1539 {8, 8, 8}, /* cost of loading SSE registers
1540 in SImode, DImode and TImode */
1541 {8, 8, 8}, /* cost of storing SSE registers
1542 in SImode, DImode and TImode */
1543 5, /* MMX or SSE register to integer */
1544 32, /* size of l1 cache. */
1545 256, /* size of l2 cache. */
1546 64, /* size of prefetch block */
1547 6, /* number of parallel prefetches */
1548 3, /* Branch cost */
1549 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1550 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1551 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1552 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1553 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1554 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1557 1, /* scalar_stmt_cost. */
1558 1, /* scalar load_cost. */
1559 1, /* scalar_store_cost. */
1560 1, /* vec_stmt_cost. */
1561 1, /* vec_to_scalar_cost. */
1562 1, /* scalar_to_vec_cost. */
1563 1, /* vec_align_load_cost. */
1564 2, /* vec_unalign_load_cost. */
1565 1, /* vec_store_cost. */
1566 3, /* cond_taken_branch_cost. */
1567 1, /* cond_not_taken_branch_cost. */
1570 static stringop_algs slm_memcpy
[2] = {
1571 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1572 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1573 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1574 static stringop_algs slm_memset
[2] = {
1575 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1576 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1577 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1578 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1580 struct processor_costs slm_cost
= {
1581 COSTS_N_INSNS (1), /* cost of an add instruction */
1582 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1583 COSTS_N_INSNS (1), /* variable shift costs */
1584 COSTS_N_INSNS (1), /* constant shift costs */
1585 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1586 COSTS_N_INSNS (4), /* HI */
1587 COSTS_N_INSNS (3), /* SI */
1588 COSTS_N_INSNS (4), /* DI */
1589 COSTS_N_INSNS (2)}, /* other */
1590 0, /* cost of multiply per each bit set */
1591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1592 COSTS_N_INSNS (26), /* HI */
1593 COSTS_N_INSNS (42), /* SI */
1594 COSTS_N_INSNS (74), /* DI */
1595 COSTS_N_INSNS (74)}, /* other */
1596 COSTS_N_INSNS (1), /* cost of movsx */
1597 COSTS_N_INSNS (1), /* cost of movzx */
1598 8, /* "large" insn */
1599 17, /* MOVE_RATIO */
1600 4, /* cost for loading QImode using movzbl */
1601 {4, 4, 4}, /* cost of loading integer registers
1602 in QImode, HImode and SImode.
1603 Relative to reg-reg move (2). */
1604 {4, 4, 4}, /* cost of storing integer registers */
1605 4, /* cost of reg,reg fld/fst */
1606 {12, 12, 12}, /* cost of loading fp registers
1607 in SFmode, DFmode and XFmode */
1608 {6, 6, 8}, /* cost of storing fp registers
1609 in SFmode, DFmode and XFmode */
1610 2, /* cost of moving MMX register */
1611 {8, 8}, /* cost of loading MMX registers
1612 in SImode and DImode */
1613 {8, 8}, /* cost of storing MMX registers
1614 in SImode and DImode */
1615 2, /* cost of moving SSE register */
1616 {8, 8, 8}, /* cost of loading SSE registers
1617 in SImode, DImode and TImode */
1618 {8, 8, 8}, /* cost of storing SSE registers
1619 in SImode, DImode and TImode */
1620 5, /* MMX or SSE register to integer */
1621 32, /* size of l1 cache. */
1622 256, /* size of l2 cache. */
1623 64, /* size of prefetch block */
1624 6, /* number of parallel prefetches */
1625 3, /* Branch cost */
1626 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1627 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1628 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1629 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1630 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1631 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1634 1, /* scalar_stmt_cost. */
1635 1, /* scalar load_cost. */
1636 1, /* scalar_store_cost. */
1637 1, /* vec_stmt_cost. */
1638 1, /* vec_to_scalar_cost. */
1639 1, /* scalar_to_vec_cost. */
1640 1, /* vec_align_load_cost. */
1641 2, /* vec_unalign_load_cost. */
1642 1, /* vec_store_cost. */
1643 3, /* cond_taken_branch_cost. */
1644 1, /* cond_not_taken_branch_cost. */
1647 /* Generic64 should produce code tuned for Nocona and K8. */
1649 static stringop_algs generic64_memcpy
[2] = {
1650 DUMMY_STRINGOP_ALGS
,
1651 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1652 {-1, libcall
, false}}}};
1653 static stringop_algs generic64_memset
[2] = {
1654 DUMMY_STRINGOP_ALGS
,
1655 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1656 {-1, libcall
, false}}}};
1658 struct processor_costs generic64_cost
= {
1659 COSTS_N_INSNS (1), /* cost of an add instruction */
1660 /* On all chips taken into consideration lea is 2 cycles and more. With
1661 this cost however our current implementation of synth_mult results in
1662 use of unnecessary temporary registers causing regression on several
1663 SPECfp benchmarks. */
1664 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1665 COSTS_N_INSNS (1), /* variable shift costs */
1666 COSTS_N_INSNS (1), /* constant shift costs */
1667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1668 COSTS_N_INSNS (4), /* HI */
1669 COSTS_N_INSNS (3), /* SI */
1670 COSTS_N_INSNS (4), /* DI */
1671 COSTS_N_INSNS (2)}, /* other */
1672 0, /* cost of multiply per each bit set */
1673 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1674 COSTS_N_INSNS (26), /* HI */
1675 COSTS_N_INSNS (42), /* SI */
1676 COSTS_N_INSNS (74), /* DI */
1677 COSTS_N_INSNS (74)}, /* other */
1678 COSTS_N_INSNS (1), /* cost of movsx */
1679 COSTS_N_INSNS (1), /* cost of movzx */
1680 8, /* "large" insn */
1681 17, /* MOVE_RATIO */
1682 4, /* cost for loading QImode using movzbl */
1683 {4, 4, 4}, /* cost of loading integer registers
1684 in QImode, HImode and SImode.
1685 Relative to reg-reg move (2). */
1686 {4, 4, 4}, /* cost of storing integer registers */
1687 4, /* cost of reg,reg fld/fst */
1688 {12, 12, 12}, /* cost of loading fp registers
1689 in SFmode, DFmode and XFmode */
1690 {6, 6, 8}, /* cost of storing fp registers
1691 in SFmode, DFmode and XFmode */
1692 2, /* cost of moving MMX register */
1693 {8, 8}, /* cost of loading MMX registers
1694 in SImode and DImode */
1695 {8, 8}, /* cost of storing MMX registers
1696 in SImode and DImode */
1697 2, /* cost of moving SSE register */
1698 {8, 8, 8}, /* cost of loading SSE registers
1699 in SImode, DImode and TImode */
1700 {8, 8, 8}, /* cost of storing SSE registers
1701 in SImode, DImode and TImode */
1702 5, /* MMX or SSE register to integer */
1703 32, /* size of l1 cache. */
1704 512, /* size of l2 cache. */
1705 64, /* size of prefetch block */
1706 6, /* number of parallel prefetches */
1707 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1708 value is increased to perhaps more appropriate value of 5. */
1709 3, /* Branch cost */
1710 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1711 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1712 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1713 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1714 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1715 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1718 1, /* scalar_stmt_cost. */
1719 1, /* scalar load_cost. */
1720 1, /* scalar_store_cost. */
1721 1, /* vec_stmt_cost. */
1722 1, /* vec_to_scalar_cost. */
1723 1, /* scalar_to_vec_cost. */
1724 1, /* vec_align_load_cost. */
1725 2, /* vec_unalign_load_cost. */
1726 1, /* vec_store_cost. */
1727 3, /* cond_taken_branch_cost. */
1728 1, /* cond_not_taken_branch_cost. */
1731 /* core_cost should produce code tuned for Core familly of CPUs. */
1732 static stringop_algs core_memcpy
[2] = {
1733 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1734 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1735 {-1, libcall
, false}}}};
1736 static stringop_algs core_memset
[2] = {
1737 {libcall
, {{6, loop_1_byte
, true},
1739 {8192, rep_prefix_4_byte
, true},
1740 {-1, libcall
, false}}},
1741 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1742 {-1, libcall
, false}}}};
1745 struct processor_costs core_cost
= {
1746 COSTS_N_INSNS (1), /* cost of an add instruction */
1747 /* On all chips taken into consideration lea is 2 cycles and more. With
1748 this cost however our current implementation of synth_mult results in
1749 use of unnecessary temporary registers causing regression on several
1750 SPECfp benchmarks. */
1751 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1752 COSTS_N_INSNS (1), /* variable shift costs */
1753 COSTS_N_INSNS (1), /* constant shift costs */
1754 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1755 COSTS_N_INSNS (4), /* HI */
1756 COSTS_N_INSNS (3), /* SI */
1757 COSTS_N_INSNS (4), /* DI */
1758 COSTS_N_INSNS (2)}, /* other */
1759 0, /* cost of multiply per each bit set */
1760 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1761 COSTS_N_INSNS (26), /* HI */
1762 COSTS_N_INSNS (42), /* SI */
1763 COSTS_N_INSNS (74), /* DI */
1764 COSTS_N_INSNS (74)}, /* other */
1765 COSTS_N_INSNS (1), /* cost of movsx */
1766 COSTS_N_INSNS (1), /* cost of movzx */
1767 8, /* "large" insn */
1768 17, /* MOVE_RATIO */
1769 4, /* cost for loading QImode using movzbl */
1770 {4, 4, 4}, /* cost of loading integer registers
1771 in QImode, HImode and SImode.
1772 Relative to reg-reg move (2). */
1773 {4, 4, 4}, /* cost of storing integer registers */
1774 4, /* cost of reg,reg fld/fst */
1775 {12, 12, 12}, /* cost of loading fp registers
1776 in SFmode, DFmode and XFmode */
1777 {6, 6, 8}, /* cost of storing fp registers
1778 in SFmode, DFmode and XFmode */
1779 2, /* cost of moving MMX register */
1780 {8, 8}, /* cost of loading MMX registers
1781 in SImode and DImode */
1782 {8, 8}, /* cost of storing MMX registers
1783 in SImode and DImode */
1784 2, /* cost of moving SSE register */
1785 {8, 8, 8}, /* cost of loading SSE registers
1786 in SImode, DImode and TImode */
1787 {8, 8, 8}, /* cost of storing SSE registers
1788 in SImode, DImode and TImode */
1789 5, /* MMX or SSE register to integer */
1790 64, /* size of l1 cache. */
1791 512, /* size of l2 cache. */
1792 64, /* size of prefetch block */
1793 6, /* number of parallel prefetches */
1794 /* FIXME perhaps more appropriate value is 5. */
1795 3, /* Branch cost */
1796 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1797 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1798 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1799 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1800 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1801 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1804 1, /* scalar_stmt_cost. */
1805 1, /* scalar load_cost. */
1806 1, /* scalar_store_cost. */
1807 1, /* vec_stmt_cost. */
1808 1, /* vec_to_scalar_cost. */
1809 1, /* scalar_to_vec_cost. */
1810 1, /* vec_align_load_cost. */
1811 2, /* vec_unalign_load_cost. */
1812 1, /* vec_store_cost. */
1813 3, /* cond_taken_branch_cost. */
1814 1, /* cond_not_taken_branch_cost. */
1817 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1819 static stringop_algs generic32_memcpy
[2] = {
1820 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1821 {-1, libcall
, false}}},
1822 DUMMY_STRINGOP_ALGS
};
1823 static stringop_algs generic32_memset
[2] = {
1824 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1825 {-1, libcall
, false}}},
1826 DUMMY_STRINGOP_ALGS
};
1828 struct processor_costs generic32_cost
= {
1829 COSTS_N_INSNS (1), /* cost of an add instruction */
1830 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1831 COSTS_N_INSNS (1), /* variable shift costs */
1832 COSTS_N_INSNS (1), /* constant shift costs */
1833 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1834 COSTS_N_INSNS (4), /* HI */
1835 COSTS_N_INSNS (3), /* SI */
1836 COSTS_N_INSNS (4), /* DI */
1837 COSTS_N_INSNS (2)}, /* other */
1838 0, /* cost of multiply per each bit set */
1839 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1840 COSTS_N_INSNS (26), /* HI */
1841 COSTS_N_INSNS (42), /* SI */
1842 COSTS_N_INSNS (74), /* DI */
1843 COSTS_N_INSNS (74)}, /* other */
1844 COSTS_N_INSNS (1), /* cost of movsx */
1845 COSTS_N_INSNS (1), /* cost of movzx */
1846 8, /* "large" insn */
1847 17, /* MOVE_RATIO */
1848 4, /* cost for loading QImode using movzbl */
1849 {4, 4, 4}, /* cost of loading integer registers
1850 in QImode, HImode and SImode.
1851 Relative to reg-reg move (2). */
1852 {4, 4, 4}, /* cost of storing integer registers */
1853 4, /* cost of reg,reg fld/fst */
1854 {12, 12, 12}, /* cost of loading fp registers
1855 in SFmode, DFmode and XFmode */
1856 {6, 6, 8}, /* cost of storing fp registers
1857 in SFmode, DFmode and XFmode */
1858 2, /* cost of moving MMX register */
1859 {8, 8}, /* cost of loading MMX registers
1860 in SImode and DImode */
1861 {8, 8}, /* cost of storing MMX registers
1862 in SImode and DImode */
1863 2, /* cost of moving SSE register */
1864 {8, 8, 8}, /* cost of loading SSE registers
1865 in SImode, DImode and TImode */
1866 {8, 8, 8}, /* cost of storing SSE registers
1867 in SImode, DImode and TImode */
1868 5, /* MMX or SSE register to integer */
1869 32, /* size of l1 cache. */
1870 256, /* size of l2 cache. */
1871 64, /* size of prefetch block */
1872 6, /* number of parallel prefetches */
1873 3, /* Branch cost */
1874 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1875 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1876 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1877 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1878 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1879 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1882 1, /* scalar_stmt_cost. */
1883 1, /* scalar load_cost. */
1884 1, /* scalar_store_cost. */
1885 1, /* vec_stmt_cost. */
1886 1, /* vec_to_scalar_cost. */
1887 1, /* scalar_to_vec_cost. */
1888 1, /* vec_align_load_cost. */
1889 2, /* vec_unalign_load_cost. */
1890 1, /* vec_store_cost. */
1891 3, /* cond_taken_branch_cost. */
1892 1, /* cond_not_taken_branch_cost. */
1895 /* Set by -mtune. */
1896 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1898 /* Set by -mtune or -Os. */
1899 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1901 /* Processor feature/optimization bitmasks. */
1902 #define m_386 (1<<PROCESSOR_I386)
1903 #define m_486 (1<<PROCESSOR_I486)
1904 #define m_PENT (1<<PROCESSOR_PENTIUM)
1905 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1906 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1907 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1908 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1909 #define m_CORE2 (1<<PROCESSOR_CORE2)
1910 #define m_COREI7 (1<<PROCESSOR_COREI7)
1911 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1912 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1913 #define m_ATOM (1<<PROCESSOR_ATOM)
1914 #define m_SLM (1<<PROCESSOR_SLM)
1916 #define m_GEODE (1<<PROCESSOR_GEODE)
1917 #define m_K6 (1<<PROCESSOR_K6)
1918 #define m_K6_GEODE (m_K6 | m_GEODE)
1919 #define m_K8 (1<<PROCESSOR_K8)
1920 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1921 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1922 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1923 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1924 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1925 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1926 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1927 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1928 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1929 #define m_BTVER (m_BTVER1 | m_BTVER2)
1930 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1932 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1933 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1935 /* Generic instruction choice should be common subset of supported CPUs
1936 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1937 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1939 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1941 #define DEF_TUNE(tune, name, selector) name,
1942 #include "x86-tune.def"
1946 /* Feature tests against the various tunings. */
1947 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1949 /* Feature tests against the various tunings used to create ix86_tune_features
1950 based on the processor mask. */
1951 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1953 #define DEF_TUNE(tune, name, selector) selector,
1954 #include "x86-tune.def"
1958 /* Feature tests against the various architecture variations. */
1959 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1961 /* Feature tests against the various architecture variations, used to create
1962 ix86_arch_features based on the processor mask. */
1963 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1964 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1965 ~(m_386
| m_486
| m_PENT
| m_K6
),
1967 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1970 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1973 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1976 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1980 static const unsigned int x86_accumulate_outgoing_args
1981 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
1983 static const unsigned int x86_arch_always_fancy_math_387
1984 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
;
1986 static const unsigned int x86_avx256_split_unaligned_load
1987 = m_COREI7
| m_GENERIC
;
1989 static const unsigned int x86_avx256_split_unaligned_store
1990 = m_COREI7
| m_BDVER
| m_GENERIC
;
1992 /* In case the average insn count for single function invocation is
1993 lower than this constant, emit fast (but longer) prologue and
1995 #define FAST_PROLOGUE_INSN_COUNT 20
1997 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1998 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1999 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2000 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2002 /* Array of the smallest class containing reg number REGNO, indexed by
2003 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2005 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2007 /* ax, dx, cx, bx */
2008 AREG
, DREG
, CREG
, BREG
,
2009 /* si, di, bp, sp */
2010 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2012 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2013 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2016 /* flags, fpsr, fpcr, frame */
2017 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2019 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2022 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2025 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2026 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2027 /* SSE REX registers */
2028 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2032 /* The "default" register map used in 32bit mode. */
2034 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2036 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2037 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2038 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2039 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2040 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2041 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2042 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2045 /* The "default" register map used in 64bit mode. */
2047 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2049 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2050 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2051 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2052 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2053 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2054 8,9,10,11,12,13,14,15, /* extended integer registers */
2055 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2058 /* Define the register numbers to be used in Dwarf debugging information.
2059 The SVR4 reference port C compiler uses the following register numbers
2060 in its Dwarf output code:
2061 0 for %eax (gcc regno = 0)
2062 1 for %ecx (gcc regno = 2)
2063 2 for %edx (gcc regno = 1)
2064 3 for %ebx (gcc regno = 3)
2065 4 for %esp (gcc regno = 7)
2066 5 for %ebp (gcc regno = 6)
2067 6 for %esi (gcc regno = 4)
2068 7 for %edi (gcc regno = 5)
2069 The following three DWARF register numbers are never generated by
2070 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2071 believes these numbers have these meanings.
2072 8 for %eip (no gcc equivalent)
2073 9 for %eflags (gcc regno = 17)
2074 10 for %trapno (no gcc equivalent)
2075 It is not at all clear how we should number the FP stack registers
2076 for the x86 architecture. If the version of SDB on x86/svr4 were
2077 a bit less brain dead with respect to floating-point then we would
2078 have a precedent to follow with respect to DWARF register numbers
2079 for x86 FP registers, but the SDB on x86/svr4 is so completely
2080 broken with respect to FP registers that it is hardly worth thinking
2081 of it as something to strive for compatibility with.
2082 The version of x86/svr4 SDB I have at the moment does (partially)
2083 seem to believe that DWARF register number 11 is associated with
2084 the x86 register %st(0), but that's about all. Higher DWARF
2085 register numbers don't seem to be associated with anything in
2086 particular, and even for DWARF regno 11, SDB only seems to under-
2087 stand that it should say that a variable lives in %st(0) (when
2088 asked via an `=' command) if we said it was in DWARF regno 11,
2089 but SDB still prints garbage when asked for the value of the
2090 variable in question (via a `/' command).
2091 (Also note that the labels SDB prints for various FP stack regs
2092 when doing an `x' command are all wrong.)
2093 Note that these problems generally don't affect the native SVR4
2094 C compiler because it doesn't allow the use of -O with -g and
2095 because when it is *not* optimizing, it allocates a memory
2096 location for each floating-point variable, and the memory
2097 location is what gets described in the DWARF AT_location
2098 attribute for the variable in question.
2099 Regardless of the severe mental illness of the x86/svr4 SDB, we
2100 do something sensible here and we use the following DWARF
2101 register numbers. Note that these are all stack-top-relative
2103 11 for %st(0) (gcc regno = 8)
2104 12 for %st(1) (gcc regno = 9)
2105 13 for %st(2) (gcc regno = 10)
2106 14 for %st(3) (gcc regno = 11)
2107 15 for %st(4) (gcc regno = 12)
2108 16 for %st(5) (gcc regno = 13)
2109 17 for %st(6) (gcc regno = 14)
2110 18 for %st(7) (gcc regno = 15)
2112 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2114 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2115 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2116 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2117 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2118 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2119 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2120 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2123 /* Define parameter passing and return registers. */
2125 static int const x86_64_int_parameter_registers
[6] =
2127 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2130 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2132 CX_REG
, DX_REG
, R8_REG
, R9_REG
2135 static int const x86_64_int_return_registers
[4] =
2137 AX_REG
, DX_REG
, DI_REG
, SI_REG
2140 /* Additional registers that are clobbered by SYSV calls. */
2142 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2146 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2147 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2150 /* Define the structure for the machine field in struct function. */
2152 struct GTY(()) stack_local_entry
{
2153 unsigned short mode
;
2156 struct stack_local_entry
*next
;
2159 /* Structure describing stack frame layout.
2160 Stack grows downward:
2166 saved static chain if ix86_static_chain_on_stack
2168 saved frame pointer if frame_pointer_needed
2169 <- HARD_FRAME_POINTER
2175 <- sse_regs_save_offset
2178 [va_arg registers] |
2182 [padding2] | = to_allocate
2191 int outgoing_arguments_size
;
2193 /* The offsets relative to ARG_POINTER. */
2194 HOST_WIDE_INT frame_pointer_offset
;
2195 HOST_WIDE_INT hard_frame_pointer_offset
;
2196 HOST_WIDE_INT stack_pointer_offset
;
2197 HOST_WIDE_INT hfp_save_offset
;
2198 HOST_WIDE_INT reg_save_offset
;
2199 HOST_WIDE_INT sse_reg_save_offset
;
2201 /* When save_regs_using_mov is set, emit prologue using
2202 move instead of push instructions. */
2203 bool save_regs_using_mov
;
2206 /* Which cpu are we scheduling for. */
2207 enum attr_cpu ix86_schedule
;
2209 /* Which cpu are we optimizing for. */
2210 enum processor_type ix86_tune
;
2212 /* Which instruction set architecture to use. */
2213 enum processor_type ix86_arch
;
2215 /* True if processor has SSE prefetch instruction. */
2216 unsigned char x86_prefetch_sse
;
2218 /* -mstackrealign option */
2219 static const char ix86_force_align_arg_pointer_string
[]
2220 = "force_align_arg_pointer";
2222 static rtx (*ix86_gen_leave
) (void);
2223 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2224 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2225 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2226 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2227 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2228 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2229 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2230 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2231 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2232 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2233 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2235 /* Preferred alignment for stack boundary in bits. */
2236 unsigned int ix86_preferred_stack_boundary
;
2238 /* Alignment for incoming stack boundary in bits specified at
2240 static unsigned int ix86_user_incoming_stack_boundary
;
2242 /* Default alignment for incoming stack boundary in bits. */
2243 static unsigned int ix86_default_incoming_stack_boundary
;
2245 /* Alignment for incoming stack boundary in bits. */
2246 unsigned int ix86_incoming_stack_boundary
;
2248 /* Calling abi specific va_list type nodes. */
2249 static GTY(()) tree sysv_va_list_type_node
;
2250 static GTY(()) tree ms_va_list_type_node
;
2252 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2253 char internal_label_prefix
[16];
2254 int internal_label_prefix_len
;
2256 /* Fence to use after loop using movnt. */
2259 /* Register class used for passing given 64bit part of the argument.
2260 These represent classes as documented by the PS ABI, with the exception
2261 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2262 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2264 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2265 whenever possible (upper half does contain padding). */
2266 enum x86_64_reg_class
2269 X86_64_INTEGER_CLASS
,
2270 X86_64_INTEGERSI_CLASS
,
2277 X86_64_COMPLEX_X87_CLASS
,
2281 #define MAX_CLASSES 4
2283 /* Table of constants used by fldpi, fldln2, etc.... */
2284 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2285 static bool ext_80387_constants_init
= 0;
2288 static struct machine_function
* ix86_init_machine_status (void);
2289 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2290 static bool ix86_function_value_regno_p (const unsigned int);
2291 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2293 static rtx
ix86_static_chain (const_tree
, bool);
2294 static int ix86_function_regparm (const_tree
, const_tree
);
2295 static void ix86_compute_frame_layout (struct ix86_frame
*);
2296 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2298 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2299 static tree
ix86_canonical_va_list_type (tree
);
2300 static void predict_jump (int);
2301 static unsigned int split_stack_prologue_scratch_regno (void);
2302 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2304 enum ix86_function_specific_strings
2306 IX86_FUNCTION_SPECIFIC_ARCH
,
2307 IX86_FUNCTION_SPECIFIC_TUNE
,
2308 IX86_FUNCTION_SPECIFIC_MAX
2311 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2312 const char *, enum fpmath_unit
, bool);
2313 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2314 static void ix86_function_specific_save (struct cl_target_option
*);
2315 static void ix86_function_specific_restore (struct cl_target_option
*);
2316 static void ix86_function_specific_print (FILE *, int,
2317 struct cl_target_option
*);
2318 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2319 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2320 struct gcc_options
*);
2321 static bool ix86_can_inline_p (tree
, tree
);
2322 static void ix86_set_current_function (tree
);
2323 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2325 static enum calling_abi
ix86_function_abi (const_tree
);
2328 #ifndef SUBTARGET32_DEFAULT_CPU
2329 #define SUBTARGET32_DEFAULT_CPU "i386"
2332 /* Whether -mtune= or -march= were specified */
2333 static int ix86_tune_defaulted
;
2334 static int ix86_arch_specified
;
2336 /* Vectorization library interface and handlers. */
2337 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2339 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2340 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2342 /* Processor target table, indexed by processor number */
2345 const struct processor_costs
*cost
; /* Processor costs */
2346 const int align_loop
; /* Default alignments. */
2347 const int align_loop_max_skip
;
2348 const int align_jump
;
2349 const int align_jump_max_skip
;
2350 const int align_func
;
2353 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2355 {&i386_cost
, 4, 3, 4, 3, 4},
2356 {&i486_cost
, 16, 15, 16, 15, 16},
2357 {&pentium_cost
, 16, 7, 16, 7, 16},
2358 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2359 {&geode_cost
, 0, 0, 0, 0, 0},
2360 {&k6_cost
, 32, 7, 32, 7, 32},
2361 {&athlon_cost
, 16, 7, 16, 7, 16},
2362 {&pentium4_cost
, 0, 0, 0, 0, 0},
2363 {&k8_cost
, 16, 7, 16, 7, 16},
2364 {&nocona_cost
, 0, 0, 0, 0, 0},
2366 {&core_cost
, 16, 10, 16, 10, 16},
2368 {&core_cost
, 16, 10, 16, 10, 16},
2370 {&core_cost
, 16, 10, 16, 10, 16},
2371 {&generic32_cost
, 16, 7, 16, 7, 16},
2372 {&generic64_cost
, 16, 10, 16, 10, 16},
2373 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2374 {&bdver1_cost
, 16, 10, 16, 7, 11},
2375 {&bdver2_cost
, 16, 10, 16, 7, 11},
2376 {&bdver3_cost
, 16, 10, 16, 7, 11},
2377 {&btver1_cost
, 16, 10, 16, 7, 11},
2378 {&btver2_cost
, 16, 10, 16, 7, 11},
2379 {&atom_cost
, 16, 15, 16, 7, 16},
2380 {&slm_cost
, 16, 15, 16, 7, 16}
2383 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2418 gate_insert_vzeroupper (void)
2420 return TARGET_AVX
&& TARGET_VZEROUPPER
;
2424 rest_of_handle_insert_vzeroupper (void)
2428 /* vzeroupper instructions are inserted immediately after reload to
2429 account for possible spills from 256bit registers. The pass
2430 reuses mode switching infrastructure by re-running mode insertion
2431 pass, so disable entities that have already been processed. */
2432 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2433 ix86_optimize_mode_switching
[i
] = 0;
2435 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2437 /* Call optimize_mode_switching. */
2438 g
->get_passes ()->execute_pass_mode_switching ();
2444 const pass_data pass_data_insert_vzeroupper
=
2446 RTL_PASS
, /* type */
2447 "vzeroupper", /* name */
2448 OPTGROUP_NONE
, /* optinfo_flags */
2449 true, /* has_gate */
2450 true, /* has_execute */
2451 TV_NONE
, /* tv_id */
2452 0, /* properties_required */
2453 0, /* properties_provided */
2454 0, /* properties_destroyed */
2455 0, /* todo_flags_start */
2456 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2459 class pass_insert_vzeroupper
: public rtl_opt_pass
2462 pass_insert_vzeroupper(gcc::context
*ctxt
)
2463 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2466 /* opt_pass methods: */
2467 bool gate () { return gate_insert_vzeroupper (); }
2468 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2470 }; // class pass_insert_vzeroupper
2475 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2477 return new pass_insert_vzeroupper (ctxt
);
2480 /* Return true if a red-zone is in use. */
2483 ix86_using_red_zone (void)
2485 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2488 /* Return a string that documents the current -m options. The caller is
2489 responsible for freeing the string. */
2492 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2493 const char *tune
, enum fpmath_unit fpmath
,
2496 struct ix86_target_opts
2498 const char *option
; /* option string */
2499 HOST_WIDE_INT mask
; /* isa mask options */
2502 /* This table is ordered so that options like -msse4.2 that imply
2503 preceding options while match those first. */
2504 static struct ix86_target_opts isa_opts
[] =
2506 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2507 { "-mfma", OPTION_MASK_ISA_FMA
},
2508 { "-mxop", OPTION_MASK_ISA_XOP
},
2509 { "-mlwp", OPTION_MASK_ISA_LWP
},
2510 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2511 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2512 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2513 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2514 { "-msse3", OPTION_MASK_ISA_SSE3
},
2515 { "-msse2", OPTION_MASK_ISA_SSE2
},
2516 { "-msse", OPTION_MASK_ISA_SSE
},
2517 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2518 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2519 { "-mmmx", OPTION_MASK_ISA_MMX
},
2520 { "-mabm", OPTION_MASK_ISA_ABM
},
2521 { "-mbmi", OPTION_MASK_ISA_BMI
},
2522 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2523 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2524 { "-mhle", OPTION_MASK_ISA_HLE
},
2525 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2526 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2527 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2528 { "-madx", OPTION_MASK_ISA_ADX
},
2529 { "-mtbm", OPTION_MASK_ISA_TBM
},
2530 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2531 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2532 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2533 { "-maes", OPTION_MASK_ISA_AES
},
2534 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2535 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2536 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2537 { "-mf16c", OPTION_MASK_ISA_F16C
},
2538 { "-mrtm", OPTION_MASK_ISA_RTM
},
2539 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2540 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2544 static struct ix86_target_opts flag_opts
[] =
2546 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2547 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2548 { "-m80387", MASK_80387
},
2549 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2550 { "-malign-double", MASK_ALIGN_DOUBLE
},
2551 { "-mcld", MASK_CLD
},
2552 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2553 { "-mieee-fp", MASK_IEEE_FP
},
2554 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2555 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2556 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2557 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2558 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2559 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2560 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2561 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2562 { "-mrecip", MASK_RECIP
},
2563 { "-mrtd", MASK_RTD
},
2564 { "-msseregparm", MASK_SSEREGPARM
},
2565 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2566 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2567 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2568 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2569 { "-mvzeroupper", MASK_VZEROUPPER
},
2570 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2571 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2572 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2575 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2578 char target_other
[40];
2588 memset (opts
, '\0', sizeof (opts
));
2590 /* Add -march= option. */
2593 opts
[num
][0] = "-march=";
2594 opts
[num
++][1] = arch
;
2597 /* Add -mtune= option. */
2600 opts
[num
][0] = "-mtune=";
2601 opts
[num
++][1] = tune
;
2604 /* Add -m32/-m64/-mx32. */
2605 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2607 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2611 isa
&= ~ (OPTION_MASK_ISA_64BIT
2612 | OPTION_MASK_ABI_64
2613 | OPTION_MASK_ABI_X32
);
2617 opts
[num
++][0] = abi
;
2619 /* Pick out the options in isa options. */
2620 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2622 if ((isa
& isa_opts
[i
].mask
) != 0)
2624 opts
[num
++][0] = isa_opts
[i
].option
;
2625 isa
&= ~ isa_opts
[i
].mask
;
2629 if (isa
&& add_nl_p
)
2631 opts
[num
++][0] = isa_other
;
2632 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2636 /* Add flag options. */
2637 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2639 if ((flags
& flag_opts
[i
].mask
) != 0)
2641 opts
[num
++][0] = flag_opts
[i
].option
;
2642 flags
&= ~ flag_opts
[i
].mask
;
2646 if (flags
&& add_nl_p
)
2648 opts
[num
++][0] = target_other
;
2649 sprintf (target_other
, "(other flags: %#x)", flags
);
2652 /* Add -fpmath= option. */
2655 opts
[num
][0] = "-mfpmath=";
2656 switch ((int) fpmath
)
2659 opts
[num
++][1] = "387";
2663 opts
[num
++][1] = "sse";
2666 case FPMATH_387
| FPMATH_SSE
:
2667 opts
[num
++][1] = "sse+387";
2679 gcc_assert (num
< ARRAY_SIZE (opts
));
2681 /* Size the string. */
2683 sep_len
= (add_nl_p
) ? 3 : 1;
2684 for (i
= 0; i
< num
; i
++)
2687 for (j
= 0; j
< 2; j
++)
2689 len
+= strlen (opts
[i
][j
]);
2692 /* Build the string. */
2693 ret
= ptr
= (char *) xmalloc (len
);
2696 for (i
= 0; i
< num
; i
++)
2700 for (j
= 0; j
< 2; j
++)
2701 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2708 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2716 for (j
= 0; j
< 2; j
++)
2719 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2721 line_len
+= len2
[j
];
2726 gcc_assert (ret
+ len
>= ptr
);
2731 /* Return true, if profiling code should be emitted before
2732 prologue. Otherwise it returns false.
2733 Note: For x86 with "hotfix" it is sorried. */
2735 ix86_profile_before_prologue (void)
2737 return flag_fentry
!= 0;
2740 /* Function that is callable from the debugger to print the current
2743 ix86_debug_options (void)
2745 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2746 ix86_arch_string
, ix86_tune_string
,
2751 fprintf (stderr
, "%s\n\n", opts
);
2755 fputs ("<no options>\n\n", stderr
);
2760 static const char *stringop_alg_names
[] = {
2762 #define DEF_ALG(alg, name) #name,
2763 #include "stringop.def"
2768 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2769 The string is of the following form (or comma separated list of it):
2771 strategy_alg:max_size:[align|noalign]
2773 where the full size range for the strategy is either [0, max_size] or
2774 [min_size, max_size], in which min_size is the max_size + 1 of the
2775 preceding range. The last size range must have max_size == -1.
2780 -mmemcpy-strategy=libcall:-1:noalign
2782 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2786 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2788 This is to tell the compiler to use the following strategy for memset
2789 1) when the expected size is between [1, 16], use rep_8byte strategy;
2790 2) when the size is between [17, 2048], use vector_loop;
2791 3) when the size is > 2048, use libcall. */
2793 struct stringop_size_range
2801 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2803 const struct stringop_algs
*default_algs
;
2804 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2805 char *curr_range_str
, *next_range_str
;
2809 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2811 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2813 curr_range_str
= strategy_str
;
2821 next_range_str
= strchr (curr_range_str
, ',');
2823 *next_range_str
++ = '\0';
2825 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2826 alg_name
, &maxs
, align
))
2828 error ("wrong arg %s to option %s", curr_range_str
,
2829 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2833 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2835 error ("size ranges of option %s should be increasing",
2836 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2840 for (i
= 0; i
< last_alg
; i
++)
2842 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2844 alg
= (stringop_alg
) i
;
2851 error ("wrong stringop strategy name %s specified for option %s",
2853 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2857 input_ranges
[n
].max
= maxs
;
2858 input_ranges
[n
].alg
= alg
;
2859 if (!strcmp (align
, "align"))
2860 input_ranges
[n
].noalign
= false;
2861 else if (!strcmp (align
, "noalign"))
2862 input_ranges
[n
].noalign
= true;
2865 error ("unknown alignment %s specified for option %s",
2866 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2870 curr_range_str
= next_range_str
;
2872 while (curr_range_str
);
2874 if (input_ranges
[n
- 1].max
!= -1)
2876 error ("the max value for the last size range should be -1"
2878 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2882 if (n
> MAX_STRINGOP_ALGS
)
2884 error ("too many size ranges specified in option %s",
2885 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2889 /* Now override the default algs array. */
2890 for (i
= 0; i
< n
; i
++)
2892 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2893 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2894 = input_ranges
[i
].alg
;
2895 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2896 = input_ranges
[i
].noalign
;
2901 /* parse -mtune-ctrl= option. When DUMP is true,
2902 print the features that are explicitly set. */
2905 parse_mtune_ctrl_str (bool dump
)
2907 if (!ix86_tune_ctrl_string
)
2910 char *next_feature_string
= NULL
;
2911 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2912 char *orig
= curr_feature_string
;
2918 next_feature_string
= strchr (curr_feature_string
, ',');
2919 if (next_feature_string
)
2920 *next_feature_string
++ = '\0';
2921 if (*curr_feature_string
== '^')
2923 curr_feature_string
++;
2926 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2928 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2930 ix86_tune_features
[i
] = !clear
;
2932 fprintf (stderr
, "Explicitly %s feature %s\n",
2933 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2937 if (i
== X86_TUNE_LAST
)
2938 error ("Unknown parameter to option -mtune-ctrl: %s",
2939 clear
? curr_feature_string
- 1 : curr_feature_string
);
2940 curr_feature_string
= next_feature_string
;
2942 while (curr_feature_string
);
2946 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2950 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2952 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2955 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2957 if (ix86_tune_no_default
)
2958 ix86_tune_features
[i
] = 0;
2960 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
2965 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
2966 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2967 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
2968 ix86_tune_features
[i
] ? "on" : "off");
2971 parse_mtune_ctrl_str (dump
);
2975 /* Override various settings based on options. If MAIN_ARGS_P, the
2976 options are from the command line, otherwise they are from
2980 ix86_option_override_internal (bool main_args_p
)
2983 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2984 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2989 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2990 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2991 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2992 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2993 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2994 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2995 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2996 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2997 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2998 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2999 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3000 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3001 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3002 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3003 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3004 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3005 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3006 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3007 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3008 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3009 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3010 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3011 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3012 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3013 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3014 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3015 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3016 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3017 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3018 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3019 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3020 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3021 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3022 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3023 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3024 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3025 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3026 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3027 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3028 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3030 /* if this reaches 64, need to widen struct pta flags below */
3034 const char *const name
; /* processor name or nickname. */
3035 const enum processor_type processor
;
3036 const enum attr_cpu schedule
;
3037 const unsigned HOST_WIDE_INT flags
;
3039 const processor_alias_table
[] =
3041 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3042 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3043 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3044 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3045 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3046 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3047 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3048 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3049 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3050 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3051 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3052 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3053 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3054 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3055 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3056 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3057 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3058 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3059 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3060 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3061 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3062 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3063 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3064 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3065 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3066 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3067 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3068 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3069 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3070 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3071 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3072 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3073 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3074 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3075 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
3076 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3077 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3078 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3079 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3080 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
3081 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3082 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3083 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3084 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3085 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3086 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3087 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3088 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3089 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3090 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3092 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3093 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3094 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3095 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3096 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3097 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3099 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3100 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3101 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3102 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3103 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3104 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3105 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3106 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3107 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3108 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3109 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3110 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3111 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3112 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3113 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3114 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3115 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3116 {"k8", PROCESSOR_K8
, CPU_K8
,
3117 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3118 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3119 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3120 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3121 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3122 {"opteron", PROCESSOR_K8
, CPU_K8
,
3123 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3124 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3125 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3126 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3127 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3128 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3129 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3130 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3131 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3132 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3133 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3134 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3135 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3136 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3137 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3138 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3139 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3140 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3141 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3142 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3143 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3144 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3145 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3146 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3147 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3148 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3149 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3150 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3151 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3152 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3153 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3154 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3155 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3156 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3157 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3158 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3159 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3160 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3161 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3162 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3163 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3164 | PTA_FXSR
| PTA_XSAVE
},
3165 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3166 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3167 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3168 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3169 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3170 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3172 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3173 PTA_HLE
/* flags are only used for -march switch. */ },
3174 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3176 | PTA_HLE
/* flags are only used for -march switch. */ },
3179 /* -mrecip options. */
3182 const char *string
; /* option name */
3183 unsigned int mask
; /* mask bits to set */
3185 const recip_options
[] =
3187 { "all", RECIP_MASK_ALL
},
3188 { "none", RECIP_MASK_NONE
},
3189 { "div", RECIP_MASK_DIV
},
3190 { "sqrt", RECIP_MASK_SQRT
},
3191 { "vec-div", RECIP_MASK_VEC_DIV
},
3192 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3195 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3197 /* Set up prefix/suffix so the error messages refer to either the command
3198 line argument, or the attribute(target). */
3207 prefix
= "option(\"";
3212 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3213 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3214 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3215 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3216 #ifdef TARGET_BI_ARCH
3219 #if TARGET_BI_ARCH == 1
3220 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3221 is on and OPTION_MASK_ABI_X32 is off. We turn off
3222 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3225 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3227 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3228 on and OPTION_MASK_ABI_64 is off. We turn off
3229 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3232 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3239 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3240 OPTION_MASK_ABI_64 for TARGET_X32. */
3241 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3242 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3244 else if (TARGET_LP64
)
3246 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3247 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3248 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3249 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3252 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3253 SUBTARGET_OVERRIDE_OPTIONS
;
3256 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3257 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3260 /* -fPIC is the default for x86_64. */
3261 if (TARGET_MACHO
&& TARGET_64BIT
)
3264 /* Need to check -mtune=generic first. */
3265 if (ix86_tune_string
)
3267 if (!strcmp (ix86_tune_string
, "generic")
3268 || !strcmp (ix86_tune_string
, "i686")
3269 /* As special support for cross compilers we read -mtune=native
3270 as -mtune=generic. With native compilers we won't see the
3271 -mtune=native, as it was changed by the driver. */
3272 || !strcmp (ix86_tune_string
, "native"))
3275 ix86_tune_string
= "generic64";
3277 ix86_tune_string
= "generic32";
3279 /* If this call is for setting the option attribute, allow the
3280 generic32/generic64 that was previously set. */
3281 else if (!main_args_p
3282 && (!strcmp (ix86_tune_string
, "generic32")
3283 || !strcmp (ix86_tune_string
, "generic64")))
3285 else if (!strncmp (ix86_tune_string
, "generic", 7))
3286 error ("bad value (%s) for %stune=%s %s",
3287 ix86_tune_string
, prefix
, suffix
, sw
);
3288 else if (!strcmp (ix86_tune_string
, "x86-64"))
3289 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3290 "%stune=k8%s or %stune=generic%s instead as appropriate",
3291 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3295 if (ix86_arch_string
)
3296 ix86_tune_string
= ix86_arch_string
;
3297 if (!ix86_tune_string
)
3299 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3300 ix86_tune_defaulted
= 1;
3303 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3304 need to use a sensible tune option. */
3305 if (!strcmp (ix86_tune_string
, "generic")
3306 || !strcmp (ix86_tune_string
, "x86-64")
3307 || !strcmp (ix86_tune_string
, "i686"))
3310 ix86_tune_string
= "generic64";
3312 ix86_tune_string
= "generic32";
3316 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3318 /* rep; movq isn't available in 32-bit code. */
3319 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3320 ix86_stringop_alg
= no_stringop
;
3323 if (!ix86_arch_string
)
3324 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3326 ix86_arch_specified
= 1;
3328 if (global_options_set
.x_ix86_pmode
)
3330 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3331 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3332 error ("address mode %qs not supported in the %s bit mode",
3333 TARGET_64BIT
? "short" : "long",
3334 TARGET_64BIT
? "64" : "32");
3337 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3339 if (!global_options_set
.x_ix86_abi
)
3340 ix86_abi
= DEFAULT_ABI
;
3342 if (global_options_set
.x_ix86_cmodel
)
3344 switch (ix86_cmodel
)
3349 ix86_cmodel
= CM_SMALL_PIC
;
3351 error ("code model %qs not supported in the %s bit mode",
3358 ix86_cmodel
= CM_MEDIUM_PIC
;
3360 error ("code model %qs not supported in the %s bit mode",
3362 else if (TARGET_X32
)
3363 error ("code model %qs not supported in x32 mode",
3370 ix86_cmodel
= CM_LARGE_PIC
;
3372 error ("code model %qs not supported in the %s bit mode",
3374 else if (TARGET_X32
)
3375 error ("code model %qs not supported in x32 mode",
3381 error ("code model %s does not support PIC mode", "32");
3383 error ("code model %qs not supported in the %s bit mode",
3390 error ("code model %s does not support PIC mode", "kernel");
3391 ix86_cmodel
= CM_32
;
3394 error ("code model %qs not supported in the %s bit mode",
3404 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3405 use of rip-relative addressing. This eliminates fixups that
3406 would otherwise be needed if this object is to be placed in a
3407 DLL, and is essentially just as efficient as direct addressing. */
3408 if (TARGET_64BIT
&& (TARGET_RDOS
|| TARGET_PECOFF
))
3409 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3410 else if (TARGET_64BIT
)
3411 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3413 ix86_cmodel
= CM_32
;
3415 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3417 error ("-masm=intel not supported in this configuration");
3418 ix86_asm_dialect
= ASM_ATT
;
3420 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3421 sorry ("%i-bit mode not compiled in",
3422 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3424 for (i
= 0; i
< pta_size
; i
++)
3425 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3427 ix86_schedule
= processor_alias_table
[i
].schedule
;
3428 ix86_arch
= processor_alias_table
[i
].processor
;
3429 /* Default cpu tuning to the architecture. */
3430 ix86_tune
= ix86_arch
;
3432 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3433 error ("CPU you selected does not support x86-64 "
3436 if (processor_alias_table
[i
].flags
& PTA_MMX
3437 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3438 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3439 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3440 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3441 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3442 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3443 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3444 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3445 if (processor_alias_table
[i
].flags
& PTA_SSE
3446 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3447 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3448 if (processor_alias_table
[i
].flags
& PTA_SSE2
3449 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3450 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3451 if (processor_alias_table
[i
].flags
& PTA_SSE3
3452 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3453 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3454 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3455 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3456 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3457 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3458 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3459 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3460 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3461 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3462 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3463 if (processor_alias_table
[i
].flags
& PTA_AVX
3464 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3465 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3466 if (processor_alias_table
[i
].flags
& PTA_AVX2
3467 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3468 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3469 if (processor_alias_table
[i
].flags
& PTA_FMA
3470 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3471 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3472 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3473 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3474 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3475 if (processor_alias_table
[i
].flags
& PTA_FMA4
3476 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3477 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3478 if (processor_alias_table
[i
].flags
& PTA_XOP
3479 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3480 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3481 if (processor_alias_table
[i
].flags
& PTA_LWP
3482 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3483 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3484 if (processor_alias_table
[i
].flags
& PTA_ABM
3485 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3486 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3487 if (processor_alias_table
[i
].flags
& PTA_BMI
3488 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3489 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3490 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3491 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3492 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3493 if (processor_alias_table
[i
].flags
& PTA_TBM
3494 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3495 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3496 if (processor_alias_table
[i
].flags
& PTA_BMI2
3497 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3498 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3499 if (processor_alias_table
[i
].flags
& PTA_CX16
3500 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3501 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3502 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3503 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3504 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3505 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3506 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3507 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3508 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3509 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3510 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3511 if (processor_alias_table
[i
].flags
& PTA_AES
3512 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3513 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3514 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3515 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3516 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3517 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3518 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3519 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3520 if (processor_alias_table
[i
].flags
& PTA_RDRND
3521 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3522 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3523 if (processor_alias_table
[i
].flags
& PTA_F16C
3524 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3525 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3526 if (processor_alias_table
[i
].flags
& PTA_RTM
3527 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3528 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3529 if (processor_alias_table
[i
].flags
& PTA_HLE
3530 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3531 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3532 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3533 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3534 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3535 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3536 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3537 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3538 if (processor_alias_table
[i
].flags
& PTA_ADX
3539 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3540 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3541 if (processor_alias_table
[i
].flags
& PTA_FXSR
3542 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3543 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3544 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3545 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3546 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3547 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3548 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3549 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3550 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3551 x86_prefetch_sse
= true;
3556 if (!strcmp (ix86_arch_string
, "generic"))
3557 error ("generic CPU can be used only for %stune=%s %s",
3558 prefix
, suffix
, sw
);
3559 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3560 error ("bad value (%s) for %sarch=%s %s",
3561 ix86_arch_string
, prefix
, suffix
, sw
);
3563 ix86_arch_mask
= 1u << ix86_arch
;
3564 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3565 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3567 for (i
= 0; i
< pta_size
; i
++)
3568 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3570 ix86_schedule
= processor_alias_table
[i
].schedule
;
3571 ix86_tune
= processor_alias_table
[i
].processor
;
3574 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3576 if (ix86_tune_defaulted
)
3578 ix86_tune_string
= "x86-64";
3579 for (i
= 0; i
< pta_size
; i
++)
3580 if (! strcmp (ix86_tune_string
,
3581 processor_alias_table
[i
].name
))
3583 ix86_schedule
= processor_alias_table
[i
].schedule
;
3584 ix86_tune
= processor_alias_table
[i
].processor
;
3587 error ("CPU you selected does not support x86-64 "
3593 /* Adjust tuning when compiling for 32-bit ABI. */
3596 case PROCESSOR_GENERIC64
:
3597 ix86_tune
= PROCESSOR_GENERIC32
;
3598 ix86_schedule
= CPU_PENTIUMPRO
;
3605 /* Intel CPUs have always interpreted SSE prefetch instructions as
3606 NOPs; so, we can enable SSE prefetch instructions even when
3607 -mtune (rather than -march) points us to a processor that has them.
3608 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3609 higher processors. */
3611 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3612 x86_prefetch_sse
= true;
3616 if (ix86_tune_specified
&& i
== pta_size
)
3617 error ("bad value (%s) for %stune=%s %s",
3618 ix86_tune_string
, prefix
, suffix
, sw
);
3620 set_ix86_tune_features (ix86_tune
, ix86_dump_tunes
);
3622 #ifndef USE_IX86_FRAME_POINTER
3623 #define USE_IX86_FRAME_POINTER 0
3626 #ifndef USE_X86_64_FRAME_POINTER
3627 #define USE_X86_64_FRAME_POINTER 0
3630 /* Set the default values for switches whose default depends on TARGET_64BIT
3631 in case they weren't overwritten by command line options. */
3634 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3635 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3636 if (flag_asynchronous_unwind_tables
== 2)
3637 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3638 if (flag_pcc_struct_return
== 2)
3639 flag_pcc_struct_return
= 0;
3643 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3644 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3645 if (flag_asynchronous_unwind_tables
== 2)
3646 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3647 if (flag_pcc_struct_return
== 2)
3648 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3651 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3653 ix86_cost
= &ix86_size_cost
;
3655 ix86_cost
= ix86_tune_cost
;
3657 /* Arrange to set up i386_stack_locals for all functions. */
3658 init_machine_status
= ix86_init_machine_status
;
3660 /* Validate -mregparm= value. */
3661 if (global_options_set
.x_ix86_regparm
)
3664 warning (0, "-mregparm is ignored in 64-bit mode");
3665 if (ix86_regparm
> REGPARM_MAX
)
3667 error ("-mregparm=%d is not between 0 and %d",
3668 ix86_regparm
, REGPARM_MAX
);
3673 ix86_regparm
= REGPARM_MAX
;
3675 /* Default align_* from the processor table. */
3676 if (align_loops
== 0)
3678 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3679 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3681 if (align_jumps
== 0)
3683 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3684 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3686 if (align_functions
== 0)
3688 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3691 /* Provide default for -mbranch-cost= value. */
3692 if (!global_options_set
.x_ix86_branch_cost
)
3693 ix86_branch_cost
= ix86_cost
->branch_cost
;
3697 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3699 /* Enable by default the SSE and MMX builtins. Do allow the user to
3700 explicitly disable any of these. In particular, disabling SSE and
3701 MMX for kernel code is extremely useful. */
3702 if (!ix86_arch_specified
)
3704 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3705 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3708 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3712 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3714 if (!ix86_arch_specified
)
3716 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3718 /* i386 ABI does not specify red zone. It still makes sense to use it
3719 when programmer takes care to stack from being destroyed. */
3720 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3721 target_flags
|= MASK_NO_RED_ZONE
;
3724 /* Keep nonleaf frame pointers. */
3725 if (flag_omit_frame_pointer
)
3726 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3727 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3728 flag_omit_frame_pointer
= 1;
3730 /* If we're doing fast math, we don't care about comparison order
3731 wrt NaNs. This lets us use a shorter comparison sequence. */
3732 if (flag_finite_math_only
)
3733 target_flags
&= ~MASK_IEEE_FP
;
3735 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3736 since the insns won't need emulation. */
3737 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3738 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3740 /* Likewise, if the target doesn't have a 387, or we've specified
3741 software floating point, don't use 387 inline intrinsics. */
3743 target_flags
|= MASK_NO_FANCY_MATH_387
;
3745 /* Turn on MMX builtins for -msse. */
3747 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3749 /* Enable SSE prefetch. */
3750 if (TARGET_SSE
|| (TARGET_PRFCHW
&& !TARGET_3DNOW
))
3751 x86_prefetch_sse
= true;
3753 /* Enable prefetch{,w} instructions for -m3dnow. */
3755 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
& ~ix86_isa_flags_explicit
;
3757 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3758 if (TARGET_SSE4_2
|| TARGET_ABM
)
3759 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3761 /* Enable lzcnt instruction for -mabm. */
3763 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3765 /* Validate -mpreferred-stack-boundary= value or default it to
3766 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3767 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3768 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3770 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3771 int max
= (TARGET_SEH
? 4 : 12);
3773 if (ix86_preferred_stack_boundary_arg
< min
3774 || ix86_preferred_stack_boundary_arg
> max
)
3777 error ("-mpreferred-stack-boundary is not supported "
3780 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3781 ix86_preferred_stack_boundary_arg
, min
, max
);
3784 ix86_preferred_stack_boundary
3785 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3788 /* Set the default value for -mstackrealign. */
3789 if (ix86_force_align_arg_pointer
== -1)
3790 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3792 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3794 /* Validate -mincoming-stack-boundary= value or default it to
3795 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3796 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3797 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3799 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3800 || ix86_incoming_stack_boundary_arg
> 12)
3801 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3802 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3805 ix86_user_incoming_stack_boundary
3806 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3807 ix86_incoming_stack_boundary
3808 = ix86_user_incoming_stack_boundary
;
3812 /* Accept -msseregparm only if at least SSE support is enabled. */
3813 if (TARGET_SSEREGPARM
3815 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3817 if (global_options_set
.x_ix86_fpmath
)
3819 if (ix86_fpmath
& FPMATH_SSE
)
3823 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3824 ix86_fpmath
= FPMATH_387
;
3826 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3828 warning (0, "387 instruction set disabled, using SSE arithmetics");
3829 ix86_fpmath
= FPMATH_SSE
;
3834 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3836 /* If the i387 is disabled, then do not return values in it. */
3838 target_flags
&= ~MASK_FLOAT_RETURNS
;
3840 /* Use external vectorized library in vectorizing intrinsics. */
3841 if (global_options_set
.x_ix86_veclibabi_type
)
3842 switch (ix86_veclibabi_type
)
3844 case ix86_veclibabi_type_svml
:
3845 ix86_veclib_handler
= ix86_veclibabi_svml
;
3848 case ix86_veclibabi_type_acml
:
3849 ix86_veclib_handler
= ix86_veclibabi_acml
;
3856 ix86_tune_mask
= 1u << ix86_tune
;
3857 if ((!USE_IX86_FRAME_POINTER
3858 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3859 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3861 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3863 /* ??? Unwind info is not correct around the CFG unless either a frame
3864 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3865 unwind info generation to be aware of the CFG and propagating states
3867 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3868 || flag_exceptions
|| flag_non_call_exceptions
)
3869 && flag_omit_frame_pointer
3870 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3872 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3873 warning (0, "unwind tables currently require either a frame pointer "
3874 "or %saccumulate-outgoing-args%s for correctness",
3876 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3879 /* If stack probes are required, the space used for large function
3880 arguments on the stack must also be probed, so enable
3881 -maccumulate-outgoing-args so this happens in the prologue. */
3882 if (TARGET_STACK_PROBE
3883 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3885 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3886 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3887 "for correctness", prefix
, suffix
);
3888 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3891 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3894 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3895 p
= strchr (internal_label_prefix
, 'X');
3896 internal_label_prefix_len
= p
- internal_label_prefix
;
3900 /* When scheduling description is not available, disable scheduler pass
3901 so it won't slow down the compilation and make x87 code slower. */
3902 if (!TARGET_SCHEDULE
)
3903 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3905 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3906 ix86_tune_cost
->simultaneous_prefetches
,
3907 global_options
.x_param_values
,
3908 global_options_set
.x_param_values
);
3909 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3910 ix86_tune_cost
->prefetch_block
,
3911 global_options
.x_param_values
,
3912 global_options_set
.x_param_values
);
3913 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3914 ix86_tune_cost
->l1_cache_size
,
3915 global_options
.x_param_values
,
3916 global_options_set
.x_param_values
);
3917 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3918 ix86_tune_cost
->l2_cache_size
,
3919 global_options
.x_param_values
,
3920 global_options_set
.x_param_values
);
3922 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3923 if (flag_prefetch_loop_arrays
< 0
3925 && (optimize
>= 3 || flag_profile_use
)
3926 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3927 flag_prefetch_loop_arrays
= 1;
3929 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3930 can be optimized to ap = __builtin_next_arg (0). */
3931 if (!TARGET_64BIT
&& !flag_split_stack
)
3932 targetm
.expand_builtin_va_start
= NULL
;
3936 ix86_gen_leave
= gen_leave_rex64
;
3937 if (Pmode
== DImode
)
3939 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3940 ix86_gen_tls_local_dynamic_base_64
3941 = gen_tls_local_dynamic_base_64_di
;
3945 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3946 ix86_gen_tls_local_dynamic_base_64
3947 = gen_tls_local_dynamic_base_64_si
;
3951 ix86_gen_leave
= gen_leave
;
3953 if (Pmode
== DImode
)
3955 ix86_gen_add3
= gen_adddi3
;
3956 ix86_gen_sub3
= gen_subdi3
;
3957 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3958 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3959 ix86_gen_andsp
= gen_anddi3
;
3960 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3961 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3962 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3963 ix86_gen_monitor
= gen_sse3_monitor_di
;
3967 ix86_gen_add3
= gen_addsi3
;
3968 ix86_gen_sub3
= gen_subsi3
;
3969 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3970 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3971 ix86_gen_andsp
= gen_andsi3
;
3972 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3973 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3974 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3975 ix86_gen_monitor
= gen_sse3_monitor_si
;
3979 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3981 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3984 if (!TARGET_64BIT
&& flag_pic
)
3986 if (flag_fentry
> 0)
3987 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3991 else if (TARGET_SEH
)
3993 if (flag_fentry
== 0)
3994 sorry ("-mno-fentry isn%'t compatible with SEH");
3997 else if (flag_fentry
< 0)
3999 #if defined(PROFILE_BEFORE_PROLOGUE)
4006 /* When not optimize for size, enable vzeroupper optimization for
4007 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4008 AVX unaligned load/store. */
4011 if (flag_expensive_optimizations
4012 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4013 target_flags
|= MASK_VZEROUPPER
;
4014 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4015 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4016 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4017 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4018 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4019 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4020 /* Enable 128-bit AVX instruction generation
4021 for the auto-vectorizer. */
4022 if (TARGET_AVX128_OPTIMAL
4023 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
4024 target_flags
|= MASK_PREFER_AVX128
;
4027 if (ix86_recip_name
)
4029 char *p
= ASTRDUP (ix86_recip_name
);
4031 unsigned int mask
, i
;
4034 while ((q
= strtok (p
, ",")) != NULL
)
4045 if (!strcmp (q
, "default"))
4046 mask
= RECIP_MASK_ALL
;
4049 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4050 if (!strcmp (q
, recip_options
[i
].string
))
4052 mask
= recip_options
[i
].mask
;
4056 if (i
== ARRAY_SIZE (recip_options
))
4058 error ("unknown option for -mrecip=%s", q
);
4060 mask
= RECIP_MASK_NONE
;
4064 recip_mask_explicit
|= mask
;
4066 recip_mask
&= ~mask
;
4073 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4074 else if (target_flags_explicit
& MASK_RECIP
)
4075 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4077 /* Default long double to 64-bit for Bionic. */
4078 if (TARGET_HAS_BIONIC
4079 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
4080 target_flags
|= MASK_LONG_DOUBLE_64
;
4082 /* Save the initial options in case the user does function specific
4085 target_option_default_node
= target_option_current_node
4086 = build_target_option_node ();
4088 /* Handle stack protector */
4089 if (!global_options_set
.x_ix86_stack_protector_guard
)
4090 ix86_stack_protector_guard
= TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4092 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4093 if (ix86_tune_memcpy_strategy
)
4095 char *str
= xstrdup (ix86_tune_memcpy_strategy
);
4096 ix86_parse_stringop_strategy_string (str
, false);
4100 if (ix86_tune_memset_strategy
)
4102 char *str
= xstrdup (ix86_tune_memset_strategy
);
4103 ix86_parse_stringop_strategy_string (str
, true);
4108 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4111 ix86_option_override (void)
4113 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4114 static struct register_pass_info insert_vzeroupper_info
4115 = { pass_insert_vzeroupper
, "reload",
4116 1, PASS_POS_INSERT_AFTER
4119 ix86_option_override_internal (true);
4122 /* This needs to be done at start up. It's convenient to do it here. */
4123 register_pass (&insert_vzeroupper_info
);
4126 /* Update register usage after having seen the compiler flags. */
4129 ix86_conditional_register_usage (void)
4134 /* The PIC register, if it exists, is fixed. */
4135 j
= PIC_OFFSET_TABLE_REGNUM
;
4136 if (j
!= INVALID_REGNUM
)
4137 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4139 /* For 32-bit targets, squash the REX registers. */
4142 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4143 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4144 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4145 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4148 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4149 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4150 : TARGET_64BIT
? (1 << 2)
4153 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4155 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4157 /* Set/reset conditionally defined registers from
4158 CALL_USED_REGISTERS initializer. */
4159 if (call_used_regs
[i
] > 1)
4160 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4162 /* Calculate registers of CLOBBERED_REGS register set
4163 as call used registers from GENERAL_REGS register set. */
4164 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4165 && call_used_regs
[i
])
4166 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4169 /* If MMX is disabled, squash the registers. */
4171 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4172 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4173 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4175 /* If SSE is disabled, squash the registers. */
4177 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4178 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4179 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4181 /* If the FPU is disabled, squash the registers. */
4182 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4183 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4184 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4185 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4189 /* Save the current options */
4192 ix86_function_specific_save (struct cl_target_option
*ptr
)
4194 ptr
->arch
= ix86_arch
;
4195 ptr
->schedule
= ix86_schedule
;
4196 ptr
->tune
= ix86_tune
;
4197 ptr
->branch_cost
= ix86_branch_cost
;
4198 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4199 ptr
->arch_specified
= ix86_arch_specified
;
4200 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4201 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4202 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4204 /* The fields are char but the variables are not; make sure the
4205 values fit in the fields. */
4206 gcc_assert (ptr
->arch
== ix86_arch
);
4207 gcc_assert (ptr
->schedule
== ix86_schedule
);
4208 gcc_assert (ptr
->tune
== ix86_tune
);
4209 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4212 /* Restore the current options */
4215 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4217 enum processor_type old_tune
= ix86_tune
;
4218 enum processor_type old_arch
= ix86_arch
;
4219 unsigned int ix86_arch_mask
;
4222 ix86_arch
= (enum processor_type
) ptr
->arch
;
4223 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4224 ix86_tune
= (enum processor_type
) ptr
->tune
;
4225 ix86_branch_cost
= ptr
->branch_cost
;
4226 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4227 ix86_arch_specified
= ptr
->arch_specified
;
4228 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4229 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4230 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4232 /* Recreate the arch feature tests if the arch changed */
4233 if (old_arch
!= ix86_arch
)
4235 ix86_arch_mask
= 1u << ix86_arch
;
4236 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4237 ix86_arch_features
[i
]
4238 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4241 /* Recreate the tune optimization tests */
4242 if (old_tune
!= ix86_tune
)
4243 set_ix86_tune_features (ix86_tune
, false);
4246 /* Print the current options */
4249 ix86_function_specific_print (FILE *file
, int indent
,
4250 struct cl_target_option
*ptr
)
4253 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4254 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4256 fprintf (file
, "%*sarch = %d (%s)\n",
4259 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4260 ? cpu_names
[ptr
->arch
]
4263 fprintf (file
, "%*stune = %d (%s)\n",
4266 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4267 ? cpu_names
[ptr
->tune
]
4270 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4274 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4275 free (target_string
);
4280 /* Inner function to process the attribute((target(...))), take an argument and
4281 set the current options from the argument. If we have a list, recursively go
4285 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4286 struct gcc_options
*enum_opts_set
)
4291 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4292 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4293 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4294 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4295 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4311 enum ix86_opt_type type
;
4316 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4317 IX86_ATTR_ISA ("abm", OPT_mabm
),
4318 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4319 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4320 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4321 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4322 IX86_ATTR_ISA ("aes", OPT_maes
),
4323 IX86_ATTR_ISA ("avx", OPT_mavx
),
4324 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4325 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4326 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4327 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4328 IX86_ATTR_ISA ("sse", OPT_msse
),
4329 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4330 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4331 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4332 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4333 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4334 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4335 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4336 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4337 IX86_ATTR_ISA ("fma", OPT_mfma
),
4338 IX86_ATTR_ISA ("xop", OPT_mxop
),
4339 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4340 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4341 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4342 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4343 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4344 IX86_ATTR_ISA ("hle", OPT_mhle
),
4345 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4346 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4347 IX86_ATTR_ISA ("adx", OPT_madx
),
4348 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4349 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4350 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4353 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4355 /* string options */
4356 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4357 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4360 IX86_ATTR_YES ("cld",
4364 IX86_ATTR_NO ("fancy-math-387",
4365 OPT_mfancy_math_387
,
4366 MASK_NO_FANCY_MATH_387
),
4368 IX86_ATTR_YES ("ieee-fp",
4372 IX86_ATTR_YES ("inline-all-stringops",
4373 OPT_minline_all_stringops
,
4374 MASK_INLINE_ALL_STRINGOPS
),
4376 IX86_ATTR_YES ("inline-stringops-dynamically",
4377 OPT_minline_stringops_dynamically
,
4378 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4380 IX86_ATTR_NO ("align-stringops",
4381 OPT_mno_align_stringops
,
4382 MASK_NO_ALIGN_STRINGOPS
),
4384 IX86_ATTR_YES ("recip",
4390 /* If this is a list, recurse to get the options. */
4391 if (TREE_CODE (args
) == TREE_LIST
)
4395 for (; args
; args
= TREE_CHAIN (args
))
4396 if (TREE_VALUE (args
)
4397 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4398 p_strings
, enum_opts_set
))
4404 else if (TREE_CODE (args
) != STRING_CST
)
4406 error ("attribute %<target%> argument not a string");
4410 /* Handle multiple arguments separated by commas. */
4411 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4413 while (next_optstr
&& *next_optstr
!= '\0')
4415 char *p
= next_optstr
;
4417 char *comma
= strchr (next_optstr
, ',');
4418 const char *opt_string
;
4419 size_t len
, opt_len
;
4424 enum ix86_opt_type type
= ix86_opt_unknown
;
4430 len
= comma
- next_optstr
;
4431 next_optstr
= comma
+ 1;
4439 /* Recognize no-xxx. */
4440 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4449 /* Find the option. */
4452 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4454 type
= attrs
[i
].type
;
4455 opt_len
= attrs
[i
].len
;
4456 if (ch
== attrs
[i
].string
[0]
4457 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4460 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4463 mask
= attrs
[i
].mask
;
4464 opt_string
= attrs
[i
].string
;
4469 /* Process the option. */
4472 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4476 else if (type
== ix86_opt_isa
)
4478 struct cl_decoded_option decoded
;
4480 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4481 ix86_handle_option (&global_options
, &global_options_set
,
4482 &decoded
, input_location
);
4485 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4487 if (type
== ix86_opt_no
)
4488 opt_set_p
= !opt_set_p
;
4491 target_flags
|= mask
;
4493 target_flags
&= ~mask
;
4496 else if (type
== ix86_opt_str
)
4500 error ("option(\"%s\") was already specified", opt_string
);
4504 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4507 else if (type
== ix86_opt_enum
)
4512 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4514 set_option (&global_options
, enum_opts_set
, opt
, value
,
4515 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4519 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4531 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4534 ix86_valid_target_attribute_tree (tree args
)
4536 const char *orig_arch_string
= ix86_arch_string
;
4537 const char *orig_tune_string
= ix86_tune_string
;
4538 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4539 int orig_tune_defaulted
= ix86_tune_defaulted
;
4540 int orig_arch_specified
= ix86_arch_specified
;
4541 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4544 struct cl_target_option
*def
4545 = TREE_TARGET_OPTION (target_option_default_node
);
4546 struct gcc_options enum_opts_set
;
4548 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4550 /* Process each of the options on the chain. */
4551 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4553 return error_mark_node
;
4555 /* If the changed options are different from the default, rerun
4556 ix86_option_override_internal, and then save the options away.
4557 The string options are are attribute options, and will be undone
4558 when we copy the save structure. */
4559 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4560 || target_flags
!= def
->x_target_flags
4561 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4562 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4563 || enum_opts_set
.x_ix86_fpmath
)
4565 /* If we are using the default tune= or arch=, undo the string assigned,
4566 and use the default. */
4567 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4568 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4569 else if (!orig_arch_specified
)
4570 ix86_arch_string
= NULL
;
4572 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4573 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4574 else if (orig_tune_defaulted
)
4575 ix86_tune_string
= NULL
;
4577 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4578 if (enum_opts_set
.x_ix86_fpmath
)
4579 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4580 else if (!TARGET_64BIT
&& TARGET_SSE
)
4582 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4583 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4586 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4587 ix86_option_override_internal (false);
4589 /* Add any builtin functions with the new isa if any. */
4590 ix86_add_new_builtins (ix86_isa_flags
);
4592 /* Save the current options unless we are validating options for
4594 t
= build_target_option_node ();
4596 ix86_arch_string
= orig_arch_string
;
4597 ix86_tune_string
= orig_tune_string
;
4598 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4600 /* Free up memory allocated to hold the strings */
4601 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4602 free (option_strings
[i
]);
4608 /* Hook to validate attribute((target("string"))). */
4611 ix86_valid_target_attribute_p (tree fndecl
,
4612 tree
ARG_UNUSED (name
),
4614 int ARG_UNUSED (flags
))
4616 struct cl_target_option cur_target
;
4619 /* attribute((target("default"))) does nothing, beyond
4620 affecting multi-versioning. */
4621 if (TREE_VALUE (args
)
4622 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4623 && TREE_CHAIN (args
) == NULL_TREE
4624 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4627 tree old_optimize
= build_optimization_node ();
4628 tree new_target
, new_optimize
;
4629 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4631 /* If the function changed the optimization levels as well as setting target
4632 options, start with the optimizations specified. */
4633 if (func_optimize
&& func_optimize
!= old_optimize
)
4634 cl_optimization_restore (&global_options
,
4635 TREE_OPTIMIZATION (func_optimize
));
4637 /* The target attributes may also change some optimization flags, so update
4638 the optimization options if necessary. */
4639 cl_target_option_save (&cur_target
, &global_options
);
4640 new_target
= ix86_valid_target_attribute_tree (args
);
4641 new_optimize
= build_optimization_node ();
4643 if (new_target
== error_mark_node
)
4646 else if (fndecl
&& new_target
)
4648 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4650 if (old_optimize
!= new_optimize
)
4651 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4654 cl_target_option_restore (&global_options
, &cur_target
);
4656 if (old_optimize
!= new_optimize
)
4657 cl_optimization_restore (&global_options
,
4658 TREE_OPTIMIZATION (old_optimize
));
4664 /* Hook to determine if one function can safely inline another. */
4667 ix86_can_inline_p (tree caller
, tree callee
)
4670 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4671 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4673 /* If callee has no option attributes, then it is ok to inline. */
4677 /* If caller has no option attributes, but callee does then it is not ok to
4679 else if (!caller_tree
)
4684 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4685 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4687 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4688 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4690 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4691 != callee_opts
->x_ix86_isa_flags
)
4694 /* See if we have the same non-isa options. */
4695 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4698 /* See if arch, tune, etc. are the same. */
4699 else if (caller_opts
->arch
!= callee_opts
->arch
)
4702 else if (caller_opts
->tune
!= callee_opts
->tune
)
4705 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4708 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4719 /* Remember the last target of ix86_set_current_function. */
4720 static GTY(()) tree ix86_previous_fndecl
;
4722 /* Invalidate ix86_previous_fndecl cache. */
4724 ix86_reset_previous_fndecl (void)
4726 ix86_previous_fndecl
= NULL_TREE
;
4729 /* Establish appropriate back-end context for processing the function
4730 FNDECL. The argument might be NULL to indicate processing at top
4731 level, outside of any function scope. */
4733 ix86_set_current_function (tree fndecl
)
4735 /* Only change the context if the function changes. This hook is called
4736 several times in the course of compiling a function, and we don't want to
4737 slow things down too much or call target_reinit when it isn't safe. */
4738 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4740 tree old_tree
= (ix86_previous_fndecl
4741 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4744 tree new_tree
= (fndecl
4745 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4748 ix86_previous_fndecl
= fndecl
;
4749 if (old_tree
== new_tree
)
4754 cl_target_option_restore (&global_options
,
4755 TREE_TARGET_OPTION (new_tree
));
4761 struct cl_target_option
*def
4762 = TREE_TARGET_OPTION (target_option_current_node
);
4764 cl_target_option_restore (&global_options
, def
);
4771 /* Return true if this goes in large data/bss. */
4774 ix86_in_large_data_p (tree exp
)
4776 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4779 /* Functions are never large data. */
4780 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4783 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4785 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4786 if (strcmp (section
, ".ldata") == 0
4787 || strcmp (section
, ".lbss") == 0)
4793 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4795 /* If this is an incomplete type with size 0, then we can't put it
4796 in data because it might be too big when completed. */
4797 if (!size
|| size
> ix86_section_threshold
)
4804 /* Switch to the appropriate section for output of DECL.
4805 DECL is either a `VAR_DECL' node or a constant of some sort.
4806 RELOC indicates whether forming the initial value of DECL requires
4807 link-time relocations. */
4809 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4813 x86_64_elf_select_section (tree decl
, int reloc
,
4814 unsigned HOST_WIDE_INT align
)
4816 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4817 && ix86_in_large_data_p (decl
))
4819 const char *sname
= NULL
;
4820 unsigned int flags
= SECTION_WRITE
;
4821 switch (categorize_decl_for_section (decl
, reloc
))
4826 case SECCAT_DATA_REL
:
4827 sname
= ".ldata.rel";
4829 case SECCAT_DATA_REL_LOCAL
:
4830 sname
= ".ldata.rel.local";
4832 case SECCAT_DATA_REL_RO
:
4833 sname
= ".ldata.rel.ro";
4835 case SECCAT_DATA_REL_RO_LOCAL
:
4836 sname
= ".ldata.rel.ro.local";
4840 flags
|= SECTION_BSS
;
4843 case SECCAT_RODATA_MERGE_STR
:
4844 case SECCAT_RODATA_MERGE_STR_INIT
:
4845 case SECCAT_RODATA_MERGE_CONST
:
4849 case SECCAT_SRODATA
:
4856 /* We don't split these for medium model. Place them into
4857 default sections and hope for best. */
4862 /* We might get called with string constants, but get_named_section
4863 doesn't like them as they are not DECLs. Also, we need to set
4864 flags in that case. */
4866 return get_section (sname
, flags
, NULL
);
4867 return get_named_section (decl
, sname
, reloc
);
4870 return default_elf_select_section (decl
, reloc
, align
);
4873 /* Build up a unique section name, expressed as a
4874 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4875 RELOC indicates whether the initial value of EXP requires
4876 link-time relocations. */
4878 static void ATTRIBUTE_UNUSED
4879 x86_64_elf_unique_section (tree decl
, int reloc
)
4881 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4882 && ix86_in_large_data_p (decl
))
4884 const char *prefix
= NULL
;
4885 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4886 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4888 switch (categorize_decl_for_section (decl
, reloc
))
4891 case SECCAT_DATA_REL
:
4892 case SECCAT_DATA_REL_LOCAL
:
4893 case SECCAT_DATA_REL_RO
:
4894 case SECCAT_DATA_REL_RO_LOCAL
:
4895 prefix
= one_only
? ".ld" : ".ldata";
4898 prefix
= one_only
? ".lb" : ".lbss";
4901 case SECCAT_RODATA_MERGE_STR
:
4902 case SECCAT_RODATA_MERGE_STR_INIT
:
4903 case SECCAT_RODATA_MERGE_CONST
:
4904 prefix
= one_only
? ".lr" : ".lrodata";
4906 case SECCAT_SRODATA
:
4913 /* We don't split these for medium model. Place them into
4914 default sections and hope for best. */
4919 const char *name
, *linkonce
;
4922 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4923 name
= targetm
.strip_name_encoding (name
);
4925 /* If we're using one_only, then there needs to be a .gnu.linkonce
4926 prefix to the section name. */
4927 linkonce
= one_only
? ".gnu.linkonce" : "";
4929 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4931 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4935 default_unique_section (decl
, reloc
);
4938 #ifdef COMMON_ASM_OP
4939 /* This says how to output assembler code to declare an
4940 uninitialized external linkage data object.
4942 For medium model x86-64 we need to use .largecomm opcode for
4945 x86_elf_aligned_common (FILE *file
,
4946 const char *name
, unsigned HOST_WIDE_INT size
,
4949 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4950 && size
> (unsigned int)ix86_section_threshold
)
4951 fputs (".largecomm\t", file
);
4953 fputs (COMMON_ASM_OP
, file
);
4954 assemble_name (file
, name
);
4955 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4956 size
, align
/ BITS_PER_UNIT
);
4960 /* Utility function for targets to use in implementing
4961 ASM_OUTPUT_ALIGNED_BSS. */
4964 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4965 const char *name
, unsigned HOST_WIDE_INT size
,
4968 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4969 && size
> (unsigned int)ix86_section_threshold
)
4970 switch_to_section (get_named_section (decl
, ".lbss", 0));
4972 switch_to_section (bss_section
);
4973 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4974 #ifdef ASM_DECLARE_OBJECT_NAME
4975 last_assemble_variable_decl
= decl
;
4976 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4978 /* Standard thing is just output label for the object. */
4979 ASM_OUTPUT_LABEL (file
, name
);
4980 #endif /* ASM_DECLARE_OBJECT_NAME */
4981 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4984 /* Decide whether we must probe the stack before any space allocation
4985 on this target. It's essentially TARGET_STACK_PROBE except when
4986 -fstack-check causes the stack to be already probed differently. */
4989 ix86_target_stack_probe (void)
4991 /* Do not probe the stack twice if static stack checking is enabled. */
4992 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4995 return TARGET_STACK_PROBE
;
4998 /* Decide whether we can make a sibling call to a function. DECL is the
4999 declaration of the function being targeted by the call and EXP is the
5000 CALL_EXPR representing the call. */
5003 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5005 tree type
, decl_or_type
;
5008 /* If we are generating position-independent code, we cannot sibcall
5009 optimize any indirect call, or a direct call to a global function,
5010 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5014 && (!decl
|| !targetm
.binds_local_p (decl
)))
5017 /* If we need to align the outgoing stack, then sibcalling would
5018 unalign the stack, which may break the called function. */
5019 if (ix86_minimum_incoming_stack_boundary (true)
5020 < PREFERRED_STACK_BOUNDARY
)
5025 decl_or_type
= decl
;
5026 type
= TREE_TYPE (decl
);
5030 /* We're looking at the CALL_EXPR, we need the type of the function. */
5031 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5032 type
= TREE_TYPE (type
); /* pointer type */
5033 type
= TREE_TYPE (type
); /* function type */
5034 decl_or_type
= type
;
5037 /* Check that the return value locations are the same. Like
5038 if we are returning floats on the 80387 register stack, we cannot
5039 make a sibcall from a function that doesn't return a float to a
5040 function that does or, conversely, from a function that does return
5041 a float to a function that doesn't; the necessary stack adjustment
5042 would not be executed. This is also the place we notice
5043 differences in the return value ABI. Note that it is ok for one
5044 of the functions to have void return type as long as the return
5045 value of the other is passed in a register. */
5046 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5047 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5049 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5051 if (!rtx_equal_p (a
, b
))
5054 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5056 else if (!rtx_equal_p (a
, b
))
5061 /* The SYSV ABI has more call-clobbered registers;
5062 disallow sibcalls from MS to SYSV. */
5063 if (cfun
->machine
->call_abi
== MS_ABI
5064 && ix86_function_type_abi (type
) == SYSV_ABI
)
5069 /* If this call is indirect, we'll need to be able to use a
5070 call-clobbered register for the address of the target function.
5071 Make sure that all such registers are not used for passing
5072 parameters. Note that DLLIMPORT functions are indirect. */
5074 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5076 if (ix86_function_regparm (type
, NULL
) >= 3)
5078 /* ??? Need to count the actual number of registers to be used,
5079 not the possible number of registers. Fix later. */
5085 /* Otherwise okay. That also includes certain types of indirect calls. */
5089 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5090 and "sseregparm" calling convention attributes;
5091 arguments as in struct attribute_spec.handler. */
5094 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5096 int flags ATTRIBUTE_UNUSED
,
5099 if (TREE_CODE (*node
) != FUNCTION_TYPE
5100 && TREE_CODE (*node
) != METHOD_TYPE
5101 && TREE_CODE (*node
) != FIELD_DECL
5102 && TREE_CODE (*node
) != TYPE_DECL
)
5104 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5106 *no_add_attrs
= true;
5110 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5111 if (is_attribute_p ("regparm", name
))
5115 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5117 error ("fastcall and regparm attributes are not compatible");
5120 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5122 error ("regparam and thiscall attributes are not compatible");
5125 cst
= TREE_VALUE (args
);
5126 if (TREE_CODE (cst
) != INTEGER_CST
)
5128 warning (OPT_Wattributes
,
5129 "%qE attribute requires an integer constant argument",
5131 *no_add_attrs
= true;
5133 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5135 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5137 *no_add_attrs
= true;
5145 /* Do not warn when emulating the MS ABI. */
5146 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5147 && TREE_CODE (*node
) != METHOD_TYPE
)
5148 || ix86_function_type_abi (*node
) != MS_ABI
)
5149 warning (OPT_Wattributes
, "%qE attribute ignored",
5151 *no_add_attrs
= true;
5155 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5156 if (is_attribute_p ("fastcall", name
))
5158 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5160 error ("fastcall and cdecl attributes are not compatible");
5162 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5164 error ("fastcall and stdcall attributes are not compatible");
5166 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5168 error ("fastcall and regparm attributes are not compatible");
5170 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5172 error ("fastcall and thiscall attributes are not compatible");
5176 /* Can combine stdcall with fastcall (redundant), regparm and
5178 else if (is_attribute_p ("stdcall", name
))
5180 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5182 error ("stdcall and cdecl attributes are not compatible");
5184 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5186 error ("stdcall and fastcall attributes are not compatible");
5188 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5190 error ("stdcall and thiscall attributes are not compatible");
5194 /* Can combine cdecl with regparm and sseregparm. */
5195 else if (is_attribute_p ("cdecl", name
))
5197 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5199 error ("stdcall and cdecl attributes are not compatible");
5201 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5203 error ("fastcall and cdecl attributes are not compatible");
5205 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5207 error ("cdecl and thiscall attributes are not compatible");
5210 else if (is_attribute_p ("thiscall", name
))
5212 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5213 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5215 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5217 error ("stdcall and thiscall attributes are not compatible");
5219 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5221 error ("fastcall and thiscall attributes are not compatible");
5223 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5225 error ("cdecl and thiscall attributes are not compatible");
5229 /* Can combine sseregparm with all attributes. */
5234 /* The transactional memory builtins are implicitly regparm or fastcall
5235 depending on the ABI. Override the generic do-nothing attribute that
5236 these builtins were declared with, and replace it with one of the two
5237 attributes that we expect elsewhere. */
5240 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5241 tree args ATTRIBUTE_UNUSED
,
5242 int flags ATTRIBUTE_UNUSED
,
5247 /* In no case do we want to add the placeholder attribute. */
5248 *no_add_attrs
= true;
5250 /* The 64-bit ABI is unchanged for transactional memory. */
5254 /* ??? Is there a better way to validate 32-bit windows? We have
5255 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5256 if (CHECK_STACK_LIMIT
> 0)
5257 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5260 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5261 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5263 decl_attributes (node
, alt
, flags
);
5268 /* This function determines from TYPE the calling-convention. */
5271 ix86_get_callcvt (const_tree type
)
5273 unsigned int ret
= 0;
5278 return IX86_CALLCVT_CDECL
;
5280 attrs
= TYPE_ATTRIBUTES (type
);
5281 if (attrs
!= NULL_TREE
)
5283 if (lookup_attribute ("cdecl", attrs
))
5284 ret
|= IX86_CALLCVT_CDECL
;
5285 else if (lookup_attribute ("stdcall", attrs
))
5286 ret
|= IX86_CALLCVT_STDCALL
;
5287 else if (lookup_attribute ("fastcall", attrs
))
5288 ret
|= IX86_CALLCVT_FASTCALL
;
5289 else if (lookup_attribute ("thiscall", attrs
))
5290 ret
|= IX86_CALLCVT_THISCALL
;
5292 /* Regparam isn't allowed for thiscall and fastcall. */
5293 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5295 if (lookup_attribute ("regparm", attrs
))
5296 ret
|= IX86_CALLCVT_REGPARM
;
5297 if (lookup_attribute ("sseregparm", attrs
))
5298 ret
|= IX86_CALLCVT_SSEREGPARM
;
5301 if (IX86_BASE_CALLCVT(ret
) != 0)
5305 is_stdarg
= stdarg_p (type
);
5306 if (TARGET_RTD
&& !is_stdarg
)
5307 return IX86_CALLCVT_STDCALL
| ret
;
5311 || TREE_CODE (type
) != METHOD_TYPE
5312 || ix86_function_type_abi (type
) != MS_ABI
)
5313 return IX86_CALLCVT_CDECL
| ret
;
5315 return IX86_CALLCVT_THISCALL
;
5318 /* Return 0 if the attributes for two types are incompatible, 1 if they
5319 are compatible, and 2 if they are nearly compatible (which causes a
5320 warning to be generated). */
5323 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5325 unsigned int ccvt1
, ccvt2
;
5327 if (TREE_CODE (type1
) != FUNCTION_TYPE
5328 && TREE_CODE (type1
) != METHOD_TYPE
)
5331 ccvt1
= ix86_get_callcvt (type1
);
5332 ccvt2
= ix86_get_callcvt (type2
);
5335 if (ix86_function_regparm (type1
, NULL
)
5336 != ix86_function_regparm (type2
, NULL
))
5342 /* Return the regparm value for a function with the indicated TYPE and DECL.
5343 DECL may be NULL when calling function indirectly
5344 or considering a libcall. */
5347 ix86_function_regparm (const_tree type
, const_tree decl
)
5354 return (ix86_function_type_abi (type
) == SYSV_ABI
5355 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5356 ccvt
= ix86_get_callcvt (type
);
5357 regparm
= ix86_regparm
;
5359 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5361 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5364 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5368 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5370 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5373 /* Use register calling convention for local functions when possible. */
5375 && TREE_CODE (decl
) == FUNCTION_DECL
5377 && !(profile_flag
&& !flag_fentry
))
5379 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5380 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5381 if (i
&& i
->local
&& i
->can_change_signature
)
5383 int local_regparm
, globals
= 0, regno
;
5385 /* Make sure no regparm register is taken by a
5386 fixed register variable. */
5387 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5388 if (fixed_regs
[local_regparm
])
5391 /* We don't want to use regparm(3) for nested functions as
5392 these use a static chain pointer in the third argument. */
5393 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5396 /* In 32-bit mode save a register for the split stack. */
5397 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5400 /* Each fixed register usage increases register pressure,
5401 so less registers should be used for argument passing.
5402 This functionality can be overriden by an explicit
5404 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5405 if (fixed_regs
[regno
])
5409 = globals
< local_regparm
? local_regparm
- globals
: 0;
5411 if (local_regparm
> regparm
)
5412 regparm
= local_regparm
;
5419 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5420 DFmode (2) arguments in SSE registers for a function with the
5421 indicated TYPE and DECL. DECL may be NULL when calling function
5422 indirectly or considering a libcall. Otherwise return 0. */
5425 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5427 gcc_assert (!TARGET_64BIT
);
5429 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5430 by the sseregparm attribute. */
5431 if (TARGET_SSEREGPARM
5432 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5439 error ("calling %qD with attribute sseregparm without "
5440 "SSE/SSE2 enabled", decl
);
5442 error ("calling %qT with attribute sseregparm without "
5443 "SSE/SSE2 enabled", type
);
5451 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5452 (and DFmode for SSE2) arguments in SSE registers. */
5453 if (decl
&& TARGET_SSE_MATH
&& optimize
5454 && !(profile_flag
&& !flag_fentry
))
5456 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5457 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5458 if (i
&& i
->local
&& i
->can_change_signature
)
5459 return TARGET_SSE2
? 2 : 1;
5465 /* Return true if EAX is live at the start of the function. Used by
5466 ix86_expand_prologue to determine if we need special help before
5467 calling allocate_stack_worker. */
5470 ix86_eax_live_at_start_p (void)
5472 /* Cheat. Don't bother working forward from ix86_function_regparm
5473 to the function type to whether an actual argument is located in
5474 eax. Instead just look at cfg info, which is still close enough
5475 to correct at this point. This gives false positives for broken
5476 functions that might use uninitialized data that happens to be
5477 allocated in eax, but who cares? */
5478 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5482 ix86_keep_aggregate_return_pointer (tree fntype
)
5488 attr
= lookup_attribute ("callee_pop_aggregate_return",
5489 TYPE_ATTRIBUTES (fntype
));
5491 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5493 /* For 32-bit MS-ABI the default is to keep aggregate
5495 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5498 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5501 /* Value is the number of bytes of arguments automatically
5502 popped when returning from a subroutine call.
5503 FUNDECL is the declaration node of the function (as a tree),
5504 FUNTYPE is the data type of the function (as a tree),
5505 or for a library call it is an identifier node for the subroutine name.
5506 SIZE is the number of bytes of arguments passed on the stack.
5508 On the 80386, the RTD insn may be used to pop them if the number
5509 of args is fixed, but if the number is variable then the caller
5510 must pop them all. RTD can't be used for library calls now
5511 because the library is compiled with the Unix compiler.
5512 Use of RTD is a selectable option, since it is incompatible with
5513 standard Unix calling sequences. If the option is not selected,
5514 the caller must always pop the args.
5516 The attribute stdcall is equivalent to RTD on a per module basis. */
5519 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5523 /* None of the 64-bit ABIs pop arguments. */
5527 ccvt
= ix86_get_callcvt (funtype
);
5529 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5530 | IX86_CALLCVT_THISCALL
)) != 0
5531 && ! stdarg_p (funtype
))
5534 /* Lose any fake structure return argument if it is passed on the stack. */
5535 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5536 && !ix86_keep_aggregate_return_pointer (funtype
))
5538 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5540 return GET_MODE_SIZE (Pmode
);
5546 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5549 ix86_legitimate_combined_insn (rtx insn
)
5551 /* Check operand constraints in case hard registers were propagated
5552 into insn pattern. This check prevents combine pass from
5553 generating insn patterns with invalid hard register operands.
5554 These invalid insns can eventually confuse reload to error out
5555 with a spill failure. See also PRs 46829 and 46843. */
5556 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5560 extract_insn (insn
);
5561 preprocess_constraints ();
5563 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5565 rtx op
= recog_data
.operand
[i
];
5566 enum machine_mode mode
= GET_MODE (op
);
5567 struct operand_alternative
*op_alt
;
5572 /* A unary operator may be accepted by the predicate, but it
5573 is irrelevant for matching constraints. */
5577 if (GET_CODE (op
) == SUBREG
)
5579 if (REG_P (SUBREG_REG (op
))
5580 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5581 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5582 GET_MODE (SUBREG_REG (op
)),
5585 op
= SUBREG_REG (op
);
5588 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5591 op_alt
= recog_op_alt
[i
];
5593 /* Operand has no constraints, anything is OK. */
5594 win
= !recog_data
.n_alternatives
;
5596 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5598 if (op_alt
[j
].anything_ok
5599 || (op_alt
[j
].matches
!= -1
5601 (recog_data
.operand
[i
],
5602 recog_data
.operand
[op_alt
[j
].matches
]))
5603 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5618 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5620 static unsigned HOST_WIDE_INT
5621 ix86_asan_shadow_offset (void)
5623 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5624 : HOST_WIDE_INT_C (0x7fff8000))
5625 : (HOST_WIDE_INT_1
<< 29);
5628 /* Argument support functions. */
5630 /* Return true when register may be used to pass function parameters. */
5632 ix86_function_arg_regno_p (int regno
)
5635 const int *parm_regs
;
5640 return (regno
< REGPARM_MAX
5641 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5643 return (regno
< REGPARM_MAX
5644 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5645 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5646 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5647 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5652 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5657 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5658 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5662 /* TODO: The function should depend on current function ABI but
5663 builtins.c would need updating then. Therefore we use the
5666 /* RAX is used as hidden argument to va_arg functions. */
5667 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5670 if (ix86_abi
== MS_ABI
)
5671 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5673 parm_regs
= x86_64_int_parameter_registers
;
5674 for (i
= 0; i
< (ix86_abi
== MS_ABI
5675 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5676 if (regno
== parm_regs
[i
])
5681 /* Return if we do not know how to pass TYPE solely in registers. */
5684 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5686 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5689 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5690 The layout_type routine is crafty and tries to trick us into passing
5691 currently unsupported vector types on the stack by using TImode. */
5692 return (!TARGET_64BIT
&& mode
== TImode
5693 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5696 /* It returns the size, in bytes, of the area reserved for arguments passed
5697 in registers for the function represented by fndecl dependent to the used
5700 ix86_reg_parm_stack_space (const_tree fndecl
)
5702 enum calling_abi call_abi
= SYSV_ABI
;
5703 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5704 call_abi
= ix86_function_abi (fndecl
);
5706 call_abi
= ix86_function_type_abi (fndecl
);
5707 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5712 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5715 ix86_function_type_abi (const_tree fntype
)
5717 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5719 enum calling_abi abi
= ix86_abi
;
5720 if (abi
== SYSV_ABI
)
5722 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5725 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5733 ix86_function_ms_hook_prologue (const_tree fn
)
5735 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5737 if (decl_function_context (fn
) != NULL_TREE
)
5738 error_at (DECL_SOURCE_LOCATION (fn
),
5739 "ms_hook_prologue is not compatible with nested function");
5746 static enum calling_abi
5747 ix86_function_abi (const_tree fndecl
)
5751 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5754 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5757 ix86_cfun_abi (void)
5761 return cfun
->machine
->call_abi
;
5764 /* Write the extra assembler code needed to declare a function properly. */
5767 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5770 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5774 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5775 unsigned int filler_cc
= 0xcccccccc;
5777 for (i
= 0; i
< filler_count
; i
+= 4)
5778 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5781 #ifdef SUBTARGET_ASM_UNWIND_INIT
5782 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5785 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5787 /* Output magic byte marker, if hot-patch attribute is set. */
5792 /* leaq [%rsp + 0], %rsp */
5793 asm_fprintf (asm_out_file
, ASM_BYTE
5794 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5798 /* movl.s %edi, %edi
5800 movl.s %esp, %ebp */
5801 asm_fprintf (asm_out_file
, ASM_BYTE
5802 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5808 extern void init_regs (void);
5810 /* Implementation of call abi switching target hook. Specific to FNDECL
5811 the specific call register sets are set. See also
5812 ix86_conditional_register_usage for more details. */
5814 ix86_call_abi_override (const_tree fndecl
)
5816 if (fndecl
== NULL_TREE
)
5817 cfun
->machine
->call_abi
= ix86_abi
;
5819 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5822 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5823 expensive re-initialization of init_regs each time we switch function context
5824 since this is needed only during RTL expansion. */
5826 ix86_maybe_switch_abi (void)
5829 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5833 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5834 for a call to a function whose data type is FNTYPE.
5835 For a library call, FNTYPE is 0. */
5838 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5839 tree fntype
, /* tree ptr for function decl */
5840 rtx libname
, /* SYMBOL_REF of library name or 0 */
5844 struct cgraph_local_info
*i
;
5846 memset (cum
, 0, sizeof (*cum
));
5850 i
= cgraph_local_info (fndecl
);
5851 cum
->call_abi
= ix86_function_abi (fndecl
);
5856 cum
->call_abi
= ix86_function_type_abi (fntype
);
5859 cum
->caller
= caller
;
5861 /* Set up the number of registers to use for passing arguments. */
5863 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5864 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5865 "or subtarget optimization implying it");
5866 cum
->nregs
= ix86_regparm
;
5869 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5870 ? X86_64_REGPARM_MAX
5871 : X86_64_MS_REGPARM_MAX
);
5875 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5878 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5879 ? X86_64_SSE_REGPARM_MAX
5880 : X86_64_MS_SSE_REGPARM_MAX
);
5884 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5885 cum
->warn_avx
= true;
5886 cum
->warn_sse
= true;
5887 cum
->warn_mmx
= true;
5889 /* Because type might mismatch in between caller and callee, we need to
5890 use actual type of function for local calls.
5891 FIXME: cgraph_analyze can be told to actually record if function uses
5892 va_start so for local functions maybe_vaarg can be made aggressive
5894 FIXME: once typesytem is fixed, we won't need this code anymore. */
5895 if (i
&& i
->local
&& i
->can_change_signature
)
5896 fntype
= TREE_TYPE (fndecl
);
5897 cum
->maybe_vaarg
= (fntype
5898 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5903 /* If there are variable arguments, then we won't pass anything
5904 in registers in 32-bit mode. */
5905 if (stdarg_p (fntype
))
5916 /* Use ecx and edx registers if function has fastcall attribute,
5917 else look for regparm information. */
5920 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5921 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5924 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5926 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5932 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5935 /* Set up the number of SSE registers used for passing SFmode
5936 and DFmode arguments. Warn for mismatching ABI. */
5937 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5941 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5942 But in the case of vector types, it is some vector mode.
5944 When we have only some of our vector isa extensions enabled, then there
5945 are some modes for which vector_mode_supported_p is false. For these
5946 modes, the generic vector support in gcc will choose some non-vector mode
5947 in order to implement the type. By computing the natural mode, we'll
5948 select the proper ABI location for the operand and not depend on whatever
5949 the middle-end decides to do with these vector types.
5951 The midde-end can't deal with the vector types > 16 bytes. In this
5952 case, we return the original mode and warn ABI change if CUM isn't
5955 static enum machine_mode
5956 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5958 enum machine_mode mode
= TYPE_MODE (type
);
5960 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5962 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5963 if ((size
== 8 || size
== 16 || size
== 32)
5964 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5965 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5967 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5969 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5970 mode
= MIN_MODE_VECTOR_FLOAT
;
5972 mode
= MIN_MODE_VECTOR_INT
;
5974 /* Get the mode which has this inner mode and number of units. */
5975 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5976 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5977 && GET_MODE_INNER (mode
) == innermode
)
5979 if (size
== 32 && !TARGET_AVX
)
5981 static bool warnedavx
;
5988 warning (0, "AVX vector argument without AVX "
5989 "enabled changes the ABI");
5991 return TYPE_MODE (type
);
5993 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5995 static bool warnedsse
;
6002 warning (0, "SSE vector argument without SSE "
6003 "enabled changes the ABI");
6018 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6019 this may not agree with the mode that the type system has chosen for the
6020 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6021 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6024 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6029 if (orig_mode
!= BLKmode
)
6030 tmp
= gen_rtx_REG (orig_mode
, regno
);
6033 tmp
= gen_rtx_REG (mode
, regno
);
6034 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6035 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6041 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6042 of this code is to classify each 8bytes of incoming argument by the register
6043 class and assign registers accordingly. */
6045 /* Return the union class of CLASS1 and CLASS2.
6046 See the x86-64 PS ABI for details. */
6048 static enum x86_64_reg_class
6049 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6051 /* Rule #1: If both classes are equal, this is the resulting class. */
6052 if (class1
== class2
)
6055 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6057 if (class1
== X86_64_NO_CLASS
)
6059 if (class2
== X86_64_NO_CLASS
)
6062 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6063 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6064 return X86_64_MEMORY_CLASS
;
6066 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6067 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6068 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6069 return X86_64_INTEGERSI_CLASS
;
6070 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6071 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6072 return X86_64_INTEGER_CLASS
;
6074 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6076 if (class1
== X86_64_X87_CLASS
6077 || class1
== X86_64_X87UP_CLASS
6078 || class1
== X86_64_COMPLEX_X87_CLASS
6079 || class2
== X86_64_X87_CLASS
6080 || class2
== X86_64_X87UP_CLASS
6081 || class2
== X86_64_COMPLEX_X87_CLASS
)
6082 return X86_64_MEMORY_CLASS
;
6084 /* Rule #6: Otherwise class SSE is used. */
6085 return X86_64_SSE_CLASS
;
6088 /* Classify the argument of type TYPE and mode MODE.
6089 CLASSES will be filled by the register class used to pass each word
6090 of the operand. The number of words is returned. In case the parameter
6091 should be passed in memory, 0 is returned. As a special case for zero
6092 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6094 BIT_OFFSET is used internally for handling records and specifies offset
6095 of the offset in bits modulo 256 to avoid overflow cases.
6097 See the x86-64 PS ABI for details.
6101 classify_argument (enum machine_mode mode
, const_tree type
,
6102 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6104 HOST_WIDE_INT bytes
=
6105 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6107 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6109 /* Variable sized entities are always passed/returned in memory. */
6113 if (mode
!= VOIDmode
6114 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6117 if (type
&& AGGREGATE_TYPE_P (type
))
6121 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6123 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6127 for (i
= 0; i
< words
; i
++)
6128 classes
[i
] = X86_64_NO_CLASS
;
6130 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6131 signalize memory class, so handle it as special case. */
6134 classes
[0] = X86_64_NO_CLASS
;
6138 /* Classify each field of record and merge classes. */
6139 switch (TREE_CODE (type
))
6142 /* And now merge the fields of structure. */
6143 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6145 if (TREE_CODE (field
) == FIELD_DECL
)
6149 if (TREE_TYPE (field
) == error_mark_node
)
6152 /* Bitfields are always classified as integer. Handle them
6153 early, since later code would consider them to be
6154 misaligned integers. */
6155 if (DECL_BIT_FIELD (field
))
6157 for (i
= (int_bit_position (field
)
6158 + (bit_offset
% 64)) / 8 / 8;
6159 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6160 + tree_low_cst (DECL_SIZE (field
), 0)
6163 merge_classes (X86_64_INTEGER_CLASS
,
6170 type
= TREE_TYPE (field
);
6172 /* Flexible array member is ignored. */
6173 if (TYPE_MODE (type
) == BLKmode
6174 && TREE_CODE (type
) == ARRAY_TYPE
6175 && TYPE_SIZE (type
) == NULL_TREE
6176 && TYPE_DOMAIN (type
) != NULL_TREE
6177 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6182 if (!warned
&& warn_psabi
)
6185 inform (input_location
,
6186 "the ABI of passing struct with"
6187 " a flexible array member has"
6188 " changed in GCC 4.4");
6192 num
= classify_argument (TYPE_MODE (type
), type
,
6194 (int_bit_position (field
)
6195 + bit_offset
) % 256);
6198 pos
= (int_bit_position (field
)
6199 + (bit_offset
% 64)) / 8 / 8;
6200 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6202 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6209 /* Arrays are handled as small records. */
6212 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6213 TREE_TYPE (type
), subclasses
, bit_offset
);
6217 /* The partial classes are now full classes. */
6218 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6219 subclasses
[0] = X86_64_SSE_CLASS
;
6220 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6221 && !((bit_offset
% 64) == 0 && bytes
== 4))
6222 subclasses
[0] = X86_64_INTEGER_CLASS
;
6224 for (i
= 0; i
< words
; i
++)
6225 classes
[i
] = subclasses
[i
% num
];
6230 case QUAL_UNION_TYPE
:
6231 /* Unions are similar to RECORD_TYPE but offset is always 0.
6233 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6235 if (TREE_CODE (field
) == FIELD_DECL
)
6239 if (TREE_TYPE (field
) == error_mark_node
)
6242 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6243 TREE_TYPE (field
), subclasses
,
6247 for (i
= 0; i
< num
; i
++)
6248 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6259 /* When size > 16 bytes, if the first one isn't
6260 X86_64_SSE_CLASS or any other ones aren't
6261 X86_64_SSEUP_CLASS, everything should be passed in
6263 if (classes
[0] != X86_64_SSE_CLASS
)
6266 for (i
= 1; i
< words
; i
++)
6267 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6271 /* Final merger cleanup. */
6272 for (i
= 0; i
< words
; i
++)
6274 /* If one class is MEMORY, everything should be passed in
6276 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6279 /* The X86_64_SSEUP_CLASS should be always preceded by
6280 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6281 if (classes
[i
] == X86_64_SSEUP_CLASS
6282 && classes
[i
- 1] != X86_64_SSE_CLASS
6283 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6285 /* The first one should never be X86_64_SSEUP_CLASS. */
6286 gcc_assert (i
!= 0);
6287 classes
[i
] = X86_64_SSE_CLASS
;
6290 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6291 everything should be passed in memory. */
6292 if (classes
[i
] == X86_64_X87UP_CLASS
6293 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6297 /* The first one should never be X86_64_X87UP_CLASS. */
6298 gcc_assert (i
!= 0);
6299 if (!warned
&& warn_psabi
)
6302 inform (input_location
,
6303 "the ABI of passing union with long double"
6304 " has changed in GCC 4.4");
6312 /* Compute alignment needed. We align all types to natural boundaries with
6313 exception of XFmode that is aligned to 64bits. */
6314 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6316 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6319 mode_alignment
= 128;
6320 else if (mode
== XCmode
)
6321 mode_alignment
= 256;
6322 if (COMPLEX_MODE_P (mode
))
6323 mode_alignment
/= 2;
6324 /* Misaligned fields are always returned in memory. */
6325 if (bit_offset
% mode_alignment
)
6329 /* for V1xx modes, just use the base mode */
6330 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6331 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6332 mode
= GET_MODE_INNER (mode
);
6334 /* Classification of atomic types. */
6339 classes
[0] = X86_64_SSE_CLASS
;
6342 classes
[0] = X86_64_SSE_CLASS
;
6343 classes
[1] = X86_64_SSEUP_CLASS
;
6353 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6357 classes
[0] = X86_64_INTEGERSI_CLASS
;
6360 else if (size
<= 64)
6362 classes
[0] = X86_64_INTEGER_CLASS
;
6365 else if (size
<= 64+32)
6367 classes
[0] = X86_64_INTEGER_CLASS
;
6368 classes
[1] = X86_64_INTEGERSI_CLASS
;
6371 else if (size
<= 64+64)
6373 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6381 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6385 /* OImode shouldn't be used directly. */
6390 if (!(bit_offset
% 64))
6391 classes
[0] = X86_64_SSESF_CLASS
;
6393 classes
[0] = X86_64_SSE_CLASS
;
6396 classes
[0] = X86_64_SSEDF_CLASS
;
6399 classes
[0] = X86_64_X87_CLASS
;
6400 classes
[1] = X86_64_X87UP_CLASS
;
6403 classes
[0] = X86_64_SSE_CLASS
;
6404 classes
[1] = X86_64_SSEUP_CLASS
;
6407 classes
[0] = X86_64_SSE_CLASS
;
6408 if (!(bit_offset
% 64))
6414 if (!warned
&& warn_psabi
)
6417 inform (input_location
,
6418 "the ABI of passing structure with complex float"
6419 " member has changed in GCC 4.4");
6421 classes
[1] = X86_64_SSESF_CLASS
;
6425 classes
[0] = X86_64_SSEDF_CLASS
;
6426 classes
[1] = X86_64_SSEDF_CLASS
;
6429 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6432 /* This modes is larger than 16 bytes. */
6440 classes
[0] = X86_64_SSE_CLASS
;
6441 classes
[1] = X86_64_SSEUP_CLASS
;
6442 classes
[2] = X86_64_SSEUP_CLASS
;
6443 classes
[3] = X86_64_SSEUP_CLASS
;
6451 classes
[0] = X86_64_SSE_CLASS
;
6452 classes
[1] = X86_64_SSEUP_CLASS
;
6460 classes
[0] = X86_64_SSE_CLASS
;
6466 gcc_assert (VECTOR_MODE_P (mode
));
6471 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6473 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6474 classes
[0] = X86_64_INTEGERSI_CLASS
;
6476 classes
[0] = X86_64_INTEGER_CLASS
;
6477 classes
[1] = X86_64_INTEGER_CLASS
;
6478 return 1 + (bytes
> 8);
6482 /* Examine the argument and return set number of register required in each
6483 class. Return 0 iff parameter should be passed in memory. */
6485 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6486 int *int_nregs
, int *sse_nregs
)
6488 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6489 int n
= classify_argument (mode
, type
, regclass
, 0);
6495 for (n
--; n
>= 0; n
--)
6496 switch (regclass
[n
])
6498 case X86_64_INTEGER_CLASS
:
6499 case X86_64_INTEGERSI_CLASS
:
6502 case X86_64_SSE_CLASS
:
6503 case X86_64_SSESF_CLASS
:
6504 case X86_64_SSEDF_CLASS
:
6507 case X86_64_NO_CLASS
:
6508 case X86_64_SSEUP_CLASS
:
6510 case X86_64_X87_CLASS
:
6511 case X86_64_X87UP_CLASS
:
6515 case X86_64_COMPLEX_X87_CLASS
:
6516 return in_return
? 2 : 0;
6517 case X86_64_MEMORY_CLASS
:
6523 /* Construct container for the argument used by GCC interface. See
6524 FUNCTION_ARG for the detailed description. */
6527 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6528 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6529 const int *intreg
, int sse_regno
)
6531 /* The following variables hold the static issued_error state. */
6532 static bool issued_sse_arg_error
;
6533 static bool issued_sse_ret_error
;
6534 static bool issued_x87_ret_error
;
6536 enum machine_mode tmpmode
;
6538 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6539 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6543 int needed_sseregs
, needed_intregs
;
6544 rtx exp
[MAX_CLASSES
];
6547 n
= classify_argument (mode
, type
, regclass
, 0);
6550 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6553 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6556 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6557 some less clueful developer tries to use floating-point anyway. */
6558 if (needed_sseregs
&& !TARGET_SSE
)
6562 if (!issued_sse_ret_error
)
6564 error ("SSE register return with SSE disabled");
6565 issued_sse_ret_error
= true;
6568 else if (!issued_sse_arg_error
)
6570 error ("SSE register argument with SSE disabled");
6571 issued_sse_arg_error
= true;
6576 /* Likewise, error if the ABI requires us to return values in the
6577 x87 registers and the user specified -mno-80387. */
6578 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6579 for (i
= 0; i
< n
; i
++)
6580 if (regclass
[i
] == X86_64_X87_CLASS
6581 || regclass
[i
] == X86_64_X87UP_CLASS
6582 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6584 if (!issued_x87_ret_error
)
6586 error ("x87 register return with x87 disabled");
6587 issued_x87_ret_error
= true;
6592 /* First construct simple cases. Avoid SCmode, since we want to use
6593 single register to pass this type. */
6594 if (n
== 1 && mode
!= SCmode
)
6595 switch (regclass
[0])
6597 case X86_64_INTEGER_CLASS
:
6598 case X86_64_INTEGERSI_CLASS
:
6599 return gen_rtx_REG (mode
, intreg
[0]);
6600 case X86_64_SSE_CLASS
:
6601 case X86_64_SSESF_CLASS
:
6602 case X86_64_SSEDF_CLASS
:
6603 if (mode
!= BLKmode
)
6604 return gen_reg_or_parallel (mode
, orig_mode
,
6605 SSE_REGNO (sse_regno
));
6607 case X86_64_X87_CLASS
:
6608 case X86_64_COMPLEX_X87_CLASS
:
6609 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6610 case X86_64_NO_CLASS
:
6611 /* Zero sized array, struct or class. */
6617 && regclass
[0] == X86_64_SSE_CLASS
6618 && regclass
[1] == X86_64_SSEUP_CLASS
6620 return gen_reg_or_parallel (mode
, orig_mode
,
6621 SSE_REGNO (sse_regno
));
6623 && regclass
[0] == X86_64_SSE_CLASS
6624 && regclass
[1] == X86_64_SSEUP_CLASS
6625 && regclass
[2] == X86_64_SSEUP_CLASS
6626 && regclass
[3] == X86_64_SSEUP_CLASS
6628 return gen_reg_or_parallel (mode
, orig_mode
,
6629 SSE_REGNO (sse_regno
));
6631 && regclass
[0] == X86_64_X87_CLASS
6632 && regclass
[1] == X86_64_X87UP_CLASS
)
6633 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6636 && regclass
[0] == X86_64_INTEGER_CLASS
6637 && regclass
[1] == X86_64_INTEGER_CLASS
6638 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6639 && intreg
[0] + 1 == intreg
[1])
6640 return gen_rtx_REG (mode
, intreg
[0]);
6642 /* Otherwise figure out the entries of the PARALLEL. */
6643 for (i
= 0; i
< n
; i
++)
6647 switch (regclass
[i
])
6649 case X86_64_NO_CLASS
:
6651 case X86_64_INTEGER_CLASS
:
6652 case X86_64_INTEGERSI_CLASS
:
6653 /* Merge TImodes on aligned occasions here too. */
6654 if (i
* 8 + 8 > bytes
)
6656 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6657 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6661 /* We've requested 24 bytes we
6662 don't have mode for. Use DImode. */
6663 if (tmpmode
== BLKmode
)
6666 = gen_rtx_EXPR_LIST (VOIDmode
,
6667 gen_rtx_REG (tmpmode
, *intreg
),
6671 case X86_64_SSESF_CLASS
:
6673 = gen_rtx_EXPR_LIST (VOIDmode
,
6674 gen_rtx_REG (SFmode
,
6675 SSE_REGNO (sse_regno
)),
6679 case X86_64_SSEDF_CLASS
:
6681 = gen_rtx_EXPR_LIST (VOIDmode
,
6682 gen_rtx_REG (DFmode
,
6683 SSE_REGNO (sse_regno
)),
6687 case X86_64_SSE_CLASS
:
6695 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6705 && regclass
[1] == X86_64_SSEUP_CLASS
6706 && regclass
[2] == X86_64_SSEUP_CLASS
6707 && regclass
[3] == X86_64_SSEUP_CLASS
);
6715 = gen_rtx_EXPR_LIST (VOIDmode
,
6716 gen_rtx_REG (tmpmode
,
6717 SSE_REGNO (sse_regno
)),
6726 /* Empty aligned struct, union or class. */
6730 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6731 for (i
= 0; i
< nexps
; i
++)
6732 XVECEXP (ret
, 0, i
) = exp
[i
];
6736 /* Update the data in CUM to advance over an argument of mode MODE
6737 and data type TYPE. (TYPE is null for libcalls where that information
6738 may not be available.) */
6741 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6742 const_tree type
, HOST_WIDE_INT bytes
,
6743 HOST_WIDE_INT words
)
6759 cum
->words
+= words
;
6760 cum
->nregs
-= words
;
6761 cum
->regno
+= words
;
6763 if (cum
->nregs
<= 0)
6771 /* OImode shouldn't be used directly. */
6775 if (cum
->float_in_sse
< 2)
6778 if (cum
->float_in_sse
< 1)
6795 if (!type
|| !AGGREGATE_TYPE_P (type
))
6797 cum
->sse_words
+= words
;
6798 cum
->sse_nregs
-= 1;
6799 cum
->sse_regno
+= 1;
6800 if (cum
->sse_nregs
<= 0)
6814 if (!type
|| !AGGREGATE_TYPE_P (type
))
6816 cum
->mmx_words
+= words
;
6817 cum
->mmx_nregs
-= 1;
6818 cum
->mmx_regno
+= 1;
6819 if (cum
->mmx_nregs
<= 0)
6830 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6831 const_tree type
, HOST_WIDE_INT words
, bool named
)
6833 int int_nregs
, sse_nregs
;
6835 /* Unnamed 256bit vector mode parameters are passed on stack. */
6836 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6839 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6840 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6842 cum
->nregs
-= int_nregs
;
6843 cum
->sse_nregs
-= sse_nregs
;
6844 cum
->regno
+= int_nregs
;
6845 cum
->sse_regno
+= sse_nregs
;
6849 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6850 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6851 cum
->words
+= words
;
6856 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6857 HOST_WIDE_INT words
)
6859 /* Otherwise, this should be passed indirect. */
6860 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6862 cum
->words
+= words
;
6870 /* Update the data in CUM to advance over an argument of mode MODE and
6871 data type TYPE. (TYPE is null for libcalls where that information
6872 may not be available.) */
6875 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6876 const_tree type
, bool named
)
6878 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6879 HOST_WIDE_INT bytes
, words
;
6881 if (mode
== BLKmode
)
6882 bytes
= int_size_in_bytes (type
);
6884 bytes
= GET_MODE_SIZE (mode
);
6885 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6888 mode
= type_natural_mode (type
, NULL
);
6890 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6891 function_arg_advance_ms_64 (cum
, bytes
, words
);
6892 else if (TARGET_64BIT
)
6893 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6895 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6898 /* Define where to put the arguments to a function.
6899 Value is zero to push the argument on the stack,
6900 or a hard register in which to store the argument.
6902 MODE is the argument's machine mode.
6903 TYPE is the data type of the argument (as a tree).
6904 This is null for libcalls where that information may
6906 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6907 the preceding args and about the function being called.
6908 NAMED is nonzero if this argument is a named parameter
6909 (otherwise it is an extra parameter matching an ellipsis). */
6912 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6913 enum machine_mode orig_mode
, const_tree type
,
6914 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6916 static bool warnedsse
, warnedmmx
;
6918 /* Avoid the AL settings for the Unix64 ABI. */
6919 if (mode
== VOIDmode
)
6935 if (words
<= cum
->nregs
)
6937 int regno
= cum
->regno
;
6939 /* Fastcall allocates the first two DWORD (SImode) or
6940 smaller arguments to ECX and EDX if it isn't an
6946 || (type
&& AGGREGATE_TYPE_P (type
)))
6949 /* ECX not EAX is the first allocated register. */
6950 if (regno
== AX_REG
)
6953 return gen_rtx_REG (mode
, regno
);
6958 if (cum
->float_in_sse
< 2)
6961 if (cum
->float_in_sse
< 1)
6965 /* In 32bit, we pass TImode in xmm registers. */
6972 if (!type
|| !AGGREGATE_TYPE_P (type
))
6974 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6977 warning (0, "SSE vector argument without SSE enabled "
6981 return gen_reg_or_parallel (mode
, orig_mode
,
6982 cum
->sse_regno
+ FIRST_SSE_REG
);
6987 /* OImode shouldn't be used directly. */
6996 if (!type
|| !AGGREGATE_TYPE_P (type
))
6999 return gen_reg_or_parallel (mode
, orig_mode
,
7000 cum
->sse_regno
+ FIRST_SSE_REG
);
7010 if (!type
|| !AGGREGATE_TYPE_P (type
))
7012 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7015 warning (0, "MMX vector argument without MMX enabled "
7019 return gen_reg_or_parallel (mode
, orig_mode
,
7020 cum
->mmx_regno
+ FIRST_MMX_REG
);
7029 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7030 enum machine_mode orig_mode
, const_tree type
, bool named
)
7032 /* Handle a hidden AL argument containing number of registers
7033 for varargs x86-64 functions. */
7034 if (mode
== VOIDmode
)
7035 return GEN_INT (cum
->maybe_vaarg
7036 ? (cum
->sse_nregs
< 0
7037 ? X86_64_SSE_REGPARM_MAX
7052 /* Unnamed 256bit vector mode parameters are passed on stack. */
7058 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7060 &x86_64_int_parameter_registers
[cum
->regno
],
7065 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7066 enum machine_mode orig_mode
, bool named
,
7067 HOST_WIDE_INT bytes
)
7071 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7072 We use value of -2 to specify that current function call is MSABI. */
7073 if (mode
== VOIDmode
)
7074 return GEN_INT (-2);
7076 /* If we've run out of registers, it goes on the stack. */
7077 if (cum
->nregs
== 0)
7080 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7082 /* Only floating point modes are passed in anything but integer regs. */
7083 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7086 regno
= cum
->regno
+ FIRST_SSE_REG
;
7091 /* Unnamed floating parameters are passed in both the
7092 SSE and integer registers. */
7093 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7094 t2
= gen_rtx_REG (mode
, regno
);
7095 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7096 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7097 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7100 /* Handle aggregated types passed in register. */
7101 if (orig_mode
== BLKmode
)
7103 if (bytes
> 0 && bytes
<= 8)
7104 mode
= (bytes
> 4 ? DImode
: SImode
);
7105 if (mode
== BLKmode
)
7109 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7112 /* Return where to put the arguments to a function.
7113 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7115 MODE is the argument's machine mode. TYPE is the data type of the
7116 argument. It is null for libcalls where that information may not be
7117 available. CUM gives information about the preceding args and about
7118 the function being called. NAMED is nonzero if this argument is a
7119 named parameter (otherwise it is an extra parameter matching an
7123 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7124 const_tree type
, bool named
)
7126 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7127 enum machine_mode mode
= omode
;
7128 HOST_WIDE_INT bytes
, words
;
7131 if (mode
== BLKmode
)
7132 bytes
= int_size_in_bytes (type
);
7134 bytes
= GET_MODE_SIZE (mode
);
7135 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7137 /* To simplify the code below, represent vector types with a vector mode
7138 even if MMX/SSE are not active. */
7139 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7140 mode
= type_natural_mode (type
, cum
);
7142 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7143 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7144 else if (TARGET_64BIT
)
7145 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7147 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7152 /* A C expression that indicates when an argument must be passed by
7153 reference. If nonzero for an argument, a copy of that argument is
7154 made in memory and a pointer to the argument is passed instead of
7155 the argument itself. The pointer is passed in whatever way is
7156 appropriate for passing a pointer to that type. */
7159 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7160 enum machine_mode mode ATTRIBUTE_UNUSED
,
7161 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7163 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7165 /* See Windows x64 Software Convention. */
7166 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7168 int msize
= (int) GET_MODE_SIZE (mode
);
7171 /* Arrays are passed by reference. */
7172 if (TREE_CODE (type
) == ARRAY_TYPE
)
7175 if (AGGREGATE_TYPE_P (type
))
7177 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7178 are passed by reference. */
7179 msize
= int_size_in_bytes (type
);
7183 /* __m128 is passed by reference. */
7185 case 1: case 2: case 4: case 8:
7191 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7197 /* Return true when TYPE should be 128bit aligned for 32bit argument
7198 passing ABI. XXX: This function is obsolete and is only used for
7199 checking psABI compatibility with previous versions of GCC. */
7202 ix86_compat_aligned_value_p (const_tree type
)
7204 enum machine_mode mode
= TYPE_MODE (type
);
7205 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7209 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7211 if (TYPE_ALIGN (type
) < 128)
7214 if (AGGREGATE_TYPE_P (type
))
7216 /* Walk the aggregates recursively. */
7217 switch (TREE_CODE (type
))
7221 case QUAL_UNION_TYPE
:
7225 /* Walk all the structure fields. */
7226 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7228 if (TREE_CODE (field
) == FIELD_DECL
7229 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7236 /* Just for use if some languages passes arrays by value. */
7237 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7248 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7249 XXX: This function is obsolete and is only used for checking psABI
7250 compatibility with previous versions of GCC. */
7253 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7254 const_tree type
, unsigned int align
)
7256 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7257 natural boundaries. */
7258 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7260 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7261 make an exception for SSE modes since these require 128bit
7264 The handling here differs from field_alignment. ICC aligns MMX
7265 arguments to 4 byte boundaries, while structure fields are aligned
7266 to 8 byte boundaries. */
7269 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7270 align
= PARM_BOUNDARY
;
7274 if (!ix86_compat_aligned_value_p (type
))
7275 align
= PARM_BOUNDARY
;
7278 if (align
> BIGGEST_ALIGNMENT
)
7279 align
= BIGGEST_ALIGNMENT
;
7283 /* Return true when TYPE should be 128bit aligned for 32bit argument
7287 ix86_contains_aligned_value_p (const_tree type
)
7289 enum machine_mode mode
= TYPE_MODE (type
);
7291 if (mode
== XFmode
|| mode
== XCmode
)
7294 if (TYPE_ALIGN (type
) < 128)
7297 if (AGGREGATE_TYPE_P (type
))
7299 /* Walk the aggregates recursively. */
7300 switch (TREE_CODE (type
))
7304 case QUAL_UNION_TYPE
:
7308 /* Walk all the structure fields. */
7309 for (field
= TYPE_FIELDS (type
);
7311 field
= DECL_CHAIN (field
))
7313 if (TREE_CODE (field
) == FIELD_DECL
7314 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7321 /* Just for use if some languages passes arrays by value. */
7322 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7331 return TYPE_ALIGN (type
) >= 128;
7336 /* Gives the alignment boundary, in bits, of an argument with the
7337 specified mode and type. */
7340 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7345 /* Since the main variant type is used for call, we convert it to
7346 the main variant type. */
7347 type
= TYPE_MAIN_VARIANT (type
);
7348 align
= TYPE_ALIGN (type
);
7351 align
= GET_MODE_ALIGNMENT (mode
);
7352 if (align
< PARM_BOUNDARY
)
7353 align
= PARM_BOUNDARY
;
7357 unsigned int saved_align
= align
;
7361 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7364 if (mode
== XFmode
|| mode
== XCmode
)
7365 align
= PARM_BOUNDARY
;
7367 else if (!ix86_contains_aligned_value_p (type
))
7368 align
= PARM_BOUNDARY
;
7371 align
= PARM_BOUNDARY
;
7376 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7380 inform (input_location
,
7381 "The ABI for passing parameters with %d-byte"
7382 " alignment has changed in GCC 4.6",
7383 align
/ BITS_PER_UNIT
);
7390 /* Return true if N is a possible register number of function value. */
7393 ix86_function_value_regno_p (const unsigned int regno
)
7400 case FIRST_FLOAT_REG
:
7401 /* TODO: The function should depend on current function ABI but
7402 builtins.c would need updating then. Therefore we use the
7404 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7406 return TARGET_FLOAT_RETURNS_IN_80387
;
7412 if (TARGET_MACHO
|| TARGET_64BIT
)
7420 /* Define how to find the value returned by a function.
7421 VALTYPE is the data type of the value (as a tree).
7422 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7423 otherwise, FUNC is 0. */
7426 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7427 const_tree fntype
, const_tree fn
)
7431 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7432 we normally prevent this case when mmx is not available. However
7433 some ABIs may require the result to be returned like DImode. */
7434 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7435 regno
= FIRST_MMX_REG
;
7437 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7438 we prevent this case when sse is not available. However some ABIs
7439 may require the result to be returned like integer TImode. */
7440 else if (mode
== TImode
7441 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7442 regno
= FIRST_SSE_REG
;
7444 /* 32-byte vector modes in %ymm0. */
7445 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7446 regno
= FIRST_SSE_REG
;
7448 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7449 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7450 regno
= FIRST_FLOAT_REG
;
7452 /* Most things go in %eax. */
7455 /* Override FP return register with %xmm0 for local functions when
7456 SSE math is enabled or for functions with sseregparm attribute. */
7457 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7459 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7460 if ((sse_level
>= 1 && mode
== SFmode
)
7461 || (sse_level
== 2 && mode
== DFmode
))
7462 regno
= FIRST_SSE_REG
;
7465 /* OImode shouldn't be used directly. */
7466 gcc_assert (mode
!= OImode
);
7468 return gen_rtx_REG (orig_mode
, regno
);
7472 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7477 /* Handle libcalls, which don't provide a type node. */
7478 if (valtype
== NULL
)
7492 regno
= FIRST_SSE_REG
;
7496 regno
= FIRST_FLOAT_REG
;
7504 return gen_rtx_REG (mode
, regno
);
7506 else if (POINTER_TYPE_P (valtype
))
7508 /* Pointers are always returned in word_mode. */
7512 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7513 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7514 x86_64_int_return_registers
, 0);
7516 /* For zero sized structures, construct_container returns NULL, but we
7517 need to keep rest of compiler happy by returning meaningful value. */
7519 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7525 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7528 unsigned int regno
= AX_REG
;
7532 switch (GET_MODE_SIZE (mode
))
7535 if (valtype
!= NULL_TREE
7536 && !VECTOR_INTEGER_TYPE_P (valtype
)
7537 && !VECTOR_INTEGER_TYPE_P (valtype
)
7538 && !INTEGRAL_TYPE_P (valtype
)
7539 && !VECTOR_FLOAT_TYPE_P (valtype
))
7541 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7542 && !COMPLEX_MODE_P (mode
))
7543 regno
= FIRST_SSE_REG
;
7547 if (mode
== SFmode
|| mode
== DFmode
)
7548 regno
= FIRST_SSE_REG
;
7554 return gen_rtx_REG (orig_mode
, regno
);
7558 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7559 enum machine_mode orig_mode
, enum machine_mode mode
)
7561 const_tree fn
, fntype
;
7564 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7565 fn
= fntype_or_decl
;
7566 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7568 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7569 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7570 else if (TARGET_64BIT
)
7571 return function_value_64 (orig_mode
, mode
, valtype
);
7573 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7577 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7578 bool outgoing ATTRIBUTE_UNUSED
)
7580 enum machine_mode mode
, orig_mode
;
7582 orig_mode
= TYPE_MODE (valtype
);
7583 mode
= type_natural_mode (valtype
, NULL
);
7584 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7587 /* Pointer function arguments and return values are promoted to
7590 static enum machine_mode
7591 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7592 int *punsignedp
, const_tree fntype
,
7595 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7597 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7600 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7604 /* Return true if a structure, union or array with MODE containing FIELD
7605 should be accessed using BLKmode. */
7608 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7610 /* Union with XFmode must be in BLKmode. */
7611 return (mode
== XFmode
7612 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7613 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7617 ix86_libcall_value (enum machine_mode mode
)
7619 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7622 /* Return true iff type is returned in memory. */
7624 static bool ATTRIBUTE_UNUSED
7625 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7629 if (mode
== BLKmode
)
7632 size
= int_size_in_bytes (type
);
7634 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7637 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7639 /* User-created vectors small enough to fit in EAX. */
7643 /* MMX/3dNow values are returned in MM0,
7644 except when it doesn't exits or the ABI prescribes otherwise. */
7646 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7648 /* SSE values are returned in XMM0, except when it doesn't exist. */
7652 /* AVX values are returned in YMM0, except when it doesn't exist. */
7663 /* OImode shouldn't be used directly. */
7664 gcc_assert (mode
!= OImode
);
7669 static bool ATTRIBUTE_UNUSED
7670 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7672 int needed_intregs
, needed_sseregs
;
7673 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7676 static bool ATTRIBUTE_UNUSED
7677 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7679 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7681 /* __m128 is returned in xmm0. */
7682 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7683 || VECTOR_FLOAT_TYPE_P (type
))
7684 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7685 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7688 /* Otherwise, the size must be exactly in [1248]. */
7689 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7693 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7695 #ifdef SUBTARGET_RETURN_IN_MEMORY
7696 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7698 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7702 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7703 return return_in_memory_ms_64 (type
, mode
);
7705 return return_in_memory_64 (type
, mode
);
7708 return return_in_memory_32 (type
, mode
);
7712 /* When returning SSE vector types, we have a choice of either
7713 (1) being abi incompatible with a -march switch, or
7714 (2) generating an error.
7715 Given no good solution, I think the safest thing is one warning.
7716 The user won't be able to use -Werror, but....
7718 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7719 called in response to actually generating a caller or callee that
7720 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7721 via aggregate_value_p for general type probing from tree-ssa. */
7724 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7726 static bool warnedsse
, warnedmmx
;
7728 if (!TARGET_64BIT
&& type
)
7730 /* Look at the return type of the function, not the function type. */
7731 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7733 if (!TARGET_SSE
&& !warnedsse
)
7736 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7739 warning (0, "SSE vector return without SSE enabled "
7744 if (!TARGET_MMX
&& !warnedmmx
)
7746 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7749 warning (0, "MMX vector return without MMX enabled "
7759 /* Create the va_list data type. */
7761 /* Returns the calling convention specific va_list date type.
7762 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7765 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7767 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7769 /* For i386 we use plain pointer to argument area. */
7770 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7771 return build_pointer_type (char_type_node
);
7773 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7774 type_decl
= build_decl (BUILTINS_LOCATION
,
7775 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7777 f_gpr
= build_decl (BUILTINS_LOCATION
,
7778 FIELD_DECL
, get_identifier ("gp_offset"),
7779 unsigned_type_node
);
7780 f_fpr
= build_decl (BUILTINS_LOCATION
,
7781 FIELD_DECL
, get_identifier ("fp_offset"),
7782 unsigned_type_node
);
7783 f_ovf
= build_decl (BUILTINS_LOCATION
,
7784 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7786 f_sav
= build_decl (BUILTINS_LOCATION
,
7787 FIELD_DECL
, get_identifier ("reg_save_area"),
7790 va_list_gpr_counter_field
= f_gpr
;
7791 va_list_fpr_counter_field
= f_fpr
;
7793 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7794 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7795 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7796 DECL_FIELD_CONTEXT (f_sav
) = record
;
7798 TYPE_STUB_DECL (record
) = type_decl
;
7799 TYPE_NAME (record
) = type_decl
;
7800 TYPE_FIELDS (record
) = f_gpr
;
7801 DECL_CHAIN (f_gpr
) = f_fpr
;
7802 DECL_CHAIN (f_fpr
) = f_ovf
;
7803 DECL_CHAIN (f_ovf
) = f_sav
;
7805 layout_type (record
);
7807 /* The correct type is an array type of one element. */
7808 return build_array_type (record
, build_index_type (size_zero_node
));
7811 /* Setup the builtin va_list data type and for 64-bit the additional
7812 calling convention specific va_list data types. */
7815 ix86_build_builtin_va_list (void)
7817 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7819 /* Initialize abi specific va_list builtin types. */
7823 if (ix86_abi
== MS_ABI
)
7825 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7826 if (TREE_CODE (t
) != RECORD_TYPE
)
7827 t
= build_variant_type_copy (t
);
7828 sysv_va_list_type_node
= t
;
7833 if (TREE_CODE (t
) != RECORD_TYPE
)
7834 t
= build_variant_type_copy (t
);
7835 sysv_va_list_type_node
= t
;
7837 if (ix86_abi
!= MS_ABI
)
7839 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7840 if (TREE_CODE (t
) != RECORD_TYPE
)
7841 t
= build_variant_type_copy (t
);
7842 ms_va_list_type_node
= t
;
7847 if (TREE_CODE (t
) != RECORD_TYPE
)
7848 t
= build_variant_type_copy (t
);
7849 ms_va_list_type_node
= t
;
7856 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7859 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7865 /* GPR size of varargs save area. */
7866 if (cfun
->va_list_gpr_size
)
7867 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7869 ix86_varargs_gpr_size
= 0;
7871 /* FPR size of varargs save area. We don't need it if we don't pass
7872 anything in SSE registers. */
7873 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7874 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7876 ix86_varargs_fpr_size
= 0;
7878 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7881 save_area
= frame_pointer_rtx
;
7882 set
= get_varargs_alias_set ();
7884 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7885 if (max
> X86_64_REGPARM_MAX
)
7886 max
= X86_64_REGPARM_MAX
;
7888 for (i
= cum
->regno
; i
< max
; i
++)
7890 mem
= gen_rtx_MEM (word_mode
,
7891 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7892 MEM_NOTRAP_P (mem
) = 1;
7893 set_mem_alias_set (mem
, set
);
7894 emit_move_insn (mem
,
7895 gen_rtx_REG (word_mode
,
7896 x86_64_int_parameter_registers
[i
]));
7899 if (ix86_varargs_fpr_size
)
7901 enum machine_mode smode
;
7904 /* Now emit code to save SSE registers. The AX parameter contains number
7905 of SSE parameter registers used to call this function, though all we
7906 actually check here is the zero/non-zero status. */
7908 label
= gen_label_rtx ();
7909 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7910 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7913 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7914 we used movdqa (i.e. TImode) instead? Perhaps even better would
7915 be if we could determine the real mode of the data, via a hook
7916 into pass_stdarg. Ignore all that for now. */
7918 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7919 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7921 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7922 if (max
> X86_64_SSE_REGPARM_MAX
)
7923 max
= X86_64_SSE_REGPARM_MAX
;
7925 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7927 mem
= plus_constant (Pmode
, save_area
,
7928 i
* 16 + ix86_varargs_gpr_size
);
7929 mem
= gen_rtx_MEM (smode
, mem
);
7930 MEM_NOTRAP_P (mem
) = 1;
7931 set_mem_alias_set (mem
, set
);
7932 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7934 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7942 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7944 alias_set_type set
= get_varargs_alias_set ();
7947 /* Reset to zero, as there might be a sysv vaarg used
7949 ix86_varargs_gpr_size
= 0;
7950 ix86_varargs_fpr_size
= 0;
7952 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7956 mem
= gen_rtx_MEM (Pmode
,
7957 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7958 i
* UNITS_PER_WORD
));
7959 MEM_NOTRAP_P (mem
) = 1;
7960 set_mem_alias_set (mem
, set
);
7962 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7963 emit_move_insn (mem
, reg
);
7968 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7969 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7972 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7973 CUMULATIVE_ARGS next_cum
;
7976 /* This argument doesn't appear to be used anymore. Which is good,
7977 because the old code here didn't suppress rtl generation. */
7978 gcc_assert (!no_rtl
);
7983 fntype
= TREE_TYPE (current_function_decl
);
7985 /* For varargs, we do not want to skip the dummy va_dcl argument.
7986 For stdargs, we do want to skip the last named argument. */
7988 if (stdarg_p (fntype
))
7989 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7992 if (cum
->call_abi
== MS_ABI
)
7993 setup_incoming_varargs_ms_64 (&next_cum
);
7995 setup_incoming_varargs_64 (&next_cum
);
7998 /* Checks if TYPE is of kind va_list char *. */
8001 is_va_list_char_pointer (tree type
)
8005 /* For 32-bit it is always true. */
8008 canonic
= ix86_canonical_va_list_type (type
);
8009 return (canonic
== ms_va_list_type_node
8010 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8013 /* Implement va_start. */
8016 ix86_va_start (tree valist
, rtx nextarg
)
8018 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8019 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8020 tree gpr
, fpr
, ovf
, sav
, t
;
8024 if (flag_split_stack
8025 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8027 unsigned int scratch_regno
;
8029 /* When we are splitting the stack, we can't refer to the stack
8030 arguments using internal_arg_pointer, because they may be on
8031 the old stack. The split stack prologue will arrange to
8032 leave a pointer to the old stack arguments in a scratch
8033 register, which we here copy to a pseudo-register. The split
8034 stack prologue can't set the pseudo-register directly because
8035 it (the prologue) runs before any registers have been saved. */
8037 scratch_regno
= split_stack_prologue_scratch_regno ();
8038 if (scratch_regno
!= INVALID_REGNUM
)
8042 reg
= gen_reg_rtx (Pmode
);
8043 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8046 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8050 push_topmost_sequence ();
8051 emit_insn_after (seq
, entry_of_function ());
8052 pop_topmost_sequence ();
8056 /* Only 64bit target needs something special. */
8057 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8059 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8060 std_expand_builtin_va_start (valist
, nextarg
);
8065 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8066 next
= expand_binop (ptr_mode
, add_optab
,
8067 cfun
->machine
->split_stack_varargs_pointer
,
8068 crtl
->args
.arg_offset_rtx
,
8069 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8070 convert_move (va_r
, next
, 0);
8075 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8076 f_fpr
= DECL_CHAIN (f_gpr
);
8077 f_ovf
= DECL_CHAIN (f_fpr
);
8078 f_sav
= DECL_CHAIN (f_ovf
);
8080 valist
= build_simple_mem_ref (valist
);
8081 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8082 /* The following should be folded into the MEM_REF offset. */
8083 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8085 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8087 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8089 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8092 /* Count number of gp and fp argument registers used. */
8093 words
= crtl
->args
.info
.words
;
8094 n_gpr
= crtl
->args
.info
.regno
;
8095 n_fpr
= crtl
->args
.info
.sse_regno
;
8097 if (cfun
->va_list_gpr_size
)
8099 type
= TREE_TYPE (gpr
);
8100 t
= build2 (MODIFY_EXPR
, type
,
8101 gpr
, build_int_cst (type
, n_gpr
* 8));
8102 TREE_SIDE_EFFECTS (t
) = 1;
8103 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8106 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8108 type
= TREE_TYPE (fpr
);
8109 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8110 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8111 TREE_SIDE_EFFECTS (t
) = 1;
8112 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8115 /* Find the overflow area. */
8116 type
= TREE_TYPE (ovf
);
8117 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8118 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8120 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8121 t
= make_tree (type
, ovf_rtx
);
8123 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8124 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8125 TREE_SIDE_EFFECTS (t
) = 1;
8126 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8128 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8130 /* Find the register save area.
8131 Prologue of the function save it right above stack frame. */
8132 type
= TREE_TYPE (sav
);
8133 t
= make_tree (type
, frame_pointer_rtx
);
8134 if (!ix86_varargs_gpr_size
)
8135 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8136 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8137 TREE_SIDE_EFFECTS (t
) = 1;
8138 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8142 /* Implement va_arg. */
8145 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8148 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8149 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8150 tree gpr
, fpr
, ovf
, sav
, t
;
8152 tree lab_false
, lab_over
= NULL_TREE
;
8157 enum machine_mode nat_mode
;
8158 unsigned int arg_boundary
;
8160 /* Only 64bit target needs something special. */
8161 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8162 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8164 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8165 f_fpr
= DECL_CHAIN (f_gpr
);
8166 f_ovf
= DECL_CHAIN (f_fpr
);
8167 f_sav
= DECL_CHAIN (f_ovf
);
8169 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8170 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8171 valist
= build_va_arg_indirect_ref (valist
);
8172 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8173 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8174 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8176 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8178 type
= build_pointer_type (type
);
8179 size
= int_size_in_bytes (type
);
8180 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8182 nat_mode
= type_natural_mode (type
, NULL
);
8191 /* Unnamed 256bit vector mode parameters are passed on stack. */
8192 if (!TARGET_64BIT_MS_ABI
)
8199 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8200 type
, 0, X86_64_REGPARM_MAX
,
8201 X86_64_SSE_REGPARM_MAX
, intreg
,
8206 /* Pull the value out of the saved registers. */
8208 addr
= create_tmp_var (ptr_type_node
, "addr");
8212 int needed_intregs
, needed_sseregs
;
8214 tree int_addr
, sse_addr
;
8216 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8217 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8219 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8221 need_temp
= (!REG_P (container
)
8222 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8223 || TYPE_ALIGN (type
) > 128));
8225 /* In case we are passing structure, verify that it is consecutive block
8226 on the register save area. If not we need to do moves. */
8227 if (!need_temp
&& !REG_P (container
))
8229 /* Verify that all registers are strictly consecutive */
8230 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8234 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8236 rtx slot
= XVECEXP (container
, 0, i
);
8237 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8238 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8246 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8248 rtx slot
= XVECEXP (container
, 0, i
);
8249 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8250 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8262 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8263 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8266 /* First ensure that we fit completely in registers. */
8269 t
= build_int_cst (TREE_TYPE (gpr
),
8270 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8271 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8272 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8273 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8274 gimplify_and_add (t
, pre_p
);
8278 t
= build_int_cst (TREE_TYPE (fpr
),
8279 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8280 + X86_64_REGPARM_MAX
* 8);
8281 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8282 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8283 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8284 gimplify_and_add (t
, pre_p
);
8287 /* Compute index to start of area used for integer regs. */
8290 /* int_addr = gpr + sav; */
8291 t
= fold_build_pointer_plus (sav
, gpr
);
8292 gimplify_assign (int_addr
, t
, pre_p
);
8296 /* sse_addr = fpr + sav; */
8297 t
= fold_build_pointer_plus (sav
, fpr
);
8298 gimplify_assign (sse_addr
, t
, pre_p
);
8302 int i
, prev_size
= 0;
8303 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8306 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8307 gimplify_assign (addr
, t
, pre_p
);
8309 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8311 rtx slot
= XVECEXP (container
, 0, i
);
8312 rtx reg
= XEXP (slot
, 0);
8313 enum machine_mode mode
= GET_MODE (reg
);
8319 tree dest_addr
, dest
;
8320 int cur_size
= GET_MODE_SIZE (mode
);
8322 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8323 prev_size
= INTVAL (XEXP (slot
, 1));
8324 if (prev_size
+ cur_size
> size
)
8326 cur_size
= size
- prev_size
;
8327 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8328 if (mode
== BLKmode
)
8331 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8332 if (mode
== GET_MODE (reg
))
8333 addr_type
= build_pointer_type (piece_type
);
8335 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8337 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8340 if (SSE_REGNO_P (REGNO (reg
)))
8342 src_addr
= sse_addr
;
8343 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8347 src_addr
= int_addr
;
8348 src_offset
= REGNO (reg
) * 8;
8350 src_addr
= fold_convert (addr_type
, src_addr
);
8351 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8353 dest_addr
= fold_convert (daddr_type
, addr
);
8354 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8355 if (cur_size
== GET_MODE_SIZE (mode
))
8357 src
= build_va_arg_indirect_ref (src_addr
);
8358 dest
= build_va_arg_indirect_ref (dest_addr
);
8360 gimplify_assign (dest
, src
, pre_p
);
8365 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8366 3, dest_addr
, src_addr
,
8367 size_int (cur_size
));
8368 gimplify_and_add (copy
, pre_p
);
8370 prev_size
+= cur_size
;
8376 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8377 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8378 gimplify_assign (gpr
, t
, pre_p
);
8383 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8384 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8385 gimplify_assign (fpr
, t
, pre_p
);
8388 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8390 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8393 /* ... otherwise out of the overflow area. */
8395 /* When we align parameter on stack for caller, if the parameter
8396 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8397 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8398 here with caller. */
8399 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8400 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8401 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8403 /* Care for on-stack alignment if needed. */
8404 if (arg_boundary
<= 64 || size
== 0)
8408 HOST_WIDE_INT align
= arg_boundary
/ 8;
8409 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8410 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8411 build_int_cst (TREE_TYPE (t
), -align
));
8414 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8415 gimplify_assign (addr
, t
, pre_p
);
8417 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8418 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8421 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8423 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8424 addr
= fold_convert (ptrtype
, addr
);
8427 addr
= build_va_arg_indirect_ref (addr
);
8428 return build_va_arg_indirect_ref (addr
);
8431 /* Return true if OPNUM's MEM should be matched
8432 in movabs* patterns. */
8435 ix86_check_movabs (rtx insn
, int opnum
)
8439 set
= PATTERN (insn
);
8440 if (GET_CODE (set
) == PARALLEL
)
8441 set
= XVECEXP (set
, 0, 0);
8442 gcc_assert (GET_CODE (set
) == SET
);
8443 mem
= XEXP (set
, opnum
);
8444 while (GET_CODE (mem
) == SUBREG
)
8445 mem
= SUBREG_REG (mem
);
8446 gcc_assert (MEM_P (mem
));
8447 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8450 /* Initialize the table of extra 80387 mathematical constants. */
8453 init_ext_80387_constants (void)
8455 static const char * cst
[5] =
8457 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8458 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8459 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8460 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8461 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8465 for (i
= 0; i
< 5; i
++)
8467 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8468 /* Ensure each constant is rounded to XFmode precision. */
8469 real_convert (&ext_80387_constants_table
[i
],
8470 XFmode
, &ext_80387_constants_table
[i
]);
8473 ext_80387_constants_init
= 1;
8476 /* Return non-zero if the constant is something that
8477 can be loaded with a special instruction. */
8480 standard_80387_constant_p (rtx x
)
8482 enum machine_mode mode
= GET_MODE (x
);
8486 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8489 if (x
== CONST0_RTX (mode
))
8491 if (x
== CONST1_RTX (mode
))
8494 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8496 /* For XFmode constants, try to find a special 80387 instruction when
8497 optimizing for size or on those CPUs that benefit from them. */
8499 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8503 if (! ext_80387_constants_init
)
8504 init_ext_80387_constants ();
8506 for (i
= 0; i
< 5; i
++)
8507 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8511 /* Load of the constant -0.0 or -1.0 will be split as
8512 fldz;fchs or fld1;fchs sequence. */
8513 if (real_isnegzero (&r
))
8515 if (real_identical (&r
, &dconstm1
))
8521 /* Return the opcode of the special instruction to be used to load
8525 standard_80387_constant_opcode (rtx x
)
8527 switch (standard_80387_constant_p (x
))
8551 /* Return the CONST_DOUBLE representing the 80387 constant that is
8552 loaded by the specified special instruction. The argument IDX
8553 matches the return value from standard_80387_constant_p. */
8556 standard_80387_constant_rtx (int idx
)
8560 if (! ext_80387_constants_init
)
8561 init_ext_80387_constants ();
8577 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8581 /* Return 1 if X is all 0s and 2 if x is all 1s
8582 in supported SSE/AVX vector mode. */
8585 standard_sse_constant_p (rtx x
)
8587 enum machine_mode mode
= GET_MODE (x
);
8589 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8591 if (vector_all_ones_operand (x
, mode
))
8613 /* Return the opcode of the special instruction to be used to load
8617 standard_sse_constant_opcode (rtx insn
, rtx x
)
8619 switch (standard_sse_constant_p (x
))
8622 switch (get_attr_mode (insn
))
8625 return "%vpxor\t%0, %d0";
8627 return "%vxorpd\t%0, %d0";
8629 return "%vxorps\t%0, %d0";
8632 return "vpxor\t%x0, %x0, %x0";
8634 return "vxorpd\t%x0, %x0, %x0";
8636 return "vxorps\t%x0, %x0, %x0";
8644 return "vpcmpeqd\t%0, %0, %0";
8646 return "pcmpeqd\t%0, %0";
8654 /* Returns true if OP contains a symbol reference */
8657 symbolic_reference_mentioned_p (rtx op
)
8662 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8665 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8666 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8672 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8673 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8677 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8684 /* Return true if it is appropriate to emit `ret' instructions in the
8685 body of a function. Do this only if the epilogue is simple, needing a
8686 couple of insns. Prior to reloading, we can't tell how many registers
8687 must be saved, so return false then. Return false if there is no frame
8688 marker to de-allocate. */
8691 ix86_can_use_return_insn_p (void)
8693 struct ix86_frame frame
;
8695 if (! reload_completed
|| frame_pointer_needed
)
8698 /* Don't allow more than 32k pop, since that's all we can do
8699 with one instruction. */
8700 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8703 ix86_compute_frame_layout (&frame
);
8704 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8705 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8708 /* Value should be nonzero if functions must have frame pointers.
8709 Zero means the frame pointer need not be set up (and parms may
8710 be accessed via the stack pointer) in functions that seem suitable. */
8713 ix86_frame_pointer_required (void)
8715 /* If we accessed previous frames, then the generated code expects
8716 to be able to access the saved ebp value in our frame. */
8717 if (cfun
->machine
->accesses_prev_frame
)
8720 /* Several x86 os'es need a frame pointer for other reasons,
8721 usually pertaining to setjmp. */
8722 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8725 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8726 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8729 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8730 allocation is 4GB. */
8731 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8734 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8735 turns off the frame pointer by default. Turn it back on now if
8736 we've not got a leaf function. */
8737 if (TARGET_OMIT_LEAF_FRAME_POINTER
8739 || ix86_current_function_calls_tls_descriptor
))
8742 if (crtl
->profile
&& !flag_fentry
)
8748 /* Record that the current function accesses previous call frames. */
8751 ix86_setup_frame_addresses (void)
8753 cfun
->machine
->accesses_prev_frame
= 1;
8756 #ifndef USE_HIDDEN_LINKONCE
8757 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8758 # define USE_HIDDEN_LINKONCE 1
8760 # define USE_HIDDEN_LINKONCE 0
8764 static int pic_labels_used
;
8766 /* Fills in the label name that should be used for a pc thunk for
8767 the given register. */
8770 get_pc_thunk_name (char name
[32], unsigned int regno
)
8772 gcc_assert (!TARGET_64BIT
);
8774 if (USE_HIDDEN_LINKONCE
)
8775 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8777 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8781 /* This function generates code for -fpic that loads %ebx with
8782 the return address of the caller and then returns. */
8785 ix86_code_end (void)
8790 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8795 if (!(pic_labels_used
& (1 << regno
)))
8798 get_pc_thunk_name (name
, regno
);
8800 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8801 get_identifier (name
),
8802 build_function_type_list (void_type_node
, NULL_TREE
));
8803 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8804 NULL_TREE
, void_type_node
);
8805 TREE_PUBLIC (decl
) = 1;
8806 TREE_STATIC (decl
) = 1;
8807 DECL_IGNORED_P (decl
) = 1;
8812 switch_to_section (darwin_sections
[text_coal_section
]);
8813 fputs ("\t.weak_definition\t", asm_out_file
);
8814 assemble_name (asm_out_file
, name
);
8815 fputs ("\n\t.private_extern\t", asm_out_file
);
8816 assemble_name (asm_out_file
, name
);
8817 putc ('\n', asm_out_file
);
8818 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8819 DECL_WEAK (decl
) = 1;
8823 if (USE_HIDDEN_LINKONCE
)
8825 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8827 targetm
.asm_out
.unique_section (decl
, 0);
8828 switch_to_section (get_named_section (decl
, NULL
, 0));
8830 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8831 fputs ("\t.hidden\t", asm_out_file
);
8832 assemble_name (asm_out_file
, name
);
8833 putc ('\n', asm_out_file
);
8834 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8838 switch_to_section (text_section
);
8839 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8842 DECL_INITIAL (decl
) = make_node (BLOCK
);
8843 current_function_decl
= decl
;
8844 init_function_start (decl
);
8845 first_function_block_is_cold
= false;
8846 /* Make sure unwind info is emitted for the thunk if needed. */
8847 final_start_function (emit_barrier (), asm_out_file
, 1);
8849 /* Pad stack IP move with 4 instructions (two NOPs count
8850 as one instruction). */
8851 if (TARGET_PAD_SHORT_FUNCTION
)
8856 fputs ("\tnop\n", asm_out_file
);
8859 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8860 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8861 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8862 fputs ("\tret\n", asm_out_file
);
8863 final_end_function ();
8864 init_insn_lengths ();
8865 free_after_compilation (cfun
);
8867 current_function_decl
= NULL
;
8870 if (flag_split_stack
)
8871 file_end_indicate_split_stack ();
8874 /* Emit code for the SET_GOT patterns. */
8877 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8883 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8885 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8886 xops
[2] = gen_rtx_MEM (Pmode
,
8887 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8888 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8890 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8891 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8892 an unadorned address. */
8893 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8894 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8895 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8899 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8904 /* We don't need a pic base, we're not producing pic. */
8907 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8908 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8909 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8910 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8915 get_pc_thunk_name (name
, REGNO (dest
));
8916 pic_labels_used
|= 1 << REGNO (dest
);
8918 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8919 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8920 output_asm_insn ("call\t%X2", xops
);
8923 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8924 This is what will be referenced by the Mach-O PIC subsystem. */
8925 if (machopic_should_output_picbase_label () || !label
)
8926 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8928 /* When we are restoring the pic base at the site of a nonlocal label,
8929 and we decided to emit the pic base above, we will still output a
8930 local label used for calculating the correction offset (even though
8931 the offset will be 0 in that case). */
8933 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8934 CODE_LABEL_NUMBER (label
));
8939 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8944 /* Generate an "push" pattern for input ARG. */
8949 struct machine_function
*m
= cfun
->machine
;
8951 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8952 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8953 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8955 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8956 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8958 return gen_rtx_SET (VOIDmode
,
8959 gen_rtx_MEM (word_mode
,
8960 gen_rtx_PRE_DEC (Pmode
,
8961 stack_pointer_rtx
)),
8965 /* Generate an "pop" pattern for input ARG. */
8970 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8971 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8973 return gen_rtx_SET (VOIDmode
,
8975 gen_rtx_MEM (word_mode
,
8976 gen_rtx_POST_INC (Pmode
,
8977 stack_pointer_rtx
)));
8980 /* Return >= 0 if there is an unused call-clobbered register available
8981 for the entire function. */
8984 ix86_select_alt_pic_regnum (void)
8988 && !ix86_current_function_calls_tls_descriptor
)
8991 /* Can't use the same register for both PIC and DRAP. */
8993 drap
= REGNO (crtl
->drap_reg
);
8996 for (i
= 2; i
>= 0; --i
)
8997 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9001 return INVALID_REGNUM
;
9004 /* Return TRUE if we need to save REGNO. */
9007 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9009 if (pic_offset_table_rtx
9010 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9011 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9013 || crtl
->calls_eh_return
9014 || crtl
->uses_const_pool
9015 || cfun
->has_nonlocal_label
))
9016 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9018 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9023 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9024 if (test
== INVALID_REGNUM
)
9031 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9034 return (df_regs_ever_live_p (regno
)
9035 && !call_used_regs
[regno
]
9036 && !fixed_regs
[regno
]
9037 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9040 /* Return number of saved general prupose registers. */
9043 ix86_nsaved_regs (void)
9048 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9049 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9054 /* Return number of saved SSE registrers. */
9057 ix86_nsaved_sseregs (void)
9062 if (!TARGET_64BIT_MS_ABI
)
9064 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9065 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9070 /* Given FROM and TO register numbers, say whether this elimination is
9071 allowed. If stack alignment is needed, we can only replace argument
9072 pointer with hard frame pointer, or replace frame pointer with stack
9073 pointer. Otherwise, frame pointer elimination is automatically
9074 handled and all other eliminations are valid. */
9077 ix86_can_eliminate (const int from
, const int to
)
9079 if (stack_realign_fp
)
9080 return ((from
== ARG_POINTER_REGNUM
9081 && to
== HARD_FRAME_POINTER_REGNUM
)
9082 || (from
== FRAME_POINTER_REGNUM
9083 && to
== STACK_POINTER_REGNUM
));
9085 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9088 /* Return the offset between two registers, one to be eliminated, and the other
9089 its replacement, at the start of a routine. */
9092 ix86_initial_elimination_offset (int from
, int to
)
9094 struct ix86_frame frame
;
9095 ix86_compute_frame_layout (&frame
);
9097 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9098 return frame
.hard_frame_pointer_offset
;
9099 else if (from
== FRAME_POINTER_REGNUM
9100 && to
== HARD_FRAME_POINTER_REGNUM
)
9101 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9104 gcc_assert (to
== STACK_POINTER_REGNUM
);
9106 if (from
== ARG_POINTER_REGNUM
)
9107 return frame
.stack_pointer_offset
;
9109 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9110 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9114 /* In a dynamically-aligned function, we can't know the offset from
9115 stack pointer to frame pointer, so we must ensure that setjmp
9116 eliminates fp against the hard fp (%ebp) rather than trying to
9117 index from %esp up to the top of the frame across a gap that is
9118 of unknown (at compile-time) size. */
9120 ix86_builtin_setjmp_frame_value (void)
9122 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9125 /* When using -fsplit-stack, the allocation routines set a field in
9126 the TCB to the bottom of the stack plus this much space, measured
9129 #define SPLIT_STACK_AVAILABLE 256
9131 /* Fill structure ix86_frame about frame of currently computed function. */
9134 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9136 unsigned HOST_WIDE_INT stack_alignment_needed
;
9137 HOST_WIDE_INT offset
;
9138 unsigned HOST_WIDE_INT preferred_alignment
;
9139 HOST_WIDE_INT size
= get_frame_size ();
9140 HOST_WIDE_INT to_allocate
;
9142 frame
->nregs
= ix86_nsaved_regs ();
9143 frame
->nsseregs
= ix86_nsaved_sseregs ();
9145 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9146 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9148 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9149 function prologues and leaf. */
9150 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9151 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9152 || ix86_current_function_calls_tls_descriptor
))
9154 preferred_alignment
= 16;
9155 stack_alignment_needed
= 16;
9156 crtl
->preferred_stack_boundary
= 128;
9157 crtl
->stack_alignment_needed
= 128;
9160 gcc_assert (!size
|| stack_alignment_needed
);
9161 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9162 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9164 /* For SEH we have to limit the amount of code movement into the prologue.
9165 At present we do this via a BLOCKAGE, at which point there's very little
9166 scheduling that can be done, which means that there's very little point
9167 in doing anything except PUSHs. */
9169 cfun
->machine
->use_fast_prologue_epilogue
= false;
9171 /* During reload iteration the amount of registers saved can change.
9172 Recompute the value as needed. Do not recompute when amount of registers
9173 didn't change as reload does multiple calls to the function and does not
9174 expect the decision to change within single iteration. */
9175 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9176 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9178 int count
= frame
->nregs
;
9179 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9181 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9183 /* The fast prologue uses move instead of push to save registers. This
9184 is significantly longer, but also executes faster as modern hardware
9185 can execute the moves in parallel, but can't do that for push/pop.
9187 Be careful about choosing what prologue to emit: When function takes
9188 many instructions to execute we may use slow version as well as in
9189 case function is known to be outside hot spot (this is known with
9190 feedback only). Weight the size of function by number of registers
9191 to save as it is cheap to use one or two push instructions but very
9192 slow to use many of them. */
9194 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9195 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9196 || (flag_branch_probabilities
9197 && node
->frequency
< NODE_FREQUENCY_HOT
))
9198 cfun
->machine
->use_fast_prologue_epilogue
= false;
9200 cfun
->machine
->use_fast_prologue_epilogue
9201 = !expensive_function_p (count
);
9204 frame
->save_regs_using_mov
9205 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9206 /* If static stack checking is enabled and done with probes,
9207 the registers need to be saved before allocating the frame. */
9208 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9210 /* Skip return address. */
9211 offset
= UNITS_PER_WORD
;
9213 /* Skip pushed static chain. */
9214 if (ix86_static_chain_on_stack
)
9215 offset
+= UNITS_PER_WORD
;
9217 /* Skip saved base pointer. */
9218 if (frame_pointer_needed
)
9219 offset
+= UNITS_PER_WORD
;
9220 frame
->hfp_save_offset
= offset
;
9222 /* The traditional frame pointer location is at the top of the frame. */
9223 frame
->hard_frame_pointer_offset
= offset
;
9225 /* Register save area */
9226 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9227 frame
->reg_save_offset
= offset
;
9229 /* On SEH target, registers are pushed just before the frame pointer
9232 frame
->hard_frame_pointer_offset
= offset
;
9234 /* Align and set SSE register save area. */
9235 if (frame
->nsseregs
)
9237 /* The only ABI that has saved SSE registers (Win64) also has a
9238 16-byte aligned default stack, and thus we don't need to be
9239 within the re-aligned local stack frame to save them. */
9240 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9241 offset
= (offset
+ 16 - 1) & -16;
9242 offset
+= frame
->nsseregs
* 16;
9244 frame
->sse_reg_save_offset
= offset
;
9246 /* The re-aligned stack starts here. Values before this point are not
9247 directly comparable with values below this point. In order to make
9248 sure that no value happens to be the same before and after, force
9249 the alignment computation below to add a non-zero value. */
9250 if (stack_realign_fp
)
9251 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9254 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9255 offset
+= frame
->va_arg_size
;
9257 /* Align start of frame for local function. */
9258 if (stack_realign_fp
9259 || offset
!= frame
->sse_reg_save_offset
9262 || cfun
->calls_alloca
9263 || ix86_current_function_calls_tls_descriptor
)
9264 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9266 /* Frame pointer points here. */
9267 frame
->frame_pointer_offset
= offset
;
9271 /* Add outgoing arguments area. Can be skipped if we eliminated
9272 all the function calls as dead code.
9273 Skipping is however impossible when function calls alloca. Alloca
9274 expander assumes that last crtl->outgoing_args_size
9275 of stack frame are unused. */
9276 if (ACCUMULATE_OUTGOING_ARGS
9277 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9278 || ix86_current_function_calls_tls_descriptor
))
9280 offset
+= crtl
->outgoing_args_size
;
9281 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9284 frame
->outgoing_arguments_size
= 0;
9286 /* Align stack boundary. Only needed if we're calling another function
9288 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9289 || ix86_current_function_calls_tls_descriptor
)
9290 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9292 /* We've reached end of stack frame. */
9293 frame
->stack_pointer_offset
= offset
;
9295 /* Size prologue needs to allocate. */
9296 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9298 if ((!to_allocate
&& frame
->nregs
<= 1)
9299 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9300 frame
->save_regs_using_mov
= false;
9302 if (ix86_using_red_zone ()
9303 && crtl
->sp_is_unchanging
9305 && !ix86_current_function_calls_tls_descriptor
)
9307 frame
->red_zone_size
= to_allocate
;
9308 if (frame
->save_regs_using_mov
)
9309 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9310 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9311 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9314 frame
->red_zone_size
= 0;
9315 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9317 /* The SEH frame pointer location is near the bottom of the frame.
9318 This is enforced by the fact that the difference between the
9319 stack pointer and the frame pointer is limited to 240 bytes in
9320 the unwind data structure. */
9325 /* If we can leave the frame pointer where it is, do so. Also, returns
9326 the establisher frame for __builtin_frame_address (0). */
9327 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9328 if (diff
<= SEH_MAX_FRAME_SIZE
9329 && (diff
> 240 || (diff
& 15) != 0)
9330 && !crtl
->accesses_prior_frames
)
9332 /* Ideally we'd determine what portion of the local stack frame
9333 (within the constraint of the lowest 240) is most heavily used.
9334 But without that complication, simply bias the frame pointer
9335 by 128 bytes so as to maximize the amount of the local stack
9336 frame that is addressable with 8-bit offsets. */
9337 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9342 /* This is semi-inlined memory_address_length, but simplified
9343 since we know that we're always dealing with reg+offset, and
9344 to avoid having to create and discard all that rtl. */
9347 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9353 /* EBP and R13 cannot be encoded without an offset. */
9354 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9356 else if (IN_RANGE (offset
, -128, 127))
9359 /* ESP and R12 must be encoded with a SIB byte. */
9360 if (regno
== SP_REG
|| regno
== R12_REG
)
9366 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9367 The valid base registers are taken from CFUN->MACHINE->FS. */
9370 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9372 const struct machine_function
*m
= cfun
->machine
;
9373 rtx base_reg
= NULL
;
9374 HOST_WIDE_INT base_offset
= 0;
9376 if (m
->use_fast_prologue_epilogue
)
9378 /* Choose the base register most likely to allow the most scheduling
9379 opportunities. Generally FP is valid throughout the function,
9380 while DRAP must be reloaded within the epilogue. But choose either
9381 over the SP due to increased encoding size. */
9385 base_reg
= hard_frame_pointer_rtx
;
9386 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9388 else if (m
->fs
.drap_valid
)
9390 base_reg
= crtl
->drap_reg
;
9391 base_offset
= 0 - cfa_offset
;
9393 else if (m
->fs
.sp_valid
)
9395 base_reg
= stack_pointer_rtx
;
9396 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9401 HOST_WIDE_INT toffset
;
9404 /* Choose the base register with the smallest address encoding.
9405 With a tie, choose FP > DRAP > SP. */
9408 base_reg
= stack_pointer_rtx
;
9409 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9410 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9412 if (m
->fs
.drap_valid
)
9414 toffset
= 0 - cfa_offset
;
9415 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9418 base_reg
= crtl
->drap_reg
;
9419 base_offset
= toffset
;
9425 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9426 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9429 base_reg
= hard_frame_pointer_rtx
;
9430 base_offset
= toffset
;
9435 gcc_assert (base_reg
!= NULL
);
9437 return plus_constant (Pmode
, base_reg
, base_offset
);
9440 /* Emit code to save registers in the prologue. */
9443 ix86_emit_save_regs (void)
9448 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9449 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9451 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9452 RTX_FRAME_RELATED_P (insn
) = 1;
9456 /* Emit a single register save at CFA - CFA_OFFSET. */
9459 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9460 HOST_WIDE_INT cfa_offset
)
9462 struct machine_function
*m
= cfun
->machine
;
9463 rtx reg
= gen_rtx_REG (mode
, regno
);
9464 rtx mem
, addr
, base
, insn
;
9466 addr
= choose_baseaddr (cfa_offset
);
9467 mem
= gen_frame_mem (mode
, addr
);
9469 /* For SSE saves, we need to indicate the 128-bit alignment. */
9470 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9472 insn
= emit_move_insn (mem
, reg
);
9473 RTX_FRAME_RELATED_P (insn
) = 1;
9476 if (GET_CODE (base
) == PLUS
)
9477 base
= XEXP (base
, 0);
9478 gcc_checking_assert (REG_P (base
));
9480 /* When saving registers into a re-aligned local stack frame, avoid
9481 any tricky guessing by dwarf2out. */
9482 if (m
->fs
.realigned
)
9484 gcc_checking_assert (stack_realign_drap
);
9486 if (regno
== REGNO (crtl
->drap_reg
))
9488 /* A bit of a hack. We force the DRAP register to be saved in
9489 the re-aligned stack frame, which provides us with a copy
9490 of the CFA that will last past the prologue. Install it. */
9491 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9492 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9493 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9494 mem
= gen_rtx_MEM (mode
, addr
);
9495 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9499 /* The frame pointer is a stable reference within the
9500 aligned frame. Use it. */
9501 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9502 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9503 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9504 mem
= gen_rtx_MEM (mode
, addr
);
9505 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9506 gen_rtx_SET (VOIDmode
, mem
, reg
));
9510 /* The memory may not be relative to the current CFA register,
9511 which means that we may need to generate a new pattern for
9512 use by the unwind info. */
9513 else if (base
!= m
->fs
.cfa_reg
)
9515 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9516 m
->fs
.cfa_offset
- cfa_offset
);
9517 mem
= gen_rtx_MEM (mode
, addr
);
9518 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9522 /* Emit code to save registers using MOV insns.
9523 First register is stored at CFA - CFA_OFFSET. */
9525 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9529 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9530 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9532 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9533 cfa_offset
-= UNITS_PER_WORD
;
9537 /* Emit code to save SSE registers using MOV insns.
9538 First register is stored at CFA - CFA_OFFSET. */
9540 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9544 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9545 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9547 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9552 static GTY(()) rtx queued_cfa_restores
;
9554 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9555 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9556 Don't add the note if the previously saved value will be left untouched
9557 within stack red-zone till return, as unwinders can find the same value
9558 in the register and on the stack. */
9561 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9563 if (!crtl
->shrink_wrapped
9564 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9569 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9570 RTX_FRAME_RELATED_P (insn
) = 1;
9574 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9577 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9580 ix86_add_queued_cfa_restore_notes (rtx insn
)
9583 if (!queued_cfa_restores
)
9585 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9587 XEXP (last
, 1) = REG_NOTES (insn
);
9588 REG_NOTES (insn
) = queued_cfa_restores
;
9589 queued_cfa_restores
= NULL_RTX
;
9590 RTX_FRAME_RELATED_P (insn
) = 1;
9593 /* Expand prologue or epilogue stack adjustment.
9594 The pattern exist to put a dependency on all ebp-based memory accesses.
9595 STYLE should be negative if instructions should be marked as frame related,
9596 zero if %r11 register is live and cannot be freely used and positive
9600 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9601 int style
, bool set_cfa
)
9603 struct machine_function
*m
= cfun
->machine
;
9605 bool add_frame_related_expr
= false;
9607 if (Pmode
== SImode
)
9608 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9609 else if (x86_64_immediate_operand (offset
, DImode
))
9610 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9614 /* r11 is used by indirect sibcall return as well, set before the
9615 epilogue and used after the epilogue. */
9617 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9620 gcc_assert (src
!= hard_frame_pointer_rtx
9621 && dest
!= hard_frame_pointer_rtx
);
9622 tmp
= hard_frame_pointer_rtx
;
9624 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9626 add_frame_related_expr
= true;
9628 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9631 insn
= emit_insn (insn
);
9633 ix86_add_queued_cfa_restore_notes (insn
);
9639 gcc_assert (m
->fs
.cfa_reg
== src
);
9640 m
->fs
.cfa_offset
+= INTVAL (offset
);
9641 m
->fs
.cfa_reg
= dest
;
9643 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9644 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9645 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9646 RTX_FRAME_RELATED_P (insn
) = 1;
9650 RTX_FRAME_RELATED_P (insn
) = 1;
9651 if (add_frame_related_expr
)
9653 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9654 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9655 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9659 if (dest
== stack_pointer_rtx
)
9661 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9662 bool valid
= m
->fs
.sp_valid
;
9664 if (src
== hard_frame_pointer_rtx
)
9666 valid
= m
->fs
.fp_valid
;
9667 ooffset
= m
->fs
.fp_offset
;
9669 else if (src
== crtl
->drap_reg
)
9671 valid
= m
->fs
.drap_valid
;
9676 /* Else there are two possibilities: SP itself, which we set
9677 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9678 taken care of this by hand along the eh_return path. */
9679 gcc_checking_assert (src
== stack_pointer_rtx
9680 || offset
== const0_rtx
);
9683 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9684 m
->fs
.sp_valid
= valid
;
9688 /* Find an available register to be used as dynamic realign argument
9689 pointer regsiter. Such a register will be written in prologue and
9690 used in begin of body, so it must not be
9691 1. parameter passing register.
9693 We reuse static-chain register if it is available. Otherwise, we
9694 use DI for i386 and R13 for x86-64. We chose R13 since it has
9697 Return: the regno of chosen register. */
9700 find_drap_reg (void)
9702 tree decl
= cfun
->decl
;
9706 /* Use R13 for nested function or function need static chain.
9707 Since function with tail call may use any caller-saved
9708 registers in epilogue, DRAP must not use caller-saved
9709 register in such case. */
9710 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9717 /* Use DI for nested function or function need static chain.
9718 Since function with tail call may use any caller-saved
9719 registers in epilogue, DRAP must not use caller-saved
9720 register in such case. */
9721 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9724 /* Reuse static chain register if it isn't used for parameter
9726 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9728 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9729 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9736 /* Return minimum incoming stack alignment. */
9739 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9741 unsigned int incoming_stack_boundary
;
9743 /* Prefer the one specified at command line. */
9744 if (ix86_user_incoming_stack_boundary
)
9745 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9746 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9747 if -mstackrealign is used, it isn't used for sibcall check and
9748 estimated stack alignment is 128bit. */
9751 && ix86_force_align_arg_pointer
9752 && crtl
->stack_alignment_estimated
== 128)
9753 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9755 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9757 /* Incoming stack alignment can be changed on individual functions
9758 via force_align_arg_pointer attribute. We use the smallest
9759 incoming stack boundary. */
9760 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9761 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9762 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9763 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9765 /* The incoming stack frame has to be aligned at least at
9766 parm_stack_boundary. */
9767 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9768 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9770 /* Stack at entrance of main is aligned by runtime. We use the
9771 smallest incoming stack boundary. */
9772 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9773 && DECL_NAME (current_function_decl
)
9774 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9775 && DECL_FILE_SCOPE_P (current_function_decl
))
9776 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9778 return incoming_stack_boundary
;
9781 /* Update incoming stack boundary and estimated stack alignment. */
9784 ix86_update_stack_boundary (void)
9786 ix86_incoming_stack_boundary
9787 = ix86_minimum_incoming_stack_boundary (false);
9789 /* x86_64 vararg needs 16byte stack alignment for register save
9793 && crtl
->stack_alignment_estimated
< 128)
9794 crtl
->stack_alignment_estimated
= 128;
9797 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9798 needed or an rtx for DRAP otherwise. */
9801 ix86_get_drap_rtx (void)
9803 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9804 crtl
->need_drap
= true;
9806 if (stack_realign_drap
)
9808 /* Assign DRAP to vDRAP and returns vDRAP */
9809 unsigned int regno
= find_drap_reg ();
9814 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9815 crtl
->drap_reg
= arg_ptr
;
9818 drap_vreg
= copy_to_reg (arg_ptr
);
9822 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9825 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9826 RTX_FRAME_RELATED_P (insn
) = 1;
9834 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9837 ix86_internal_arg_pointer (void)
9839 return virtual_incoming_args_rtx
;
9842 struct scratch_reg
{
9847 /* Return a short-lived scratch register for use on function entry.
9848 In 32-bit mode, it is valid only after the registers are saved
9849 in the prologue. This register must be released by means of
9850 release_scratch_register_on_entry once it is dead. */
9853 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9861 /* We always use R11 in 64-bit mode. */
9866 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9868 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9870 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9871 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9872 int regparm
= ix86_function_regparm (fntype
, decl
);
9874 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9876 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9877 for the static chain register. */
9878 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9879 && drap_regno
!= AX_REG
)
9881 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9882 for the static chain register. */
9883 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9885 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9887 /* ecx is the static chain register. */
9888 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9890 && drap_regno
!= CX_REG
)
9892 else if (ix86_save_reg (BX_REG
, true))
9894 /* esi is the static chain register. */
9895 else if (!(regparm
== 3 && static_chain_p
)
9896 && ix86_save_reg (SI_REG
, true))
9898 else if (ix86_save_reg (DI_REG
, true))
9902 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9907 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9910 rtx insn
= emit_insn (gen_push (sr
->reg
));
9911 RTX_FRAME_RELATED_P (insn
) = 1;
9915 /* Release a scratch register obtained from the preceding function. */
9918 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9922 struct machine_function
*m
= cfun
->machine
;
9923 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9925 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9926 RTX_FRAME_RELATED_P (insn
) = 1;
9927 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9928 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9929 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9930 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9934 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9936 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9939 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9941 /* We skip the probe for the first interval + a small dope of 4 words and
9942 probe that many bytes past the specified size to maintain a protection
9943 area at the botton of the stack. */
9944 const int dope
= 4 * UNITS_PER_WORD
;
9945 rtx size_rtx
= GEN_INT (size
), last
;
9947 /* See if we have a constant small number of probes to generate. If so,
9948 that's the easy case. The run-time loop is made up of 11 insns in the
9949 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9950 for n # of intervals. */
9951 if (size
<= 5 * PROBE_INTERVAL
)
9953 HOST_WIDE_INT i
, adjust
;
9954 bool first_probe
= true;
9956 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9957 values of N from 1 until it exceeds SIZE. If only one probe is
9958 needed, this will not generate any code. Then adjust and probe
9959 to PROBE_INTERVAL + SIZE. */
9960 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9964 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9965 first_probe
= false;
9968 adjust
= PROBE_INTERVAL
;
9970 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9971 plus_constant (Pmode
, stack_pointer_rtx
,
9973 emit_stack_probe (stack_pointer_rtx
);
9977 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9979 adjust
= size
+ PROBE_INTERVAL
- i
;
9981 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9982 plus_constant (Pmode
, stack_pointer_rtx
,
9984 emit_stack_probe (stack_pointer_rtx
);
9986 /* Adjust back to account for the additional first interval. */
9987 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9988 plus_constant (Pmode
, stack_pointer_rtx
,
9989 PROBE_INTERVAL
+ dope
)));
9992 /* Otherwise, do the same as above, but in a loop. Note that we must be
9993 extra careful with variables wrapping around because we might be at
9994 the very top (or the very bottom) of the address space and we have
9995 to be able to handle this case properly; in particular, we use an
9996 equality test for the loop condition. */
9999 HOST_WIDE_INT rounded_size
;
10000 struct scratch_reg sr
;
10002 get_scratch_register_on_entry (&sr
);
10005 /* Step 1: round SIZE to the previous multiple of the interval. */
10007 rounded_size
= size
& -PROBE_INTERVAL
;
10010 /* Step 2: compute initial and final value of the loop counter. */
10012 /* SP = SP_0 + PROBE_INTERVAL. */
10013 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10014 plus_constant (Pmode
, stack_pointer_rtx
,
10015 - (PROBE_INTERVAL
+ dope
))));
10017 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10018 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10019 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10020 gen_rtx_PLUS (Pmode
, sr
.reg
,
10021 stack_pointer_rtx
)));
10024 /* Step 3: the loop
10026 while (SP != LAST_ADDR)
10028 SP = SP + PROBE_INTERVAL
10032 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10033 values of N from 1 until it is equal to ROUNDED_SIZE. */
10035 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10038 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10039 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10041 if (size
!= rounded_size
)
10043 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10044 plus_constant (Pmode
, stack_pointer_rtx
,
10045 rounded_size
- size
)));
10046 emit_stack_probe (stack_pointer_rtx
);
10049 /* Adjust back to account for the additional first interval. */
10050 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10051 plus_constant (Pmode
, stack_pointer_rtx
,
10052 PROBE_INTERVAL
+ dope
)));
10054 release_scratch_register_on_entry (&sr
);
10057 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10059 /* Even if the stack pointer isn't the CFA register, we need to correctly
10060 describe the adjustments made to it, in particular differentiate the
10061 frame-related ones from the frame-unrelated ones. */
10064 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10065 XVECEXP (expr
, 0, 0)
10066 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10067 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10068 XVECEXP (expr
, 0, 1)
10069 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10070 plus_constant (Pmode
, stack_pointer_rtx
,
10071 PROBE_INTERVAL
+ dope
+ size
));
10072 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10073 RTX_FRAME_RELATED_P (last
) = 1;
10075 cfun
->machine
->fs
.sp_offset
+= size
;
10078 /* Make sure nothing is scheduled before we are done. */
10079 emit_insn (gen_blockage ());
10082 /* Adjust the stack pointer up to REG while probing it. */
10085 output_adjust_stack_and_probe (rtx reg
)
10087 static int labelno
= 0;
10088 char loop_lab
[32], end_lab
[32];
10091 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10092 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10094 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10096 /* Jump to END_LAB if SP == LAST_ADDR. */
10097 xops
[0] = stack_pointer_rtx
;
10099 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10100 fputs ("\tje\t", asm_out_file
);
10101 assemble_name_raw (asm_out_file
, end_lab
);
10102 fputc ('\n', asm_out_file
);
10104 /* SP = SP + PROBE_INTERVAL. */
10105 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10106 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10109 xops
[1] = const0_rtx
;
10110 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10112 fprintf (asm_out_file
, "\tjmp\t");
10113 assemble_name_raw (asm_out_file
, loop_lab
);
10114 fputc ('\n', asm_out_file
);
10116 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10121 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10122 inclusive. These are offsets from the current stack pointer. */
10125 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10127 /* See if we have a constant small number of probes to generate. If so,
10128 that's the easy case. The run-time loop is made up of 7 insns in the
10129 generic case while the compile-time loop is made up of n insns for n #
10131 if (size
<= 7 * PROBE_INTERVAL
)
10135 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10136 it exceeds SIZE. If only one probe is needed, this will not
10137 generate any code. Then probe at FIRST + SIZE. */
10138 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10139 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10142 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10146 /* Otherwise, do the same as above, but in a loop. Note that we must be
10147 extra careful with variables wrapping around because we might be at
10148 the very top (or the very bottom) of the address space and we have
10149 to be able to handle this case properly; in particular, we use an
10150 equality test for the loop condition. */
10153 HOST_WIDE_INT rounded_size
, last
;
10154 struct scratch_reg sr
;
10156 get_scratch_register_on_entry (&sr
);
10159 /* Step 1: round SIZE to the previous multiple of the interval. */
10161 rounded_size
= size
& -PROBE_INTERVAL
;
10164 /* Step 2: compute initial and final value of the loop counter. */
10166 /* TEST_OFFSET = FIRST. */
10167 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10169 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10170 last
= first
+ rounded_size
;
10173 /* Step 3: the loop
10175 while (TEST_ADDR != LAST_ADDR)
10177 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10181 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10182 until it is equal to ROUNDED_SIZE. */
10184 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10187 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10188 that SIZE is equal to ROUNDED_SIZE. */
10190 if (size
!= rounded_size
)
10191 emit_stack_probe (plus_constant (Pmode
,
10192 gen_rtx_PLUS (Pmode
,
10195 rounded_size
- size
));
10197 release_scratch_register_on_entry (&sr
);
10200 /* Make sure nothing is scheduled before we are done. */
10201 emit_insn (gen_blockage ());
10204 /* Probe a range of stack addresses from REG to END, inclusive. These are
10205 offsets from the current stack pointer. */
10208 output_probe_stack_range (rtx reg
, rtx end
)
10210 static int labelno
= 0;
10211 char loop_lab
[32], end_lab
[32];
10214 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10215 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10217 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10219 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10222 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10223 fputs ("\tje\t", asm_out_file
);
10224 assemble_name_raw (asm_out_file
, end_lab
);
10225 fputc ('\n', asm_out_file
);
10227 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10228 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10229 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10231 /* Probe at TEST_ADDR. */
10232 xops
[0] = stack_pointer_rtx
;
10234 xops
[2] = const0_rtx
;
10235 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10237 fprintf (asm_out_file
, "\tjmp\t");
10238 assemble_name_raw (asm_out_file
, loop_lab
);
10239 fputc ('\n', asm_out_file
);
10241 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10246 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10247 to be generated in correct form. */
10249 ix86_finalize_stack_realign_flags (void)
10251 /* Check if stack realign is really needed after reload, and
10252 stores result in cfun */
10253 unsigned int incoming_stack_boundary
10254 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10255 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10256 unsigned int stack_realign
= (incoming_stack_boundary
10258 ? crtl
->max_used_stack_slot_alignment
10259 : crtl
->stack_alignment_needed
));
10261 if (crtl
->stack_realign_finalized
)
10263 /* After stack_realign_needed is finalized, we can't no longer
10265 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10269 /* If the only reason for frame_pointer_needed is that we conservatively
10270 assumed stack realignment might be needed, but in the end nothing that
10271 needed the stack alignment had been spilled, clear frame_pointer_needed
10272 and say we don't need stack realignment. */
10274 && !crtl
->need_drap
10275 && frame_pointer_needed
10277 && flag_omit_frame_pointer
10278 && crtl
->sp_is_unchanging
10279 && !ix86_current_function_calls_tls_descriptor
10280 && !crtl
->accesses_prior_frames
10281 && !cfun
->calls_alloca
10282 && !crtl
->calls_eh_return
10283 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10284 && !ix86_frame_pointer_required ()
10285 && get_frame_size () == 0
10286 && ix86_nsaved_sseregs () == 0
10287 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10289 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10292 CLEAR_HARD_REG_SET (prologue_used
);
10293 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10294 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10295 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10296 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10297 HARD_FRAME_POINTER_REGNUM
);
10301 FOR_BB_INSNS (bb
, insn
)
10302 if (NONDEBUG_INSN_P (insn
)
10303 && requires_stack_frame_p (insn
, prologue_used
,
10304 set_up_by_prologue
))
10306 crtl
->stack_realign_needed
= stack_realign
;
10307 crtl
->stack_realign_finalized
= true;
10312 frame_pointer_needed
= false;
10313 stack_realign
= false;
10314 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10315 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10316 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10317 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10318 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10319 df_finish_pass (true);
10320 df_scan_alloc (NULL
);
10322 df_compute_regs_ever_live (true);
10326 crtl
->stack_realign_needed
= stack_realign
;
10327 crtl
->stack_realign_finalized
= true;
10330 /* Expand the prologue into a bunch of separate insns. */
10333 ix86_expand_prologue (void)
10335 struct machine_function
*m
= cfun
->machine
;
10338 struct ix86_frame frame
;
10339 HOST_WIDE_INT allocate
;
10340 bool int_registers_saved
;
10341 bool sse_registers_saved
;
10343 ix86_finalize_stack_realign_flags ();
10345 /* DRAP should not coexist with stack_realign_fp */
10346 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10348 memset (&m
->fs
, 0, sizeof (m
->fs
));
10350 /* Initialize CFA state for before the prologue. */
10351 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10352 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10354 /* Track SP offset to the CFA. We continue tracking this after we've
10355 swapped the CFA register away from SP. In the case of re-alignment
10356 this is fudged; we're interested to offsets within the local frame. */
10357 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10358 m
->fs
.sp_valid
= true;
10360 ix86_compute_frame_layout (&frame
);
10362 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10364 /* We should have already generated an error for any use of
10365 ms_hook on a nested function. */
10366 gcc_checking_assert (!ix86_static_chain_on_stack
);
10368 /* Check if profiling is active and we shall use profiling before
10369 prologue variant. If so sorry. */
10370 if (crtl
->profile
&& flag_fentry
!= 0)
10371 sorry ("ms_hook_prologue attribute isn%'t compatible "
10372 "with -mfentry for 32-bit");
10374 /* In ix86_asm_output_function_label we emitted:
10375 8b ff movl.s %edi,%edi
10377 8b ec movl.s %esp,%ebp
10379 This matches the hookable function prologue in Win32 API
10380 functions in Microsoft Windows XP Service Pack 2 and newer.
10381 Wine uses this to enable Windows apps to hook the Win32 API
10382 functions provided by Wine.
10384 What that means is that we've already set up the frame pointer. */
10386 if (frame_pointer_needed
10387 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10391 /* We've decided to use the frame pointer already set up.
10392 Describe this to the unwinder by pretending that both
10393 push and mov insns happen right here.
10395 Putting the unwind info here at the end of the ms_hook
10396 is done so that we can make absolutely certain we get
10397 the required byte sequence at the start of the function,
10398 rather than relying on an assembler that can produce
10399 the exact encoding required.
10401 However it does mean (in the unpatched case) that we have
10402 a 1 insn window where the asynchronous unwind info is
10403 incorrect. However, if we placed the unwind info at
10404 its correct location we would have incorrect unwind info
10405 in the patched case. Which is probably all moot since
10406 I don't expect Wine generates dwarf2 unwind info for the
10407 system libraries that use this feature. */
10409 insn
= emit_insn (gen_blockage ());
10411 push
= gen_push (hard_frame_pointer_rtx
);
10412 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10413 stack_pointer_rtx
);
10414 RTX_FRAME_RELATED_P (push
) = 1;
10415 RTX_FRAME_RELATED_P (mov
) = 1;
10417 RTX_FRAME_RELATED_P (insn
) = 1;
10418 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10419 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10421 /* Note that gen_push incremented m->fs.cfa_offset, even
10422 though we didn't emit the push insn here. */
10423 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10424 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10425 m
->fs
.fp_valid
= true;
10429 /* The frame pointer is not needed so pop %ebp again.
10430 This leaves us with a pristine state. */
10431 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10435 /* The first insn of a function that accepts its static chain on the
10436 stack is to push the register that would be filled in by a direct
10437 call. This insn will be skipped by the trampoline. */
10438 else if (ix86_static_chain_on_stack
)
10440 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10441 emit_insn (gen_blockage ());
10443 /* We don't want to interpret this push insn as a register save,
10444 only as a stack adjustment. The real copy of the register as
10445 a save will be done later, if needed. */
10446 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10447 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10448 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10449 RTX_FRAME_RELATED_P (insn
) = 1;
10452 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10453 of DRAP is needed and stack realignment is really needed after reload */
10454 if (stack_realign_drap
)
10456 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10458 /* Only need to push parameter pointer reg if it is caller saved. */
10459 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10461 /* Push arg pointer reg */
10462 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10463 RTX_FRAME_RELATED_P (insn
) = 1;
10466 /* Grab the argument pointer. */
10467 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10468 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10469 RTX_FRAME_RELATED_P (insn
) = 1;
10470 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10471 m
->fs
.cfa_offset
= 0;
10473 /* Align the stack. */
10474 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10476 GEN_INT (-align_bytes
)));
10477 RTX_FRAME_RELATED_P (insn
) = 1;
10479 /* Replicate the return address on the stack so that return
10480 address can be reached via (argp - 1) slot. This is needed
10481 to implement macro RETURN_ADDR_RTX and intrinsic function
10482 expand_builtin_return_addr etc. */
10483 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10484 t
= gen_frame_mem (word_mode
, t
);
10485 insn
= emit_insn (gen_push (t
));
10486 RTX_FRAME_RELATED_P (insn
) = 1;
10488 /* For the purposes of frame and register save area addressing,
10489 we've started over with a new frame. */
10490 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10491 m
->fs
.realigned
= true;
10494 int_registers_saved
= (frame
.nregs
== 0);
10495 sse_registers_saved
= (frame
.nsseregs
== 0);
10497 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10499 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10500 slower on all targets. Also sdb doesn't like it. */
10501 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10502 RTX_FRAME_RELATED_P (insn
) = 1;
10504 /* Push registers now, before setting the frame pointer
10506 if (!int_registers_saved
10508 && !frame
.save_regs_using_mov
)
10510 ix86_emit_save_regs ();
10511 int_registers_saved
= true;
10512 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10515 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10517 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10518 RTX_FRAME_RELATED_P (insn
) = 1;
10520 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10521 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10522 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10523 m
->fs
.fp_valid
= true;
10527 if (!int_registers_saved
)
10529 /* If saving registers via PUSH, do so now. */
10530 if (!frame
.save_regs_using_mov
)
10532 ix86_emit_save_regs ();
10533 int_registers_saved
= true;
10534 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10537 /* When using red zone we may start register saving before allocating
10538 the stack frame saving one cycle of the prologue. However, avoid
10539 doing this if we have to probe the stack; at least on x86_64 the
10540 stack probe can turn into a call that clobbers a red zone location. */
10541 else if (ix86_using_red_zone ()
10542 && (! TARGET_STACK_PROBE
10543 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10545 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10546 int_registers_saved
= true;
10550 if (stack_realign_fp
)
10552 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10553 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10555 /* The computation of the size of the re-aligned stack frame means
10556 that we must allocate the size of the register save area before
10557 performing the actual alignment. Otherwise we cannot guarantee
10558 that there's enough storage above the realignment point. */
10559 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10560 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10561 GEN_INT (m
->fs
.sp_offset
10562 - frame
.sse_reg_save_offset
),
10565 /* Align the stack. */
10566 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10568 GEN_INT (-align_bytes
)));
10570 /* For the purposes of register save area addressing, the stack
10571 pointer is no longer valid. As for the value of sp_offset,
10572 see ix86_compute_frame_layout, which we need to match in order
10573 to pass verification of stack_pointer_offset at the end. */
10574 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10575 m
->fs
.sp_valid
= false;
10578 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10580 if (flag_stack_usage_info
)
10582 /* We start to count from ARG_POINTER. */
10583 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10585 /* If it was realigned, take into account the fake frame. */
10586 if (stack_realign_drap
)
10588 if (ix86_static_chain_on_stack
)
10589 stack_size
+= UNITS_PER_WORD
;
10591 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10592 stack_size
+= UNITS_PER_WORD
;
10594 /* This over-estimates by 1 minimal-stack-alignment-unit but
10595 mitigates that by counting in the new return address slot. */
10596 current_function_dynamic_stack_size
10597 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10600 current_function_static_stack_size
= stack_size
;
10603 /* On SEH target with very large frame size, allocate an area to save
10604 SSE registers (as the very large allocation won't be described). */
10606 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10607 && !sse_registers_saved
)
10609 HOST_WIDE_INT sse_size
=
10610 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10612 gcc_assert (int_registers_saved
);
10614 /* No need to do stack checking as the area will be immediately
10616 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10617 GEN_INT (-sse_size
), -1,
10618 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10619 allocate
-= sse_size
;
10620 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10621 sse_registers_saved
= true;
10624 /* The stack has already been decremented by the instruction calling us
10625 so probe if the size is non-negative to preserve the protection area. */
10626 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10628 /* We expect the registers to be saved when probes are used. */
10629 gcc_assert (int_registers_saved
);
10631 if (STACK_CHECK_MOVING_SP
)
10633 ix86_adjust_stack_and_probe (allocate
);
10638 HOST_WIDE_INT size
= allocate
;
10640 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10641 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10643 if (TARGET_STACK_PROBE
)
10644 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10646 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10652 else if (!ix86_target_stack_probe ()
10653 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10655 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10656 GEN_INT (-allocate
), -1,
10657 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10661 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10663 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10664 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10665 bool eax_live
= false;
10666 bool r10_live
= false;
10669 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10670 if (!TARGET_64BIT_MS_ABI
)
10671 eax_live
= ix86_eax_live_at_start_p ();
10673 /* Note that SEH directives need to continue tracking the stack
10674 pointer even after the frame pointer has been set up. */
10677 insn
= emit_insn (gen_push (eax
));
10678 allocate
-= UNITS_PER_WORD
;
10679 if (sp_is_cfa_reg
|| TARGET_SEH
)
10682 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10683 RTX_FRAME_RELATED_P (insn
) = 1;
10689 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10690 insn
= emit_insn (gen_push (r10
));
10691 allocate
-= UNITS_PER_WORD
;
10692 if (sp_is_cfa_reg
|| TARGET_SEH
)
10695 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10696 RTX_FRAME_RELATED_P (insn
) = 1;
10700 emit_move_insn (eax
, GEN_INT (allocate
));
10701 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10703 /* Use the fact that AX still contains ALLOCATE. */
10704 adjust_stack_insn
= (Pmode
== DImode
10705 ? gen_pro_epilogue_adjust_stack_di_sub
10706 : gen_pro_epilogue_adjust_stack_si_sub
);
10708 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10709 stack_pointer_rtx
, eax
));
10711 if (sp_is_cfa_reg
|| TARGET_SEH
)
10714 m
->fs
.cfa_offset
+= allocate
;
10715 RTX_FRAME_RELATED_P (insn
) = 1;
10716 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10717 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10718 plus_constant (Pmode
, stack_pointer_rtx
,
10721 m
->fs
.sp_offset
+= allocate
;
10723 if (r10_live
&& eax_live
)
10725 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10726 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10727 gen_frame_mem (word_mode
, t
));
10728 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10729 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10730 gen_frame_mem (word_mode
, t
));
10732 else if (eax_live
|| r10_live
)
10734 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10735 emit_move_insn (gen_rtx_REG (word_mode
,
10736 (eax_live
? AX_REG
: R10_REG
)),
10737 gen_frame_mem (word_mode
, t
));
10740 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10742 /* If we havn't already set up the frame pointer, do so now. */
10743 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10745 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10746 GEN_INT (frame
.stack_pointer_offset
10747 - frame
.hard_frame_pointer_offset
));
10748 insn
= emit_insn (insn
);
10749 RTX_FRAME_RELATED_P (insn
) = 1;
10750 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10752 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10753 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10754 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10755 m
->fs
.fp_valid
= true;
10758 if (!int_registers_saved
)
10759 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10760 if (!sse_registers_saved
)
10761 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10763 pic_reg_used
= false;
10764 /* We don't use pic-register for pe-coff target. */
10765 if (pic_offset_table_rtx
10767 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10770 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10772 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10773 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10775 pic_reg_used
= true;
10782 if (ix86_cmodel
== CM_LARGE_PIC
)
10784 rtx label
, tmp_reg
;
10786 gcc_assert (Pmode
== DImode
);
10787 label
= gen_label_rtx ();
10788 emit_label (label
);
10789 LABEL_PRESERVE_P (label
) = 1;
10790 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10791 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10792 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10794 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10795 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10796 pic_offset_table_rtx
, tmp_reg
));
10799 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10803 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10804 RTX_FRAME_RELATED_P (insn
) = 1;
10805 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10809 /* In the pic_reg_used case, make sure that the got load isn't deleted
10810 when mcount needs it. Blockage to avoid call movement across mcount
10811 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10813 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10814 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10816 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10818 /* vDRAP is setup but after reload it turns out stack realign
10819 isn't necessary, here we will emit prologue to setup DRAP
10820 without stack realign adjustment */
10821 t
= choose_baseaddr (0);
10822 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10825 /* Prevent instructions from being scheduled into register save push
10826 sequence when access to the redzone area is done through frame pointer.
10827 The offset between the frame pointer and the stack pointer is calculated
10828 relative to the value of the stack pointer at the end of the function
10829 prologue, and moving instructions that access redzone area via frame
10830 pointer inside push sequence violates this assumption. */
10831 if (frame_pointer_needed
&& frame
.red_zone_size
)
10832 emit_insn (gen_memory_blockage ());
10834 /* Emit cld instruction if stringops are used in the function. */
10835 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10836 emit_insn (gen_cld ());
10838 /* SEH requires that the prologue end within 256 bytes of the start of
10839 the function. Prevent instruction schedules that would extend that.
10840 Further, prevent alloca modifications to the stack pointer from being
10841 combined with prologue modifications. */
10843 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10846 /* Emit code to restore REG using a POP insn. */
10849 ix86_emit_restore_reg_using_pop (rtx reg
)
10851 struct machine_function
*m
= cfun
->machine
;
10852 rtx insn
= emit_insn (gen_pop (reg
));
10854 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10855 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10857 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10858 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10860 /* Previously we'd represented the CFA as an expression
10861 like *(%ebp - 8). We've just popped that value from
10862 the stack, which means we need to reset the CFA to
10863 the drap register. This will remain until we restore
10864 the stack pointer. */
10865 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10866 RTX_FRAME_RELATED_P (insn
) = 1;
10868 /* This means that the DRAP register is valid for addressing too. */
10869 m
->fs
.drap_valid
= true;
10873 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10875 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10876 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10877 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10878 RTX_FRAME_RELATED_P (insn
) = 1;
10880 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10883 /* When the frame pointer is the CFA, and we pop it, we are
10884 swapping back to the stack pointer as the CFA. This happens
10885 for stack frames that don't allocate other data, so we assume
10886 the stack pointer is now pointing at the return address, i.e.
10887 the function entry state, which makes the offset be 1 word. */
10888 if (reg
== hard_frame_pointer_rtx
)
10890 m
->fs
.fp_valid
= false;
10891 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10893 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10894 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10896 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10897 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10898 GEN_INT (m
->fs
.cfa_offset
)));
10899 RTX_FRAME_RELATED_P (insn
) = 1;
10904 /* Emit code to restore saved registers using POP insns. */
10907 ix86_emit_restore_regs_using_pop (void)
10909 unsigned int regno
;
10911 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10912 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10913 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10916 /* Emit code and notes for the LEAVE instruction. */
10919 ix86_emit_leave (void)
10921 struct machine_function
*m
= cfun
->machine
;
10922 rtx insn
= emit_insn (ix86_gen_leave ());
10924 ix86_add_queued_cfa_restore_notes (insn
);
10926 gcc_assert (m
->fs
.fp_valid
);
10927 m
->fs
.sp_valid
= true;
10928 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10929 m
->fs
.fp_valid
= false;
10931 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10933 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10934 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10936 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10937 plus_constant (Pmode
, stack_pointer_rtx
,
10939 RTX_FRAME_RELATED_P (insn
) = 1;
10941 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10945 /* Emit code to restore saved registers using MOV insns.
10946 First register is restored from CFA - CFA_OFFSET. */
10948 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10949 bool maybe_eh_return
)
10951 struct machine_function
*m
= cfun
->machine
;
10952 unsigned int regno
;
10954 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10955 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10957 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10960 mem
= choose_baseaddr (cfa_offset
);
10961 mem
= gen_frame_mem (word_mode
, mem
);
10962 insn
= emit_move_insn (reg
, mem
);
10964 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10966 /* Previously we'd represented the CFA as an expression
10967 like *(%ebp - 8). We've just popped that value from
10968 the stack, which means we need to reset the CFA to
10969 the drap register. This will remain until we restore
10970 the stack pointer. */
10971 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10972 RTX_FRAME_RELATED_P (insn
) = 1;
10974 /* This means that the DRAP register is valid for addressing. */
10975 m
->fs
.drap_valid
= true;
10978 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10980 cfa_offset
-= UNITS_PER_WORD
;
10984 /* Emit code to restore saved registers using MOV insns.
10985 First register is restored from CFA - CFA_OFFSET. */
10987 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10988 bool maybe_eh_return
)
10990 unsigned int regno
;
10992 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10993 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10995 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10998 mem
= choose_baseaddr (cfa_offset
);
10999 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11000 set_mem_align (mem
, 128);
11001 emit_move_insn (reg
, mem
);
11003 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11009 /* Restore function stack, frame, and registers. */
11012 ix86_expand_epilogue (int style
)
11014 struct machine_function
*m
= cfun
->machine
;
11015 struct machine_frame_state frame_state_save
= m
->fs
;
11016 struct ix86_frame frame
;
11017 bool restore_regs_via_mov
;
11020 ix86_finalize_stack_realign_flags ();
11021 ix86_compute_frame_layout (&frame
);
11023 m
->fs
.sp_valid
= (!frame_pointer_needed
11024 || (crtl
->sp_is_unchanging
11025 && !stack_realign_fp
));
11026 gcc_assert (!m
->fs
.sp_valid
11027 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11029 /* The FP must be valid if the frame pointer is present. */
11030 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11031 gcc_assert (!m
->fs
.fp_valid
11032 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11034 /* We must have *some* valid pointer to the stack frame. */
11035 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11037 /* The DRAP is never valid at this point. */
11038 gcc_assert (!m
->fs
.drap_valid
);
11040 /* See the comment about red zone and frame
11041 pointer usage in ix86_expand_prologue. */
11042 if (frame_pointer_needed
&& frame
.red_zone_size
)
11043 emit_insn (gen_memory_blockage ());
11045 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11046 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11048 /* Determine the CFA offset of the end of the red-zone. */
11049 m
->fs
.red_zone_offset
= 0;
11050 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11052 /* The red-zone begins below the return address. */
11053 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11055 /* When the register save area is in the aligned portion of
11056 the stack, determine the maximum runtime displacement that
11057 matches up with the aligned frame. */
11058 if (stack_realign_drap
)
11059 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11063 /* Special care must be taken for the normal return case of a function
11064 using eh_return: the eax and edx registers are marked as saved, but
11065 not restored along this path. Adjust the save location to match. */
11066 if (crtl
->calls_eh_return
&& style
!= 2)
11067 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11069 /* EH_RETURN requires the use of moves to function properly. */
11070 if (crtl
->calls_eh_return
)
11071 restore_regs_via_mov
= true;
11072 /* SEH requires the use of pops to identify the epilogue. */
11073 else if (TARGET_SEH
)
11074 restore_regs_via_mov
= false;
11075 /* If we're only restoring one register and sp is not valid then
11076 using a move instruction to restore the register since it's
11077 less work than reloading sp and popping the register. */
11078 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11079 restore_regs_via_mov
= true;
11080 else if (TARGET_EPILOGUE_USING_MOVE
11081 && cfun
->machine
->use_fast_prologue_epilogue
11082 && (frame
.nregs
> 1
11083 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11084 restore_regs_via_mov
= true;
11085 else if (frame_pointer_needed
11087 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11088 restore_regs_via_mov
= true;
11089 else if (frame_pointer_needed
11090 && TARGET_USE_LEAVE
11091 && cfun
->machine
->use_fast_prologue_epilogue
11092 && frame
.nregs
== 1)
11093 restore_regs_via_mov
= true;
11095 restore_regs_via_mov
= false;
11097 if (restore_regs_via_mov
|| frame
.nsseregs
)
11099 /* Ensure that the entire register save area is addressable via
11100 the stack pointer, if we will restore via sp. */
11102 && m
->fs
.sp_offset
> 0x7fffffff
11103 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11104 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11106 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11107 GEN_INT (m
->fs
.sp_offset
11108 - frame
.sse_reg_save_offset
),
11110 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11114 /* If there are any SSE registers to restore, then we have to do it
11115 via moves, since there's obviously no pop for SSE regs. */
11116 if (frame
.nsseregs
)
11117 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11120 if (restore_regs_via_mov
)
11125 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11127 /* eh_return epilogues need %ecx added to the stack pointer. */
11130 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11132 /* Stack align doesn't work with eh_return. */
11133 gcc_assert (!stack_realign_drap
);
11134 /* Neither does regparm nested functions. */
11135 gcc_assert (!ix86_static_chain_on_stack
);
11137 if (frame_pointer_needed
)
11139 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11140 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11141 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11143 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11144 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11146 /* Note that we use SA as a temporary CFA, as the return
11147 address is at the proper place relative to it. We
11148 pretend this happens at the FP restore insn because
11149 prior to this insn the FP would be stored at the wrong
11150 offset relative to SA, and after this insn we have no
11151 other reasonable register to use for the CFA. We don't
11152 bother resetting the CFA to the SP for the duration of
11153 the return insn. */
11154 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11155 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11156 ix86_add_queued_cfa_restore_notes (insn
);
11157 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11158 RTX_FRAME_RELATED_P (insn
) = 1;
11160 m
->fs
.cfa_reg
= sa
;
11161 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11162 m
->fs
.fp_valid
= false;
11164 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11165 const0_rtx
, style
, false);
11169 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11170 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11171 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11172 ix86_add_queued_cfa_restore_notes (insn
);
11174 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11175 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11177 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11178 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11179 plus_constant (Pmode
, stack_pointer_rtx
,
11181 RTX_FRAME_RELATED_P (insn
) = 1;
11184 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11185 m
->fs
.sp_valid
= true;
11190 /* SEH requires that the function end with (1) a stack adjustment
11191 if necessary, (2) a sequence of pops, and (3) a return or
11192 jump instruction. Prevent insns from the function body from
11193 being scheduled into this sequence. */
11196 /* Prevent a catch region from being adjacent to the standard
11197 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11198 several other flags that would be interesting to test are
11200 if (flag_non_call_exceptions
)
11201 emit_insn (gen_nops (const1_rtx
));
11203 emit_insn (gen_blockage ());
11206 /* First step is to deallocate the stack frame so that we can
11207 pop the registers. Also do it on SEH target for very large
11208 frame as the emitted instructions aren't allowed by the ABI in
11210 if (!m
->fs
.sp_valid
11212 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11213 >= SEH_MAX_FRAME_SIZE
)))
11215 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11216 GEN_INT (m
->fs
.fp_offset
11217 - frame
.reg_save_offset
),
11220 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11222 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11223 GEN_INT (m
->fs
.sp_offset
11224 - frame
.reg_save_offset
),
11226 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11229 ix86_emit_restore_regs_using_pop ();
11232 /* If we used a stack pointer and haven't already got rid of it,
11234 if (m
->fs
.fp_valid
)
11236 /* If the stack pointer is valid and pointing at the frame
11237 pointer store address, then we only need a pop. */
11238 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11239 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11240 /* Leave results in shorter dependency chains on CPUs that are
11241 able to grok it fast. */
11242 else if (TARGET_USE_LEAVE
11243 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11244 || !cfun
->machine
->use_fast_prologue_epilogue
)
11245 ix86_emit_leave ();
11248 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11249 hard_frame_pointer_rtx
,
11250 const0_rtx
, style
, !using_drap
);
11251 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11257 int param_ptr_offset
= UNITS_PER_WORD
;
11260 gcc_assert (stack_realign_drap
);
11262 if (ix86_static_chain_on_stack
)
11263 param_ptr_offset
+= UNITS_PER_WORD
;
11264 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11265 param_ptr_offset
+= UNITS_PER_WORD
;
11267 insn
= emit_insn (gen_rtx_SET
11268 (VOIDmode
, stack_pointer_rtx
,
11269 gen_rtx_PLUS (Pmode
,
11271 GEN_INT (-param_ptr_offset
))));
11272 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11273 m
->fs
.cfa_offset
= param_ptr_offset
;
11274 m
->fs
.sp_offset
= param_ptr_offset
;
11275 m
->fs
.realigned
= false;
11277 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11278 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11279 GEN_INT (param_ptr_offset
)));
11280 RTX_FRAME_RELATED_P (insn
) = 1;
11282 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11283 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11286 /* At this point the stack pointer must be valid, and we must have
11287 restored all of the registers. We may not have deallocated the
11288 entire stack frame. We've delayed this until now because it may
11289 be possible to merge the local stack deallocation with the
11290 deallocation forced by ix86_static_chain_on_stack. */
11291 gcc_assert (m
->fs
.sp_valid
);
11292 gcc_assert (!m
->fs
.fp_valid
);
11293 gcc_assert (!m
->fs
.realigned
);
11294 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11296 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11297 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11301 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11303 /* Sibcall epilogues don't want a return instruction. */
11306 m
->fs
= frame_state_save
;
11310 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11312 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11314 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11315 address, do explicit add, and jump indirectly to the caller. */
11317 if (crtl
->args
.pops_args
>= 65536)
11319 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11322 /* There is no "pascal" calling convention in any 64bit ABI. */
11323 gcc_assert (!TARGET_64BIT
);
11325 insn
= emit_insn (gen_pop (ecx
));
11326 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11327 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11329 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11330 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11331 add_reg_note (insn
, REG_CFA_REGISTER
,
11332 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11333 RTX_FRAME_RELATED_P (insn
) = 1;
11335 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11337 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11340 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11343 emit_jump_insn (gen_simple_return_internal ());
11345 /* Restore the state back to the state from the prologue,
11346 so that it's correct for the next epilogue. */
11347 m
->fs
= frame_state_save
;
11350 /* Reset from the function's potential modifications. */
11353 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11354 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11356 if (pic_offset_table_rtx
)
11357 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11359 /* Mach-O doesn't support labels at the end of objects, so if
11360 it looks like we might want one, insert a NOP. */
11362 rtx insn
= get_last_insn ();
11363 rtx deleted_debug_label
= NULL_RTX
;
11366 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11368 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11369 notes only, instead set their CODE_LABEL_NUMBER to -1,
11370 otherwise there would be code generation differences
11371 in between -g and -g0. */
11372 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11373 deleted_debug_label
= insn
;
11374 insn
= PREV_INSN (insn
);
11379 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11380 fputs ("\tnop\n", file
);
11381 else if (deleted_debug_label
)
11382 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11383 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11384 CODE_LABEL_NUMBER (insn
) = -1;
11390 /* Return a scratch register to use in the split stack prologue. The
11391 split stack prologue is used for -fsplit-stack. It is the first
11392 instructions in the function, even before the regular prologue.
11393 The scratch register can be any caller-saved register which is not
11394 used for parameters or for the static chain. */
11396 static unsigned int
11397 split_stack_prologue_scratch_regno (void)
11403 bool is_fastcall
, is_thiscall
;
11406 is_fastcall
= (lookup_attribute ("fastcall",
11407 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11409 is_thiscall
= (lookup_attribute ("thiscall",
11410 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11412 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11416 if (DECL_STATIC_CHAIN (cfun
->decl
))
11418 sorry ("-fsplit-stack does not support fastcall with "
11419 "nested function");
11420 return INVALID_REGNUM
;
11424 else if (is_thiscall
)
11426 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11430 else if (regparm
< 3)
11432 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11438 sorry ("-fsplit-stack does not support 2 register "
11439 " parameters for a nested function");
11440 return INVALID_REGNUM
;
11447 /* FIXME: We could make this work by pushing a register
11448 around the addition and comparison. */
11449 sorry ("-fsplit-stack does not support 3 register parameters");
11450 return INVALID_REGNUM
;
11455 /* A SYMBOL_REF for the function which allocates new stackspace for
11458 static GTY(()) rtx split_stack_fn
;
11460 /* A SYMBOL_REF for the more stack function when using the large
11463 static GTY(()) rtx split_stack_fn_large
;
11465 /* Handle -fsplit-stack. These are the first instructions in the
11466 function, even before the regular prologue. */
11469 ix86_expand_split_stack_prologue (void)
11471 struct ix86_frame frame
;
11472 HOST_WIDE_INT allocate
;
11473 unsigned HOST_WIDE_INT args_size
;
11474 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11475 rtx scratch_reg
= NULL_RTX
;
11476 rtx varargs_label
= NULL_RTX
;
11479 gcc_assert (flag_split_stack
&& reload_completed
);
11481 ix86_finalize_stack_realign_flags ();
11482 ix86_compute_frame_layout (&frame
);
11483 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11485 /* This is the label we will branch to if we have enough stack
11486 space. We expect the basic block reordering pass to reverse this
11487 branch if optimizing, so that we branch in the unlikely case. */
11488 label
= gen_label_rtx ();
11490 /* We need to compare the stack pointer minus the frame size with
11491 the stack boundary in the TCB. The stack boundary always gives
11492 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11493 can compare directly. Otherwise we need to do an addition. */
11495 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11496 UNSPEC_STACK_CHECK
);
11497 limit
= gen_rtx_CONST (Pmode
, limit
);
11498 limit
= gen_rtx_MEM (Pmode
, limit
);
11499 if (allocate
< SPLIT_STACK_AVAILABLE
)
11500 current
= stack_pointer_rtx
;
11503 unsigned int scratch_regno
;
11506 /* We need a scratch register to hold the stack pointer minus
11507 the required frame size. Since this is the very start of the
11508 function, the scratch register can be any caller-saved
11509 register which is not used for parameters. */
11510 offset
= GEN_INT (- allocate
);
11511 scratch_regno
= split_stack_prologue_scratch_regno ();
11512 if (scratch_regno
== INVALID_REGNUM
)
11514 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11515 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11517 /* We don't use ix86_gen_add3 in this case because it will
11518 want to split to lea, but when not optimizing the insn
11519 will not be split after this point. */
11520 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11521 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11526 emit_move_insn (scratch_reg
, offset
);
11527 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11528 stack_pointer_rtx
));
11530 current
= scratch_reg
;
11533 ix86_expand_branch (GEU
, current
, limit
, label
);
11534 jump_insn
= get_last_insn ();
11535 JUMP_LABEL (jump_insn
) = label
;
11537 /* Mark the jump as very likely to be taken. */
11538 add_reg_note (jump_insn
, REG_BR_PROB
,
11539 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11541 if (split_stack_fn
== NULL_RTX
)
11542 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11543 fn
= split_stack_fn
;
11545 /* Get more stack space. We pass in the desired stack space and the
11546 size of the arguments to copy to the new stack. In 32-bit mode
11547 we push the parameters; __morestack will return on a new stack
11548 anyhow. In 64-bit mode we pass the parameters in r10 and
11550 allocate_rtx
= GEN_INT (allocate
);
11551 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11552 call_fusage
= NULL_RTX
;
11557 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11558 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11560 /* If this function uses a static chain, it will be in %r10.
11561 Preserve it across the call to __morestack. */
11562 if (DECL_STATIC_CHAIN (cfun
->decl
))
11566 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11567 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11568 use_reg (&call_fusage
, rax
);
11571 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11574 HOST_WIDE_INT argval
;
11576 gcc_assert (Pmode
== DImode
);
11577 /* When using the large model we need to load the address
11578 into a register, and we've run out of registers. So we
11579 switch to a different calling convention, and we call a
11580 different function: __morestack_large. We pass the
11581 argument size in the upper 32 bits of r10 and pass the
11582 frame size in the lower 32 bits. */
11583 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11584 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11586 if (split_stack_fn_large
== NULL_RTX
)
11587 split_stack_fn_large
=
11588 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11590 if (ix86_cmodel
== CM_LARGE_PIC
)
11594 label
= gen_label_rtx ();
11595 emit_label (label
);
11596 LABEL_PRESERVE_P (label
) = 1;
11597 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11598 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11599 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11600 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11602 x
= gen_rtx_CONST (Pmode
, x
);
11603 emit_move_insn (reg11
, x
);
11604 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11605 x
= gen_const_mem (Pmode
, x
);
11606 emit_move_insn (reg11
, x
);
11609 emit_move_insn (reg11
, split_stack_fn_large
);
11613 argval
= ((args_size
<< 16) << 16) + allocate
;
11614 emit_move_insn (reg10
, GEN_INT (argval
));
11618 emit_move_insn (reg10
, allocate_rtx
);
11619 emit_move_insn (reg11
, GEN_INT (args_size
));
11620 use_reg (&call_fusage
, reg11
);
11623 use_reg (&call_fusage
, reg10
);
11627 emit_insn (gen_push (GEN_INT (args_size
)));
11628 emit_insn (gen_push (allocate_rtx
));
11630 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11631 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11633 add_function_usage_to (call_insn
, call_fusage
);
11635 /* In order to make call/return prediction work right, we now need
11636 to execute a return instruction. See
11637 libgcc/config/i386/morestack.S for the details on how this works.
11639 For flow purposes gcc must not see this as a return
11640 instruction--we need control flow to continue at the subsequent
11641 label. Therefore, we use an unspec. */
11642 gcc_assert (crtl
->args
.pops_args
< 65536);
11643 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11645 /* If we are in 64-bit mode and this function uses a static chain,
11646 we saved %r10 in %rax before calling _morestack. */
11647 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11648 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11649 gen_rtx_REG (word_mode
, AX_REG
));
11651 /* If this function calls va_start, we need to store a pointer to
11652 the arguments on the old stack, because they may not have been
11653 all copied to the new stack. At this point the old stack can be
11654 found at the frame pointer value used by __morestack, because
11655 __morestack has set that up before calling back to us. Here we
11656 store that pointer in a scratch register, and in
11657 ix86_expand_prologue we store the scratch register in a stack
11659 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11661 unsigned int scratch_regno
;
11665 scratch_regno
= split_stack_prologue_scratch_regno ();
11666 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11667 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11671 return address within this function
11672 return address of caller of this function
11674 So we add three words to get to the stack arguments.
11678 return address within this function
11679 first argument to __morestack
11680 second argument to __morestack
11681 return address of caller of this function
11683 So we add five words to get to the stack arguments.
11685 words
= TARGET_64BIT
? 3 : 5;
11686 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11687 gen_rtx_PLUS (Pmode
, frame_reg
,
11688 GEN_INT (words
* UNITS_PER_WORD
))));
11690 varargs_label
= gen_label_rtx ();
11691 emit_jump_insn (gen_jump (varargs_label
));
11692 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11697 emit_label (label
);
11698 LABEL_NUSES (label
) = 1;
11700 /* If this function calls va_start, we now have to set the scratch
11701 register for the case where we do not call __morestack. In this
11702 case we need to set it based on the stack pointer. */
11703 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11705 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11706 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11707 GEN_INT (UNITS_PER_WORD
))));
11709 emit_label (varargs_label
);
11710 LABEL_NUSES (varargs_label
) = 1;
11714 /* We may have to tell the dataflow pass that the split stack prologue
11715 is initializing a scratch register. */
11718 ix86_live_on_entry (bitmap regs
)
11720 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11722 gcc_assert (flag_split_stack
);
11723 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11727 /* Determine if op is suitable SUBREG RTX for address. */
11730 ix86_address_subreg_operand (rtx op
)
11732 enum machine_mode mode
;
11737 mode
= GET_MODE (op
);
11739 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11742 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11743 failures when the register is one word out of a two word structure. */
11744 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11747 /* Allow only SUBREGs of non-eliminable hard registers. */
11748 return register_no_elim_operand (op
, mode
);
11751 /* Extract the parts of an RTL expression that is a valid memory address
11752 for an instruction. Return 0 if the structure of the address is
11753 grossly off. Return -1 if the address contains ASHIFT, so it is not
11754 strictly valid, but still used for computing length of lea instruction. */
11757 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11759 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11760 rtx base_reg
, index_reg
;
11761 HOST_WIDE_INT scale
= 1;
11762 rtx scale_rtx
= NULL_RTX
;
11765 enum ix86_address_seg seg
= SEG_DEFAULT
;
11767 /* Allow zero-extended SImode addresses,
11768 they will be emitted with addr32 prefix. */
11769 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11771 if (GET_CODE (addr
) == ZERO_EXTEND
11772 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11774 addr
= XEXP (addr
, 0);
11775 if (CONST_INT_P (addr
))
11778 else if (GET_CODE (addr
) == AND
11779 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11781 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11782 if (addr
== NULL_RTX
)
11785 if (CONST_INT_P (addr
))
11790 /* Allow SImode subregs of DImode addresses,
11791 they will be emitted with addr32 prefix. */
11792 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11794 if (GET_CODE (addr
) == SUBREG
11795 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11797 addr
= SUBREG_REG (addr
);
11798 if (CONST_INT_P (addr
))
11805 else if (GET_CODE (addr
) == SUBREG
)
11807 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11812 else if (GET_CODE (addr
) == PLUS
)
11814 rtx addends
[4], op
;
11822 addends
[n
++] = XEXP (op
, 1);
11825 while (GET_CODE (op
) == PLUS
);
11830 for (i
= n
; i
>= 0; --i
)
11833 switch (GET_CODE (op
))
11838 index
= XEXP (op
, 0);
11839 scale_rtx
= XEXP (op
, 1);
11845 index
= XEXP (op
, 0);
11846 tmp
= XEXP (op
, 1);
11847 if (!CONST_INT_P (tmp
))
11849 scale
= INTVAL (tmp
);
11850 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11852 scale
= 1 << scale
;
11857 if (GET_CODE (op
) != UNSPEC
)
11862 if (XINT (op
, 1) == UNSPEC_TP
11863 && TARGET_TLS_DIRECT_SEG_REFS
11864 && seg
== SEG_DEFAULT
)
11865 seg
= DEFAULT_TLS_SEG_REG
;
11871 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11898 else if (GET_CODE (addr
) == MULT
)
11900 index
= XEXP (addr
, 0); /* index*scale */
11901 scale_rtx
= XEXP (addr
, 1);
11903 else if (GET_CODE (addr
) == ASHIFT
)
11905 /* We're called for lea too, which implements ashift on occasion. */
11906 index
= XEXP (addr
, 0);
11907 tmp
= XEXP (addr
, 1);
11908 if (!CONST_INT_P (tmp
))
11910 scale
= INTVAL (tmp
);
11911 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11913 scale
= 1 << scale
;
11916 else if (CONST_INT_P (addr
))
11918 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11921 /* Constant addresses are sign extended to 64bit, we have to
11922 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11924 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11930 disp
= addr
; /* displacement */
11936 else if (GET_CODE (index
) == SUBREG
11937 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11943 /* Address override works only on the (%reg) part of %fs:(%reg). */
11944 if (seg
!= SEG_DEFAULT
11945 && ((base
&& GET_MODE (base
) != word_mode
)
11946 || (index
&& GET_MODE (index
) != word_mode
)))
11949 /* Extract the integral value of scale. */
11952 if (!CONST_INT_P (scale_rtx
))
11954 scale
= INTVAL (scale_rtx
);
11957 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11958 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11960 /* Avoid useless 0 displacement. */
11961 if (disp
== const0_rtx
&& (base
|| index
))
11964 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11965 if (base_reg
&& index_reg
&& scale
== 1
11966 && (index_reg
== arg_pointer_rtx
11967 || index_reg
== frame_pointer_rtx
11968 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11971 tmp
= base
, base
= index
, index
= tmp
;
11972 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11975 /* Special case: %ebp cannot be encoded as a base without a displacement.
11979 && (base_reg
== hard_frame_pointer_rtx
11980 || base_reg
== frame_pointer_rtx
11981 || base_reg
== arg_pointer_rtx
11982 || (REG_P (base_reg
)
11983 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11984 || REGNO (base_reg
) == R13_REG
))))
11987 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11988 Avoid this by transforming to [%esi+0].
11989 Reload calls address legitimization without cfun defined, so we need
11990 to test cfun for being non-NULL. */
11991 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11992 && base_reg
&& !index_reg
&& !disp
11993 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11996 /* Special case: encode reg+reg instead of reg*2. */
11997 if (!base
&& index
&& scale
== 2)
11998 base
= index
, base_reg
= index_reg
, scale
= 1;
12000 /* Special case: scaling cannot be encoded without base or displacement. */
12001 if (!base
&& !disp
&& index
&& scale
!= 1)
12005 out
->index
= index
;
12007 out
->scale
= scale
;
12013 /* Return cost of the memory address x.
12014 For i386, it is better to use a complex address than let gcc copy
12015 the address into a reg and make a new pseudo. But not if the address
12016 requires to two regs - that would mean more pseudos with longer
12019 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12020 addr_space_t as ATTRIBUTE_UNUSED
,
12021 bool speed ATTRIBUTE_UNUSED
)
12023 struct ix86_address parts
;
12025 int ok
= ix86_decompose_address (x
, &parts
);
12029 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12030 parts
.base
= SUBREG_REG (parts
.base
);
12031 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12032 parts
.index
= SUBREG_REG (parts
.index
);
12034 /* Attempt to minimize number of registers in the address. */
12036 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12038 && (!REG_P (parts
.index
)
12039 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12043 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12045 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12046 && parts
.base
!= parts
.index
)
12049 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12050 since it's predecode logic can't detect the length of instructions
12051 and it degenerates to vector decoded. Increase cost of such
12052 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12053 to split such addresses or even refuse such addresses at all.
12055 Following addressing modes are affected:
12060 The first and last case may be avoidable by explicitly coding the zero in
12061 memory address, but I don't have AMD-K6 machine handy to check this
12065 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12066 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12067 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12073 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12074 this is used for to form addresses to local data when -fPIC is in
12078 darwin_local_data_pic (rtx disp
)
12080 return (GET_CODE (disp
) == UNSPEC
12081 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12084 /* Determine if a given RTX is a valid constant. We already know this
12085 satisfies CONSTANT_P. */
12088 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12090 switch (GET_CODE (x
))
12095 if (GET_CODE (x
) == PLUS
)
12097 if (!CONST_INT_P (XEXP (x
, 1)))
12102 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12105 /* Only some unspecs are valid as "constants". */
12106 if (GET_CODE (x
) == UNSPEC
)
12107 switch (XINT (x
, 1))
12110 case UNSPEC_GOTOFF
:
12111 case UNSPEC_PLTOFF
:
12112 return TARGET_64BIT
;
12114 case UNSPEC_NTPOFF
:
12115 x
= XVECEXP (x
, 0, 0);
12116 return (GET_CODE (x
) == SYMBOL_REF
12117 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12118 case UNSPEC_DTPOFF
:
12119 x
= XVECEXP (x
, 0, 0);
12120 return (GET_CODE (x
) == SYMBOL_REF
12121 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12126 /* We must have drilled down to a symbol. */
12127 if (GET_CODE (x
) == LABEL_REF
)
12129 if (GET_CODE (x
) != SYMBOL_REF
)
12134 /* TLS symbols are never valid. */
12135 if (SYMBOL_REF_TLS_MODEL (x
))
12138 /* DLLIMPORT symbols are never valid. */
12139 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12140 && SYMBOL_REF_DLLIMPORT_P (x
))
12144 /* mdynamic-no-pic */
12145 if (MACHO_DYNAMIC_NO_PIC_P
)
12146 return machopic_symbol_defined_p (x
);
12151 if (GET_MODE (x
) == TImode
12152 && x
!= CONST0_RTX (TImode
)
12158 if (!standard_sse_constant_p (x
))
12165 /* Otherwise we handle everything else in the move patterns. */
12169 /* Determine if it's legal to put X into the constant pool. This
12170 is not possible for the address of thread-local symbols, which
12171 is checked above. */
12174 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12176 /* We can always put integral constants and vectors in memory. */
12177 switch (GET_CODE (x
))
12187 return !ix86_legitimate_constant_p (mode
, x
);
12190 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12194 is_imported_p (rtx x
)
12196 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12197 || GET_CODE (x
) != SYMBOL_REF
)
12200 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12204 /* Nonzero if the constant value X is a legitimate general operand
12205 when generating PIC code. It is given that flag_pic is on and
12206 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12209 legitimate_pic_operand_p (rtx x
)
12213 switch (GET_CODE (x
))
12216 inner
= XEXP (x
, 0);
12217 if (GET_CODE (inner
) == PLUS
12218 && CONST_INT_P (XEXP (inner
, 1)))
12219 inner
= XEXP (inner
, 0);
12221 /* Only some unspecs are valid as "constants". */
12222 if (GET_CODE (inner
) == UNSPEC
)
12223 switch (XINT (inner
, 1))
12226 case UNSPEC_GOTOFF
:
12227 case UNSPEC_PLTOFF
:
12228 return TARGET_64BIT
;
12230 x
= XVECEXP (inner
, 0, 0);
12231 return (GET_CODE (x
) == SYMBOL_REF
12232 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12233 case UNSPEC_MACHOPIC_OFFSET
:
12234 return legitimate_pic_address_disp_p (x
);
12242 return legitimate_pic_address_disp_p (x
);
12249 /* Determine if a given CONST RTX is a valid memory displacement
12253 legitimate_pic_address_disp_p (rtx disp
)
12257 /* In 64bit mode we can allow direct addresses of symbols and labels
12258 when they are not dynamic symbols. */
12261 rtx op0
= disp
, op1
;
12263 switch (GET_CODE (disp
))
12269 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12271 op0
= XEXP (XEXP (disp
, 0), 0);
12272 op1
= XEXP (XEXP (disp
, 0), 1);
12273 if (!CONST_INT_P (op1
)
12274 || INTVAL (op1
) >= 16*1024*1024
12275 || INTVAL (op1
) < -16*1024*1024)
12277 if (GET_CODE (op0
) == LABEL_REF
)
12279 if (GET_CODE (op0
) == CONST
12280 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12281 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12283 if (GET_CODE (op0
) == UNSPEC
12284 && XINT (op0
, 1) == UNSPEC_PCREL
)
12286 if (GET_CODE (op0
) != SYMBOL_REF
)
12291 /* TLS references should always be enclosed in UNSPEC.
12292 The dllimported symbol needs always to be resolved. */
12293 if (SYMBOL_REF_TLS_MODEL (op0
)
12294 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12299 if (is_imported_p (op0
))
12302 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12303 || !SYMBOL_REF_LOCAL_P (op0
))
12306 /* Function-symbols need to be resolved only for
12308 For the small-model we don't need to resolve anything
12310 if ((ix86_cmodel
!= CM_LARGE_PIC
12311 && SYMBOL_REF_FUNCTION_P (op0
))
12312 || ix86_cmodel
== CM_SMALL_PIC
)
12314 /* Non-external symbols don't need to be resolved for
12315 large, and medium-model. */
12316 if ((ix86_cmodel
== CM_LARGE_PIC
12317 || ix86_cmodel
== CM_MEDIUM_PIC
)
12318 && !SYMBOL_REF_EXTERNAL_P (op0
))
12321 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12322 && SYMBOL_REF_LOCAL_P (op0
)
12323 && ix86_cmodel
!= CM_LARGE_PIC
)
12331 if (GET_CODE (disp
) != CONST
)
12333 disp
= XEXP (disp
, 0);
12337 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12338 of GOT tables. We should not need these anyway. */
12339 if (GET_CODE (disp
) != UNSPEC
12340 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12341 && XINT (disp
, 1) != UNSPEC_GOTOFF
12342 && XINT (disp
, 1) != UNSPEC_PCREL
12343 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12346 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12347 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12353 if (GET_CODE (disp
) == PLUS
)
12355 if (!CONST_INT_P (XEXP (disp
, 1)))
12357 disp
= XEXP (disp
, 0);
12361 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12364 if (GET_CODE (disp
) != UNSPEC
)
12367 switch (XINT (disp
, 1))
12372 /* We need to check for both symbols and labels because VxWorks loads
12373 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12375 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12376 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12377 case UNSPEC_GOTOFF
:
12378 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12379 While ABI specify also 32bit relocation but we don't produce it in
12380 small PIC model at all. */
12381 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12382 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12384 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12386 case UNSPEC_GOTTPOFF
:
12387 case UNSPEC_GOTNTPOFF
:
12388 case UNSPEC_INDNTPOFF
:
12391 disp
= XVECEXP (disp
, 0, 0);
12392 return (GET_CODE (disp
) == SYMBOL_REF
12393 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12394 case UNSPEC_NTPOFF
:
12395 disp
= XVECEXP (disp
, 0, 0);
12396 return (GET_CODE (disp
) == SYMBOL_REF
12397 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12398 case UNSPEC_DTPOFF
:
12399 disp
= XVECEXP (disp
, 0, 0);
12400 return (GET_CODE (disp
) == SYMBOL_REF
12401 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12407 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12408 replace the input X, or the original X if no replacement is called for.
12409 The output parameter *WIN is 1 if the calling macro should goto WIN,
12410 0 if it should not. */
12413 ix86_legitimize_reload_address (rtx x
,
12414 enum machine_mode mode ATTRIBUTE_UNUSED
,
12415 int opnum
, int type
,
12416 int ind_levels ATTRIBUTE_UNUSED
)
12418 /* Reload can generate:
12420 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12424 This RTX is rejected from ix86_legitimate_address_p due to
12425 non-strictness of base register 97. Following this rejection,
12426 reload pushes all three components into separate registers,
12427 creating invalid memory address RTX.
12429 Following code reloads only the invalid part of the
12430 memory address RTX. */
12432 if (GET_CODE (x
) == PLUS
12433 && REG_P (XEXP (x
, 1))
12434 && GET_CODE (XEXP (x
, 0)) == PLUS
12435 && REG_P (XEXP (XEXP (x
, 0), 1)))
12438 bool something_reloaded
= false;
12440 base
= XEXP (XEXP (x
, 0), 1);
12441 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12443 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12444 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12445 opnum
, (enum reload_type
) type
);
12446 something_reloaded
= true;
12449 index
= XEXP (x
, 1);
12450 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12452 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12453 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12454 opnum
, (enum reload_type
) type
);
12455 something_reloaded
= true;
12458 gcc_assert (something_reloaded
);
12465 /* Recognizes RTL expressions that are valid memory addresses for an
12466 instruction. The MODE argument is the machine mode for the MEM
12467 expression that wants to use this address.
12469 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12470 convert common non-canonical forms to canonical form so that they will
12474 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12475 rtx addr
, bool strict
)
12477 struct ix86_address parts
;
12478 rtx base
, index
, disp
;
12479 HOST_WIDE_INT scale
;
12481 if (ix86_decompose_address (addr
, &parts
) <= 0)
12482 /* Decomposition failed. */
12486 index
= parts
.index
;
12488 scale
= parts
.scale
;
12490 /* Validate base register. */
12497 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12498 reg
= SUBREG_REG (base
);
12500 /* Base is not a register. */
12503 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12506 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12507 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12508 /* Base is not valid. */
12512 /* Validate index register. */
12519 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12520 reg
= SUBREG_REG (index
);
12522 /* Index is not a register. */
12525 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12528 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12529 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12530 /* Index is not valid. */
12534 /* Index and base should have the same mode. */
12536 && GET_MODE (base
) != GET_MODE (index
))
12539 /* Validate scale factor. */
12543 /* Scale without index. */
12546 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12547 /* Scale is not a valid multiplier. */
12551 /* Validate displacement. */
12554 if (GET_CODE (disp
) == CONST
12555 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12556 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12557 switch (XINT (XEXP (disp
, 0), 1))
12559 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12560 used. While ABI specify also 32bit relocations, we don't produce
12561 them at all and use IP relative instead. */
12563 case UNSPEC_GOTOFF
:
12564 gcc_assert (flag_pic
);
12566 goto is_legitimate_pic
;
12568 /* 64bit address unspec. */
12571 case UNSPEC_GOTPCREL
:
12573 gcc_assert (flag_pic
);
12574 goto is_legitimate_pic
;
12576 case UNSPEC_GOTTPOFF
:
12577 case UNSPEC_GOTNTPOFF
:
12578 case UNSPEC_INDNTPOFF
:
12579 case UNSPEC_NTPOFF
:
12580 case UNSPEC_DTPOFF
:
12583 case UNSPEC_STACK_CHECK
:
12584 gcc_assert (flag_split_stack
);
12588 /* Invalid address unspec. */
12592 else if (SYMBOLIC_CONST (disp
)
12596 && MACHOPIC_INDIRECT
12597 && !machopic_operand_p (disp
)
12603 if (TARGET_64BIT
&& (index
|| base
))
12605 /* foo@dtpoff(%rX) is ok. */
12606 if (GET_CODE (disp
) != CONST
12607 || GET_CODE (XEXP (disp
, 0)) != PLUS
12608 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12609 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12610 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12611 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12612 /* Non-constant pic memory reference. */
12615 else if ((!TARGET_MACHO
|| flag_pic
)
12616 && ! legitimate_pic_address_disp_p (disp
))
12617 /* Displacement is an invalid pic construct. */
12620 else if (MACHO_DYNAMIC_NO_PIC_P
12621 && !ix86_legitimate_constant_p (Pmode
, disp
))
12622 /* displacment must be referenced via non_lazy_pointer */
12626 /* This code used to verify that a symbolic pic displacement
12627 includes the pic_offset_table_rtx register.
12629 While this is good idea, unfortunately these constructs may
12630 be created by "adds using lea" optimization for incorrect
12639 This code is nonsensical, but results in addressing
12640 GOT table with pic_offset_table_rtx base. We can't
12641 just refuse it easily, since it gets matched by
12642 "addsi3" pattern, that later gets split to lea in the
12643 case output register differs from input. While this
12644 can be handled by separate addsi pattern for this case
12645 that never results in lea, this seems to be easier and
12646 correct fix for crash to disable this test. */
12648 else if (GET_CODE (disp
) != LABEL_REF
12649 && !CONST_INT_P (disp
)
12650 && (GET_CODE (disp
) != CONST
12651 || !ix86_legitimate_constant_p (Pmode
, disp
))
12652 && (GET_CODE (disp
) != SYMBOL_REF
12653 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12654 /* Displacement is not constant. */
12656 else if (TARGET_64BIT
12657 && !x86_64_immediate_operand (disp
, VOIDmode
))
12658 /* Displacement is out of range. */
12662 /* Everything looks valid. */
12666 /* Determine if a given RTX is a valid constant address. */
12669 constant_address_p (rtx x
)
12671 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12674 /* Return a unique alias set for the GOT. */
12676 static alias_set_type
12677 ix86_GOT_alias_set (void)
12679 static alias_set_type set
= -1;
12681 set
= new_alias_set ();
12685 /* Return a legitimate reference for ORIG (an address) using the
12686 register REG. If REG is 0, a new pseudo is generated.
12688 There are two types of references that must be handled:
12690 1. Global data references must load the address from the GOT, via
12691 the PIC reg. An insn is emitted to do this load, and the reg is
12694 2. Static data references, constant pool addresses, and code labels
12695 compute the address as an offset from the GOT, whose base is in
12696 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12697 differentiate them from global data objects. The returned
12698 address is the PIC reg + an unspec constant.
12700 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12701 reg also appears in the address. */
12704 legitimize_pic_address (rtx orig
, rtx reg
)
12707 rtx new_rtx
= orig
;
12710 if (TARGET_MACHO
&& !TARGET_64BIT
)
12713 reg
= gen_reg_rtx (Pmode
);
12714 /* Use the generic Mach-O PIC machinery. */
12715 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12719 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12721 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12726 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12728 else if (TARGET_64BIT
&& !TARGET_PECOFF
12729 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12732 /* This symbol may be referenced via a displacement from the PIC
12733 base address (@GOTOFF). */
12735 if (reload_in_progress
)
12736 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12737 if (GET_CODE (addr
) == CONST
)
12738 addr
= XEXP (addr
, 0);
12739 if (GET_CODE (addr
) == PLUS
)
12741 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12743 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12746 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12747 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12749 tmpreg
= gen_reg_rtx (Pmode
);
12752 emit_move_insn (tmpreg
, new_rtx
);
12756 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12757 tmpreg
, 1, OPTAB_DIRECT
);
12761 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12763 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12765 /* This symbol may be referenced via a displacement from the PIC
12766 base address (@GOTOFF). */
12768 if (reload_in_progress
)
12769 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12770 if (GET_CODE (addr
) == CONST
)
12771 addr
= XEXP (addr
, 0);
12772 if (GET_CODE (addr
) == PLUS
)
12774 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12776 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12779 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12780 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12781 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12785 emit_move_insn (reg
, new_rtx
);
12789 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12790 /* We can't use @GOTOFF for text labels on VxWorks;
12791 see gotoff_operand. */
12792 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12794 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12798 /* For x64 PE-COFF there is no GOT table. So we use address
12800 if (TARGET_64BIT
&& TARGET_PECOFF
)
12802 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12803 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12806 reg
= gen_reg_rtx (Pmode
);
12807 emit_move_insn (reg
, new_rtx
);
12810 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12812 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12813 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12814 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12815 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12818 reg
= gen_reg_rtx (Pmode
);
12819 /* Use directly gen_movsi, otherwise the address is loaded
12820 into register for CSE. We don't want to CSE this addresses,
12821 instead we CSE addresses from the GOT table, so skip this. */
12822 emit_insn (gen_movsi (reg
, new_rtx
));
12827 /* This symbol must be referenced via a load from the
12828 Global Offset Table (@GOT). */
12830 if (reload_in_progress
)
12831 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12832 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12833 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12835 new_rtx
= force_reg (Pmode
, new_rtx
);
12836 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12837 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12838 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12841 reg
= gen_reg_rtx (Pmode
);
12842 emit_move_insn (reg
, new_rtx
);
12848 if (CONST_INT_P (addr
)
12849 && !x86_64_immediate_operand (addr
, VOIDmode
))
12853 emit_move_insn (reg
, addr
);
12857 new_rtx
= force_reg (Pmode
, addr
);
12859 else if (GET_CODE (addr
) == CONST
)
12861 addr
= XEXP (addr
, 0);
12863 /* We must match stuff we generate before. Assume the only
12864 unspecs that can get here are ours. Not that we could do
12865 anything with them anyway.... */
12866 if (GET_CODE (addr
) == UNSPEC
12867 || (GET_CODE (addr
) == PLUS
12868 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12870 gcc_assert (GET_CODE (addr
) == PLUS
);
12872 if (GET_CODE (addr
) == PLUS
)
12874 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12876 /* Check first to see if this is a constant offset from a @GOTOFF
12877 symbol reference. */
12878 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
12879 && CONST_INT_P (op1
))
12883 if (reload_in_progress
)
12884 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12885 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12887 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12888 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12889 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12893 emit_move_insn (reg
, new_rtx
);
12899 if (INTVAL (op1
) < -16*1024*1024
12900 || INTVAL (op1
) >= 16*1024*1024)
12902 if (!x86_64_immediate_operand (op1
, Pmode
))
12903 op1
= force_reg (Pmode
, op1
);
12904 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12910 rtx base
= legitimize_pic_address (op0
, reg
);
12911 enum machine_mode mode
= GET_MODE (base
);
12913 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12915 if (CONST_INT_P (new_rtx
))
12917 if (INTVAL (new_rtx
) < -16*1024*1024
12918 || INTVAL (new_rtx
) >= 16*1024*1024)
12920 if (!x86_64_immediate_operand (new_rtx
, mode
))
12921 new_rtx
= force_reg (mode
, new_rtx
);
12923 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12926 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12930 if (GET_CODE (new_rtx
) == PLUS
12931 && CONSTANT_P (XEXP (new_rtx
, 1)))
12933 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12934 new_rtx
= XEXP (new_rtx
, 1);
12936 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12944 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12947 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12949 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12951 if (GET_MODE (tp
) != tp_mode
)
12953 gcc_assert (GET_MODE (tp
) == SImode
);
12954 gcc_assert (tp_mode
== DImode
);
12956 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12960 tp
= copy_to_mode_reg (tp_mode
, tp
);
12965 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12967 static GTY(()) rtx ix86_tls_symbol
;
12970 ix86_tls_get_addr (void)
12972 if (!ix86_tls_symbol
)
12975 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12976 ? "___tls_get_addr" : "__tls_get_addr");
12978 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12981 return ix86_tls_symbol
;
12984 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12986 static GTY(()) rtx ix86_tls_module_base_symbol
;
12989 ix86_tls_module_base (void)
12991 if (!ix86_tls_module_base_symbol
)
12993 ix86_tls_module_base_symbol
12994 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12996 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12997 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13000 return ix86_tls_module_base_symbol
;
13003 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13004 false if we expect this to be used for a memory address and true if
13005 we expect to load the address into a register. */
13008 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13010 rtx dest
, base
, off
;
13011 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13012 enum machine_mode tp_mode
= Pmode
;
13017 case TLS_MODEL_GLOBAL_DYNAMIC
:
13018 dest
= gen_reg_rtx (Pmode
);
13022 if (flag_pic
&& !TARGET_PECOFF
)
13023 pic
= pic_offset_table_rtx
;
13026 pic
= gen_reg_rtx (Pmode
);
13027 emit_insn (gen_set_got (pic
));
13031 if (TARGET_GNU2_TLS
)
13034 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13036 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13038 tp
= get_thread_pointer (Pmode
, true);
13039 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13041 if (GET_MODE (x
) != Pmode
)
13042 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13044 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13048 rtx caddr
= ix86_tls_get_addr ();
13052 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13057 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13058 insns
= get_insns ();
13061 if (GET_MODE (x
) != Pmode
)
13062 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13064 RTL_CONST_CALL_P (insns
) = 1;
13065 emit_libcall_block (insns
, dest
, rax
, x
);
13068 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13072 case TLS_MODEL_LOCAL_DYNAMIC
:
13073 base
= gen_reg_rtx (Pmode
);
13078 pic
= pic_offset_table_rtx
;
13081 pic
= gen_reg_rtx (Pmode
);
13082 emit_insn (gen_set_got (pic
));
13086 if (TARGET_GNU2_TLS
)
13088 rtx tmp
= ix86_tls_module_base ();
13091 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13093 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13095 tp
= get_thread_pointer (Pmode
, true);
13096 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13097 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13101 rtx caddr
= ix86_tls_get_addr ();
13105 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13110 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13111 insns
= get_insns ();
13114 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13115 share the LD_BASE result with other LD model accesses. */
13116 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13117 UNSPEC_TLS_LD_BASE
);
13119 RTL_CONST_CALL_P (insns
) = 1;
13120 emit_libcall_block (insns
, base
, rax
, eqv
);
13123 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13126 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13127 off
= gen_rtx_CONST (Pmode
, off
);
13129 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13131 if (TARGET_GNU2_TLS
)
13133 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13135 if (GET_MODE (x
) != Pmode
)
13136 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13138 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13142 case TLS_MODEL_INITIAL_EXEC
:
13145 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13147 /* The Sun linker took the AMD64 TLS spec literally
13148 and can only handle %rax as destination of the
13149 initial executable code sequence. */
13151 dest
= gen_reg_rtx (DImode
);
13152 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13156 /* Generate DImode references to avoid %fs:(%reg32)
13157 problems and linker IE->LE relaxation bug. */
13160 type
= UNSPEC_GOTNTPOFF
;
13164 if (reload_in_progress
)
13165 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13166 pic
= pic_offset_table_rtx
;
13167 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13169 else if (!TARGET_ANY_GNU_TLS
)
13171 pic
= gen_reg_rtx (Pmode
);
13172 emit_insn (gen_set_got (pic
));
13173 type
= UNSPEC_GOTTPOFF
;
13178 type
= UNSPEC_INDNTPOFF
;
13181 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13182 off
= gen_rtx_CONST (tp_mode
, off
);
13184 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13185 off
= gen_const_mem (tp_mode
, off
);
13186 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13188 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13190 base
= get_thread_pointer (tp_mode
,
13191 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13192 off
= force_reg (tp_mode
, off
);
13193 return gen_rtx_PLUS (tp_mode
, base
, off
);
13197 base
= get_thread_pointer (Pmode
, true);
13198 dest
= gen_reg_rtx (Pmode
);
13199 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13203 case TLS_MODEL_LOCAL_EXEC
:
13204 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13205 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13206 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13207 off
= gen_rtx_CONST (Pmode
, off
);
13209 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13211 base
= get_thread_pointer (Pmode
,
13212 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13213 return gen_rtx_PLUS (Pmode
, base
, off
);
13217 base
= get_thread_pointer (Pmode
, true);
13218 dest
= gen_reg_rtx (Pmode
);
13219 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13224 gcc_unreachable ();
13230 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13231 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13232 unique refptr-DECL symbol corresponding to symbol DECL. */
13234 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13235 htab_t dllimport_map
;
13238 get_dllimport_decl (tree decl
, bool beimport
)
13240 struct tree_map
*h
, in
;
13243 const char *prefix
;
13244 size_t namelen
, prefixlen
;
13249 if (!dllimport_map
)
13250 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13252 in
.hash
= htab_hash_pointer (decl
);
13253 in
.base
.from
= decl
;
13254 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13255 h
= (struct tree_map
*) *loc
;
13259 *loc
= h
= ggc_alloc_tree_map ();
13261 h
->base
.from
= decl
;
13262 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13263 VAR_DECL
, NULL
, ptr_type_node
);
13264 DECL_ARTIFICIAL (to
) = 1;
13265 DECL_IGNORED_P (to
) = 1;
13266 DECL_EXTERNAL (to
) = 1;
13267 TREE_READONLY (to
) = 1;
13269 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13270 name
= targetm
.strip_name_encoding (name
);
13272 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13273 ? "*__imp_" : "*__imp__";
13275 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13276 namelen
= strlen (name
);
13277 prefixlen
= strlen (prefix
);
13278 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13279 memcpy (imp_name
, prefix
, prefixlen
);
13280 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13282 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13283 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13284 SET_SYMBOL_REF_DECL (rtl
, to
);
13285 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13288 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13289 #ifdef SUB_TARGET_RECORD_STUB
13290 SUB_TARGET_RECORD_STUB (name
);
13294 rtl
= gen_const_mem (Pmode
, rtl
);
13295 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13297 SET_DECL_RTL (to
, rtl
);
13298 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13303 /* Expand SYMBOL into its corresponding far-addresse symbol.
13304 WANT_REG is true if we require the result be a register. */
13307 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13312 gcc_assert (SYMBOL_REF_DECL (symbol
));
13313 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13315 x
= DECL_RTL (imp_decl
);
13317 x
= force_reg (Pmode
, x
);
13321 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13322 true if we require the result be a register. */
13325 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13330 gcc_assert (SYMBOL_REF_DECL (symbol
));
13331 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13333 x
= DECL_RTL (imp_decl
);
13335 x
= force_reg (Pmode
, x
);
13339 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13340 is true if we require the result be a register. */
13343 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13345 if (!TARGET_PECOFF
)
13348 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13350 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13351 return legitimize_dllimport_symbol (addr
, inreg
);
13352 if (GET_CODE (addr
) == CONST
13353 && GET_CODE (XEXP (addr
, 0)) == PLUS
13354 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13355 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13357 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13358 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13362 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13364 if (GET_CODE (addr
) == SYMBOL_REF
13365 && !is_imported_p (addr
)
13366 && SYMBOL_REF_EXTERNAL_P (addr
)
13367 && SYMBOL_REF_DECL (addr
))
13368 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13370 if (GET_CODE (addr
) == CONST
13371 && GET_CODE (XEXP (addr
, 0)) == PLUS
13372 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13373 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13374 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13375 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13377 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13378 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13383 /* Try machine-dependent ways of modifying an illegitimate address
13384 to be legitimate. If we find one, return the new, valid address.
13385 This macro is used in only one place: `memory_address' in explow.c.
13387 OLDX is the address as it was before break_out_memory_refs was called.
13388 In some cases it is useful to look at this to decide what needs to be done.
13390 It is always safe for this macro to do nothing. It exists to recognize
13391 opportunities to optimize the output.
13393 For the 80386, we handle X+REG by loading X into a register R and
13394 using R+REG. R will go in a general reg and indexing will be used.
13395 However, if REG is a broken-out memory address or multiplication,
13396 nothing needs to be done because REG can certainly go in a general reg.
13398 When -fpic is used, special handling is needed for symbolic references.
13399 See comments by legitimize_pic_address in i386.c for details. */
13402 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13403 enum machine_mode mode
)
13408 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13410 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13411 if (GET_CODE (x
) == CONST
13412 && GET_CODE (XEXP (x
, 0)) == PLUS
13413 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13414 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13416 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13417 (enum tls_model
) log
, false);
13418 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13421 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13423 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13428 if (flag_pic
&& SYMBOLIC_CONST (x
))
13429 return legitimize_pic_address (x
, 0);
13432 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13433 return machopic_indirect_data_reference (x
, 0);
13436 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13437 if (GET_CODE (x
) == ASHIFT
13438 && CONST_INT_P (XEXP (x
, 1))
13439 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13442 log
= INTVAL (XEXP (x
, 1));
13443 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13444 GEN_INT (1 << log
));
13447 if (GET_CODE (x
) == PLUS
)
13449 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13451 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13452 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13453 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13456 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13457 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13458 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13459 GEN_INT (1 << log
));
13462 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13463 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13464 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13467 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13468 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13469 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13470 GEN_INT (1 << log
));
13473 /* Put multiply first if it isn't already. */
13474 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13476 rtx tmp
= XEXP (x
, 0);
13477 XEXP (x
, 0) = XEXP (x
, 1);
13482 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13483 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13484 created by virtual register instantiation, register elimination, and
13485 similar optimizations. */
13486 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13489 x
= gen_rtx_PLUS (Pmode
,
13490 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13491 XEXP (XEXP (x
, 1), 0)),
13492 XEXP (XEXP (x
, 1), 1));
13496 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13497 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13498 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13499 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13500 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13501 && CONSTANT_P (XEXP (x
, 1)))
13504 rtx other
= NULL_RTX
;
13506 if (CONST_INT_P (XEXP (x
, 1)))
13508 constant
= XEXP (x
, 1);
13509 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13511 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13513 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13514 other
= XEXP (x
, 1);
13522 x
= gen_rtx_PLUS (Pmode
,
13523 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13524 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13525 plus_constant (Pmode
, other
,
13526 INTVAL (constant
)));
13530 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13533 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13536 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13539 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13542 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13546 && REG_P (XEXP (x
, 1))
13547 && REG_P (XEXP (x
, 0)))
13550 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13553 x
= legitimize_pic_address (x
, 0);
13556 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13559 if (REG_P (XEXP (x
, 0)))
13561 rtx temp
= gen_reg_rtx (Pmode
);
13562 rtx val
= force_operand (XEXP (x
, 1), temp
);
13565 val
= convert_to_mode (Pmode
, val
, 1);
13566 emit_move_insn (temp
, val
);
13569 XEXP (x
, 1) = temp
;
13573 else if (REG_P (XEXP (x
, 1)))
13575 rtx temp
= gen_reg_rtx (Pmode
);
13576 rtx val
= force_operand (XEXP (x
, 0), temp
);
13579 val
= convert_to_mode (Pmode
, val
, 1);
13580 emit_move_insn (temp
, val
);
13583 XEXP (x
, 0) = temp
;
13591 /* Print an integer constant expression in assembler syntax. Addition
13592 and subtraction are the only arithmetic that may appear in these
13593 expressions. FILE is the stdio stream to write to, X is the rtx, and
13594 CODE is the operand print code from the output string. */
13597 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13601 switch (GET_CODE (x
))
13604 gcc_assert (flag_pic
);
13609 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13610 output_addr_const (file
, x
);
13613 const char *name
= XSTR (x
, 0);
13615 /* Mark the decl as referenced so that cgraph will
13616 output the function. */
13617 if (SYMBOL_REF_DECL (x
))
13618 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13621 if (MACHOPIC_INDIRECT
13622 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13623 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13625 assemble_name (file
, name
);
13627 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13628 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13629 fputs ("@PLT", file
);
13636 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13637 assemble_name (asm_out_file
, buf
);
13641 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13645 /* This used to output parentheses around the expression,
13646 but that does not work on the 386 (either ATT or BSD assembler). */
13647 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13651 if (GET_MODE (x
) == VOIDmode
)
13653 /* We can use %d if the number is <32 bits and positive. */
13654 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13655 fprintf (file
, "0x%lx%08lx",
13656 (unsigned long) CONST_DOUBLE_HIGH (x
),
13657 (unsigned long) CONST_DOUBLE_LOW (x
));
13659 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13662 /* We can't handle floating point constants;
13663 TARGET_PRINT_OPERAND must handle them. */
13664 output_operand_lossage ("floating constant misused");
13668 /* Some assemblers need integer constants to appear first. */
13669 if (CONST_INT_P (XEXP (x
, 0)))
13671 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13673 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13677 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13678 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13680 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13686 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13687 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13689 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13691 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13695 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13697 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13702 gcc_assert (XVECLEN (x
, 0) == 1);
13703 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13704 switch (XINT (x
, 1))
13707 fputs ("@GOT", file
);
13709 case UNSPEC_GOTOFF
:
13710 fputs ("@GOTOFF", file
);
13712 case UNSPEC_PLTOFF
:
13713 fputs ("@PLTOFF", file
);
13716 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13717 "(%rip)" : "[rip]", file
);
13719 case UNSPEC_GOTPCREL
:
13720 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13721 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13723 case UNSPEC_GOTTPOFF
:
13724 /* FIXME: This might be @TPOFF in Sun ld too. */
13725 fputs ("@gottpoff", file
);
13728 fputs ("@tpoff", file
);
13730 case UNSPEC_NTPOFF
:
13732 fputs ("@tpoff", file
);
13734 fputs ("@ntpoff", file
);
13736 case UNSPEC_DTPOFF
:
13737 fputs ("@dtpoff", file
);
13739 case UNSPEC_GOTNTPOFF
:
13741 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13742 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13744 fputs ("@gotntpoff", file
);
13746 case UNSPEC_INDNTPOFF
:
13747 fputs ("@indntpoff", file
);
13750 case UNSPEC_MACHOPIC_OFFSET
:
13752 machopic_output_function_base_name (file
);
13756 output_operand_lossage ("invalid UNSPEC as operand");
13762 output_operand_lossage ("invalid expression as operand");
13766 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13767 We need to emit DTP-relative relocations. */
13769 static void ATTRIBUTE_UNUSED
13770 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13772 fputs (ASM_LONG
, file
);
13773 output_addr_const (file
, x
);
13774 fputs ("@dtpoff", file
);
13780 fputs (", 0", file
);
13783 gcc_unreachable ();
13787 /* Return true if X is a representation of the PIC register. This copes
13788 with calls from ix86_find_base_term, where the register might have
13789 been replaced by a cselib value. */
13792 ix86_pic_register_p (rtx x
)
13794 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13795 return (pic_offset_table_rtx
13796 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13798 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13801 /* Helper function for ix86_delegitimize_address.
13802 Attempt to delegitimize TLS local-exec accesses. */
13805 ix86_delegitimize_tls_address (rtx orig_x
)
13807 rtx x
= orig_x
, unspec
;
13808 struct ix86_address addr
;
13810 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13814 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13816 if (ix86_decompose_address (x
, &addr
) == 0
13817 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13818 || addr
.disp
== NULL_RTX
13819 || GET_CODE (addr
.disp
) != CONST
)
13821 unspec
= XEXP (addr
.disp
, 0);
13822 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13823 unspec
= XEXP (unspec
, 0);
13824 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13826 x
= XVECEXP (unspec
, 0, 0);
13827 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13828 if (unspec
!= XEXP (addr
.disp
, 0))
13829 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13832 rtx idx
= addr
.index
;
13833 if (addr
.scale
!= 1)
13834 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13835 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13838 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13839 if (MEM_P (orig_x
))
13840 x
= replace_equiv_address_nv (orig_x
, x
);
13844 /* In the name of slightly smaller debug output, and to cater to
13845 general assembler lossage, recognize PIC+GOTOFF and turn it back
13846 into a direct symbol reference.
13848 On Darwin, this is necessary to avoid a crash, because Darwin
13849 has a different PIC label for each routine but the DWARF debugging
13850 information is not associated with any particular routine, so it's
13851 necessary to remove references to the PIC label from RTL stored by
13852 the DWARF output code. */
13855 ix86_delegitimize_address (rtx x
)
13857 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13858 /* addend is NULL or some rtx if x is something+GOTOFF where
13859 something doesn't include the PIC register. */
13860 rtx addend
= NULL_RTX
;
13861 /* reg_addend is NULL or a multiple of some register. */
13862 rtx reg_addend
= NULL_RTX
;
13863 /* const_addend is NULL or a const_int. */
13864 rtx const_addend
= NULL_RTX
;
13865 /* This is the result, or NULL. */
13866 rtx result
= NULL_RTX
;
13875 if (GET_CODE (x
) == CONST
13876 && GET_CODE (XEXP (x
, 0)) == PLUS
13877 && GET_MODE (XEXP (x
, 0)) == Pmode
13878 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13879 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13880 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13882 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13883 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13884 if (MEM_P (orig_x
))
13885 x
= replace_equiv_address_nv (orig_x
, x
);
13889 if (GET_CODE (x
) == CONST
13890 && GET_CODE (XEXP (x
, 0)) == UNSPEC
13891 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
13892 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
13893 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
13895 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13896 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13898 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13906 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
13907 return ix86_delegitimize_tls_address (orig_x
);
13909 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13910 and -mcmodel=medium -fpic. */
13913 if (GET_CODE (x
) != PLUS
13914 || GET_CODE (XEXP (x
, 1)) != CONST
)
13915 return ix86_delegitimize_tls_address (orig_x
);
13917 if (ix86_pic_register_p (XEXP (x
, 0)))
13918 /* %ebx + GOT/GOTOFF */
13920 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13922 /* %ebx + %reg * scale + GOT/GOTOFF */
13923 reg_addend
= XEXP (x
, 0);
13924 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13925 reg_addend
= XEXP (reg_addend
, 1);
13926 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13927 reg_addend
= XEXP (reg_addend
, 0);
13930 reg_addend
= NULL_RTX
;
13931 addend
= XEXP (x
, 0);
13935 addend
= XEXP (x
, 0);
13937 x
= XEXP (XEXP (x
, 1), 0);
13938 if (GET_CODE (x
) == PLUS
13939 && CONST_INT_P (XEXP (x
, 1)))
13941 const_addend
= XEXP (x
, 1);
13945 if (GET_CODE (x
) == UNSPEC
13946 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13947 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
13948 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
13949 && !MEM_P (orig_x
) && !addend
)))
13950 result
= XVECEXP (x
, 0, 0);
13952 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
13953 && !MEM_P (orig_x
))
13954 result
= XVECEXP (x
, 0, 0);
13957 return ix86_delegitimize_tls_address (orig_x
);
13960 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13962 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13965 /* If the rest of original X doesn't involve the PIC register, add
13966 addend and subtract pic_offset_table_rtx. This can happen e.g.
13968 leal (%ebx, %ecx, 4), %ecx
13970 movl foo@GOTOFF(%ecx), %edx
13971 in which case we return (%ecx - %ebx) + foo. */
13972 if (pic_offset_table_rtx
)
13973 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13974 pic_offset_table_rtx
),
13979 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13981 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13982 if (result
== NULL_RTX
)
13988 /* If X is a machine specific address (i.e. a symbol or label being
13989 referenced as a displacement from the GOT implemented using an
13990 UNSPEC), then return the base term. Otherwise return X. */
13993 ix86_find_base_term (rtx x
)
13999 if (GET_CODE (x
) != CONST
)
14001 term
= XEXP (x
, 0);
14002 if (GET_CODE (term
) == PLUS
14003 && (CONST_INT_P (XEXP (term
, 1))
14004 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14005 term
= XEXP (term
, 0);
14006 if (GET_CODE (term
) != UNSPEC
14007 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14008 && XINT (term
, 1) != UNSPEC_PCREL
))
14011 return XVECEXP (term
, 0, 0);
14014 return ix86_delegitimize_address (x
);
14018 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14019 bool fp
, FILE *file
)
14021 const char *suffix
;
14023 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14025 code
= ix86_fp_compare_code_to_integer (code
);
14029 code
= reverse_condition (code
);
14080 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14084 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14085 Those same assemblers have the same but opposite lossage on cmov. */
14086 if (mode
== CCmode
)
14087 suffix
= fp
? "nbe" : "a";
14088 else if (mode
== CCCmode
)
14091 gcc_unreachable ();
14107 gcc_unreachable ();
14111 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14128 gcc_unreachable ();
14132 /* ??? As above. */
14133 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14134 suffix
= fp
? "nb" : "ae";
14137 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14141 /* ??? As above. */
14142 if (mode
== CCmode
)
14144 else if (mode
== CCCmode
)
14145 suffix
= fp
? "nb" : "ae";
14147 gcc_unreachable ();
14150 suffix
= fp
? "u" : "p";
14153 suffix
= fp
? "nu" : "np";
14156 gcc_unreachable ();
14158 fputs (suffix
, file
);
14161 /* Print the name of register X to FILE based on its machine mode and number.
14162 If CODE is 'w', pretend the mode is HImode.
14163 If CODE is 'b', pretend the mode is QImode.
14164 If CODE is 'k', pretend the mode is SImode.
14165 If CODE is 'q', pretend the mode is DImode.
14166 If CODE is 'x', pretend the mode is V4SFmode.
14167 If CODE is 't', pretend the mode is V8SFmode.
14168 If CODE is 'h', pretend the reg is the 'high' byte register.
14169 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14170 If CODE is 'd', duplicate the operand for AVX instruction.
14174 print_reg (rtx x
, int code
, FILE *file
)
14177 unsigned int regno
;
14178 bool duplicated
= code
== 'd' && TARGET_AVX
;
14180 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14185 gcc_assert (TARGET_64BIT
);
14186 fputs ("rip", file
);
14190 regno
= true_regnum (x
);
14191 gcc_assert (regno
!= ARG_POINTER_REGNUM
14192 && regno
!= FRAME_POINTER_REGNUM
14193 && regno
!= FLAGS_REG
14194 && regno
!= FPSR_REG
14195 && regno
!= FPCR_REG
);
14197 if (code
== 'w' || MMX_REG_P (x
))
14199 else if (code
== 'b')
14201 else if (code
== 'k')
14203 else if (code
== 'q')
14205 else if (code
== 'y')
14207 else if (code
== 'h')
14209 else if (code
== 'x')
14211 else if (code
== 't')
14214 code
= GET_MODE_SIZE (GET_MODE (x
));
14216 /* Irritatingly, AMD extended registers use different naming convention
14217 from the normal registers: "r%d[bwd]" */
14218 if (REX_INT_REGNO_P (regno
))
14220 gcc_assert (TARGET_64BIT
);
14222 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14226 error ("extended registers have no high halves");
14241 error ("unsupported operand size for extended register");
14251 if (STACK_TOP_P (x
))
14260 if (! ANY_FP_REG_P (x
))
14261 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14266 reg
= hi_reg_name
[regno
];
14269 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14271 reg
= qi_reg_name
[regno
];
14274 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14276 reg
= qi_high_reg_name
[regno
];
14281 gcc_assert (!duplicated
);
14283 fputs (hi_reg_name
[regno
] + 1, file
);
14288 gcc_unreachable ();
14294 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14295 fprintf (file
, ", %%%s", reg
);
14297 fprintf (file
, ", %s", reg
);
14301 /* Locate some local-dynamic symbol still in use by this function
14302 so that we can print its name in some tls_local_dynamic_base
14306 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14310 if (GET_CODE (x
) == SYMBOL_REF
14311 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14313 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14320 static const char *
14321 get_some_local_dynamic_name (void)
14325 if (cfun
->machine
->some_ld_name
)
14326 return cfun
->machine
->some_ld_name
;
14328 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14329 if (NONDEBUG_INSN_P (insn
)
14330 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14331 return cfun
->machine
->some_ld_name
;
14336 /* Meaning of CODE:
14337 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14338 C -- print opcode suffix for set/cmov insn.
14339 c -- like C, but print reversed condition
14340 F,f -- likewise, but for floating-point.
14341 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14343 R -- print the prefix for register names.
14344 z -- print the opcode suffix for the size of the current operand.
14345 Z -- likewise, with special suffixes for x87 instructions.
14346 * -- print a star (in certain assembler syntax)
14347 A -- print an absolute memory reference.
14348 E -- print address with DImode register names if TARGET_64BIT.
14349 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14350 s -- print a shift double count, followed by the assemblers argument
14352 b -- print the QImode name of the register for the indicated operand.
14353 %b0 would print %al if operands[0] is reg 0.
14354 w -- likewise, print the HImode name of the register.
14355 k -- likewise, print the SImode name of the register.
14356 q -- likewise, print the DImode name of the register.
14357 x -- likewise, print the V4SFmode name of the register.
14358 t -- likewise, print the V8SFmode name of the register.
14359 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14360 y -- print "st(0)" instead of "st" as a register.
14361 d -- print duplicated register operand for AVX instruction.
14362 D -- print condition for SSE cmp instruction.
14363 P -- if PIC, print an @PLT suffix.
14364 p -- print raw symbol name.
14365 X -- don't print any sort of PIC '@' suffix for a symbol.
14366 & -- print some in-use local-dynamic symbol name.
14367 H -- print a memory address offset by 8; used for sse high-parts
14368 Y -- print condition for XOP pcom* instruction.
14369 + -- print a branch hint as 'cs' or 'ds' prefix
14370 ; -- print a semicolon (after prefixes due to bug in older gas).
14371 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14372 @ -- print a segment register of thread base pointer load
14373 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14377 ix86_print_operand (FILE *file
, rtx x
, int code
)
14384 switch (ASSEMBLER_DIALECT
)
14391 /* Intel syntax. For absolute addresses, registers should not
14392 be surrounded by braces. */
14396 ix86_print_operand (file
, x
, 0);
14403 gcc_unreachable ();
14406 ix86_print_operand (file
, x
, 0);
14410 /* Wrap address in an UNSPEC to declare special handling. */
14412 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14414 output_address (x
);
14418 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14423 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14428 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14433 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14438 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14443 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14448 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14449 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14452 switch (GET_MODE_SIZE (GET_MODE (x
)))
14467 output_operand_lossage
14468 ("invalid operand size for operand code 'O'");
14477 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14479 /* Opcodes don't get size suffixes if using Intel opcodes. */
14480 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14483 switch (GET_MODE_SIZE (GET_MODE (x
)))
14502 output_operand_lossage
14503 ("invalid operand size for operand code 'z'");
14508 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14510 (0, "non-integer operand used with operand code 'z'");
14514 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14515 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14518 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14520 switch (GET_MODE_SIZE (GET_MODE (x
)))
14523 #ifdef HAVE_AS_IX86_FILDS
14533 #ifdef HAVE_AS_IX86_FILDQ
14536 fputs ("ll", file
);
14544 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14546 /* 387 opcodes don't get size suffixes
14547 if the operands are registers. */
14548 if (STACK_REG_P (x
))
14551 switch (GET_MODE_SIZE (GET_MODE (x
)))
14572 output_operand_lossage
14573 ("invalid operand type used with operand code 'Z'");
14577 output_operand_lossage
14578 ("invalid operand size for operand code 'Z'");
14596 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14598 ix86_print_operand (file
, x
, 0);
14599 fputs (", ", file
);
14604 switch (GET_CODE (x
))
14607 fputs ("neq", file
);
14610 fputs ("eq", file
);
14614 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14618 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14622 fputs ("le", file
);
14626 fputs ("lt", file
);
14629 fputs ("unord", file
);
14632 fputs ("ord", file
);
14635 fputs ("ueq", file
);
14638 fputs ("nlt", file
);
14641 fputs ("nle", file
);
14644 fputs ("ule", file
);
14647 fputs ("ult", file
);
14650 fputs ("une", file
);
14653 output_operand_lossage ("operand is not a condition code, "
14654 "invalid operand code 'Y'");
14660 /* Little bit of braindamage here. The SSE compare instructions
14661 does use completely different names for the comparisons that the
14662 fp conditional moves. */
14663 switch (GET_CODE (x
))
14668 fputs ("eq_us", file
);
14672 fputs ("eq", file
);
14677 fputs ("nge", file
);
14681 fputs ("lt", file
);
14686 fputs ("ngt", file
);
14690 fputs ("le", file
);
14693 fputs ("unord", file
);
14698 fputs ("neq_oq", file
);
14702 fputs ("neq", file
);
14707 fputs ("ge", file
);
14711 fputs ("nlt", file
);
14716 fputs ("gt", file
);
14720 fputs ("nle", file
);
14723 fputs ("ord", file
);
14726 output_operand_lossage ("operand is not a condition code, "
14727 "invalid operand code 'D'");
14734 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14735 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14741 if (!COMPARISON_P (x
))
14743 output_operand_lossage ("operand is not a condition code, "
14744 "invalid operand code '%c'", code
);
14747 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14748 code
== 'c' || code
== 'f',
14749 code
== 'F' || code
== 'f',
14754 if (!offsettable_memref_p (x
))
14756 output_operand_lossage ("operand is not an offsettable memory "
14757 "reference, invalid operand code 'H'");
14760 /* It doesn't actually matter what mode we use here, as we're
14761 only going to use this for printing. */
14762 x
= adjust_address_nv (x
, DImode
, 8);
14763 /* Output 'qword ptr' for intel assembler dialect. */
14764 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14769 gcc_assert (CONST_INT_P (x
));
14771 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14772 #ifdef HAVE_AS_IX86_HLE
14773 fputs ("xacquire ", file
);
14775 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14777 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14778 #ifdef HAVE_AS_IX86_HLE
14779 fputs ("xrelease ", file
);
14781 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14783 /* We do not want to print value of the operand. */
14787 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14793 const char *name
= get_some_local_dynamic_name ();
14795 output_operand_lossage ("'%%&' used without any "
14796 "local dynamic TLS references");
14798 assemble_name (file
, name
);
14807 || optimize_function_for_size_p (cfun
)
14808 || !TARGET_BRANCH_PREDICTION_HINTS
)
14811 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14814 int pred_val
= INTVAL (XEXP (x
, 0));
14816 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14817 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14819 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14821 = final_forward_branch_p (current_output_insn
) == 0;
14823 /* Emit hints only in the case default branch prediction
14824 heuristics would fail. */
14825 if (taken
!= cputaken
)
14827 /* We use 3e (DS) prefix for taken branches and
14828 2e (CS) prefix for not taken branches. */
14830 fputs ("ds ; ", file
);
14832 fputs ("cs ; ", file
);
14840 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14846 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14849 /* The kernel uses a different segment register for performance
14850 reasons; a system call would not have to trash the userspace
14851 segment register, which would be expensive. */
14852 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14853 fputs ("fs", file
);
14855 fputs ("gs", file
);
14859 putc (TARGET_AVX2
? 'i' : 'f', file
);
14863 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14864 fputs ("addr32 ", file
);
14868 output_operand_lossage ("invalid operand code '%c'", code
);
14873 print_reg (x
, code
, file
);
14875 else if (MEM_P (x
))
14877 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14878 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14879 && GET_MODE (x
) != BLKmode
)
14882 switch (GET_MODE_SIZE (GET_MODE (x
)))
14884 case 1: size
= "BYTE"; break;
14885 case 2: size
= "WORD"; break;
14886 case 4: size
= "DWORD"; break;
14887 case 8: size
= "QWORD"; break;
14888 case 12: size
= "TBYTE"; break;
14890 if (GET_MODE (x
) == XFmode
)
14895 case 32: size
= "YMMWORD"; break;
14897 gcc_unreachable ();
14900 /* Check for explicit size override (codes 'b', 'w', 'k',
14904 else if (code
== 'w')
14906 else if (code
== 'k')
14908 else if (code
== 'q')
14910 else if (code
== 'x')
14913 fputs (size
, file
);
14914 fputs (" PTR ", file
);
14918 /* Avoid (%rip) for call operands. */
14919 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14920 && !CONST_INT_P (x
))
14921 output_addr_const (file
, x
);
14922 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14923 output_operand_lossage ("invalid constraints for operand");
14925 output_address (x
);
14928 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14933 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14934 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14936 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14938 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14940 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14941 (unsigned long long) (int) l
);
14943 fprintf (file
, "0x%08x", (unsigned int) l
);
14946 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14951 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14952 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14954 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14956 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14959 /* These float cases don't actually occur as immediate operands. */
14960 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14964 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14965 fputs (dstr
, file
);
14970 /* We have patterns that allow zero sets of memory, for instance.
14971 In 64-bit mode, we should probably support all 8-byte vectors,
14972 since we can in fact encode that into an immediate. */
14973 if (GET_CODE (x
) == CONST_VECTOR
)
14975 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14979 if (code
!= 'P' && code
!= 'p')
14981 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14983 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14986 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14987 || GET_CODE (x
) == LABEL_REF
)
14989 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14992 fputs ("OFFSET FLAT:", file
);
14995 if (CONST_INT_P (x
))
14996 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14997 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14998 output_pic_addr_const (file
, x
, code
);
15000 output_addr_const (file
, x
);
15005 ix86_print_operand_punct_valid_p (unsigned char code
)
15007 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15008 || code
== ';' || code
== '~' || code
== '^');
15011 /* Print a memory operand whose address is ADDR. */
15014 ix86_print_operand_address (FILE *file
, rtx addr
)
15016 struct ix86_address parts
;
15017 rtx base
, index
, disp
;
15023 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15025 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15026 gcc_assert (parts
.index
== NULL_RTX
);
15027 parts
.index
= XVECEXP (addr
, 0, 1);
15028 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15029 addr
= XVECEXP (addr
, 0, 0);
15032 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15034 gcc_assert (TARGET_64BIT
);
15035 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15039 ok
= ix86_decompose_address (addr
, &parts
);
15044 index
= parts
.index
;
15046 scale
= parts
.scale
;
15054 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15056 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15059 gcc_unreachable ();
15062 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15063 if (TARGET_64BIT
&& !base
&& !index
)
15067 if (GET_CODE (disp
) == CONST
15068 && GET_CODE (XEXP (disp
, 0)) == PLUS
15069 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15070 symbol
= XEXP (XEXP (disp
, 0), 0);
15072 if (GET_CODE (symbol
) == LABEL_REF
15073 || (GET_CODE (symbol
) == SYMBOL_REF
15074 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15077 if (!base
&& !index
)
15079 /* Displacement only requires special attention. */
15081 if (CONST_INT_P (disp
))
15083 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15084 fputs ("ds:", file
);
15085 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15088 output_pic_addr_const (file
, disp
, 0);
15090 output_addr_const (file
, disp
);
15094 /* Print SImode register names to force addr32 prefix. */
15095 if (SImode_address_operand (addr
, VOIDmode
))
15097 #ifdef ENABLE_CHECKING
15098 gcc_assert (TARGET_64BIT
);
15099 switch (GET_CODE (addr
))
15102 gcc_assert (GET_MODE (addr
) == SImode
);
15103 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15107 gcc_assert (GET_MODE (addr
) == DImode
);
15110 gcc_unreachable ();
15113 gcc_assert (!code
);
15119 && CONST_INT_P (disp
)
15120 && INTVAL (disp
) < -16*1024*1024)
15122 /* X32 runs in 64-bit mode, where displacement, DISP, in
15123 address DISP(%r64), is encoded as 32-bit immediate sign-
15124 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15125 address is %r64 + 0xffffffffbffffd00. When %r64 <
15126 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15127 which is invalid for x32. The correct address is %r64
15128 - 0x40000300 == 0xf7ffdd64. To properly encode
15129 -0x40000300(%r64) for x32, we zero-extend negative
15130 displacement by forcing addr32 prefix which truncates
15131 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15132 zero-extend all negative displacements, including -1(%rsp).
15133 However, for small negative displacements, sign-extension
15134 won't cause overflow. We only zero-extend negative
15135 displacements if they < -16*1024*1024, which is also used
15136 to check legitimate address displacements for PIC. */
15140 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15145 output_pic_addr_const (file
, disp
, 0);
15146 else if (GET_CODE (disp
) == LABEL_REF
)
15147 output_asm_label (disp
);
15149 output_addr_const (file
, disp
);
15154 print_reg (base
, code
, file
);
15158 print_reg (index
, vsib
? 0 : code
, file
);
15159 if (scale
!= 1 || vsib
)
15160 fprintf (file
, ",%d", scale
);
15166 rtx offset
= NULL_RTX
;
15170 /* Pull out the offset of a symbol; print any symbol itself. */
15171 if (GET_CODE (disp
) == CONST
15172 && GET_CODE (XEXP (disp
, 0)) == PLUS
15173 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15175 offset
= XEXP (XEXP (disp
, 0), 1);
15176 disp
= gen_rtx_CONST (VOIDmode
,
15177 XEXP (XEXP (disp
, 0), 0));
15181 output_pic_addr_const (file
, disp
, 0);
15182 else if (GET_CODE (disp
) == LABEL_REF
)
15183 output_asm_label (disp
);
15184 else if (CONST_INT_P (disp
))
15187 output_addr_const (file
, disp
);
15193 print_reg (base
, code
, file
);
15196 if (INTVAL (offset
) >= 0)
15198 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15202 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15209 print_reg (index
, vsib
? 0 : code
, file
);
15210 if (scale
!= 1 || vsib
)
15211 fprintf (file
, "*%d", scale
);
15218 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15221 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15225 if (GET_CODE (x
) != UNSPEC
)
15228 op
= XVECEXP (x
, 0, 0);
15229 switch (XINT (x
, 1))
15231 case UNSPEC_GOTTPOFF
:
15232 output_addr_const (file
, op
);
15233 /* FIXME: This might be @TPOFF in Sun ld. */
15234 fputs ("@gottpoff", file
);
15237 output_addr_const (file
, op
);
15238 fputs ("@tpoff", file
);
15240 case UNSPEC_NTPOFF
:
15241 output_addr_const (file
, op
);
15243 fputs ("@tpoff", file
);
15245 fputs ("@ntpoff", file
);
15247 case UNSPEC_DTPOFF
:
15248 output_addr_const (file
, op
);
15249 fputs ("@dtpoff", file
);
15251 case UNSPEC_GOTNTPOFF
:
15252 output_addr_const (file
, op
);
15254 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15255 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15257 fputs ("@gotntpoff", file
);
15259 case UNSPEC_INDNTPOFF
:
15260 output_addr_const (file
, op
);
15261 fputs ("@indntpoff", file
);
15264 case UNSPEC_MACHOPIC_OFFSET
:
15265 output_addr_const (file
, op
);
15267 machopic_output_function_base_name (file
);
15271 case UNSPEC_STACK_CHECK
:
15275 gcc_assert (flag_split_stack
);
15277 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15278 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15280 gcc_unreachable ();
15283 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15294 /* Split one or more double-mode RTL references into pairs of half-mode
15295 references. The RTL can be REG, offsettable MEM, integer constant, or
15296 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15297 split and "num" is its length. lo_half and hi_half are output arrays
15298 that parallel "operands". */
15301 split_double_mode (enum machine_mode mode
, rtx operands
[],
15302 int num
, rtx lo_half
[], rtx hi_half
[])
15304 enum machine_mode half_mode
;
15310 half_mode
= DImode
;
15313 half_mode
= SImode
;
15316 gcc_unreachable ();
15319 byte
= GET_MODE_SIZE (half_mode
);
15323 rtx op
= operands
[num
];
15325 /* simplify_subreg refuse to split volatile memory addresses,
15326 but we still have to handle it. */
15329 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15330 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15334 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15335 GET_MODE (op
) == VOIDmode
15336 ? mode
: GET_MODE (op
), 0);
15337 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15338 GET_MODE (op
) == VOIDmode
15339 ? mode
: GET_MODE (op
), byte
);
15344 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15345 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15346 is the expression of the binary operation. The output may either be
15347 emitted here, or returned to the caller, like all output_* functions.
15349 There is no guarantee that the operands are the same mode, as they
15350 might be within FLOAT or FLOAT_EXTEND expressions. */
15352 #ifndef SYSV386_COMPAT
15353 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15354 wants to fix the assemblers because that causes incompatibility
15355 with gcc. No-one wants to fix gcc because that causes
15356 incompatibility with assemblers... You can use the option of
15357 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15358 #define SYSV386_COMPAT 1
15362 output_387_binary_op (rtx insn
, rtx
*operands
)
15364 static char buf
[40];
15367 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15369 #ifdef ENABLE_CHECKING
15370 /* Even if we do not want to check the inputs, this documents input
15371 constraints. Which helps in understanding the following code. */
15372 if (STACK_REG_P (operands
[0])
15373 && ((REG_P (operands
[1])
15374 && REGNO (operands
[0]) == REGNO (operands
[1])
15375 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15376 || (REG_P (operands
[2])
15377 && REGNO (operands
[0]) == REGNO (operands
[2])
15378 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15379 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15382 gcc_assert (is_sse
);
15385 switch (GET_CODE (operands
[3]))
15388 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15389 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15397 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15398 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15406 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15407 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15415 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15416 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15424 gcc_unreachable ();
15431 strcpy (buf
, ssep
);
15432 if (GET_MODE (operands
[0]) == SFmode
)
15433 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15435 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15439 strcpy (buf
, ssep
+ 1);
15440 if (GET_MODE (operands
[0]) == SFmode
)
15441 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15443 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15449 switch (GET_CODE (operands
[3]))
15453 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15455 rtx temp
= operands
[2];
15456 operands
[2] = operands
[1];
15457 operands
[1] = temp
;
15460 /* know operands[0] == operands[1]. */
15462 if (MEM_P (operands
[2]))
15468 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15470 if (STACK_TOP_P (operands
[0]))
15471 /* How is it that we are storing to a dead operand[2]?
15472 Well, presumably operands[1] is dead too. We can't
15473 store the result to st(0) as st(0) gets popped on this
15474 instruction. Instead store to operands[2] (which I
15475 think has to be st(1)). st(1) will be popped later.
15476 gcc <= 2.8.1 didn't have this check and generated
15477 assembly code that the Unixware assembler rejected. */
15478 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15480 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15484 if (STACK_TOP_P (operands
[0]))
15485 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15487 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15492 if (MEM_P (operands
[1]))
15498 if (MEM_P (operands
[2]))
15504 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15507 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15508 derived assemblers, confusingly reverse the direction of
15509 the operation for fsub{r} and fdiv{r} when the
15510 destination register is not st(0). The Intel assembler
15511 doesn't have this brain damage. Read !SYSV386_COMPAT to
15512 figure out what the hardware really does. */
15513 if (STACK_TOP_P (operands
[0]))
15514 p
= "{p\t%0, %2|rp\t%2, %0}";
15516 p
= "{rp\t%2, %0|p\t%0, %2}";
15518 if (STACK_TOP_P (operands
[0]))
15519 /* As above for fmul/fadd, we can't store to st(0). */
15520 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15522 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15527 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15530 if (STACK_TOP_P (operands
[0]))
15531 p
= "{rp\t%0, %1|p\t%1, %0}";
15533 p
= "{p\t%1, %0|rp\t%0, %1}";
15535 if (STACK_TOP_P (operands
[0]))
15536 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15538 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15543 if (STACK_TOP_P (operands
[0]))
15545 if (STACK_TOP_P (operands
[1]))
15546 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15548 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15551 else if (STACK_TOP_P (operands
[1]))
15554 p
= "{\t%1, %0|r\t%0, %1}";
15556 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15562 p
= "{r\t%2, %0|\t%0, %2}";
15564 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15570 gcc_unreachable ();
15577 /* Check if a 256bit AVX register is referenced inside of EXP. */
15580 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15584 if (GET_CODE (exp
) == SUBREG
)
15585 exp
= SUBREG_REG (exp
);
15588 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15594 /* Return needed mode for entity in optimize_mode_switching pass. */
15597 ix86_avx_u128_mode_needed (rtx insn
)
15603 /* Needed mode is set to AVX_U128_CLEAN if there are
15604 no 256bit modes used in function arguments. */
15605 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15607 link
= XEXP (link
, 1))
15609 if (GET_CODE (XEXP (link
, 0)) == USE
)
15611 rtx arg
= XEXP (XEXP (link
, 0), 0);
15613 if (ix86_check_avx256_register (&arg
, NULL
))
15614 return AVX_U128_ANY
;
15618 return AVX_U128_CLEAN
;
15621 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15622 changes state only when a 256bit register is written to, but we need
15623 to prevent the compiler from moving optimal insertion point above
15624 eventual read from 256bit register. */
15625 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15626 return AVX_U128_DIRTY
;
15628 return AVX_U128_ANY
;
15631 /* Return mode that i387 must be switched into
15632 prior to the execution of insn. */
15635 ix86_i387_mode_needed (int entity
, rtx insn
)
15637 enum attr_i387_cw mode
;
15639 /* The mode UNINITIALIZED is used to store control word after a
15640 function call or ASM pattern. The mode ANY specify that function
15641 has no requirements on the control word and make no changes in the
15642 bits we are interested in. */
15645 || (NONJUMP_INSN_P (insn
)
15646 && (asm_noperands (PATTERN (insn
)) >= 0
15647 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15648 return I387_CW_UNINITIALIZED
;
15650 if (recog_memoized (insn
) < 0)
15651 return I387_CW_ANY
;
15653 mode
= get_attr_i387_cw (insn
);
15658 if (mode
== I387_CW_TRUNC
)
15663 if (mode
== I387_CW_FLOOR
)
15668 if (mode
== I387_CW_CEIL
)
15673 if (mode
== I387_CW_MASK_PM
)
15678 gcc_unreachable ();
15681 return I387_CW_ANY
;
15684 /* Return mode that entity must be switched into
15685 prior to the execution of insn. */
15688 ix86_mode_needed (int entity
, rtx insn
)
15693 return ix86_avx_u128_mode_needed (insn
);
15698 return ix86_i387_mode_needed (entity
, insn
);
15700 gcc_unreachable ();
15705 /* Check if a 256bit AVX register is referenced in stores. */
15708 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15710 if (ix86_check_avx256_register (&dest
, NULL
))
15712 bool *used
= (bool *) data
;
15717 /* Calculate mode of upper 128bit AVX registers after the insn. */
15720 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15722 rtx pat
= PATTERN (insn
);
15724 if (vzeroupper_operation (pat
, VOIDmode
)
15725 || vzeroall_operation (pat
, VOIDmode
))
15726 return AVX_U128_CLEAN
;
15728 /* We know that state is clean after CALL insn if there are no
15729 256bit registers used in the function return register. */
15732 bool avx_reg256_found
= false;
15733 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15734 if (!avx_reg256_found
)
15735 return AVX_U128_CLEAN
;
15738 /* Otherwise, return current mode. Remember that if insn
15739 references AVX 256bit registers, the mode was already changed
15740 to DIRTY from MODE_NEEDED. */
15744 /* Return the mode that an insn results in. */
15747 ix86_mode_after (int entity
, int mode
, rtx insn
)
15752 return ix86_avx_u128_mode_after (mode
, insn
);
15759 gcc_unreachable ();
15764 ix86_avx_u128_mode_entry (void)
15768 /* Entry mode is set to AVX_U128_DIRTY if there are
15769 256bit modes used in function arguments. */
15770 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15771 arg
= TREE_CHAIN (arg
))
15773 rtx incoming
= DECL_INCOMING_RTL (arg
);
15775 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15776 return AVX_U128_DIRTY
;
15779 return AVX_U128_CLEAN
;
15782 /* Return a mode that ENTITY is assumed to be
15783 switched to at function entry. */
15786 ix86_mode_entry (int entity
)
15791 return ix86_avx_u128_mode_entry ();
15796 return I387_CW_ANY
;
15798 gcc_unreachable ();
15803 ix86_avx_u128_mode_exit (void)
15805 rtx reg
= crtl
->return_rtx
;
15807 /* Exit mode is set to AVX_U128_DIRTY if there are
15808 256bit modes used in the function return register. */
15809 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15810 return AVX_U128_DIRTY
;
15812 return AVX_U128_CLEAN
;
15815 /* Return a mode that ENTITY is assumed to be
15816 switched to at function exit. */
15819 ix86_mode_exit (int entity
)
15824 return ix86_avx_u128_mode_exit ();
15829 return I387_CW_ANY
;
15831 gcc_unreachable ();
15835 /* Output code to initialize control word copies used by trunc?f?i and
15836 rounding patterns. CURRENT_MODE is set to current control word,
15837 while NEW_MODE is set to new control word. */
15840 emit_i387_cw_initialization (int mode
)
15842 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15845 enum ix86_stack_slot slot
;
15847 rtx reg
= gen_reg_rtx (HImode
);
15849 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15850 emit_move_insn (reg
, copy_rtx (stored_mode
));
15852 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15853 || optimize_insn_for_size_p ())
15857 case I387_CW_TRUNC
:
15858 /* round toward zero (truncate) */
15859 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15860 slot
= SLOT_CW_TRUNC
;
15863 case I387_CW_FLOOR
:
15864 /* round down toward -oo */
15865 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15866 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15867 slot
= SLOT_CW_FLOOR
;
15871 /* round up toward +oo */
15872 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15873 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15874 slot
= SLOT_CW_CEIL
;
15877 case I387_CW_MASK_PM
:
15878 /* mask precision exception for nearbyint() */
15879 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15880 slot
= SLOT_CW_MASK_PM
;
15884 gcc_unreachable ();
15891 case I387_CW_TRUNC
:
15892 /* round toward zero (truncate) */
15893 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15894 slot
= SLOT_CW_TRUNC
;
15897 case I387_CW_FLOOR
:
15898 /* round down toward -oo */
15899 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15900 slot
= SLOT_CW_FLOOR
;
15904 /* round up toward +oo */
15905 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15906 slot
= SLOT_CW_CEIL
;
15909 case I387_CW_MASK_PM
:
15910 /* mask precision exception for nearbyint() */
15911 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15912 slot
= SLOT_CW_MASK_PM
;
15916 gcc_unreachable ();
15920 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15922 new_mode
= assign_386_stack_local (HImode
, slot
);
15923 emit_move_insn (new_mode
, reg
);
15926 /* Emit vzeroupper. */
15929 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15933 /* Cancel automatic vzeroupper insertion if there are
15934 live call-saved SSE registers at the insertion point. */
15936 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15937 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15941 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15942 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15945 emit_insn (gen_avx_vzeroupper ());
15948 /* Generate one or more insns to set ENTITY to MODE. */
15951 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15956 if (mode
== AVX_U128_CLEAN
)
15957 ix86_avx_emit_vzeroupper (regs_live
);
15963 if (mode
!= I387_CW_ANY
15964 && mode
!= I387_CW_UNINITIALIZED
)
15965 emit_i387_cw_initialization (mode
);
15968 gcc_unreachable ();
15972 /* Output code for INSN to convert a float to a signed int. OPERANDS
15973 are the insn operands. The output may be [HSD]Imode and the input
15974 operand may be [SDX]Fmode. */
15977 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15979 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15980 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15981 int round_mode
= get_attr_i387_cw (insn
);
15983 /* Jump through a hoop or two for DImode, since the hardware has no
15984 non-popping instruction. We used to do this a different way, but
15985 that was somewhat fragile and broke with post-reload splitters. */
15986 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15987 output_asm_insn ("fld\t%y1", operands
);
15989 gcc_assert (STACK_TOP_P (operands
[1]));
15990 gcc_assert (MEM_P (operands
[0]));
15991 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15994 output_asm_insn ("fisttp%Z0\t%0", operands
);
15997 if (round_mode
!= I387_CW_ANY
)
15998 output_asm_insn ("fldcw\t%3", operands
);
15999 if (stack_top_dies
|| dimode_p
)
16000 output_asm_insn ("fistp%Z0\t%0", operands
);
16002 output_asm_insn ("fist%Z0\t%0", operands
);
16003 if (round_mode
!= I387_CW_ANY
)
16004 output_asm_insn ("fldcw\t%2", operands
);
16010 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16011 have the values zero or one, indicates the ffreep insn's operand
16012 from the OPERANDS array. */
16014 static const char *
16015 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16017 if (TARGET_USE_FFREEP
)
16018 #ifdef HAVE_AS_IX86_FFREEP
16019 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16022 static char retval
[32];
16023 int regno
= REGNO (operands
[opno
]);
16025 gcc_assert (STACK_REGNO_P (regno
));
16027 regno
-= FIRST_STACK_REG
;
16029 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16034 return opno
? "fstp\t%y1" : "fstp\t%y0";
16038 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16039 should be used. UNORDERED_P is true when fucom should be used. */
16042 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16044 int stack_top_dies
;
16045 rtx cmp_op0
, cmp_op1
;
16046 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16050 cmp_op0
= operands
[0];
16051 cmp_op1
= operands
[1];
16055 cmp_op0
= operands
[1];
16056 cmp_op1
= operands
[2];
16061 if (GET_MODE (operands
[0]) == SFmode
)
16063 return "%vucomiss\t{%1, %0|%0, %1}";
16065 return "%vcomiss\t{%1, %0|%0, %1}";
16068 return "%vucomisd\t{%1, %0|%0, %1}";
16070 return "%vcomisd\t{%1, %0|%0, %1}";
16073 gcc_assert (STACK_TOP_P (cmp_op0
));
16075 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16077 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16079 if (stack_top_dies
)
16081 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16082 return output_387_ffreep (operands
, 1);
16085 return "ftst\n\tfnstsw\t%0";
16088 if (STACK_REG_P (cmp_op1
)
16090 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16091 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16093 /* If both the top of the 387 stack dies, and the other operand
16094 is also a stack register that dies, then this must be a
16095 `fcompp' float compare */
16099 /* There is no double popping fcomi variant. Fortunately,
16100 eflags is immune from the fstp's cc clobbering. */
16102 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16104 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16105 return output_387_ffreep (operands
, 0);
16110 return "fucompp\n\tfnstsw\t%0";
16112 return "fcompp\n\tfnstsw\t%0";
16117 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16119 static const char * const alt
[16] =
16121 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16122 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16123 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16124 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16126 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16127 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16131 "fcomi\t{%y1, %0|%0, %y1}",
16132 "fcomip\t{%y1, %0|%0, %y1}",
16133 "fucomi\t{%y1, %0|%0, %y1}",
16134 "fucomip\t{%y1, %0|%0, %y1}",
16145 mask
= eflags_p
<< 3;
16146 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16147 mask
|= unordered_p
<< 1;
16148 mask
|= stack_top_dies
;
16150 gcc_assert (mask
< 16);
16159 ix86_output_addr_vec_elt (FILE *file
, int value
)
16161 const char *directive
= ASM_LONG
;
16165 directive
= ASM_QUAD
;
16167 gcc_assert (!TARGET_64BIT
);
16170 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16174 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16176 const char *directive
= ASM_LONG
;
16179 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16180 directive
= ASM_QUAD
;
16182 gcc_assert (!TARGET_64BIT
);
16184 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16185 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16186 fprintf (file
, "%s%s%d-%s%d\n",
16187 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16188 else if (HAVE_AS_GOTOFF_IN_DATA
)
16189 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16191 else if (TARGET_MACHO
)
16193 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16194 machopic_output_function_base_name (file
);
16199 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16200 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16203 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16207 ix86_expand_clear (rtx dest
)
16211 /* We play register width games, which are only valid after reload. */
16212 gcc_assert (reload_completed
);
16214 /* Avoid HImode and its attendant prefix byte. */
16215 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16216 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16217 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16219 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16220 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16222 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16223 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16229 /* X is an unchanging MEM. If it is a constant pool reference, return
16230 the constant pool rtx, else NULL. */
16233 maybe_get_pool_constant (rtx x
)
16235 x
= ix86_delegitimize_address (XEXP (x
, 0));
16237 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16238 return get_pool_constant (x
);
16244 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16247 enum tls_model model
;
16252 if (GET_CODE (op1
) == SYMBOL_REF
)
16256 model
= SYMBOL_REF_TLS_MODEL (op1
);
16259 op1
= legitimize_tls_address (op1
, model
, true);
16260 op1
= force_operand (op1
, op0
);
16263 op1
= convert_to_mode (mode
, op1
, 1);
16265 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16268 else if (GET_CODE (op1
) == CONST
16269 && GET_CODE (XEXP (op1
, 0)) == PLUS
16270 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16272 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16273 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16276 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16278 tmp
= legitimize_tls_address (symbol
, model
, true);
16280 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16284 tmp
= force_operand (tmp
, NULL
);
16285 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16286 op0
, 1, OPTAB_DIRECT
);
16289 op1
= convert_to_mode (mode
, tmp
, 1);
16293 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16294 && symbolic_operand (op1
, mode
))
16296 if (TARGET_MACHO
&& !TARGET_64BIT
)
16299 /* dynamic-no-pic */
16300 if (MACHOPIC_INDIRECT
)
16302 rtx temp
= ((reload_in_progress
16303 || ((op0
&& REG_P (op0
))
16305 ? op0
: gen_reg_rtx (Pmode
));
16306 op1
= machopic_indirect_data_reference (op1
, temp
);
16308 op1
= machopic_legitimize_pic_address (op1
, mode
,
16309 temp
== op1
? 0 : temp
);
16311 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16313 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16317 if (GET_CODE (op0
) == MEM
)
16318 op1
= force_reg (Pmode
, op1
);
16322 if (GET_CODE (temp
) != REG
)
16323 temp
= gen_reg_rtx (Pmode
);
16324 temp
= legitimize_pic_address (op1
, temp
);
16329 /* dynamic-no-pic */
16335 op1
= force_reg (mode
, op1
);
16336 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16338 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16339 op1
= legitimize_pic_address (op1
, reg
);
16342 op1
= convert_to_mode (mode
, op1
, 1);
16349 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16350 || !push_operand (op0
, mode
))
16352 op1
= force_reg (mode
, op1
);
16354 if (push_operand (op0
, mode
)
16355 && ! general_no_elim_operand (op1
, mode
))
16356 op1
= copy_to_mode_reg (mode
, op1
);
16358 /* Force large constants in 64bit compilation into register
16359 to get them CSEed. */
16360 if (can_create_pseudo_p ()
16361 && (mode
== DImode
) && TARGET_64BIT
16362 && immediate_operand (op1
, mode
)
16363 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16364 && !register_operand (op0
, mode
)
16366 op1
= copy_to_mode_reg (mode
, op1
);
16368 if (can_create_pseudo_p ()
16369 && FLOAT_MODE_P (mode
)
16370 && GET_CODE (op1
) == CONST_DOUBLE
)
16372 /* If we are loading a floating point constant to a register,
16373 force the value to memory now, since we'll get better code
16374 out the back end. */
16376 op1
= validize_mem (force_const_mem (mode
, op1
));
16377 if (!register_operand (op0
, mode
))
16379 rtx temp
= gen_reg_rtx (mode
);
16380 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16381 emit_move_insn (op0
, temp
);
16387 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16391 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16393 rtx op0
= operands
[0], op1
= operands
[1];
16394 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16396 /* Force constants other than zero into memory. We do not know how
16397 the instructions used to build constants modify the upper 64 bits
16398 of the register, once we have that information we may be able
16399 to handle some of them more efficiently. */
16400 if (can_create_pseudo_p ()
16401 && register_operand (op0
, mode
)
16402 && (CONSTANT_P (op1
)
16403 || (GET_CODE (op1
) == SUBREG
16404 && CONSTANT_P (SUBREG_REG (op1
))))
16405 && !standard_sse_constant_p (op1
))
16406 op1
= validize_mem (force_const_mem (mode
, op1
));
16408 /* We need to check memory alignment for SSE mode since attribute
16409 can make operands unaligned. */
16410 if (can_create_pseudo_p ()
16411 && SSE_REG_MODE_P (mode
)
16412 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16413 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16417 /* ix86_expand_vector_move_misalign() does not like constants ... */
16418 if (CONSTANT_P (op1
)
16419 || (GET_CODE (op1
) == SUBREG
16420 && CONSTANT_P (SUBREG_REG (op1
))))
16421 op1
= validize_mem (force_const_mem (mode
, op1
));
16423 /* ... nor both arguments in memory. */
16424 if (!register_operand (op0
, mode
)
16425 && !register_operand (op1
, mode
))
16426 op1
= force_reg (mode
, op1
);
16428 tmp
[0] = op0
; tmp
[1] = op1
;
16429 ix86_expand_vector_move_misalign (mode
, tmp
);
16433 /* Make operand1 a register if it isn't already. */
16434 if (can_create_pseudo_p ()
16435 && !register_operand (op0
, mode
)
16436 && !register_operand (op1
, mode
))
16438 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16442 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16445 /* Split 32-byte AVX unaligned load and store if needed. */
16448 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16451 rtx (*extract
) (rtx
, rtx
, rtx
);
16452 rtx (*load_unaligned
) (rtx
, rtx
);
16453 rtx (*store_unaligned
) (rtx
, rtx
);
16454 enum machine_mode mode
;
16456 switch (GET_MODE (op0
))
16459 gcc_unreachable ();
16461 extract
= gen_avx_vextractf128v32qi
;
16462 load_unaligned
= gen_avx_loaddqu256
;
16463 store_unaligned
= gen_avx_storedqu256
;
16467 extract
= gen_avx_vextractf128v8sf
;
16468 load_unaligned
= gen_avx_loadups256
;
16469 store_unaligned
= gen_avx_storeups256
;
16473 extract
= gen_avx_vextractf128v4df
;
16474 load_unaligned
= gen_avx_loadupd256
;
16475 store_unaligned
= gen_avx_storeupd256
;
16482 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16484 rtx r
= gen_reg_rtx (mode
);
16485 m
= adjust_address (op1
, mode
, 0);
16486 emit_move_insn (r
, m
);
16487 m
= adjust_address (op1
, mode
, 16);
16488 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16489 emit_move_insn (op0
, r
);
16492 emit_insn (load_unaligned (op0
, op1
));
16494 else if (MEM_P (op0
))
16496 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16498 m
= adjust_address (op0
, mode
, 0);
16499 emit_insn (extract (m
, op1
, const0_rtx
));
16500 m
= adjust_address (op0
, mode
, 16);
16501 emit_insn (extract (m
, op1
, const1_rtx
));
16504 emit_insn (store_unaligned (op0
, op1
));
16507 gcc_unreachable ();
16510 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16511 straight to ix86_expand_vector_move. */
16512 /* Code generation for scalar reg-reg moves of single and double precision data:
16513 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16517 if (x86_sse_partial_reg_dependency == true)
16522 Code generation for scalar loads of double precision data:
16523 if (x86_sse_split_regs == true)
16524 movlpd mem, reg (gas syntax)
16528 Code generation for unaligned packed loads of single precision data
16529 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16530 if (x86_sse_unaligned_move_optimal)
16533 if (x86_sse_partial_reg_dependency == true)
16545 Code generation for unaligned packed loads of double precision data
16546 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16547 if (x86_sse_unaligned_move_optimal)
16550 if (x86_sse_split_regs == true)
16563 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16571 && GET_MODE_SIZE (mode
) == 32)
16573 switch (GET_MODE_CLASS (mode
))
16575 case MODE_VECTOR_INT
:
16577 op0
= gen_lowpart (V32QImode
, op0
);
16578 op1
= gen_lowpart (V32QImode
, op1
);
16581 case MODE_VECTOR_FLOAT
:
16582 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16586 gcc_unreachable ();
16594 /* ??? If we have typed data, then it would appear that using
16595 movdqu is the only way to get unaligned data loaded with
16597 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16599 op0
= gen_lowpart (V16QImode
, op0
);
16600 op1
= gen_lowpart (V16QImode
, op1
);
16601 /* We will eventually emit movups based on insn attributes. */
16602 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16604 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16609 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16610 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16611 || optimize_insn_for_size_p ())
16613 /* We will eventually emit movups based on insn attributes. */
16614 emit_insn (gen_sse2_loadupd (op0
, op1
));
16618 /* When SSE registers are split into halves, we can avoid
16619 writing to the top half twice. */
16620 if (TARGET_SSE_SPLIT_REGS
)
16622 emit_clobber (op0
);
16627 /* ??? Not sure about the best option for the Intel chips.
16628 The following would seem to satisfy; the register is
16629 entirely cleared, breaking the dependency chain. We
16630 then store to the upper half, with a dependency depth
16631 of one. A rumor has it that Intel recommends two movsd
16632 followed by an unpacklpd, but this is unconfirmed. And
16633 given that the dependency depth of the unpacklpd would
16634 still be one, I'm not sure why this would be better. */
16635 zero
= CONST0_RTX (V2DFmode
);
16638 m
= adjust_address (op1
, DFmode
, 0);
16639 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16640 m
= adjust_address (op1
, DFmode
, 8);
16641 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16646 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16647 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16648 || optimize_insn_for_size_p ())
16650 op0
= gen_lowpart (V4SFmode
, op0
);
16651 op1
= gen_lowpart (V4SFmode
, op1
);
16652 emit_insn (gen_sse_loadups (op0
, op1
));
16656 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16657 emit_move_insn (op0
, CONST0_RTX (mode
));
16659 emit_clobber (op0
);
16661 if (mode
!= V4SFmode
)
16662 op0
= gen_lowpart (V4SFmode
, op0
);
16664 m
= adjust_address (op1
, V2SFmode
, 0);
16665 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16666 m
= adjust_address (op1
, V2SFmode
, 8);
16667 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16670 else if (MEM_P (op0
))
16672 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16674 op0
= gen_lowpart (V16QImode
, op0
);
16675 op1
= gen_lowpart (V16QImode
, op1
);
16676 /* We will eventually emit movups based on insn attributes. */
16677 emit_insn (gen_sse2_storedqu (op0
, op1
));
16679 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16682 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16683 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16684 || optimize_insn_for_size_p ())
16685 /* We will eventually emit movups based on insn attributes. */
16686 emit_insn (gen_sse2_storeupd (op0
, op1
));
16689 m
= adjust_address (op0
, DFmode
, 0);
16690 emit_insn (gen_sse2_storelpd (m
, op1
));
16691 m
= adjust_address (op0
, DFmode
, 8);
16692 emit_insn (gen_sse2_storehpd (m
, op1
));
16697 if (mode
!= V4SFmode
)
16698 op1
= gen_lowpart (V4SFmode
, op1
);
16701 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16702 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16703 || optimize_insn_for_size_p ())
16705 op0
= gen_lowpart (V4SFmode
, op0
);
16706 emit_insn (gen_sse_storeups (op0
, op1
));
16710 m
= adjust_address (op0
, V2SFmode
, 0);
16711 emit_insn (gen_sse_storelps (m
, op1
));
16712 m
= adjust_address (op0
, V2SFmode
, 8);
16713 emit_insn (gen_sse_storehps (m
, op1
));
16718 gcc_unreachable ();
16721 /* Expand a push in MODE. This is some mode for which we do not support
16722 proper push instructions, at least from the registers that we expect
16723 the value to live in. */
16726 ix86_expand_push (enum machine_mode mode
, rtx x
)
16730 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16731 GEN_INT (-GET_MODE_SIZE (mode
)),
16732 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16733 if (tmp
!= stack_pointer_rtx
)
16734 emit_move_insn (stack_pointer_rtx
, tmp
);
16736 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16738 /* When we push an operand onto stack, it has to be aligned at least
16739 at the function argument boundary. However since we don't have
16740 the argument type, we can't determine the actual argument
16742 emit_move_insn (tmp
, x
);
16745 /* Helper function of ix86_fixup_binary_operands to canonicalize
16746 operand order. Returns true if the operands should be swapped. */
16749 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16752 rtx dst
= operands
[0];
16753 rtx src1
= operands
[1];
16754 rtx src2
= operands
[2];
16756 /* If the operation is not commutative, we can't do anything. */
16757 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16760 /* Highest priority is that src1 should match dst. */
16761 if (rtx_equal_p (dst
, src1
))
16763 if (rtx_equal_p (dst
, src2
))
16766 /* Next highest priority is that immediate constants come second. */
16767 if (immediate_operand (src2
, mode
))
16769 if (immediate_operand (src1
, mode
))
16772 /* Lowest priority is that memory references should come second. */
16782 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16783 destination to use for the operation. If different from the true
16784 destination in operands[0], a copy operation will be required. */
16787 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16790 rtx dst
= operands
[0];
16791 rtx src1
= operands
[1];
16792 rtx src2
= operands
[2];
16794 /* Canonicalize operand order. */
16795 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16799 /* It is invalid to swap operands of different modes. */
16800 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16807 /* Both source operands cannot be in memory. */
16808 if (MEM_P (src1
) && MEM_P (src2
))
16810 /* Optimization: Only read from memory once. */
16811 if (rtx_equal_p (src1
, src2
))
16813 src2
= force_reg (mode
, src2
);
16817 src2
= force_reg (mode
, src2
);
16820 /* If the destination is memory, and we do not have matching source
16821 operands, do things in registers. */
16822 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16823 dst
= gen_reg_rtx (mode
);
16825 /* Source 1 cannot be a constant. */
16826 if (CONSTANT_P (src1
))
16827 src1
= force_reg (mode
, src1
);
16829 /* Source 1 cannot be a non-matching memory. */
16830 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16831 src1
= force_reg (mode
, src1
);
16833 /* Improve address combine. */
16835 && GET_MODE_CLASS (mode
) == MODE_INT
16837 src2
= force_reg (mode
, src2
);
16839 operands
[1] = src1
;
16840 operands
[2] = src2
;
16844 /* Similarly, but assume that the destination has already been
16845 set up properly. */
16848 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16849 enum machine_mode mode
, rtx operands
[])
16851 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16852 gcc_assert (dst
== operands
[0]);
16855 /* Attempt to expand a binary operator. Make the expansion closer to the
16856 actual machine, then just general_operand, which will allow 3 separate
16857 memory references (one output, two input) in a single insn. */
16860 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16863 rtx src1
, src2
, dst
, op
, clob
;
16865 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16866 src1
= operands
[1];
16867 src2
= operands
[2];
16869 /* Emit the instruction. */
16871 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16872 if (reload_in_progress
)
16874 /* Reload doesn't know about the flags register, and doesn't know that
16875 it doesn't want to clobber it. We can only do this with PLUS. */
16876 gcc_assert (code
== PLUS
);
16879 else if (reload_completed
16881 && !rtx_equal_p (dst
, src1
))
16883 /* This is going to be an LEA; avoid splitting it later. */
16888 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16889 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16892 /* Fix up the destination if needed. */
16893 if (dst
!= operands
[0])
16894 emit_move_insn (operands
[0], dst
);
16897 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16898 the given OPERANDS. */
16901 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16904 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16905 if (GET_CODE (operands
[1]) == SUBREG
)
16910 else if (GET_CODE (operands
[2]) == SUBREG
)
16915 /* Optimize (__m128i) d | (__m128i) e and similar code
16916 when d and e are float vectors into float vector logical
16917 insn. In C/C++ without using intrinsics there is no other way
16918 to express vector logical operation on float vectors than
16919 to cast them temporarily to integer vectors. */
16921 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16922 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16923 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16924 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16925 && SUBREG_BYTE (op1
) == 0
16926 && (GET_CODE (op2
) == CONST_VECTOR
16927 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16928 && SUBREG_BYTE (op2
) == 0))
16929 && can_create_pseudo_p ())
16932 switch (GET_MODE (SUBREG_REG (op1
)))
16938 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16939 if (GET_CODE (op2
) == CONST_VECTOR
)
16941 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16942 op2
= force_reg (GET_MODE (dst
), op2
);
16947 op2
= SUBREG_REG (operands
[2]);
16948 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16949 op2
= force_reg (GET_MODE (dst
), op2
);
16951 op1
= SUBREG_REG (op1
);
16952 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16953 op1
= force_reg (GET_MODE (dst
), op1
);
16954 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16955 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16957 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16963 if (!nonimmediate_operand (operands
[1], mode
))
16964 operands
[1] = force_reg (mode
, operands
[1]);
16965 if (!nonimmediate_operand (operands
[2], mode
))
16966 operands
[2] = force_reg (mode
, operands
[2]);
16967 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16968 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16969 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16973 /* Return TRUE or FALSE depending on whether the binary operator meets the
16974 appropriate constraints. */
16977 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16980 rtx dst
= operands
[0];
16981 rtx src1
= operands
[1];
16982 rtx src2
= operands
[2];
16984 /* Both source operands cannot be in memory. */
16985 if (MEM_P (src1
) && MEM_P (src2
))
16988 /* Canonicalize operand order for commutative operators. */
16989 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16996 /* If the destination is memory, we must have a matching source operand. */
16997 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17000 /* Source 1 cannot be a constant. */
17001 if (CONSTANT_P (src1
))
17004 /* Source 1 cannot be a non-matching memory. */
17005 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17006 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17007 return (code
== AND
17010 || (TARGET_64BIT
&& mode
== DImode
))
17011 && satisfies_constraint_L (src2
));
17016 /* Attempt to expand a unary operator. Make the expansion closer to the
17017 actual machine, then just general_operand, which will allow 2 separate
17018 memory references (one output, one input) in a single insn. */
17021 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17024 int matching_memory
;
17025 rtx src
, dst
, op
, clob
;
17030 /* If the destination is memory, and we do not have matching source
17031 operands, do things in registers. */
17032 matching_memory
= 0;
17035 if (rtx_equal_p (dst
, src
))
17036 matching_memory
= 1;
17038 dst
= gen_reg_rtx (mode
);
17041 /* When source operand is memory, destination must match. */
17042 if (MEM_P (src
) && !matching_memory
)
17043 src
= force_reg (mode
, src
);
17045 /* Emit the instruction. */
17047 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17048 if (reload_in_progress
|| code
== NOT
)
17050 /* Reload doesn't know about the flags register, and doesn't know that
17051 it doesn't want to clobber it. */
17052 gcc_assert (code
== NOT
);
17057 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17058 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17061 /* Fix up the destination if needed. */
17062 if (dst
!= operands
[0])
17063 emit_move_insn (operands
[0], dst
);
17066 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17067 divisor are within the range [0-255]. */
17070 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17073 rtx end_label
, qimode_label
;
17074 rtx insn
, div
, mod
;
17075 rtx scratch
, tmp0
, tmp1
, tmp2
;
17076 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17077 rtx (*gen_zero_extend
) (rtx
, rtx
);
17078 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17083 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17084 gen_test_ccno_1
= gen_testsi_ccno_1
;
17085 gen_zero_extend
= gen_zero_extendqisi2
;
17088 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17089 gen_test_ccno_1
= gen_testdi_ccno_1
;
17090 gen_zero_extend
= gen_zero_extendqidi2
;
17093 gcc_unreachable ();
17096 end_label
= gen_label_rtx ();
17097 qimode_label
= gen_label_rtx ();
17099 scratch
= gen_reg_rtx (mode
);
17101 /* Use 8bit unsigned divimod if dividend and divisor are within
17102 the range [0-255]. */
17103 emit_move_insn (scratch
, operands
[2]);
17104 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17105 scratch
, 1, OPTAB_DIRECT
);
17106 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17107 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17108 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17109 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17110 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17112 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17113 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17114 JUMP_LABEL (insn
) = qimode_label
;
17116 /* Generate original signed/unsigned divimod. */
17117 div
= gen_divmod4_1 (operands
[0], operands
[1],
17118 operands
[2], operands
[3]);
17121 /* Branch to the end. */
17122 emit_jump_insn (gen_jump (end_label
));
17125 /* Generate 8bit unsigned divide. */
17126 emit_label (qimode_label
);
17127 /* Don't use operands[0] for result of 8bit divide since not all
17128 registers support QImode ZERO_EXTRACT. */
17129 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17130 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17131 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17132 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17136 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17137 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17141 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17142 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17145 /* Extract remainder from AH. */
17146 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17147 if (REG_P (operands
[1]))
17148 insn
= emit_move_insn (operands
[1], tmp1
);
17151 /* Need a new scratch register since the old one has result
17153 scratch
= gen_reg_rtx (mode
);
17154 emit_move_insn (scratch
, tmp1
);
17155 insn
= emit_move_insn (operands
[1], scratch
);
17157 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17159 /* Zero extend quotient from AL. */
17160 tmp1
= gen_lowpart (QImode
, tmp0
);
17161 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17162 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17164 emit_label (end_label
);
17167 #define LEA_MAX_STALL (3)
17168 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17170 /* Increase given DISTANCE in half-cycles according to
17171 dependencies between PREV and NEXT instructions.
17172 Add 1 half-cycle if there is no dependency and
17173 go to next cycle if there is some dependecy. */
17175 static unsigned int
17176 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17181 if (!prev
|| !next
)
17182 return distance
+ (distance
& 1) + 2;
17184 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17185 return distance
+ 1;
17187 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17188 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17189 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17190 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17191 return distance
+ (distance
& 1) + 2;
17193 return distance
+ 1;
17196 /* Function checks if instruction INSN defines register number
17197 REGNO1 or REGNO2. */
17200 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17205 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17206 if (DF_REF_REG_DEF_P (*def_rec
)
17207 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17208 && (regno1
== DF_REF_REGNO (*def_rec
)
17209 || regno2
== DF_REF_REGNO (*def_rec
)))
17217 /* Function checks if instruction INSN uses register number
17218 REGNO as a part of address expression. */
17221 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17225 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17226 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17232 /* Search backward for non-agu definition of register number REGNO1
17233 or register number REGNO2 in basic block starting from instruction
17234 START up to head of basic block or instruction INSN.
17236 Function puts true value into *FOUND var if definition was found
17237 and false otherwise.
17239 Distance in half-cycles between START and found instruction or head
17240 of BB is added to DISTANCE and returned. */
17243 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17244 rtx insn
, int distance
,
17245 rtx start
, bool *found
)
17247 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17255 && distance
< LEA_SEARCH_THRESHOLD
)
17257 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17259 distance
= increase_distance (prev
, next
, distance
);
17260 if (insn_defines_reg (regno1
, regno2
, prev
))
17262 if (recog_memoized (prev
) < 0
17263 || get_attr_type (prev
) != TYPE_LEA
)
17272 if (prev
== BB_HEAD (bb
))
17275 prev
= PREV_INSN (prev
);
17281 /* Search backward for non-agu definition of register number REGNO1
17282 or register number REGNO2 in INSN's basic block until
17283 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17284 2. Reach neighbour BBs boundary, or
17285 3. Reach agu definition.
17286 Returns the distance between the non-agu definition point and INSN.
17287 If no definition point, returns -1. */
17290 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17293 basic_block bb
= BLOCK_FOR_INSN (insn
);
17295 bool found
= false;
17297 if (insn
!= BB_HEAD (bb
))
17298 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17299 distance
, PREV_INSN (insn
),
17302 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17306 bool simple_loop
= false;
17308 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17311 simple_loop
= true;
17316 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17318 BB_END (bb
), &found
);
17321 int shortest_dist
= -1;
17322 bool found_in_bb
= false;
17324 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17327 = distance_non_agu_define_in_bb (regno1
, regno2
,
17333 if (shortest_dist
< 0)
17334 shortest_dist
= bb_dist
;
17335 else if (bb_dist
> 0)
17336 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17342 distance
= shortest_dist
;
17346 /* get_attr_type may modify recog data. We want to make sure
17347 that recog data is valid for instruction INSN, on which
17348 distance_non_agu_define is called. INSN is unchanged here. */
17349 extract_insn_cached (insn
);
17354 return distance
>> 1;
17357 /* Return the distance in half-cycles between INSN and the next
17358 insn that uses register number REGNO in memory address added
17359 to DISTANCE. Return -1 if REGNO0 is set.
17361 Put true value into *FOUND if register usage was found and
17363 Put true value into *REDEFINED if register redefinition was
17364 found and false otherwise. */
17367 distance_agu_use_in_bb (unsigned int regno
,
17368 rtx insn
, int distance
, rtx start
,
17369 bool *found
, bool *redefined
)
17371 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17376 *redefined
= false;
17380 && distance
< LEA_SEARCH_THRESHOLD
)
17382 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17384 distance
= increase_distance(prev
, next
, distance
);
17385 if (insn_uses_reg_mem (regno
, next
))
17387 /* Return DISTANCE if OP0 is used in memory
17388 address in NEXT. */
17393 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17395 /* Return -1 if OP0 is set in NEXT. */
17403 if (next
== BB_END (bb
))
17406 next
= NEXT_INSN (next
);
17412 /* Return the distance between INSN and the next insn that uses
17413 register number REGNO0 in memory address. Return -1 if no such
17414 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17417 distance_agu_use (unsigned int regno0
, rtx insn
)
17419 basic_block bb
= BLOCK_FOR_INSN (insn
);
17421 bool found
= false;
17422 bool redefined
= false;
17424 if (insn
!= BB_END (bb
))
17425 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17427 &found
, &redefined
);
17429 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17433 bool simple_loop
= false;
17435 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17438 simple_loop
= true;
17443 distance
= distance_agu_use_in_bb (regno0
, insn
,
17444 distance
, BB_HEAD (bb
),
17445 &found
, &redefined
);
17448 int shortest_dist
= -1;
17449 bool found_in_bb
= false;
17450 bool redefined_in_bb
= false;
17452 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17455 = distance_agu_use_in_bb (regno0
, insn
,
17456 distance
, BB_HEAD (e
->dest
),
17457 &found_in_bb
, &redefined_in_bb
);
17460 if (shortest_dist
< 0)
17461 shortest_dist
= bb_dist
;
17462 else if (bb_dist
> 0)
17463 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17469 distance
= shortest_dist
;
17473 if (!found
|| redefined
)
17476 return distance
>> 1;
17479 /* Define this macro to tune LEA priority vs ADD, it take effect when
17480 there is a dilemma of choicing LEA or ADD
17481 Negative value: ADD is more preferred than LEA
17483 Positive value: LEA is more preferred than ADD*/
17484 #define IX86_LEA_PRIORITY 0
17486 /* Return true if usage of lea INSN has performance advantage
17487 over a sequence of instructions. Instructions sequence has
17488 SPLIT_COST cycles higher latency than lea latency. */
17491 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17492 unsigned int regno2
, int split_cost
, bool has_scale
)
17494 int dist_define
, dist_use
;
17496 /* For Silvermont if using a 2-source or 3-source LEA for
17497 non-destructive destination purposes, or due to wanting
17498 ability to use SCALE, the use of LEA is justified. */
17499 if (ix86_tune
== PROCESSOR_SLM
)
17503 if (split_cost
< 1)
17505 if (regno0
== regno1
|| regno0
== regno2
)
17510 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17511 dist_use
= distance_agu_use (regno0
, insn
);
17513 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17515 /* If there is no non AGU operand definition, no AGU
17516 operand usage and split cost is 0 then both lea
17517 and non lea variants have same priority. Currently
17518 we prefer lea for 64 bit code and non lea on 32 bit
17520 if (dist_use
< 0 && split_cost
== 0)
17521 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17526 /* With longer definitions distance lea is more preferable.
17527 Here we change it to take into account splitting cost and
17529 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17531 /* If there is no use in memory addess then we just check
17532 that split cost exceeds AGU stall. */
17534 return dist_define
> LEA_MAX_STALL
;
17536 /* If this insn has both backward non-agu dependence and forward
17537 agu dependence, the one with short distance takes effect. */
17538 return dist_define
>= dist_use
;
17541 /* Return true if it is legal to clobber flags by INSN and
17542 false otherwise. */
17545 ix86_ok_to_clobber_flags (rtx insn
)
17547 basic_block bb
= BLOCK_FOR_INSN (insn
);
17553 if (NONDEBUG_INSN_P (insn
))
17555 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17556 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17559 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17563 if (insn
== BB_END (bb
))
17566 insn
= NEXT_INSN (insn
);
17569 live
= df_get_live_out(bb
);
17570 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17573 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17574 move and add to avoid AGU stalls. */
17577 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17579 unsigned int regno0
, regno1
, regno2
;
17581 /* Check if we need to optimize. */
17582 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17585 /* Check it is correct to split here. */
17586 if (!ix86_ok_to_clobber_flags(insn
))
17589 regno0
= true_regnum (operands
[0]);
17590 regno1
= true_regnum (operands
[1]);
17591 regno2
= true_regnum (operands
[2]);
17593 /* We need to split only adds with non destructive
17594 destination operand. */
17595 if (regno0
== regno1
|| regno0
== regno2
)
17598 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17601 /* Return true if we should emit lea instruction instead of mov
17605 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17607 unsigned int regno0
, regno1
;
17609 /* Check if we need to optimize. */
17610 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17613 /* Use lea for reg to reg moves only. */
17614 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17617 regno0
= true_regnum (operands
[0]);
17618 regno1
= true_regnum (operands
[1]);
17620 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17623 /* Return true if we need to split lea into a sequence of
17624 instructions to avoid AGU stalls. */
17627 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17629 unsigned int regno0
, regno1
, regno2
;
17631 struct ix86_address parts
;
17634 /* Check we need to optimize. */
17635 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17638 /* Check it is correct to split here. */
17639 if (!ix86_ok_to_clobber_flags(insn
))
17642 ok
= ix86_decompose_address (operands
[1], &parts
);
17645 /* There should be at least two components in the address. */
17646 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17647 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17650 /* We should not split into add if non legitimate pic
17651 operand is used as displacement. */
17652 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17655 regno0
= true_regnum (operands
[0]) ;
17656 regno1
= INVALID_REGNUM
;
17657 regno2
= INVALID_REGNUM
;
17660 regno1
= true_regnum (parts
.base
);
17662 regno2
= true_regnum (parts
.index
);
17666 /* Compute how many cycles we will add to execution time
17667 if split lea into a sequence of instructions. */
17668 if (parts
.base
|| parts
.index
)
17670 /* Have to use mov instruction if non desctructive
17671 destination form is used. */
17672 if (regno1
!= regno0
&& regno2
!= regno0
)
17675 /* Have to add index to base if both exist. */
17676 if (parts
.base
&& parts
.index
)
17679 /* Have to use shift and adds if scale is 2 or greater. */
17680 if (parts
.scale
> 1)
17682 if (regno0
!= regno1
)
17684 else if (regno2
== regno0
)
17687 split_cost
+= parts
.scale
;
17690 /* Have to use add instruction with immediate if
17691 disp is non zero. */
17692 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17695 /* Subtract the price of lea. */
17699 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17703 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17704 matches destination. RTX includes clobber of FLAGS_REG. */
17707 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17712 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17713 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17715 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17718 /* Return true if regno1 def is nearest to the insn. */
17721 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17724 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17728 while (prev
&& prev
!= start
)
17730 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17732 prev
= PREV_INSN (prev
);
17735 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17737 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17739 prev
= PREV_INSN (prev
);
17742 /* None of the regs is defined in the bb. */
17746 /* Split lea instructions into a sequence of instructions
17747 which are executed on ALU to avoid AGU stalls.
17748 It is assumed that it is allowed to clobber flags register
17749 at lea position. */
17752 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17754 unsigned int regno0
, regno1
, regno2
;
17755 struct ix86_address parts
;
17759 ok
= ix86_decompose_address (operands
[1], &parts
);
17762 target
= gen_lowpart (mode
, operands
[0]);
17764 regno0
= true_regnum (target
);
17765 regno1
= INVALID_REGNUM
;
17766 regno2
= INVALID_REGNUM
;
17770 parts
.base
= gen_lowpart (mode
, parts
.base
);
17771 regno1
= true_regnum (parts
.base
);
17776 parts
.index
= gen_lowpart (mode
, parts
.index
);
17777 regno2
= true_regnum (parts
.index
);
17781 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17783 if (parts
.scale
> 1)
17785 /* Case r1 = r1 + ... */
17786 if (regno1
== regno0
)
17788 /* If we have a case r1 = r1 + C * r1 then we
17789 should use multiplication which is very
17790 expensive. Assume cost model is wrong if we
17791 have such case here. */
17792 gcc_assert (regno2
!= regno0
);
17794 for (adds
= parts
.scale
; adds
> 0; adds
--)
17795 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17799 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17800 if (regno0
!= regno2
)
17801 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17803 /* Use shift for scaling. */
17804 ix86_emit_binop (ASHIFT
, mode
, target
,
17805 GEN_INT (exact_log2 (parts
.scale
)));
17808 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17810 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17811 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17814 else if (!parts
.base
&& !parts
.index
)
17816 gcc_assert(parts
.disp
);
17817 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17823 if (regno0
!= regno2
)
17824 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17826 else if (!parts
.index
)
17828 if (regno0
!= regno1
)
17829 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17833 if (regno0
== regno1
)
17835 else if (regno0
== regno2
)
17841 /* Find better operand for SET instruction, depending
17842 on which definition is farther from the insn. */
17843 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17844 tmp
= parts
.index
, tmp1
= parts
.base
;
17846 tmp
= parts
.base
, tmp1
= parts
.index
;
17848 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17850 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17851 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17853 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17857 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17860 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17861 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17865 /* Return true if it is ok to optimize an ADD operation to LEA
17866 operation to avoid flag register consumation. For most processors,
17867 ADD is faster than LEA. For the processors like ATOM, if the
17868 destination register of LEA holds an actual address which will be
17869 used soon, LEA is better and otherwise ADD is better. */
17872 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17874 unsigned int regno0
= true_regnum (operands
[0]);
17875 unsigned int regno1
= true_regnum (operands
[1]);
17876 unsigned int regno2
= true_regnum (operands
[2]);
17878 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17879 if (regno0
!= regno1
&& regno0
!= regno2
)
17882 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17885 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
17888 /* Return true if destination reg of SET_BODY is shift count of
17892 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17898 /* Retrieve destination of SET_BODY. */
17899 switch (GET_CODE (set_body
))
17902 set_dest
= SET_DEST (set_body
);
17903 if (!set_dest
|| !REG_P (set_dest
))
17907 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17908 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17916 /* Retrieve shift count of USE_BODY. */
17917 switch (GET_CODE (use_body
))
17920 shift_rtx
= XEXP (use_body
, 1);
17923 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17924 if (ix86_dep_by_shift_count_body (set_body
,
17925 XVECEXP (use_body
, 0, i
)))
17933 && (GET_CODE (shift_rtx
) == ASHIFT
17934 || GET_CODE (shift_rtx
) == LSHIFTRT
17935 || GET_CODE (shift_rtx
) == ASHIFTRT
17936 || GET_CODE (shift_rtx
) == ROTATE
17937 || GET_CODE (shift_rtx
) == ROTATERT
))
17939 rtx shift_count
= XEXP (shift_rtx
, 1);
17941 /* Return true if shift count is dest of SET_BODY. */
17942 if (REG_P (shift_count
))
17944 /* Add check since it can be invoked before register
17945 allocation in pre-reload schedule. */
17946 if (reload_completed
17947 && true_regnum (set_dest
) == true_regnum (shift_count
))
17949 else if (REGNO(set_dest
) == REGNO(shift_count
))
17957 /* Return true if destination reg of SET_INSN is shift count of
17961 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17963 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17964 PATTERN (use_insn
));
17967 /* Return TRUE or FALSE depending on whether the unary operator meets the
17968 appropriate constraints. */
17971 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17972 enum machine_mode mode ATTRIBUTE_UNUSED
,
17973 rtx operands
[2] ATTRIBUTE_UNUSED
)
17975 /* If one of operands is memory, source and destination must match. */
17976 if ((MEM_P (operands
[0])
17977 || MEM_P (operands
[1]))
17978 && ! rtx_equal_p (operands
[0], operands
[1]))
17983 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17984 are ok, keeping in mind the possible movddup alternative. */
17987 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17989 if (MEM_P (operands
[0]))
17990 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17991 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17992 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17996 /* Post-reload splitter for converting an SF or DFmode value in an
17997 SSE register into an unsigned SImode. */
18000 ix86_split_convert_uns_si_sse (rtx operands
[])
18002 enum machine_mode vecmode
;
18003 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18005 large
= operands
[1];
18006 zero_or_two31
= operands
[2];
18007 input
= operands
[3];
18008 two31
= operands
[4];
18009 vecmode
= GET_MODE (large
);
18010 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18012 /* Load up the value into the low element. We must ensure that the other
18013 elements are valid floats -- zero is the easiest such value. */
18016 if (vecmode
== V4SFmode
)
18017 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18019 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18023 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18024 emit_move_insn (value
, CONST0_RTX (vecmode
));
18025 if (vecmode
== V4SFmode
)
18026 emit_insn (gen_sse_movss (value
, value
, input
));
18028 emit_insn (gen_sse2_movsd (value
, value
, input
));
18031 emit_move_insn (large
, two31
);
18032 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18034 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18035 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18037 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18038 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18040 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18041 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18043 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18044 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18046 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18047 if (vecmode
== V4SFmode
)
18048 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18050 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18053 emit_insn (gen_xorv4si3 (value
, value
, large
));
18056 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18057 Expects the 64-bit DImode to be supplied in a pair of integral
18058 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18059 -mfpmath=sse, !optimize_size only. */
18062 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18064 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18065 rtx int_xmm
, fp_xmm
;
18066 rtx biases
, exponents
;
18069 int_xmm
= gen_reg_rtx (V4SImode
);
18070 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18071 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18072 else if (TARGET_SSE_SPLIT_REGS
)
18074 emit_clobber (int_xmm
);
18075 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18079 x
= gen_reg_rtx (V2DImode
);
18080 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18081 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18084 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18085 gen_rtvec (4, GEN_INT (0x43300000UL
),
18086 GEN_INT (0x45300000UL
),
18087 const0_rtx
, const0_rtx
));
18088 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18090 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18091 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18093 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18094 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18095 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18096 (0x1.0p84 + double(fp_value_hi_xmm)).
18097 Note these exponents differ by 32. */
18099 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18101 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18102 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18103 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18104 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18105 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18106 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18107 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18108 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18109 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18111 /* Add the upper and lower DFmode values together. */
18113 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18116 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18117 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18118 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18121 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18124 /* Not used, but eases macroization of patterns. */
18126 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18127 rtx input ATTRIBUTE_UNUSED
)
18129 gcc_unreachable ();
18132 /* Convert an unsigned SImode value into a DFmode. Only currently used
18133 for SSE, but applicable anywhere. */
18136 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18138 REAL_VALUE_TYPE TWO31r
;
18141 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18142 NULL
, 1, OPTAB_DIRECT
);
18144 fp
= gen_reg_rtx (DFmode
);
18145 emit_insn (gen_floatsidf2 (fp
, x
));
18147 real_ldexp (&TWO31r
, &dconst1
, 31);
18148 x
= const_double_from_real_value (TWO31r
, DFmode
);
18150 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18152 emit_move_insn (target
, x
);
18155 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18156 32-bit mode; otherwise we have a direct convert instruction. */
18159 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18161 REAL_VALUE_TYPE TWO32r
;
18162 rtx fp_lo
, fp_hi
, x
;
18164 fp_lo
= gen_reg_rtx (DFmode
);
18165 fp_hi
= gen_reg_rtx (DFmode
);
18167 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18169 real_ldexp (&TWO32r
, &dconst1
, 32);
18170 x
= const_double_from_real_value (TWO32r
, DFmode
);
18171 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18173 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18175 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18178 emit_move_insn (target
, x
);
18181 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18182 For x86_32, -mfpmath=sse, !optimize_size only. */
18184 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18186 REAL_VALUE_TYPE ONE16r
;
18187 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18189 real_ldexp (&ONE16r
, &dconst1
, 16);
18190 x
= const_double_from_real_value (ONE16r
, SFmode
);
18191 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18192 NULL
, 0, OPTAB_DIRECT
);
18193 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18194 NULL
, 0, OPTAB_DIRECT
);
18195 fp_hi
= gen_reg_rtx (SFmode
);
18196 fp_lo
= gen_reg_rtx (SFmode
);
18197 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18198 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18199 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18201 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18203 if (!rtx_equal_p (target
, fp_hi
))
18204 emit_move_insn (target
, fp_hi
);
18207 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18208 a vector of unsigned ints VAL to vector of floats TARGET. */
18211 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18214 REAL_VALUE_TYPE TWO16r
;
18215 enum machine_mode intmode
= GET_MODE (val
);
18216 enum machine_mode fltmode
= GET_MODE (target
);
18217 rtx (*cvt
) (rtx
, rtx
);
18219 if (intmode
== V4SImode
)
18220 cvt
= gen_floatv4siv4sf2
;
18222 cvt
= gen_floatv8siv8sf2
;
18223 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18224 tmp
[0] = force_reg (intmode
, tmp
[0]);
18225 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18227 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18228 NULL_RTX
, 1, OPTAB_DIRECT
);
18229 tmp
[3] = gen_reg_rtx (fltmode
);
18230 emit_insn (cvt (tmp
[3], tmp
[1]));
18231 tmp
[4] = gen_reg_rtx (fltmode
);
18232 emit_insn (cvt (tmp
[4], tmp
[2]));
18233 real_ldexp (&TWO16r
, &dconst1
, 16);
18234 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18235 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18236 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18238 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18240 if (tmp
[7] != target
)
18241 emit_move_insn (target
, tmp
[7]);
18244 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18245 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18246 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18247 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18250 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18252 REAL_VALUE_TYPE TWO31r
;
18253 rtx two31r
, tmp
[4];
18254 enum machine_mode mode
= GET_MODE (val
);
18255 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18256 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18257 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18260 for (i
= 0; i
< 3; i
++)
18261 tmp
[i
] = gen_reg_rtx (mode
);
18262 real_ldexp (&TWO31r
, &dconst1
, 31);
18263 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18264 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18265 two31r
= force_reg (mode
, two31r
);
18268 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18269 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18270 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18271 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18272 default: gcc_unreachable ();
18274 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18275 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18276 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18278 if (intmode
== V4SImode
|| TARGET_AVX2
)
18279 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18280 gen_lowpart (intmode
, tmp
[0]),
18281 GEN_INT (31), NULL_RTX
, 0,
18285 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18286 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18287 *xorp
= expand_simple_binop (intmode
, AND
,
18288 gen_lowpart (intmode
, tmp
[0]),
18289 two31
, NULL_RTX
, 0,
18292 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18296 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18297 then replicate the value for all elements of the vector
18301 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18305 enum machine_mode scalar_mode
;
18322 n_elt
= GET_MODE_NUNITS (mode
);
18323 v
= rtvec_alloc (n_elt
);
18324 scalar_mode
= GET_MODE_INNER (mode
);
18326 RTVEC_ELT (v
, 0) = value
;
18328 for (i
= 1; i
< n_elt
; ++i
)
18329 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18331 return gen_rtx_CONST_VECTOR (mode
, v
);
18334 gcc_unreachable ();
18338 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18339 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18340 for an SSE register. If VECT is true, then replicate the mask for
18341 all elements of the vector register. If INVERT is true, then create
18342 a mask excluding the sign bit. */
18345 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18347 enum machine_mode vec_mode
, imode
;
18348 HOST_WIDE_INT hi
, lo
;
18353 /* Find the sign bit, sign extended to 2*HWI. */
18361 mode
= GET_MODE_INNER (mode
);
18363 lo
= 0x80000000, hi
= lo
< 0;
18371 mode
= GET_MODE_INNER (mode
);
18373 if (HOST_BITS_PER_WIDE_INT
>= 64)
18374 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18376 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18381 vec_mode
= VOIDmode
;
18382 if (HOST_BITS_PER_WIDE_INT
>= 64)
18385 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18392 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18396 lo
= ~lo
, hi
= ~hi
;
18402 mask
= immed_double_const (lo
, hi
, imode
);
18404 vec
= gen_rtvec (2, v
, mask
);
18405 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18406 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18413 gcc_unreachable ();
18417 lo
= ~lo
, hi
= ~hi
;
18419 /* Force this value into the low part of a fp vector constant. */
18420 mask
= immed_double_const (lo
, hi
, imode
);
18421 mask
= gen_lowpart (mode
, mask
);
18423 if (vec_mode
== VOIDmode
)
18424 return force_reg (mode
, mask
);
18426 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18427 return force_reg (vec_mode
, v
);
18430 /* Generate code for floating point ABS or NEG. */
18433 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18436 rtx mask
, set
, dst
, src
;
18437 bool use_sse
= false;
18438 bool vector_mode
= VECTOR_MODE_P (mode
);
18439 enum machine_mode vmode
= mode
;
18443 else if (mode
== TFmode
)
18445 else if (TARGET_SSE_MATH
)
18447 use_sse
= SSE_FLOAT_MODE_P (mode
);
18448 if (mode
== SFmode
)
18450 else if (mode
== DFmode
)
18454 /* NEG and ABS performed with SSE use bitwise mask operations.
18455 Create the appropriate mask now. */
18457 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18464 set
= gen_rtx_fmt_e (code
, mode
, src
);
18465 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18472 use
= gen_rtx_USE (VOIDmode
, mask
);
18474 par
= gen_rtvec (2, set
, use
);
18477 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18478 par
= gen_rtvec (3, set
, use
, clob
);
18480 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18486 /* Expand a copysign operation. Special case operand 0 being a constant. */
18489 ix86_expand_copysign (rtx operands
[])
18491 enum machine_mode mode
, vmode
;
18492 rtx dest
, op0
, op1
, mask
, nmask
;
18494 dest
= operands
[0];
18498 mode
= GET_MODE (dest
);
18500 if (mode
== SFmode
)
18502 else if (mode
== DFmode
)
18507 if (GET_CODE (op0
) == CONST_DOUBLE
)
18509 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18511 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18512 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18514 if (mode
== SFmode
|| mode
== DFmode
)
18516 if (op0
== CONST0_RTX (mode
))
18517 op0
= CONST0_RTX (vmode
);
18520 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18522 op0
= force_reg (vmode
, v
);
18525 else if (op0
!= CONST0_RTX (mode
))
18526 op0
= force_reg (mode
, op0
);
18528 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18530 if (mode
== SFmode
)
18531 copysign_insn
= gen_copysignsf3_const
;
18532 else if (mode
== DFmode
)
18533 copysign_insn
= gen_copysigndf3_const
;
18535 copysign_insn
= gen_copysigntf3_const
;
18537 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18541 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18543 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18544 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18546 if (mode
== SFmode
)
18547 copysign_insn
= gen_copysignsf3_var
;
18548 else if (mode
== DFmode
)
18549 copysign_insn
= gen_copysigndf3_var
;
18551 copysign_insn
= gen_copysigntf3_var
;
18553 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18557 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18558 be a constant, and so has already been expanded into a vector constant. */
18561 ix86_split_copysign_const (rtx operands
[])
18563 enum machine_mode mode
, vmode
;
18564 rtx dest
, op0
, mask
, x
;
18566 dest
= operands
[0];
18568 mask
= operands
[3];
18570 mode
= GET_MODE (dest
);
18571 vmode
= GET_MODE (mask
);
18573 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18574 x
= gen_rtx_AND (vmode
, dest
, mask
);
18575 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18577 if (op0
!= CONST0_RTX (vmode
))
18579 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18580 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18584 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18585 so we have to do two masks. */
18588 ix86_split_copysign_var (rtx operands
[])
18590 enum machine_mode mode
, vmode
;
18591 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18593 dest
= operands
[0];
18594 scratch
= operands
[1];
18597 nmask
= operands
[4];
18598 mask
= operands
[5];
18600 mode
= GET_MODE (dest
);
18601 vmode
= GET_MODE (mask
);
18603 if (rtx_equal_p (op0
, op1
))
18605 /* Shouldn't happen often (it's useless, obviously), but when it does
18606 we'd generate incorrect code if we continue below. */
18607 emit_move_insn (dest
, op0
);
18611 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18613 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18615 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18616 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18619 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18620 x
= gen_rtx_NOT (vmode
, dest
);
18621 x
= gen_rtx_AND (vmode
, x
, op0
);
18622 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18626 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18628 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18630 else /* alternative 2,4 */
18632 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18633 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18634 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18636 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18638 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18640 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18641 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18643 else /* alternative 3,4 */
18645 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18647 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18648 x
= gen_rtx_AND (vmode
, dest
, op0
);
18650 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18653 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18654 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18657 /* Return TRUE or FALSE depending on whether the first SET in INSN
18658 has source and destination with matching CC modes, and that the
18659 CC mode is at least as constrained as REQ_MODE. */
18662 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18665 enum machine_mode set_mode
;
18667 set
= PATTERN (insn
);
18668 if (GET_CODE (set
) == PARALLEL
)
18669 set
= XVECEXP (set
, 0, 0);
18670 gcc_assert (GET_CODE (set
) == SET
);
18671 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18673 set_mode
= GET_MODE (SET_DEST (set
));
18677 if (req_mode
!= CCNOmode
18678 && (req_mode
!= CCmode
18679 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18683 if (req_mode
== CCGCmode
)
18687 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18691 if (req_mode
== CCZmode
)
18701 if (set_mode
!= req_mode
)
18706 gcc_unreachable ();
18709 return GET_MODE (SET_SRC (set
)) == set_mode
;
18712 /* Generate insn patterns to do an integer compare of OPERANDS. */
18715 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18717 enum machine_mode cmpmode
;
18720 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18721 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18723 /* This is very simple, but making the interface the same as in the
18724 FP case makes the rest of the code easier. */
18725 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18726 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18728 /* Return the test that should be put into the flags user, i.e.
18729 the bcc, scc, or cmov instruction. */
18730 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18733 /* Figure out whether to use ordered or unordered fp comparisons.
18734 Return the appropriate mode to use. */
18737 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18739 /* ??? In order to make all comparisons reversible, we do all comparisons
18740 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18741 all forms trapping and nontrapping comparisons, we can make inequality
18742 comparisons trapping again, since it results in better code when using
18743 FCOM based compares. */
18744 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18748 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18750 enum machine_mode mode
= GET_MODE (op0
);
18752 if (SCALAR_FLOAT_MODE_P (mode
))
18754 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18755 return ix86_fp_compare_mode (code
);
18760 /* Only zero flag is needed. */
18761 case EQ
: /* ZF=0 */
18762 case NE
: /* ZF!=0 */
18764 /* Codes needing carry flag. */
18765 case GEU
: /* CF=0 */
18766 case LTU
: /* CF=1 */
18767 /* Detect overflow checks. They need just the carry flag. */
18768 if (GET_CODE (op0
) == PLUS
18769 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18773 case GTU
: /* CF=0 & ZF=0 */
18774 case LEU
: /* CF=1 | ZF=1 */
18775 /* Detect overflow checks. They need just the carry flag. */
18776 if (GET_CODE (op0
) == MINUS
18777 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18781 /* Codes possibly doable only with sign flag when
18782 comparing against zero. */
18783 case GE
: /* SF=OF or SF=0 */
18784 case LT
: /* SF<>OF or SF=1 */
18785 if (op1
== const0_rtx
)
18788 /* For other cases Carry flag is not required. */
18790 /* Codes doable only with sign flag when comparing
18791 against zero, but we miss jump instruction for it
18792 so we need to use relational tests against overflow
18793 that thus needs to be zero. */
18794 case GT
: /* ZF=0 & SF=OF */
18795 case LE
: /* ZF=1 | SF<>OF */
18796 if (op1
== const0_rtx
)
18800 /* strcmp pattern do (use flags) and combine may ask us for proper
18805 gcc_unreachable ();
18809 /* Return the fixed registers used for condition codes. */
18812 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18819 /* If two condition code modes are compatible, return a condition code
18820 mode which is compatible with both. Otherwise, return
18823 static enum machine_mode
18824 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18829 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18832 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18833 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18836 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18838 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18844 gcc_unreachable ();
18874 /* These are only compatible with themselves, which we already
18881 /* Return a comparison we can do and that it is equivalent to
18882 swap_condition (code) apart possibly from orderedness.
18883 But, never change orderedness if TARGET_IEEE_FP, returning
18884 UNKNOWN in that case if necessary. */
18886 static enum rtx_code
18887 ix86_fp_swap_condition (enum rtx_code code
)
18891 case GT
: /* GTU - CF=0 & ZF=0 */
18892 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18893 case GE
: /* GEU - CF=0 */
18894 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18895 case UNLT
: /* LTU - CF=1 */
18896 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18897 case UNLE
: /* LEU - CF=1 | ZF=1 */
18898 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18900 return swap_condition (code
);
18904 /* Return cost of comparison CODE using the best strategy for performance.
18905 All following functions do use number of instructions as a cost metrics.
18906 In future this should be tweaked to compute bytes for optimize_size and
18907 take into account performance of various instructions on various CPUs. */
18910 ix86_fp_comparison_cost (enum rtx_code code
)
18914 /* The cost of code using bit-twiddling on %ah. */
18931 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18935 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18938 gcc_unreachable ();
18941 switch (ix86_fp_comparison_strategy (code
))
18943 case IX86_FPCMP_COMI
:
18944 return arith_cost
> 4 ? 3 : 2;
18945 case IX86_FPCMP_SAHF
:
18946 return arith_cost
> 4 ? 4 : 3;
18952 /* Return strategy to use for floating-point. We assume that fcomi is always
18953 preferrable where available, since that is also true when looking at size
18954 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18956 enum ix86_fpcmp_strategy
18957 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18959 /* Do fcomi/sahf based test when profitable. */
18962 return IX86_FPCMP_COMI
;
18964 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
18965 return IX86_FPCMP_SAHF
;
18967 return IX86_FPCMP_ARITH
;
18970 /* Swap, force into registers, or otherwise massage the two operands
18971 to a fp comparison. The operands are updated in place; the new
18972 comparison code is returned. */
18974 static enum rtx_code
18975 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18977 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18978 rtx op0
= *pop0
, op1
= *pop1
;
18979 enum machine_mode op_mode
= GET_MODE (op0
);
18980 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18982 /* All of the unordered compare instructions only work on registers.
18983 The same is true of the fcomi compare instructions. The XFmode
18984 compare instructions require registers except when comparing
18985 against zero or when converting operand 1 from fixed point to
18989 && (fpcmp_mode
== CCFPUmode
18990 || (op_mode
== XFmode
18991 && ! (standard_80387_constant_p (op0
) == 1
18992 || standard_80387_constant_p (op1
) == 1)
18993 && GET_CODE (op1
) != FLOAT
)
18994 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18996 op0
= force_reg (op_mode
, op0
);
18997 op1
= force_reg (op_mode
, op1
);
19001 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19002 things around if they appear profitable, otherwise force op0
19003 into a register. */
19005 if (standard_80387_constant_p (op0
) == 0
19007 && ! (standard_80387_constant_p (op1
) == 0
19010 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19011 if (new_code
!= UNKNOWN
)
19014 tmp
= op0
, op0
= op1
, op1
= tmp
;
19020 op0
= force_reg (op_mode
, op0
);
19022 if (CONSTANT_P (op1
))
19024 int tmp
= standard_80387_constant_p (op1
);
19026 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19030 op1
= force_reg (op_mode
, op1
);
19033 op1
= force_reg (op_mode
, op1
);
19037 /* Try to rearrange the comparison to make it cheaper. */
19038 if (ix86_fp_comparison_cost (code
)
19039 > ix86_fp_comparison_cost (swap_condition (code
))
19040 && (REG_P (op1
) || can_create_pseudo_p ()))
19043 tmp
= op0
, op0
= op1
, op1
= tmp
;
19044 code
= swap_condition (code
);
19046 op0
= force_reg (op_mode
, op0
);
19054 /* Convert comparison codes we use to represent FP comparison to integer
19055 code that will result in proper branch. Return UNKNOWN if no such code
19059 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19088 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19091 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19093 enum machine_mode fpcmp_mode
, intcmp_mode
;
19096 fpcmp_mode
= ix86_fp_compare_mode (code
);
19097 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19099 /* Do fcomi/sahf based test when profitable. */
19100 switch (ix86_fp_comparison_strategy (code
))
19102 case IX86_FPCMP_COMI
:
19103 intcmp_mode
= fpcmp_mode
;
19104 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19105 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19110 case IX86_FPCMP_SAHF
:
19111 intcmp_mode
= fpcmp_mode
;
19112 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19113 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19117 scratch
= gen_reg_rtx (HImode
);
19118 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19119 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19122 case IX86_FPCMP_ARITH
:
19123 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19124 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19125 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19127 scratch
= gen_reg_rtx (HImode
);
19128 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19130 /* In the unordered case, we have to check C2 for NaN's, which
19131 doesn't happen to work out to anything nice combination-wise.
19132 So do some bit twiddling on the value we've got in AH to come
19133 up with an appropriate set of condition codes. */
19135 intcmp_mode
= CCNOmode
;
19140 if (code
== GT
|| !TARGET_IEEE_FP
)
19142 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19147 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19148 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19149 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19150 intcmp_mode
= CCmode
;
19156 if (code
== LT
&& TARGET_IEEE_FP
)
19158 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19159 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19160 intcmp_mode
= CCmode
;
19165 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19171 if (code
== GE
|| !TARGET_IEEE_FP
)
19173 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19178 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19179 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19185 if (code
== LE
&& TARGET_IEEE_FP
)
19187 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19188 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19189 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19190 intcmp_mode
= CCmode
;
19195 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19201 if (code
== EQ
&& TARGET_IEEE_FP
)
19203 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19204 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19205 intcmp_mode
= CCmode
;
19210 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19216 if (code
== NE
&& TARGET_IEEE_FP
)
19218 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19219 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19225 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19231 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19235 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19240 gcc_unreachable ();
19248 /* Return the test that should be put into the flags user, i.e.
19249 the bcc, scc, or cmov instruction. */
19250 return gen_rtx_fmt_ee (code
, VOIDmode
,
19251 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19256 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19260 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19261 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19263 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19265 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19266 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19269 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19275 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19277 enum machine_mode mode
= GET_MODE (op0
);
19289 tmp
= ix86_expand_compare (code
, op0
, op1
);
19290 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19291 gen_rtx_LABEL_REF (VOIDmode
, label
),
19293 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19300 /* Expand DImode branch into multiple compare+branch. */
19302 rtx lo
[2], hi
[2], label2
;
19303 enum rtx_code code1
, code2
, code3
;
19304 enum machine_mode submode
;
19306 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19308 tmp
= op0
, op0
= op1
, op1
= tmp
;
19309 code
= swap_condition (code
);
19312 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19313 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19315 submode
= mode
== DImode
? SImode
: DImode
;
19317 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19318 avoid two branches. This costs one extra insn, so disable when
19319 optimizing for size. */
19321 if ((code
== EQ
|| code
== NE
)
19322 && (!optimize_insn_for_size_p ()
19323 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19328 if (hi
[1] != const0_rtx
)
19329 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19330 NULL_RTX
, 0, OPTAB_WIDEN
);
19333 if (lo
[1] != const0_rtx
)
19334 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19335 NULL_RTX
, 0, OPTAB_WIDEN
);
19337 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19338 NULL_RTX
, 0, OPTAB_WIDEN
);
19340 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19344 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19345 op1 is a constant and the low word is zero, then we can just
19346 examine the high word. Similarly for low word -1 and
19347 less-or-equal-than or greater-than. */
19349 if (CONST_INT_P (hi
[1]))
19352 case LT
: case LTU
: case GE
: case GEU
:
19353 if (lo
[1] == const0_rtx
)
19355 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19359 case LE
: case LEU
: case GT
: case GTU
:
19360 if (lo
[1] == constm1_rtx
)
19362 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19370 /* Otherwise, we need two or three jumps. */
19372 label2
= gen_label_rtx ();
19375 code2
= swap_condition (code
);
19376 code3
= unsigned_condition (code
);
19380 case LT
: case GT
: case LTU
: case GTU
:
19383 case LE
: code1
= LT
; code2
= GT
; break;
19384 case GE
: code1
= GT
; code2
= LT
; break;
19385 case LEU
: code1
= LTU
; code2
= GTU
; break;
19386 case GEU
: code1
= GTU
; code2
= LTU
; break;
19388 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19389 case NE
: code2
= UNKNOWN
; break;
19392 gcc_unreachable ();
19397 * if (hi(a) < hi(b)) goto true;
19398 * if (hi(a) > hi(b)) goto false;
19399 * if (lo(a) < lo(b)) goto true;
19403 if (code1
!= UNKNOWN
)
19404 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19405 if (code2
!= UNKNOWN
)
19406 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19408 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19410 if (code2
!= UNKNOWN
)
19411 emit_label (label2
);
19416 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19421 /* Split branch based on floating point condition. */
19423 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19424 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19429 if (target2
!= pc_rtx
)
19432 code
= reverse_condition_maybe_unordered (code
);
19437 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19440 /* Remove pushed operand from stack. */
19442 ix86_free_from_memory (GET_MODE (pushed
));
19444 i
= emit_jump_insn (gen_rtx_SET
19446 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19447 condition
, target1
, target2
)));
19448 if (split_branch_probability
>= 0)
19449 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19453 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19457 gcc_assert (GET_MODE (dest
) == QImode
);
19459 ret
= ix86_expand_compare (code
, op0
, op1
);
19460 PUT_MODE (ret
, QImode
);
19461 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19464 /* Expand comparison setting or clearing carry flag. Return true when
19465 successful and set pop for the operation. */
19467 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19469 enum machine_mode mode
=
19470 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19472 /* Do not handle double-mode compares that go through special path. */
19473 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19476 if (SCALAR_FLOAT_MODE_P (mode
))
19478 rtx compare_op
, compare_seq
;
19480 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19482 /* Shortcut: following common codes never translate
19483 into carry flag compares. */
19484 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19485 || code
== ORDERED
|| code
== UNORDERED
)
19488 /* These comparisons require zero flag; swap operands so they won't. */
19489 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19490 && !TARGET_IEEE_FP
)
19495 code
= swap_condition (code
);
19498 /* Try to expand the comparison and verify that we end up with
19499 carry flag based comparison. This fails to be true only when
19500 we decide to expand comparison using arithmetic that is not
19501 too common scenario. */
19503 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19504 compare_seq
= get_insns ();
19507 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19508 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19509 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19511 code
= GET_CODE (compare_op
);
19513 if (code
!= LTU
&& code
!= GEU
)
19516 emit_insn (compare_seq
);
19521 if (!INTEGRAL_MODE_P (mode
))
19530 /* Convert a==0 into (unsigned)a<1. */
19533 if (op1
!= const0_rtx
)
19536 code
= (code
== EQ
? LTU
: GEU
);
19539 /* Convert a>b into b<a or a>=b-1. */
19542 if (CONST_INT_P (op1
))
19544 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19545 /* Bail out on overflow. We still can swap operands but that
19546 would force loading of the constant into register. */
19547 if (op1
== const0_rtx
19548 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19550 code
= (code
== GTU
? GEU
: LTU
);
19557 code
= (code
== GTU
? LTU
: GEU
);
19561 /* Convert a>=0 into (unsigned)a<0x80000000. */
19564 if (mode
== DImode
|| op1
!= const0_rtx
)
19566 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19567 code
= (code
== LT
? GEU
: LTU
);
19571 if (mode
== DImode
|| op1
!= constm1_rtx
)
19573 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19574 code
= (code
== LE
? GEU
: LTU
);
19580 /* Swapping operands may cause constant to appear as first operand. */
19581 if (!nonimmediate_operand (op0
, VOIDmode
))
19583 if (!can_create_pseudo_p ())
19585 op0
= force_reg (mode
, op0
);
19587 *pop
= ix86_expand_compare (code
, op0
, op1
);
19588 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19593 ix86_expand_int_movcc (rtx operands
[])
19595 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19596 rtx compare_seq
, compare_op
;
19597 enum machine_mode mode
= GET_MODE (operands
[0]);
19598 bool sign_bit_compare_p
= false;
19599 rtx op0
= XEXP (operands
[1], 0);
19600 rtx op1
= XEXP (operands
[1], 1);
19602 if (GET_MODE (op0
) == TImode
19603 || (GET_MODE (op0
) == DImode
19608 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19609 compare_seq
= get_insns ();
19612 compare_code
= GET_CODE (compare_op
);
19614 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19615 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19616 sign_bit_compare_p
= true;
19618 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19619 HImode insns, we'd be swallowed in word prefix ops. */
19621 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19622 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19623 && CONST_INT_P (operands
[2])
19624 && CONST_INT_P (operands
[3]))
19626 rtx out
= operands
[0];
19627 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19628 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19629 HOST_WIDE_INT diff
;
19632 /* Sign bit compares are better done using shifts than we do by using
19634 if (sign_bit_compare_p
19635 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19637 /* Detect overlap between destination and compare sources. */
19640 if (!sign_bit_compare_p
)
19643 bool fpcmp
= false;
19645 compare_code
= GET_CODE (compare_op
);
19647 flags
= XEXP (compare_op
, 0);
19649 if (GET_MODE (flags
) == CCFPmode
19650 || GET_MODE (flags
) == CCFPUmode
)
19654 = ix86_fp_compare_code_to_integer (compare_code
);
19657 /* To simplify rest of code, restrict to the GEU case. */
19658 if (compare_code
== LTU
)
19660 HOST_WIDE_INT tmp
= ct
;
19663 compare_code
= reverse_condition (compare_code
);
19664 code
= reverse_condition (code
);
19669 PUT_CODE (compare_op
,
19670 reverse_condition_maybe_unordered
19671 (GET_CODE (compare_op
)));
19673 PUT_CODE (compare_op
,
19674 reverse_condition (GET_CODE (compare_op
)));
19678 if (reg_overlap_mentioned_p (out
, op0
)
19679 || reg_overlap_mentioned_p (out
, op1
))
19680 tmp
= gen_reg_rtx (mode
);
19682 if (mode
== DImode
)
19683 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19685 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19686 flags
, compare_op
));
19690 if (code
== GT
|| code
== GE
)
19691 code
= reverse_condition (code
);
19694 HOST_WIDE_INT tmp
= ct
;
19699 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19712 tmp
= expand_simple_binop (mode
, PLUS
,
19714 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19725 tmp
= expand_simple_binop (mode
, IOR
,
19727 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19729 else if (diff
== -1 && ct
)
19739 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19741 tmp
= expand_simple_binop (mode
, PLUS
,
19742 copy_rtx (tmp
), GEN_INT (cf
),
19743 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19751 * andl cf - ct, dest
19761 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19764 tmp
= expand_simple_binop (mode
, AND
,
19766 gen_int_mode (cf
- ct
, mode
),
19767 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19769 tmp
= expand_simple_binop (mode
, PLUS
,
19770 copy_rtx (tmp
), GEN_INT (ct
),
19771 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19774 if (!rtx_equal_p (tmp
, out
))
19775 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19782 enum machine_mode cmp_mode
= GET_MODE (op0
);
19785 tmp
= ct
, ct
= cf
, cf
= tmp
;
19788 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19790 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19792 /* We may be reversing unordered compare to normal compare, that
19793 is not valid in general (we may convert non-trapping condition
19794 to trapping one), however on i386 we currently emit all
19795 comparisons unordered. */
19796 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19797 code
= reverse_condition_maybe_unordered (code
);
19801 compare_code
= reverse_condition (compare_code
);
19802 code
= reverse_condition (code
);
19806 compare_code
= UNKNOWN
;
19807 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19808 && CONST_INT_P (op1
))
19810 if (op1
== const0_rtx
19811 && (code
== LT
|| code
== GE
))
19812 compare_code
= code
;
19813 else if (op1
== constm1_rtx
)
19817 else if (code
== GT
)
19822 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19823 if (compare_code
!= UNKNOWN
19824 && GET_MODE (op0
) == GET_MODE (out
)
19825 && (cf
== -1 || ct
== -1))
19827 /* If lea code below could be used, only optimize
19828 if it results in a 2 insn sequence. */
19830 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19831 || diff
== 3 || diff
== 5 || diff
== 9)
19832 || (compare_code
== LT
&& ct
== -1)
19833 || (compare_code
== GE
&& cf
== -1))
19836 * notl op1 (if necessary)
19844 code
= reverse_condition (code
);
19847 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19849 out
= expand_simple_binop (mode
, IOR
,
19851 out
, 1, OPTAB_DIRECT
);
19852 if (out
!= operands
[0])
19853 emit_move_insn (operands
[0], out
);
19860 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19861 || diff
== 3 || diff
== 5 || diff
== 9)
19862 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19864 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19870 * lea cf(dest*(ct-cf)),dest
19874 * This also catches the degenerate setcc-only case.
19880 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19883 /* On x86_64 the lea instruction operates on Pmode, so we need
19884 to get arithmetics done in proper mode to match. */
19886 tmp
= copy_rtx (out
);
19890 out1
= copy_rtx (out
);
19891 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19895 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19901 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19904 if (!rtx_equal_p (tmp
, out
))
19907 out
= force_operand (tmp
, copy_rtx (out
));
19909 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19911 if (!rtx_equal_p (out
, operands
[0]))
19912 emit_move_insn (operands
[0], copy_rtx (out
));
19918 * General case: Jumpful:
19919 * xorl dest,dest cmpl op1, op2
19920 * cmpl op1, op2 movl ct, dest
19921 * setcc dest jcc 1f
19922 * decl dest movl cf, dest
19923 * andl (cf-ct),dest 1:
19926 * Size 20. Size 14.
19928 * This is reasonably steep, but branch mispredict costs are
19929 * high on modern cpus, so consider failing only if optimizing
19933 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19934 && BRANCH_COST (optimize_insn_for_speed_p (),
19939 enum machine_mode cmp_mode
= GET_MODE (op0
);
19944 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19946 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19948 /* We may be reversing unordered compare to normal compare,
19949 that is not valid in general (we may convert non-trapping
19950 condition to trapping one), however on i386 we currently
19951 emit all comparisons unordered. */
19952 code
= reverse_condition_maybe_unordered (code
);
19956 code
= reverse_condition (code
);
19957 if (compare_code
!= UNKNOWN
)
19958 compare_code
= reverse_condition (compare_code
);
19962 if (compare_code
!= UNKNOWN
)
19964 /* notl op1 (if needed)
19969 For x < 0 (resp. x <= -1) there will be no notl,
19970 so if possible swap the constants to get rid of the
19972 True/false will be -1/0 while code below (store flag
19973 followed by decrement) is 0/-1, so the constants need
19974 to be exchanged once more. */
19976 if (compare_code
== GE
|| !cf
)
19978 code
= reverse_condition (code
);
19983 HOST_WIDE_INT tmp
= cf
;
19988 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19992 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19994 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19996 copy_rtx (out
), 1, OPTAB_DIRECT
);
19999 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20000 gen_int_mode (cf
- ct
, mode
),
20001 copy_rtx (out
), 1, OPTAB_DIRECT
);
20003 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20004 copy_rtx (out
), 1, OPTAB_DIRECT
);
20005 if (!rtx_equal_p (out
, operands
[0]))
20006 emit_move_insn (operands
[0], copy_rtx (out
));
20012 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20014 /* Try a few things more with specific constants and a variable. */
20017 rtx var
, orig_out
, out
, tmp
;
20019 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20022 /* If one of the two operands is an interesting constant, load a
20023 constant with the above and mask it in with a logical operation. */
20025 if (CONST_INT_P (operands
[2]))
20028 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20029 operands
[3] = constm1_rtx
, op
= and_optab
;
20030 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20031 operands
[3] = const0_rtx
, op
= ior_optab
;
20035 else if (CONST_INT_P (operands
[3]))
20038 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20039 operands
[2] = constm1_rtx
, op
= and_optab
;
20040 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20041 operands
[2] = const0_rtx
, op
= ior_optab
;
20048 orig_out
= operands
[0];
20049 tmp
= gen_reg_rtx (mode
);
20052 /* Recurse to get the constant loaded. */
20053 if (ix86_expand_int_movcc (operands
) == 0)
20056 /* Mask in the interesting variable. */
20057 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20059 if (!rtx_equal_p (out
, orig_out
))
20060 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20066 * For comparison with above,
20076 if (! nonimmediate_operand (operands
[2], mode
))
20077 operands
[2] = force_reg (mode
, operands
[2]);
20078 if (! nonimmediate_operand (operands
[3], mode
))
20079 operands
[3] = force_reg (mode
, operands
[3]);
20081 if (! register_operand (operands
[2], VOIDmode
)
20083 || ! register_operand (operands
[3], VOIDmode
)))
20084 operands
[2] = force_reg (mode
, operands
[2]);
20087 && ! register_operand (operands
[3], VOIDmode
))
20088 operands
[3] = force_reg (mode
, operands
[3]);
20090 emit_insn (compare_seq
);
20091 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20092 gen_rtx_IF_THEN_ELSE (mode
,
20093 compare_op
, operands
[2],
20098 /* Swap, force into registers, or otherwise massage the two operands
20099 to an sse comparison with a mask result. Thus we differ a bit from
20100 ix86_prepare_fp_compare_args which expects to produce a flags result.
20102 The DEST operand exists to help determine whether to commute commutative
20103 operators. The POP0/POP1 operands are updated in place. The new
20104 comparison code is returned, or UNKNOWN if not implementable. */
20106 static enum rtx_code
20107 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20108 rtx
*pop0
, rtx
*pop1
)
20116 /* AVX supports all the needed comparisons. */
20119 /* We have no LTGT as an operator. We could implement it with
20120 NE & ORDERED, but this requires an extra temporary. It's
20121 not clear that it's worth it. */
20128 /* These are supported directly. */
20135 /* AVX has 3 operand comparisons, no need to swap anything. */
20138 /* For commutative operators, try to canonicalize the destination
20139 operand to be first in the comparison - this helps reload to
20140 avoid extra moves. */
20141 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20149 /* These are not supported directly before AVX, and furthermore
20150 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20151 comparison operands to transform into something that is
20156 code
= swap_condition (code
);
20160 gcc_unreachable ();
20166 /* Detect conditional moves that exactly match min/max operational
20167 semantics. Note that this is IEEE safe, as long as we don't
20168 interchange the operands.
20170 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20171 and TRUE if the operation is successful and instructions are emitted. */
20174 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20175 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20177 enum machine_mode mode
;
20183 else if (code
== UNGE
)
20186 if_true
= if_false
;
20192 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20194 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20199 mode
= GET_MODE (dest
);
20201 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20202 but MODE may be a vector mode and thus not appropriate. */
20203 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20205 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20208 if_true
= force_reg (mode
, if_true
);
20209 v
= gen_rtvec (2, if_true
, if_false
);
20210 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20214 code
= is_min
? SMIN
: SMAX
;
20215 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20218 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20222 /* Expand an sse vector comparison. Return the register with the result. */
20225 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20226 rtx op_true
, rtx op_false
)
20228 enum machine_mode mode
= GET_MODE (dest
);
20229 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20232 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20233 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20234 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20237 || reg_overlap_mentioned_p (dest
, op_true
)
20238 || reg_overlap_mentioned_p (dest
, op_false
))
20239 dest
= gen_reg_rtx (mode
);
20241 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20242 if (cmp_mode
!= mode
)
20244 x
= force_reg (cmp_mode
, x
);
20245 convert_move (dest
, x
, false);
20248 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20253 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20254 operations. This is used for both scalar and vector conditional moves. */
20257 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20259 enum machine_mode mode
= GET_MODE (dest
);
20262 if (vector_all_ones_operand (op_true
, mode
)
20263 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20265 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20267 else if (op_false
== CONST0_RTX (mode
))
20269 op_true
= force_reg (mode
, op_true
);
20270 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20271 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20273 else if (op_true
== CONST0_RTX (mode
))
20275 op_false
= force_reg (mode
, op_false
);
20276 x
= gen_rtx_NOT (mode
, cmp
);
20277 x
= gen_rtx_AND (mode
, x
, op_false
);
20278 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20280 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20282 op_false
= force_reg (mode
, op_false
);
20283 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20284 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20286 else if (TARGET_XOP
)
20288 op_true
= force_reg (mode
, op_true
);
20290 if (!nonimmediate_operand (op_false
, mode
))
20291 op_false
= force_reg (mode
, op_false
);
20293 emit_insn (gen_rtx_SET (mode
, dest
,
20294 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20300 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20302 if (!nonimmediate_operand (op_true
, mode
))
20303 op_true
= force_reg (mode
, op_true
);
20305 op_false
= force_reg (mode
, op_false
);
20311 gen
= gen_sse4_1_blendvps
;
20315 gen
= gen_sse4_1_blendvpd
;
20323 gen
= gen_sse4_1_pblendvb
;
20324 dest
= gen_lowpart (V16QImode
, dest
);
20325 op_false
= gen_lowpart (V16QImode
, op_false
);
20326 op_true
= gen_lowpart (V16QImode
, op_true
);
20327 cmp
= gen_lowpart (V16QImode
, cmp
);
20332 gen
= gen_avx_blendvps256
;
20336 gen
= gen_avx_blendvpd256
;
20344 gen
= gen_avx2_pblendvb
;
20345 dest
= gen_lowpart (V32QImode
, dest
);
20346 op_false
= gen_lowpart (V32QImode
, op_false
);
20347 op_true
= gen_lowpart (V32QImode
, op_true
);
20348 cmp
= gen_lowpart (V32QImode
, cmp
);
20356 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20359 op_true
= force_reg (mode
, op_true
);
20361 t2
= gen_reg_rtx (mode
);
20363 t3
= gen_reg_rtx (mode
);
20367 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20368 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20370 x
= gen_rtx_NOT (mode
, cmp
);
20371 x
= gen_rtx_AND (mode
, x
, op_false
);
20372 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20374 x
= gen_rtx_IOR (mode
, t3
, t2
);
20375 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20380 /* Expand a floating-point conditional move. Return true if successful. */
20383 ix86_expand_fp_movcc (rtx operands
[])
20385 enum machine_mode mode
= GET_MODE (operands
[0]);
20386 enum rtx_code code
= GET_CODE (operands
[1]);
20387 rtx tmp
, compare_op
;
20388 rtx op0
= XEXP (operands
[1], 0);
20389 rtx op1
= XEXP (operands
[1], 1);
20391 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20393 enum machine_mode cmode
;
20395 /* Since we've no cmove for sse registers, don't force bad register
20396 allocation just to gain access to it. Deny movcc when the
20397 comparison mode doesn't match the move mode. */
20398 cmode
= GET_MODE (op0
);
20399 if (cmode
== VOIDmode
)
20400 cmode
= GET_MODE (op1
);
20404 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20405 if (code
== UNKNOWN
)
20408 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20409 operands
[2], operands
[3]))
20412 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20413 operands
[2], operands
[3]);
20414 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20418 if (GET_MODE (op0
) == TImode
20419 || (GET_MODE (op0
) == DImode
20423 /* The floating point conditional move instructions don't directly
20424 support conditions resulting from a signed integer comparison. */
20426 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20427 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20429 tmp
= gen_reg_rtx (QImode
);
20430 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20432 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20435 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20436 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20437 operands
[2], operands
[3])));
20442 /* Expand a floating-point vector conditional move; a vcond operation
20443 rather than a movcc operation. */
20446 ix86_expand_fp_vcond (rtx operands
[])
20448 enum rtx_code code
= GET_CODE (operands
[3]);
20451 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20452 &operands
[4], &operands
[5]);
20453 if (code
== UNKNOWN
)
20456 switch (GET_CODE (operands
[3]))
20459 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20460 operands
[5], operands
[0], operands
[0]);
20461 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20462 operands
[5], operands
[1], operands
[2]);
20466 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20467 operands
[5], operands
[0], operands
[0]);
20468 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20469 operands
[5], operands
[1], operands
[2]);
20473 gcc_unreachable ();
20475 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20477 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20481 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20482 operands
[5], operands
[1], operands
[2]))
20485 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20486 operands
[1], operands
[2]);
20487 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20491 /* Expand a signed/unsigned integral vector conditional move. */
20494 ix86_expand_int_vcond (rtx operands
[])
20496 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20497 enum machine_mode mode
= GET_MODE (operands
[4]);
20498 enum rtx_code code
= GET_CODE (operands
[3]);
20499 bool negate
= false;
20502 cop0
= operands
[4];
20503 cop1
= operands
[5];
20505 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20506 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20507 if ((code
== LT
|| code
== GE
)
20508 && data_mode
== mode
20509 && cop1
== CONST0_RTX (mode
)
20510 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20511 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20512 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20513 && (GET_MODE_SIZE (data_mode
) == 16
20514 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20516 rtx negop
= operands
[2 - (code
== LT
)];
20517 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20518 if (negop
== CONST1_RTX (data_mode
))
20520 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20521 operands
[0], 1, OPTAB_DIRECT
);
20522 if (res
!= operands
[0])
20523 emit_move_insn (operands
[0], res
);
20526 else if (GET_MODE_INNER (data_mode
) != DImode
20527 && vector_all_ones_operand (negop
, data_mode
))
20529 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20530 operands
[0], 0, OPTAB_DIRECT
);
20531 if (res
!= operands
[0])
20532 emit_move_insn (operands
[0], res
);
20537 if (!nonimmediate_operand (cop1
, mode
))
20538 cop1
= force_reg (mode
, cop1
);
20539 if (!general_operand (operands
[1], data_mode
))
20540 operands
[1] = force_reg (data_mode
, operands
[1]);
20541 if (!general_operand (operands
[2], data_mode
))
20542 operands
[2] = force_reg (data_mode
, operands
[2]);
20544 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20546 && (mode
== V16QImode
|| mode
== V8HImode
20547 || mode
== V4SImode
|| mode
== V2DImode
))
20551 /* Canonicalize the comparison to EQ, GT, GTU. */
20562 code
= reverse_condition (code
);
20568 code
= reverse_condition (code
);
20574 code
= swap_condition (code
);
20575 x
= cop0
, cop0
= cop1
, cop1
= x
;
20579 gcc_unreachable ();
20582 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20583 if (mode
== V2DImode
)
20588 /* SSE4.1 supports EQ. */
20589 if (!TARGET_SSE4_1
)
20595 /* SSE4.2 supports GT/GTU. */
20596 if (!TARGET_SSE4_2
)
20601 gcc_unreachable ();
20605 /* Unsigned parallel compare is not supported by the hardware.
20606 Play some tricks to turn this into a signed comparison
20610 cop0
= force_reg (mode
, cop0
);
20620 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20624 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20625 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20626 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20627 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20629 gcc_unreachable ();
20631 /* Subtract (-(INT MAX) - 1) from both operands to make
20633 mask
= ix86_build_signbit_mask (mode
, true, false);
20634 t1
= gen_reg_rtx (mode
);
20635 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20637 t2
= gen_reg_rtx (mode
);
20638 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20650 /* Perform a parallel unsigned saturating subtraction. */
20651 x
= gen_reg_rtx (mode
);
20652 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20653 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20656 cop1
= CONST0_RTX (mode
);
20662 gcc_unreachable ();
20667 /* Allow the comparison to be done in one mode, but the movcc to
20668 happen in another mode. */
20669 if (data_mode
== mode
)
20671 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20672 operands
[1+negate
], operands
[2-negate
]);
20676 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20677 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20679 operands
[1+negate
], operands
[2-negate
]);
20680 x
= gen_lowpart (data_mode
, x
);
20683 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20684 operands
[2-negate
]);
20688 /* Expand a variable vector permutation. */
20691 ix86_expand_vec_perm (rtx operands
[])
20693 rtx target
= operands
[0];
20694 rtx op0
= operands
[1];
20695 rtx op1
= operands
[2];
20696 rtx mask
= operands
[3];
20697 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20698 enum machine_mode mode
= GET_MODE (op0
);
20699 enum machine_mode maskmode
= GET_MODE (mask
);
20701 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20703 /* Number of elements in the vector. */
20704 w
= GET_MODE_NUNITS (mode
);
20705 e
= GET_MODE_UNIT_SIZE (mode
);
20706 gcc_assert (w
<= 32);
20710 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20712 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20713 an constant shuffle operand. With a tiny bit of effort we can
20714 use VPERMD instead. A re-interpretation stall for V4DFmode is
20715 unfortunate but there's no avoiding it.
20716 Similarly for V16HImode we don't have instructions for variable
20717 shuffling, while for V32QImode we can use after preparing suitable
20718 masks vpshufb; vpshufb; vpermq; vpor. */
20720 if (mode
== V16HImode
)
20722 maskmode
= mode
= V32QImode
;
20728 maskmode
= mode
= V8SImode
;
20732 t1
= gen_reg_rtx (maskmode
);
20734 /* Replicate the low bits of the V4DImode mask into V8SImode:
20736 t1 = { A A B B C C D D }. */
20737 for (i
= 0; i
< w
/ 2; ++i
)
20738 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20739 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20740 vt
= force_reg (maskmode
, vt
);
20741 mask
= gen_lowpart (maskmode
, mask
);
20742 if (maskmode
== V8SImode
)
20743 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20745 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20747 /* Multiply the shuffle indicies by two. */
20748 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20751 /* Add one to the odd shuffle indicies:
20752 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20753 for (i
= 0; i
< w
/ 2; ++i
)
20755 vec
[i
* 2] = const0_rtx
;
20756 vec
[i
* 2 + 1] = const1_rtx
;
20758 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20759 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20760 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20763 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20764 operands
[3] = mask
= t1
;
20765 target
= gen_lowpart (mode
, target
);
20766 op0
= gen_lowpart (mode
, op0
);
20767 op1
= gen_lowpart (mode
, op1
);
20773 /* The VPERMD and VPERMPS instructions already properly ignore
20774 the high bits of the shuffle elements. No need for us to
20775 perform an AND ourselves. */
20776 if (one_operand_shuffle
)
20777 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20780 t1
= gen_reg_rtx (V8SImode
);
20781 t2
= gen_reg_rtx (V8SImode
);
20782 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20783 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20789 mask
= gen_lowpart (V8SFmode
, mask
);
20790 if (one_operand_shuffle
)
20791 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20794 t1
= gen_reg_rtx (V8SFmode
);
20795 t2
= gen_reg_rtx (V8SFmode
);
20796 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20797 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20803 /* By combining the two 128-bit input vectors into one 256-bit
20804 input vector, we can use VPERMD and VPERMPS for the full
20805 two-operand shuffle. */
20806 t1
= gen_reg_rtx (V8SImode
);
20807 t2
= gen_reg_rtx (V8SImode
);
20808 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20809 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20810 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20811 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20815 t1
= gen_reg_rtx (V8SFmode
);
20816 t2
= gen_reg_rtx (V8SImode
);
20817 mask
= gen_lowpart (V4SImode
, mask
);
20818 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20819 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20820 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20821 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20825 t1
= gen_reg_rtx (V32QImode
);
20826 t2
= gen_reg_rtx (V32QImode
);
20827 t3
= gen_reg_rtx (V32QImode
);
20828 vt2
= GEN_INT (128);
20829 for (i
= 0; i
< 32; i
++)
20831 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20832 vt
= force_reg (V32QImode
, vt
);
20833 for (i
= 0; i
< 32; i
++)
20834 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20835 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20836 vt2
= force_reg (V32QImode
, vt2
);
20837 /* From mask create two adjusted masks, which contain the same
20838 bits as mask in the low 7 bits of each vector element.
20839 The first mask will have the most significant bit clear
20840 if it requests element from the same 128-bit lane
20841 and MSB set if it requests element from the other 128-bit lane.
20842 The second mask will have the opposite values of the MSB,
20843 and additionally will have its 128-bit lanes swapped.
20844 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20845 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20846 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20847 stands for other 12 bytes. */
20848 /* The bit whether element is from the same lane or the other
20849 lane is bit 4, so shift it up by 3 to the MSB position. */
20850 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20851 gen_lowpart (V4DImode
, mask
),
20853 /* Clear MSB bits from the mask just in case it had them set. */
20854 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20855 /* After this t1 will have MSB set for elements from other lane. */
20856 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20857 /* Clear bits other than MSB. */
20858 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20859 /* Or in the lower bits from mask into t3. */
20860 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20861 /* And invert MSB bits in t1, so MSB is set for elements from the same
20863 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20864 /* Swap 128-bit lanes in t3. */
20865 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20866 gen_lowpart (V4DImode
, t3
),
20867 const2_rtx
, GEN_INT (3),
20868 const0_rtx
, const1_rtx
));
20869 /* And or in the lower bits from mask into t1. */
20870 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20871 if (one_operand_shuffle
)
20873 /* Each of these shuffles will put 0s in places where
20874 element from the other 128-bit lane is needed, otherwise
20875 will shuffle in the requested value. */
20876 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20877 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20878 /* For t3 the 128-bit lanes are swapped again. */
20879 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20880 gen_lowpart (V4DImode
, t3
),
20881 const2_rtx
, GEN_INT (3),
20882 const0_rtx
, const1_rtx
));
20883 /* And oring both together leads to the result. */
20884 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20888 t4
= gen_reg_rtx (V32QImode
);
20889 /* Similarly to the above one_operand_shuffle code,
20890 just for repeated twice for each operand. merge_two:
20891 code will merge the two results together. */
20892 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20893 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20894 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20895 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20896 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20897 gen_lowpart (V4DImode
, t4
),
20898 const2_rtx
, GEN_INT (3),
20899 const0_rtx
, const1_rtx
));
20900 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20901 gen_lowpart (V4DImode
, t3
),
20902 const2_rtx
, GEN_INT (3),
20903 const0_rtx
, const1_rtx
));
20904 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20905 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20911 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20918 /* The XOP VPPERM insn supports three inputs. By ignoring the
20919 one_operand_shuffle special case, we avoid creating another
20920 set of constant vectors in memory. */
20921 one_operand_shuffle
= false;
20923 /* mask = mask & {2*w-1, ...} */
20924 vt
= GEN_INT (2*w
- 1);
20928 /* mask = mask & {w-1, ...} */
20929 vt
= GEN_INT (w
- 1);
20932 for (i
= 0; i
< w
; i
++)
20934 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20935 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20936 NULL_RTX
, 0, OPTAB_DIRECT
);
20938 /* For non-QImode operations, convert the word permutation control
20939 into a byte permutation control. */
20940 if (mode
!= V16QImode
)
20942 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20943 GEN_INT (exact_log2 (e
)),
20944 NULL_RTX
, 0, OPTAB_DIRECT
);
20946 /* Convert mask to vector of chars. */
20947 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20949 /* Replicate each of the input bytes into byte positions:
20950 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20951 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20952 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20953 for (i
= 0; i
< 16; ++i
)
20954 vec
[i
] = GEN_INT (i
/e
* e
);
20955 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20956 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20958 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20960 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20962 /* Convert it into the byte positions by doing
20963 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20964 for (i
= 0; i
< 16; ++i
)
20965 vec
[i
] = GEN_INT (i
% e
);
20966 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20967 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20968 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20971 /* The actual shuffle operations all operate on V16QImode. */
20972 op0
= gen_lowpart (V16QImode
, op0
);
20973 op1
= gen_lowpart (V16QImode
, op1
);
20974 target
= gen_lowpart (V16QImode
, target
);
20978 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20980 else if (one_operand_shuffle
)
20982 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20989 /* Shuffle the two input vectors independently. */
20990 t1
= gen_reg_rtx (V16QImode
);
20991 t2
= gen_reg_rtx (V16QImode
);
20992 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20993 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20996 /* Then merge them together. The key is whether any given control
20997 element contained a bit set that indicates the second word. */
20998 mask
= operands
[3];
21000 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21002 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21003 more shuffle to convert the V2DI input mask into a V4SI
21004 input mask. At which point the masking that expand_int_vcond
21005 will work as desired. */
21006 rtx t3
= gen_reg_rtx (V4SImode
);
21007 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21008 const0_rtx
, const0_rtx
,
21009 const2_rtx
, const2_rtx
));
21011 maskmode
= V4SImode
;
21015 for (i
= 0; i
< w
; i
++)
21017 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21018 vt
= force_reg (maskmode
, vt
);
21019 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21020 NULL_RTX
, 0, OPTAB_DIRECT
);
21022 xops
[0] = gen_lowpart (mode
, operands
[0]);
21023 xops
[1] = gen_lowpart (mode
, t2
);
21024 xops
[2] = gen_lowpart (mode
, t1
);
21025 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21028 ok
= ix86_expand_int_vcond (xops
);
21033 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21034 true if we should do zero extension, else sign extension. HIGH_P is
21035 true if we want the N/2 high elements, else the low elements. */
21038 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21040 enum machine_mode imode
= GET_MODE (src
);
21045 rtx (*unpack
)(rtx
, rtx
);
21046 rtx (*extract
)(rtx
, rtx
) = NULL
;
21047 enum machine_mode halfmode
= BLKmode
;
21053 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21055 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21056 halfmode
= V16QImode
;
21058 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21062 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21064 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21065 halfmode
= V8HImode
;
21067 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21071 unpack
= gen_avx2_zero_extendv4siv4di2
;
21073 unpack
= gen_avx2_sign_extendv4siv4di2
;
21074 halfmode
= V4SImode
;
21076 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21080 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21082 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21086 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21088 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21092 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21094 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21097 gcc_unreachable ();
21100 if (GET_MODE_SIZE (imode
) == 32)
21102 tmp
= gen_reg_rtx (halfmode
);
21103 emit_insn (extract (tmp
, src
));
21107 /* Shift higher 8 bytes to lower 8 bytes. */
21108 tmp
= gen_reg_rtx (imode
);
21109 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
21110 gen_lowpart (V1TImode
, src
),
21116 emit_insn (unpack (dest
, tmp
));
21120 rtx (*unpack
)(rtx
, rtx
, rtx
);
21126 unpack
= gen_vec_interleave_highv16qi
;
21128 unpack
= gen_vec_interleave_lowv16qi
;
21132 unpack
= gen_vec_interleave_highv8hi
;
21134 unpack
= gen_vec_interleave_lowv8hi
;
21138 unpack
= gen_vec_interleave_highv4si
;
21140 unpack
= gen_vec_interleave_lowv4si
;
21143 gcc_unreachable ();
21147 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21149 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21150 src
, pc_rtx
, pc_rtx
);
21152 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
21156 /* Expand conditional increment or decrement using adb/sbb instructions.
21157 The default case using setcc followed by the conditional move can be
21158 done by generic code. */
21160 ix86_expand_int_addcc (rtx operands
[])
21162 enum rtx_code code
= GET_CODE (operands
[1]);
21164 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21166 rtx val
= const0_rtx
;
21167 bool fpcmp
= false;
21168 enum machine_mode mode
;
21169 rtx op0
= XEXP (operands
[1], 0);
21170 rtx op1
= XEXP (operands
[1], 1);
21172 if (operands
[3] != const1_rtx
21173 && operands
[3] != constm1_rtx
)
21175 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21177 code
= GET_CODE (compare_op
);
21179 flags
= XEXP (compare_op
, 0);
21181 if (GET_MODE (flags
) == CCFPmode
21182 || GET_MODE (flags
) == CCFPUmode
)
21185 code
= ix86_fp_compare_code_to_integer (code
);
21192 PUT_CODE (compare_op
,
21193 reverse_condition_maybe_unordered
21194 (GET_CODE (compare_op
)));
21196 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21199 mode
= GET_MODE (operands
[0]);
21201 /* Construct either adc or sbb insn. */
21202 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21207 insn
= gen_subqi3_carry
;
21210 insn
= gen_subhi3_carry
;
21213 insn
= gen_subsi3_carry
;
21216 insn
= gen_subdi3_carry
;
21219 gcc_unreachable ();
21227 insn
= gen_addqi3_carry
;
21230 insn
= gen_addhi3_carry
;
21233 insn
= gen_addsi3_carry
;
21236 insn
= gen_adddi3_carry
;
21239 gcc_unreachable ();
21242 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21248 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21249 but works for floating pointer parameters and nonoffsetable memories.
21250 For pushes, it returns just stack offsets; the values will be saved
21251 in the right order. Maximally three parts are generated. */
21254 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21259 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21261 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21263 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21264 gcc_assert (size
>= 2 && size
<= 4);
21266 /* Optimize constant pool reference to immediates. This is used by fp
21267 moves, that force all constants to memory to allow combining. */
21268 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21270 rtx tmp
= maybe_get_pool_constant (operand
);
21275 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21277 /* The only non-offsetable memories we handle are pushes. */
21278 int ok
= push_operand (operand
, VOIDmode
);
21282 operand
= copy_rtx (operand
);
21283 PUT_MODE (operand
, word_mode
);
21284 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21288 if (GET_CODE (operand
) == CONST_VECTOR
)
21290 enum machine_mode imode
= int_mode_for_mode (mode
);
21291 /* Caution: if we looked through a constant pool memory above,
21292 the operand may actually have a different mode now. That's
21293 ok, since we want to pun this all the way back to an integer. */
21294 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21295 gcc_assert (operand
!= NULL
);
21301 if (mode
== DImode
)
21302 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21307 if (REG_P (operand
))
21309 gcc_assert (reload_completed
);
21310 for (i
= 0; i
< size
; i
++)
21311 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21313 else if (offsettable_memref_p (operand
))
21315 operand
= adjust_address (operand
, SImode
, 0);
21316 parts
[0] = operand
;
21317 for (i
= 1; i
< size
; i
++)
21318 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21320 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21325 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21329 real_to_target (l
, &r
, mode
);
21330 parts
[3] = gen_int_mode (l
[3], SImode
);
21331 parts
[2] = gen_int_mode (l
[2], SImode
);
21334 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21335 long double may not be 80-bit. */
21336 real_to_target (l
, &r
, mode
);
21337 parts
[2] = gen_int_mode (l
[2], SImode
);
21340 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21343 gcc_unreachable ();
21345 parts
[1] = gen_int_mode (l
[1], SImode
);
21346 parts
[0] = gen_int_mode (l
[0], SImode
);
21349 gcc_unreachable ();
21354 if (mode
== TImode
)
21355 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21356 if (mode
== XFmode
|| mode
== TFmode
)
21358 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21359 if (REG_P (operand
))
21361 gcc_assert (reload_completed
);
21362 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21363 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21365 else if (offsettable_memref_p (operand
))
21367 operand
= adjust_address (operand
, DImode
, 0);
21368 parts
[0] = operand
;
21369 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21371 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21376 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21377 real_to_target (l
, &r
, mode
);
21379 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21380 if (HOST_BITS_PER_WIDE_INT
>= 64)
21383 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21384 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21387 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21389 if (upper_mode
== SImode
)
21390 parts
[1] = gen_int_mode (l
[2], SImode
);
21391 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21394 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21395 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21398 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21401 gcc_unreachable ();
21408 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21409 Return false when normal moves are needed; true when all required
21410 insns have been emitted. Operands 2-4 contain the input values
21411 int the correct order; operands 5-7 contain the output values. */
21414 ix86_split_long_move (rtx operands
[])
21419 int collisions
= 0;
21420 enum machine_mode mode
= GET_MODE (operands
[0]);
21421 bool collisionparts
[4];
21423 /* The DFmode expanders may ask us to move double.
21424 For 64bit target this is single move. By hiding the fact
21425 here we simplify i386.md splitters. */
21426 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21428 /* Optimize constant pool reference to immediates. This is used by
21429 fp moves, that force all constants to memory to allow combining. */
21431 if (MEM_P (operands
[1])
21432 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21433 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21434 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21435 if (push_operand (operands
[0], VOIDmode
))
21437 operands
[0] = copy_rtx (operands
[0]);
21438 PUT_MODE (operands
[0], word_mode
);
21441 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21442 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21443 emit_move_insn (operands
[0], operands
[1]);
21447 /* The only non-offsettable memory we handle is push. */
21448 if (push_operand (operands
[0], VOIDmode
))
21451 gcc_assert (!MEM_P (operands
[0])
21452 || offsettable_memref_p (operands
[0]));
21454 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21455 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21457 /* When emitting push, take care for source operands on the stack. */
21458 if (push
&& MEM_P (operands
[1])
21459 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21461 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21463 /* Compensate for the stack decrement by 4. */
21464 if (!TARGET_64BIT
&& nparts
== 3
21465 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21466 src_base
= plus_constant (Pmode
, src_base
, 4);
21468 /* src_base refers to the stack pointer and is
21469 automatically decreased by emitted push. */
21470 for (i
= 0; i
< nparts
; i
++)
21471 part
[1][i
] = change_address (part
[1][i
],
21472 GET_MODE (part
[1][i
]), src_base
);
21475 /* We need to do copy in the right order in case an address register
21476 of the source overlaps the destination. */
21477 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21481 for (i
= 0; i
< nparts
; i
++)
21484 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21485 if (collisionparts
[i
])
21489 /* Collision in the middle part can be handled by reordering. */
21490 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21492 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21493 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21495 else if (collisions
== 1
21497 && (collisionparts
[1] || collisionparts
[2]))
21499 if (collisionparts
[1])
21501 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21502 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21506 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21507 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21511 /* If there are more collisions, we can't handle it by reordering.
21512 Do an lea to the last part and use only one colliding move. */
21513 else if (collisions
> 1)
21519 base
= part
[0][nparts
- 1];
21521 /* Handle the case when the last part isn't valid for lea.
21522 Happens in 64-bit mode storing the 12-byte XFmode. */
21523 if (GET_MODE (base
) != Pmode
)
21524 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21526 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21527 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21528 for (i
= 1; i
< nparts
; i
++)
21530 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21531 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21542 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21543 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21544 stack_pointer_rtx
, GEN_INT (-4)));
21545 emit_move_insn (part
[0][2], part
[1][2]);
21547 else if (nparts
== 4)
21549 emit_move_insn (part
[0][3], part
[1][3]);
21550 emit_move_insn (part
[0][2], part
[1][2]);
21555 /* In 64bit mode we don't have 32bit push available. In case this is
21556 register, it is OK - we will just use larger counterpart. We also
21557 retype memory - these comes from attempt to avoid REX prefix on
21558 moving of second half of TFmode value. */
21559 if (GET_MODE (part
[1][1]) == SImode
)
21561 switch (GET_CODE (part
[1][1]))
21564 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21568 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21572 gcc_unreachable ();
21575 if (GET_MODE (part
[1][0]) == SImode
)
21576 part
[1][0] = part
[1][1];
21579 emit_move_insn (part
[0][1], part
[1][1]);
21580 emit_move_insn (part
[0][0], part
[1][0]);
21584 /* Choose correct order to not overwrite the source before it is copied. */
21585 if ((REG_P (part
[0][0])
21586 && REG_P (part
[1][1])
21587 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21589 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21591 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21593 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21595 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21597 operands
[2 + i
] = part
[0][j
];
21598 operands
[6 + i
] = part
[1][j
];
21603 for (i
= 0; i
< nparts
; i
++)
21605 operands
[2 + i
] = part
[0][i
];
21606 operands
[6 + i
] = part
[1][i
];
21610 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21611 if (optimize_insn_for_size_p ())
21613 for (j
= 0; j
< nparts
- 1; j
++)
21614 if (CONST_INT_P (operands
[6 + j
])
21615 && operands
[6 + j
] != const0_rtx
21616 && REG_P (operands
[2 + j
]))
21617 for (i
= j
; i
< nparts
- 1; i
++)
21618 if (CONST_INT_P (operands
[7 + i
])
21619 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21620 operands
[7 + i
] = operands
[2 + j
];
21623 for (i
= 0; i
< nparts
; i
++)
21624 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21629 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21630 left shift by a constant, either using a single shift or
21631 a sequence of add instructions. */
21634 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21636 rtx (*insn
)(rtx
, rtx
, rtx
);
21639 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21640 && !optimize_insn_for_size_p ()))
21642 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21643 while (count
-- > 0)
21644 emit_insn (insn (operand
, operand
, operand
));
21648 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21649 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21654 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21656 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21657 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21658 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21660 rtx low
[2], high
[2];
21663 if (CONST_INT_P (operands
[2]))
21665 split_double_mode (mode
, operands
, 2, low
, high
);
21666 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21668 if (count
>= half_width
)
21670 emit_move_insn (high
[0], low
[1]);
21671 emit_move_insn (low
[0], const0_rtx
);
21673 if (count
> half_width
)
21674 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21678 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21680 if (!rtx_equal_p (operands
[0], operands
[1]))
21681 emit_move_insn (operands
[0], operands
[1]);
21683 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21684 ix86_expand_ashl_const (low
[0], count
, mode
);
21689 split_double_mode (mode
, operands
, 1, low
, high
);
21691 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21693 if (operands
[1] == const1_rtx
)
21695 /* Assuming we've chosen a QImode capable registers, then 1 << N
21696 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21697 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21699 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21701 ix86_expand_clear (low
[0]);
21702 ix86_expand_clear (high
[0]);
21703 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21705 d
= gen_lowpart (QImode
, low
[0]);
21706 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21707 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21708 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21710 d
= gen_lowpart (QImode
, high
[0]);
21711 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21712 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21713 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21716 /* Otherwise, we can get the same results by manually performing
21717 a bit extract operation on bit 5/6, and then performing the two
21718 shifts. The two methods of getting 0/1 into low/high are exactly
21719 the same size. Avoiding the shift in the bit extract case helps
21720 pentium4 a bit; no one else seems to care much either way. */
21723 enum machine_mode half_mode
;
21724 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21725 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21726 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21727 HOST_WIDE_INT bits
;
21730 if (mode
== DImode
)
21732 half_mode
= SImode
;
21733 gen_lshr3
= gen_lshrsi3
;
21734 gen_and3
= gen_andsi3
;
21735 gen_xor3
= gen_xorsi3
;
21740 half_mode
= DImode
;
21741 gen_lshr3
= gen_lshrdi3
;
21742 gen_and3
= gen_anddi3
;
21743 gen_xor3
= gen_xordi3
;
21747 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21748 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21750 x
= gen_lowpart (half_mode
, operands
[2]);
21751 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21753 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21754 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21755 emit_move_insn (low
[0], high
[0]);
21756 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21759 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21760 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21764 if (operands
[1] == constm1_rtx
)
21766 /* For -1 << N, we can avoid the shld instruction, because we
21767 know that we're shifting 0...31/63 ones into a -1. */
21768 emit_move_insn (low
[0], constm1_rtx
);
21769 if (optimize_insn_for_size_p ())
21770 emit_move_insn (high
[0], low
[0]);
21772 emit_move_insn (high
[0], constm1_rtx
);
21776 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21778 if (!rtx_equal_p (operands
[0], operands
[1]))
21779 emit_move_insn (operands
[0], operands
[1]);
21781 split_double_mode (mode
, operands
, 1, low
, high
);
21782 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21785 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21787 if (TARGET_CMOVE
&& scratch
)
21789 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21790 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21792 ix86_expand_clear (scratch
);
21793 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21797 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21798 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21800 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21805 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21807 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21808 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21809 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21810 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21812 rtx low
[2], high
[2];
21815 if (CONST_INT_P (operands
[2]))
21817 split_double_mode (mode
, operands
, 2, low
, high
);
21818 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21820 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21822 emit_move_insn (high
[0], high
[1]);
21823 emit_insn (gen_ashr3 (high
[0], high
[0],
21824 GEN_INT (half_width
- 1)));
21825 emit_move_insn (low
[0], high
[0]);
21828 else if (count
>= half_width
)
21830 emit_move_insn (low
[0], high
[1]);
21831 emit_move_insn (high
[0], low
[0]);
21832 emit_insn (gen_ashr3 (high
[0], high
[0],
21833 GEN_INT (half_width
- 1)));
21835 if (count
> half_width
)
21836 emit_insn (gen_ashr3 (low
[0], low
[0],
21837 GEN_INT (count
- half_width
)));
21841 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21843 if (!rtx_equal_p (operands
[0], operands
[1]))
21844 emit_move_insn (operands
[0], operands
[1]);
21846 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21847 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21852 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21854 if (!rtx_equal_p (operands
[0], operands
[1]))
21855 emit_move_insn (operands
[0], operands
[1]);
21857 split_double_mode (mode
, operands
, 1, low
, high
);
21859 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21860 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21862 if (TARGET_CMOVE
&& scratch
)
21864 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21865 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21867 emit_move_insn (scratch
, high
[0]);
21868 emit_insn (gen_ashr3 (scratch
, scratch
,
21869 GEN_INT (half_width
- 1)));
21870 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21875 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21876 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21878 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21884 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21886 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21887 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21888 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21889 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21891 rtx low
[2], high
[2];
21894 if (CONST_INT_P (operands
[2]))
21896 split_double_mode (mode
, operands
, 2, low
, high
);
21897 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21899 if (count
>= half_width
)
21901 emit_move_insn (low
[0], high
[1]);
21902 ix86_expand_clear (high
[0]);
21904 if (count
> half_width
)
21905 emit_insn (gen_lshr3 (low
[0], low
[0],
21906 GEN_INT (count
- half_width
)));
21910 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21912 if (!rtx_equal_p (operands
[0], operands
[1]))
21913 emit_move_insn (operands
[0], operands
[1]);
21915 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21916 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21921 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21923 if (!rtx_equal_p (operands
[0], operands
[1]))
21924 emit_move_insn (operands
[0], operands
[1]);
21926 split_double_mode (mode
, operands
, 1, low
, high
);
21928 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21929 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21931 if (TARGET_CMOVE
&& scratch
)
21933 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21934 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21936 ix86_expand_clear (scratch
);
21937 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21942 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21943 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21945 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21950 /* Predict just emitted jump instruction to be taken with probability PROB. */
21952 predict_jump (int prob
)
21954 rtx insn
= get_last_insn ();
21955 gcc_assert (JUMP_P (insn
));
21956 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21959 /* Helper function for the string operations below. Dest VARIABLE whether
21960 it is aligned to VALUE bytes. If true, jump to the label. */
21962 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21964 rtx label
= gen_label_rtx ();
21965 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21966 if (GET_MODE (variable
) == DImode
)
21967 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21969 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21970 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21973 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21975 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21979 /* Adjust COUNTER by the VALUE. */
21981 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21983 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21984 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21986 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21989 /* Zero extend possibly SImode EXP to Pmode register. */
21991 ix86_zero_extend_to_Pmode (rtx exp
)
21993 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21996 /* Divide COUNTREG by SCALE. */
21998 scale_counter (rtx countreg
, int scale
)
22004 if (CONST_INT_P (countreg
))
22005 return GEN_INT (INTVAL (countreg
) / scale
);
22006 gcc_assert (REG_P (countreg
));
22008 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22009 GEN_INT (exact_log2 (scale
)),
22010 NULL
, 1, OPTAB_DIRECT
);
22014 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22015 DImode for constant loop counts. */
22017 static enum machine_mode
22018 counter_mode (rtx count_exp
)
22020 if (GET_MODE (count_exp
) != VOIDmode
)
22021 return GET_MODE (count_exp
);
22022 if (!CONST_INT_P (count_exp
))
22024 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22029 /* When SRCPTR is non-NULL, output simple loop to move memory
22030 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
22031 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
22032 equivalent loop to set memory by VALUE (supposed to be in MODE).
22034 The size is rounded down to whole number of chunk size moved at once.
22035 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22039 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22040 rtx destptr
, rtx srcptr
, rtx value
,
22041 rtx count
, enum machine_mode mode
, int unroll
,
22044 rtx out_label
, top_label
, iter
, tmp
;
22045 enum machine_mode iter_mode
= counter_mode (count
);
22046 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22047 rtx piece_size
= GEN_INT (piece_size_n
);
22048 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22052 top_label
= gen_label_rtx ();
22053 out_label
= gen_label_rtx ();
22054 iter
= gen_reg_rtx (iter_mode
);
22056 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22057 NULL
, 1, OPTAB_DIRECT
);
22058 /* Those two should combine. */
22059 if (piece_size
== const1_rtx
)
22061 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22063 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22065 emit_move_insn (iter
, const0_rtx
);
22067 emit_label (top_label
);
22069 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22071 /* This assert could be relaxed - in this case we'll need to compute
22072 smallest power of two, containing in PIECE_SIZE_N and pass it to
22074 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22075 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22076 destmem
= adjust_address (destmem
, mode
, 0);
22080 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22081 srcmem
= adjust_address (srcmem
, mode
, 0);
22083 /* When unrolling for chips that reorder memory reads and writes,
22084 we can save registers by using single temporary.
22085 Also using 4 temporaries is overkill in 32bit mode. */
22086 if (!TARGET_64BIT
&& 0)
22088 for (i
= 0; i
< unroll
; i
++)
22093 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22095 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22097 emit_move_insn (destmem
, srcmem
);
22103 gcc_assert (unroll
<= 4);
22104 for (i
= 0; i
< unroll
; i
++)
22106 tmpreg
[i
] = gen_reg_rtx (mode
);
22110 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22112 emit_move_insn (tmpreg
[i
], srcmem
);
22114 for (i
= 0; i
< unroll
; i
++)
22119 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22121 emit_move_insn (destmem
, tmpreg
[i
]);
22126 for (i
= 0; i
< unroll
; i
++)
22130 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22131 emit_move_insn (destmem
, value
);
22134 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22135 true, OPTAB_LIB_WIDEN
);
22137 emit_move_insn (iter
, tmp
);
22139 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22141 if (expected_size
!= -1)
22143 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22144 if (expected_size
== 0)
22146 else if (expected_size
> REG_BR_PROB_BASE
)
22147 predict_jump (REG_BR_PROB_BASE
- 1);
22149 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22152 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22153 iter
= ix86_zero_extend_to_Pmode (iter
);
22154 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22155 true, OPTAB_LIB_WIDEN
);
22156 if (tmp
!= destptr
)
22157 emit_move_insn (destptr
, tmp
);
22160 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22161 true, OPTAB_LIB_WIDEN
);
22163 emit_move_insn (srcptr
, tmp
);
22165 emit_label (out_label
);
22168 /* Output "rep; mov" instruction.
22169 Arguments have same meaning as for previous function */
22171 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
22172 rtx destptr
, rtx srcptr
,
22174 enum machine_mode mode
)
22179 HOST_WIDE_INT rounded_count
;
22181 /* If the size is known, it is shorter to use rep movs. */
22182 if (mode
== QImode
&& CONST_INT_P (count
)
22183 && !(INTVAL (count
) & 3))
22186 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22187 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22188 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22189 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22190 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22191 if (mode
!= QImode
)
22193 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22194 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22195 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22196 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22197 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22198 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22202 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22203 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22205 if (CONST_INT_P (count
))
22207 rounded_count
= (INTVAL (count
)
22208 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22209 destmem
= shallow_copy_rtx (destmem
);
22210 srcmem
= shallow_copy_rtx (srcmem
);
22211 set_mem_size (destmem
, rounded_count
);
22212 set_mem_size (srcmem
, rounded_count
);
22216 if (MEM_SIZE_KNOWN_P (destmem
))
22217 clear_mem_size (destmem
);
22218 if (MEM_SIZE_KNOWN_P (srcmem
))
22219 clear_mem_size (srcmem
);
22221 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22225 /* Output "rep; stos" instruction.
22226 Arguments have same meaning as for previous function */
22228 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
22229 rtx count
, enum machine_mode mode
,
22234 HOST_WIDE_INT rounded_count
;
22236 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22237 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22238 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22239 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22240 if (mode
!= QImode
)
22242 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22243 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22244 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22247 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22248 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
22250 rounded_count
= (INTVAL (count
)
22251 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22252 destmem
= shallow_copy_rtx (destmem
);
22253 set_mem_size (destmem
, rounded_count
);
22255 else if (MEM_SIZE_KNOWN_P (destmem
))
22256 clear_mem_size (destmem
);
22257 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22260 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22262 SRC is passed by pointer to be updated on return.
22263 Return value is updated DST. */
22265 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22266 HOST_WIDE_INT size_to_move
)
22268 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22269 enum insn_code code
;
22270 enum machine_mode move_mode
;
22273 /* Find the widest mode in which we could perform moves.
22274 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22275 it until move of such size is supported. */
22276 piece_size
= 1 << floor_log2 (size_to_move
);
22277 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22278 code
= optab_handler (mov_optab
, move_mode
);
22279 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22282 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22283 code
= optab_handler (mov_optab
, move_mode
);
22286 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22287 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22288 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22290 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22291 move_mode
= mode_for_vector (word_mode
, nunits
);
22292 code
= optab_handler (mov_optab
, move_mode
);
22293 if (code
== CODE_FOR_nothing
)
22295 move_mode
= word_mode
;
22296 piece_size
= GET_MODE_SIZE (move_mode
);
22297 code
= optab_handler (mov_optab
, move_mode
);
22300 gcc_assert (code
!= CODE_FOR_nothing
);
22302 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22303 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22305 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22306 gcc_assert (size_to_move
% piece_size
== 0);
22307 adjust
= GEN_INT (piece_size
);
22308 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22310 /* We move from memory to memory, so we'll need to do it via
22311 a temporary register. */
22312 tempreg
= gen_reg_rtx (move_mode
);
22313 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22314 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22316 emit_move_insn (destptr
,
22317 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22318 emit_move_insn (srcptr
,
22319 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22321 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22323 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22327 /* Update DST and SRC rtx. */
22332 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22334 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22335 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22338 if (CONST_INT_P (count
))
22340 HOST_WIDE_INT countval
= INTVAL (count
);
22341 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22344 /* For now MAX_SIZE should be a power of 2. This assert could be
22345 relaxed, but it'll require a bit more complicated epilogue
22347 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22348 for (i
= max_size
; i
>= 1; i
>>= 1)
22350 if (epilogue_size
& i
)
22351 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22357 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22358 count
, 1, OPTAB_DIRECT
);
22359 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22360 count
, QImode
, 1, 4);
22364 /* When there are stringops, we can cheaply increase dest and src pointers.
22365 Otherwise we save code size by maintaining offset (zero is readily
22366 available from preceding rep operation) and using x86 addressing modes.
22368 if (TARGET_SINGLE_STRINGOP
)
22372 rtx label
= ix86_expand_aligntest (count
, 4, true);
22373 src
= change_address (srcmem
, SImode
, srcptr
);
22374 dest
= change_address (destmem
, SImode
, destptr
);
22375 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22376 emit_label (label
);
22377 LABEL_NUSES (label
) = 1;
22381 rtx label
= ix86_expand_aligntest (count
, 2, true);
22382 src
= change_address (srcmem
, HImode
, srcptr
);
22383 dest
= change_address (destmem
, HImode
, destptr
);
22384 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22385 emit_label (label
);
22386 LABEL_NUSES (label
) = 1;
22390 rtx label
= ix86_expand_aligntest (count
, 1, true);
22391 src
= change_address (srcmem
, QImode
, srcptr
);
22392 dest
= change_address (destmem
, QImode
, destptr
);
22393 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22394 emit_label (label
);
22395 LABEL_NUSES (label
) = 1;
22400 rtx offset
= force_reg (Pmode
, const0_rtx
);
22405 rtx label
= ix86_expand_aligntest (count
, 4, true);
22406 src
= change_address (srcmem
, SImode
, srcptr
);
22407 dest
= change_address (destmem
, SImode
, destptr
);
22408 emit_move_insn (dest
, src
);
22409 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22410 true, OPTAB_LIB_WIDEN
);
22412 emit_move_insn (offset
, tmp
);
22413 emit_label (label
);
22414 LABEL_NUSES (label
) = 1;
22418 rtx label
= ix86_expand_aligntest (count
, 2, true);
22419 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22420 src
= change_address (srcmem
, HImode
, tmp
);
22421 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22422 dest
= change_address (destmem
, HImode
, tmp
);
22423 emit_move_insn (dest
, src
);
22424 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22425 true, OPTAB_LIB_WIDEN
);
22427 emit_move_insn (offset
, tmp
);
22428 emit_label (label
);
22429 LABEL_NUSES (label
) = 1;
22433 rtx label
= ix86_expand_aligntest (count
, 1, true);
22434 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22435 src
= change_address (srcmem
, QImode
, tmp
);
22436 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22437 dest
= change_address (destmem
, QImode
, tmp
);
22438 emit_move_insn (dest
, src
);
22439 emit_label (label
);
22440 LABEL_NUSES (label
) = 1;
22445 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22447 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22448 rtx count
, int max_size
)
22451 expand_simple_binop (counter_mode (count
), AND
, count
,
22452 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22453 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22454 gen_lowpart (QImode
, value
), count
, QImode
,
22458 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22460 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22464 if (CONST_INT_P (count
))
22466 HOST_WIDE_INT countval
= INTVAL (count
);
22469 if ((countval
& 0x10) && max_size
> 16)
22473 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22474 emit_insn (gen_strset (destptr
, dest
, value
));
22475 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22476 emit_insn (gen_strset (destptr
, dest
, value
));
22479 gcc_unreachable ();
22482 if ((countval
& 0x08) && max_size
> 8)
22486 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22487 emit_insn (gen_strset (destptr
, dest
, value
));
22491 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22492 emit_insn (gen_strset (destptr
, dest
, value
));
22493 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22494 emit_insn (gen_strset (destptr
, dest
, value
));
22498 if ((countval
& 0x04) && max_size
> 4)
22500 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22501 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22504 if ((countval
& 0x02) && max_size
> 2)
22506 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22507 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22510 if ((countval
& 0x01) && max_size
> 1)
22512 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22513 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22520 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22525 rtx label
= ix86_expand_aligntest (count
, 16, true);
22528 dest
= change_address (destmem
, DImode
, destptr
);
22529 emit_insn (gen_strset (destptr
, dest
, value
));
22530 emit_insn (gen_strset (destptr
, dest
, value
));
22534 dest
= change_address (destmem
, SImode
, destptr
);
22535 emit_insn (gen_strset (destptr
, dest
, value
));
22536 emit_insn (gen_strset (destptr
, dest
, value
));
22537 emit_insn (gen_strset (destptr
, dest
, value
));
22538 emit_insn (gen_strset (destptr
, dest
, value
));
22540 emit_label (label
);
22541 LABEL_NUSES (label
) = 1;
22545 rtx label
= ix86_expand_aligntest (count
, 8, true);
22548 dest
= change_address (destmem
, DImode
, destptr
);
22549 emit_insn (gen_strset (destptr
, dest
, value
));
22553 dest
= change_address (destmem
, SImode
, destptr
);
22554 emit_insn (gen_strset (destptr
, dest
, value
));
22555 emit_insn (gen_strset (destptr
, dest
, value
));
22557 emit_label (label
);
22558 LABEL_NUSES (label
) = 1;
22562 rtx label
= ix86_expand_aligntest (count
, 4, true);
22563 dest
= change_address (destmem
, SImode
, destptr
);
22564 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22565 emit_label (label
);
22566 LABEL_NUSES (label
) = 1;
22570 rtx label
= ix86_expand_aligntest (count
, 2, true);
22571 dest
= change_address (destmem
, HImode
, destptr
);
22572 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22573 emit_label (label
);
22574 LABEL_NUSES (label
) = 1;
22578 rtx label
= ix86_expand_aligntest (count
, 1, true);
22579 dest
= change_address (destmem
, QImode
, destptr
);
22580 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22581 emit_label (label
);
22582 LABEL_NUSES (label
) = 1;
22586 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22588 Return value is updated DESTMEM. */
22590 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22591 rtx destptr
, rtx srcptr
, rtx count
,
22592 int align
, int desired_alignment
)
22595 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22599 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22600 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22601 ix86_adjust_counter (count
, i
);
22602 emit_label (label
);
22603 LABEL_NUSES (label
) = 1;
22604 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22610 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22611 ALIGN_BYTES is how many bytes need to be copied.
22612 The function updates DST and SRC, namely, it sets proper alignment.
22613 DST is returned via return value, SRC is updated via pointer SRCP. */
22615 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22616 int desired_align
, int align_bytes
)
22619 rtx orig_dst
= dst
;
22620 rtx orig_src
= src
;
22621 int piece_size
= 1;
22622 int copied_bytes
= 0;
22623 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22624 if (src_align_bytes
>= 0)
22625 src_align_bytes
= desired_align
- src_align_bytes
;
22627 for (piece_size
= 1;
22628 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
22631 if (align_bytes
& piece_size
)
22633 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
22634 copied_bytes
+= piece_size
;
22638 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22639 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22640 if (src_align_bytes
>= 0)
22642 unsigned int src_align
;
22643 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
22645 if ((src_align_bytes
& (src_align
- 1))
22646 == (align_bytes
& (src_align
- 1)))
22649 if (src_align
> (unsigned int) desired_align
)
22650 src_align
= desired_align
;
22651 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22652 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22654 if (MEM_SIZE_KNOWN_P (orig_dst
))
22655 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22656 if (MEM_SIZE_KNOWN_P (orig_src
))
22657 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22662 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22663 DESIRED_ALIGNMENT. */
22665 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22666 int align
, int desired_alignment
)
22668 if (align
<= 1 && desired_alignment
> 1)
22670 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22671 destmem
= change_address (destmem
, QImode
, destptr
);
22672 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22673 ix86_adjust_counter (count
, 1);
22674 emit_label (label
);
22675 LABEL_NUSES (label
) = 1;
22677 if (align
<= 2 && desired_alignment
> 2)
22679 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22680 destmem
= change_address (destmem
, HImode
, destptr
);
22681 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22682 ix86_adjust_counter (count
, 2);
22683 emit_label (label
);
22684 LABEL_NUSES (label
) = 1;
22686 if (align
<= 4 && desired_alignment
> 4)
22688 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22689 destmem
= change_address (destmem
, SImode
, destptr
);
22690 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22691 ix86_adjust_counter (count
, 4);
22692 emit_label (label
);
22693 LABEL_NUSES (label
) = 1;
22695 gcc_assert (desired_alignment
<= 8);
22698 /* Set enough from DST to align DST known to by aligned by ALIGN to
22699 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22701 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22702 int desired_align
, int align_bytes
)
22705 rtx orig_dst
= dst
;
22706 if (align_bytes
& 1)
22708 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22710 emit_insn (gen_strset (destreg
, dst
,
22711 gen_lowpart (QImode
, value
)));
22713 if (align_bytes
& 2)
22715 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22716 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22717 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22719 emit_insn (gen_strset (destreg
, dst
,
22720 gen_lowpart (HImode
, value
)));
22722 if (align_bytes
& 4)
22724 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22725 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22726 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22728 emit_insn (gen_strset (destreg
, dst
,
22729 gen_lowpart (SImode
, value
)));
22731 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22732 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22733 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22734 if (MEM_SIZE_KNOWN_P (orig_dst
))
22735 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22739 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22740 static enum stringop_alg
22741 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22742 int *dynamic_check
, bool *noalign
)
22744 const struct stringop_algs
* algs
;
22745 bool optimize_for_speed
;
22746 /* Algorithms using the rep prefix want at least edi and ecx;
22747 additionally, memset wants eax and memcpy wants esi. Don't
22748 consider such algorithms if the user has appropriated those
22749 registers for their own purposes. */
22750 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22752 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22755 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22756 || (alg != rep_prefix_1_byte \
22757 && alg != rep_prefix_4_byte \
22758 && alg != rep_prefix_8_byte))
22759 const struct processor_costs
*cost
;
22761 /* Even if the string operation call is cold, we still might spend a lot
22762 of time processing large blocks. */
22763 if (optimize_function_for_size_p (cfun
)
22764 || (optimize_insn_for_size_p ()
22765 && expected_size
!= -1 && expected_size
< 256))
22766 optimize_for_speed
= false;
22768 optimize_for_speed
= true;
22770 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22772 *dynamic_check
= -1;
22774 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22776 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22777 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22778 return ix86_stringop_alg
;
22779 /* rep; movq or rep; movl is the smallest variant. */
22780 else if (!optimize_for_speed
)
22782 if (!count
|| (count
& 3))
22783 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22785 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22787 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22789 else if (expected_size
!= -1 && expected_size
< 4)
22790 return loop_1_byte
;
22791 else if (expected_size
!= -1)
22794 enum stringop_alg alg
= libcall
;
22795 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22797 /* We get here if the algorithms that were not libcall-based
22798 were rep-prefix based and we are unable to use rep prefixes
22799 based on global register usage. Break out of the loop and
22800 use the heuristic below. */
22801 if (algs
->size
[i
].max
== 0)
22803 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22805 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22807 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22809 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22810 last non-libcall inline algorithm. */
22811 if (TARGET_INLINE_ALL_STRINGOPS
)
22813 /* When the current size is best to be copied by a libcall,
22814 but we are still forced to inline, run the heuristic below
22815 that will pick code for medium sized blocks. */
22816 if (alg
!= libcall
)
22820 else if (ALG_USABLE_P (candidate
))
22822 *noalign
= algs
->size
[i
].noalign
;
22827 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22829 /* When asked to inline the call anyway, try to pick meaningful choice.
22830 We look for maximal size of block that is faster to copy by hand and
22831 take blocks of at most of that size guessing that average size will
22832 be roughly half of the block.
22834 If this turns out to be bad, we might simply specify the preferred
22835 choice in ix86_costs. */
22836 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22837 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22840 enum stringop_alg alg
;
22842 bool any_alg_usable_p
= true;
22844 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22846 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22847 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22849 if (candidate
!= libcall
&& candidate
22850 && ALG_USABLE_P (candidate
))
22851 max
= algs
->size
[i
].max
;
22853 /* If there aren't any usable algorithms, then recursing on
22854 smaller sizes isn't going to find anything. Just return the
22855 simple byte-at-a-time copy loop. */
22856 if (!any_alg_usable_p
)
22858 /* Pick something reasonable. */
22859 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22860 *dynamic_check
= 128;
22861 return loop_1_byte
;
22865 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22866 gcc_assert (*dynamic_check
== -1);
22867 gcc_assert (alg
!= libcall
);
22868 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22869 *dynamic_check
= max
;
22872 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22873 #undef ALG_USABLE_P
22876 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22877 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22879 decide_alignment (int align
,
22880 enum stringop_alg alg
,
22882 enum machine_mode move_mode
)
22884 int desired_align
= 0;
22886 gcc_assert (alg
!= no_stringop
);
22888 if (alg
== libcall
)
22890 if (move_mode
== VOIDmode
)
22893 desired_align
= GET_MODE_SIZE (move_mode
);
22894 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22895 copying whole cacheline at once. */
22896 if (TARGET_PENTIUMPRO
22897 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
22902 if (desired_align
< align
)
22903 desired_align
= align
;
22904 if (expected_size
!= -1 && expected_size
< 4)
22905 desired_align
= align
;
22907 return desired_align
;
22910 /* Expand string move (memcpy) operation. Use i386 string operations
22911 when profitable. expand_setmem contains similar code. The code
22912 depends upon architecture, block size and alignment, but always has
22913 the same overall structure:
22915 1) Prologue guard: Conditional that jumps up to epilogues for small
22916 blocks that can be handled by epilogue alone. This is faster
22917 but also needed for correctness, since prologue assume the block
22918 is larger than the desired alignment.
22920 Optional dynamic check for size and libcall for large
22921 blocks is emitted here too, with -minline-stringops-dynamically.
22923 2) Prologue: copy first few bytes in order to get destination
22924 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22925 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22926 copied. We emit either a jump tree on power of two sized
22927 blocks, or a byte loop.
22929 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22930 with specified algorithm.
22932 4) Epilogue: code copying tail of the block that is too small to be
22933 handled by main body (or up to size guarded by prologue guard). */
22936 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22937 rtx expected_align_exp
, rtx expected_size_exp
)
22943 rtx jump_around_label
= NULL
;
22944 HOST_WIDE_INT align
= 1;
22945 unsigned HOST_WIDE_INT count
= 0;
22946 HOST_WIDE_INT expected_size
= -1;
22947 int size_needed
= 0, epilogue_size_needed
;
22948 int desired_align
= 0, align_bytes
= 0;
22949 enum stringop_alg alg
;
22951 bool need_zero_guard
= false;
22953 enum machine_mode move_mode
= VOIDmode
;
22954 int unroll_factor
= 1;
22956 if (CONST_INT_P (align_exp
))
22957 align
= INTVAL (align_exp
);
22958 /* i386 can do misaligned access on reasonably increased cost. */
22959 if (CONST_INT_P (expected_align_exp
)
22960 && INTVAL (expected_align_exp
) > align
)
22961 align
= INTVAL (expected_align_exp
);
22962 /* ALIGN is the minimum of destination and source alignment, but we care here
22963 just about destination alignment. */
22964 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22965 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22967 if (CONST_INT_P (count_exp
))
22968 count
= expected_size
= INTVAL (count_exp
);
22969 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22970 expected_size
= INTVAL (expected_size_exp
);
22972 /* Make sure we don't need to care about overflow later on. */
22973 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22976 /* Step 0: Decide on preferred algorithm, desired alignment and
22977 size of chunks to be copied by main loop. */
22978 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22979 if (alg
== libcall
)
22981 gcc_assert (alg
!= no_stringop
);
22984 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22985 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22986 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22989 move_mode
= word_mode
;
22995 gcc_unreachable ();
22997 need_zero_guard
= true;
22998 move_mode
= QImode
;
23001 need_zero_guard
= true;
23003 case unrolled_loop
:
23004 need_zero_guard
= true;
23005 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23008 need_zero_guard
= true;
23010 /* Find the widest supported mode. */
23011 move_mode
= word_mode
;
23012 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23013 != CODE_FOR_nothing
)
23014 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23016 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23017 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23018 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23020 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23021 move_mode
= mode_for_vector (word_mode
, nunits
);
23022 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23023 move_mode
= word_mode
;
23025 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23027 case rep_prefix_8_byte
:
23028 move_mode
= DImode
;
23030 case rep_prefix_4_byte
:
23031 move_mode
= SImode
;
23033 case rep_prefix_1_byte
:
23034 move_mode
= QImode
;
23037 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23038 epilogue_size_needed
= size_needed
;
23040 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23041 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23042 align
= desired_align
;
23044 /* Step 1: Prologue guard. */
23046 /* Alignment code needs count to be in register. */
23047 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23049 if (INTVAL (count_exp
) > desired_align
23050 && INTVAL (count_exp
) > size_needed
)
23053 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23054 if (align_bytes
<= 0)
23057 align_bytes
= desired_align
- align_bytes
;
23059 if (align_bytes
== 0)
23060 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23062 gcc_assert (desired_align
>= 1 && align
>= 1);
23064 /* Ensure that alignment prologue won't copy past end of block. */
23065 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23067 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23068 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23069 Make sure it is power of 2. */
23070 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23074 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23076 /* If main algorithm works on QImode, no epilogue is needed.
23077 For small sizes just don't align anything. */
23078 if (size_needed
== 1)
23079 desired_align
= align
;
23086 label
= gen_label_rtx ();
23087 emit_cmp_and_jump_insns (count_exp
,
23088 GEN_INT (epilogue_size_needed
),
23089 LTU
, 0, counter_mode (count_exp
), 1, label
);
23090 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23091 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23093 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23097 /* Emit code to decide on runtime whether library call or inline should be
23099 if (dynamic_check
!= -1)
23101 if (CONST_INT_P (count_exp
))
23103 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23105 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23106 count_exp
= const0_rtx
;
23112 rtx hot_label
= gen_label_rtx ();
23113 jump_around_label
= gen_label_rtx ();
23114 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23115 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23116 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23117 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23118 emit_jump (jump_around_label
);
23119 emit_label (hot_label
);
23123 /* Step 2: Alignment prologue. */
23125 if (desired_align
> align
)
23127 if (align_bytes
== 0)
23129 /* Except for the first move in epilogue, we no longer know
23130 constant offset in aliasing info. It don't seems to worth
23131 the pain to maintain it for the first move, so throw away
23133 src
= change_address (src
, BLKmode
, srcreg
);
23134 dst
= change_address (dst
, BLKmode
, destreg
);
23135 dst
= expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
23140 /* If we know how many bytes need to be stored before dst is
23141 sufficiently aligned, maintain aliasing info accurately. */
23142 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
23143 desired_align
, align_bytes
);
23144 count_exp
= plus_constant (counter_mode (count_exp
),
23145 count_exp
, -align_bytes
);
23146 count
-= align_bytes
;
23148 if (need_zero_guard
23149 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23150 || (align_bytes
== 0
23151 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23152 + desired_align
- align
))))
23154 /* It is possible that we copied enough so the main loop will not
23156 gcc_assert (size_needed
> 1);
23157 if (label
== NULL_RTX
)
23158 label
= gen_label_rtx ();
23159 emit_cmp_and_jump_insns (count_exp
,
23160 GEN_INT (size_needed
),
23161 LTU
, 0, counter_mode (count_exp
), 1, label
);
23162 if (expected_size
== -1
23163 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23164 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23166 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23169 if (label
&& size_needed
== 1)
23171 emit_label (label
);
23172 LABEL_NUSES (label
) = 1;
23174 epilogue_size_needed
= 1;
23176 else if (label
== NULL_RTX
)
23177 epilogue_size_needed
= size_needed
;
23179 /* Step 3: Main loop. */
23186 gcc_unreachable ();
23189 case unrolled_loop
:
23191 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
23192 count_exp
, move_mode
, unroll_factor
,
23195 case rep_prefix_8_byte
:
23196 case rep_prefix_4_byte
:
23197 case rep_prefix_1_byte
:
23198 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
23202 /* Adjust properly the offset of src and dest memory for aliasing. */
23203 if (CONST_INT_P (count_exp
))
23205 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23206 (count
/ size_needed
) * size_needed
);
23207 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23208 (count
/ size_needed
) * size_needed
);
23212 src
= change_address (src
, BLKmode
, srcreg
);
23213 dst
= change_address (dst
, BLKmode
, destreg
);
23216 /* Step 4: Epilogue to copy the remaining bytes. */
23220 /* When the main loop is done, COUNT_EXP might hold original count,
23221 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23222 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23223 bytes. Compensate if needed. */
23225 if (size_needed
< epilogue_size_needed
)
23228 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23229 GEN_INT (size_needed
- 1), count_exp
, 1,
23231 if (tmp
!= count_exp
)
23232 emit_move_insn (count_exp
, tmp
);
23234 emit_label (label
);
23235 LABEL_NUSES (label
) = 1;
23238 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23239 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
23241 if (jump_around_label
)
23242 emit_label (jump_around_label
);
23246 /* Helper function for memcpy. For QImode value 0xXY produce
23247 0xXYXYXYXY of wide specified by MODE. This is essentially
23248 a * 0x10101010, but we can do slightly better than
23249 synth_mult by unwinding the sequence by hand on CPUs with
23252 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23254 enum machine_mode valmode
= GET_MODE (val
);
23256 int nops
= mode
== DImode
? 3 : 2;
23258 gcc_assert (mode
== SImode
|| mode
== DImode
);
23259 if (val
== const0_rtx
)
23260 return copy_to_mode_reg (mode
, const0_rtx
);
23261 if (CONST_INT_P (val
))
23263 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23267 if (mode
== DImode
)
23268 v
|= (v
<< 16) << 16;
23269 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23272 if (valmode
== VOIDmode
)
23274 if (valmode
!= QImode
)
23275 val
= gen_lowpart (QImode
, val
);
23276 if (mode
== QImode
)
23278 if (!TARGET_PARTIAL_REG_STALL
)
23280 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23281 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23282 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23283 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23285 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23286 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23287 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23292 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23294 if (!TARGET_PARTIAL_REG_STALL
)
23295 if (mode
== SImode
)
23296 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23298 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23301 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23302 NULL
, 1, OPTAB_DIRECT
);
23304 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23306 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23307 NULL
, 1, OPTAB_DIRECT
);
23308 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23309 if (mode
== SImode
)
23311 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23312 NULL
, 1, OPTAB_DIRECT
);
23313 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23318 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23319 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23320 alignment from ALIGN to DESIRED_ALIGN. */
23322 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23327 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23328 promoted_val
= promote_duplicated_reg (DImode
, val
);
23329 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23330 promoted_val
= promote_duplicated_reg (SImode
, val
);
23331 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23332 promoted_val
= promote_duplicated_reg (HImode
, val
);
23334 promoted_val
= val
;
23336 return promoted_val
;
23339 /* Expand string clear operation (bzero). Use i386 string operations when
23340 profitable. See expand_movmem comment for explanation of individual
23341 steps performed. */
23343 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23344 rtx expected_align_exp
, rtx expected_size_exp
)
23349 rtx jump_around_label
= NULL
;
23350 HOST_WIDE_INT align
= 1;
23351 unsigned HOST_WIDE_INT count
= 0;
23352 HOST_WIDE_INT expected_size
= -1;
23353 int size_needed
= 0, epilogue_size_needed
;
23354 int desired_align
= 0, align_bytes
= 0;
23355 enum stringop_alg alg
;
23356 rtx promoted_val
= NULL
;
23357 bool force_loopy_epilogue
= false;
23359 bool need_zero_guard
= false;
23361 enum machine_mode move_mode
= VOIDmode
;
23364 if (CONST_INT_P (align_exp
))
23365 align
= INTVAL (align_exp
);
23366 /* i386 can do misaligned access on reasonably increased cost. */
23367 if (CONST_INT_P (expected_align_exp
)
23368 && INTVAL (expected_align_exp
) > align
)
23369 align
= INTVAL (expected_align_exp
);
23370 if (CONST_INT_P (count_exp
))
23371 count
= expected_size
= INTVAL (count_exp
);
23372 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23373 expected_size
= INTVAL (expected_size_exp
);
23375 /* Make sure we don't need to care about overflow later on. */
23376 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23379 /* Step 0: Decide on preferred algorithm, desired alignment and
23380 size of chunks to be copied by main loop. */
23382 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23383 if (alg
== libcall
)
23385 gcc_assert (alg
!= no_stringop
);
23388 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23389 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23391 move_mode
= word_mode
;
23398 gcc_unreachable ();
23400 need_zero_guard
= true;
23403 case unrolled_loop
:
23404 need_zero_guard
= true;
23407 case rep_prefix_8_byte
:
23408 move_mode
= DImode
;
23410 case rep_prefix_4_byte
:
23411 move_mode
= SImode
;
23413 case rep_prefix_1_byte
:
23414 move_mode
= QImode
;
23417 need_zero_guard
= true;
23418 move_mode
= QImode
;
23421 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23422 epilogue_size_needed
= size_needed
;
23424 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23425 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23426 align
= desired_align
;
23428 /* Step 1: Prologue guard. */
23430 /* Alignment code needs count to be in register. */
23431 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23433 if (INTVAL (count_exp
) > desired_align
23434 && INTVAL (count_exp
) > size_needed
)
23437 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23438 if (align_bytes
<= 0)
23441 align_bytes
= desired_align
- align_bytes
;
23443 if (align_bytes
== 0)
23445 enum machine_mode mode
= SImode
;
23446 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23448 count_exp
= force_reg (mode
, count_exp
);
23451 /* Do the cheap promotion to allow better CSE across the
23452 main loop and epilogue (ie one load of the big constant in the
23453 front of all code. */
23454 if (CONST_INT_P (val_exp
))
23455 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23456 desired_align
, align
);
23457 /* Ensure that alignment prologue won't copy past end of block. */
23458 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23460 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23461 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23462 Make sure it is power of 2. */
23463 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23465 /* To improve performance of small blocks, we jump around the VAL
23466 promoting mode. This mean that if the promoted VAL is not constant,
23467 we might not use it in the epilogue and have to use byte
23469 if (epilogue_size_needed
> 2 && !promoted_val
)
23470 force_loopy_epilogue
= true;
23473 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23475 /* If main algorithm works on QImode, no epilogue is needed.
23476 For small sizes just don't align anything. */
23477 if (size_needed
== 1)
23478 desired_align
= align
;
23485 label
= gen_label_rtx ();
23486 emit_cmp_and_jump_insns (count_exp
,
23487 GEN_INT (epilogue_size_needed
),
23488 LTU
, 0, counter_mode (count_exp
), 1, label
);
23489 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23490 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23492 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23495 if (dynamic_check
!= -1)
23497 rtx hot_label
= gen_label_rtx ();
23498 jump_around_label
= gen_label_rtx ();
23499 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23500 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23501 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23502 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23503 emit_jump (jump_around_label
);
23504 emit_label (hot_label
);
23507 /* Step 2: Alignment prologue. */
23509 /* Do the expensive promotion once we branched off the small blocks. */
23511 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23512 desired_align
, align
);
23513 gcc_assert (desired_align
>= 1 && align
>= 1);
23515 if (desired_align
> align
)
23517 if (align_bytes
== 0)
23519 /* Except for the first move in epilogue, we no longer know
23520 constant offset in aliasing info. It don't seems to worth
23521 the pain to maintain it for the first move, so throw away
23523 dst
= change_address (dst
, BLKmode
, destreg
);
23524 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23529 /* If we know how many bytes need to be stored before dst is
23530 sufficiently aligned, maintain aliasing info accurately. */
23531 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23532 desired_align
, align_bytes
);
23533 count_exp
= plus_constant (counter_mode (count_exp
),
23534 count_exp
, -align_bytes
);
23535 count
-= align_bytes
;
23537 if (need_zero_guard
23538 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23539 || (align_bytes
== 0
23540 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23541 + desired_align
- align
))))
23543 /* It is possible that we copied enough so the main loop will not
23545 gcc_assert (size_needed
> 1);
23546 if (label
== NULL_RTX
)
23547 label
= gen_label_rtx ();
23548 emit_cmp_and_jump_insns (count_exp
,
23549 GEN_INT (size_needed
),
23550 LTU
, 0, counter_mode (count_exp
), 1, label
);
23551 if (expected_size
== -1
23552 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23553 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23555 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23558 if (label
&& size_needed
== 1)
23560 emit_label (label
);
23561 LABEL_NUSES (label
) = 1;
23563 promoted_val
= val_exp
;
23564 epilogue_size_needed
= 1;
23566 else if (label
== NULL_RTX
)
23567 epilogue_size_needed
= size_needed
;
23569 /* Step 3: Main loop. */
23576 gcc_unreachable ();
23580 case unrolled_loop
:
23581 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23582 count_exp
, move_mode
, unroll_factor
,
23585 case rep_prefix_8_byte
:
23586 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23589 case rep_prefix_4_byte
:
23590 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23593 case rep_prefix_1_byte
:
23594 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23598 /* Adjust properly the offset of src and dest memory for aliasing. */
23599 if (CONST_INT_P (count_exp
))
23600 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23601 (count
/ size_needed
) * size_needed
);
23603 dst
= change_address (dst
, BLKmode
, destreg
);
23605 /* Step 4: Epilogue to copy the remaining bytes. */
23609 /* When the main loop is done, COUNT_EXP might hold original count,
23610 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23611 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23612 bytes. Compensate if needed. */
23614 if (size_needed
< epilogue_size_needed
)
23617 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23618 GEN_INT (size_needed
- 1), count_exp
, 1,
23620 if (tmp
!= count_exp
)
23621 emit_move_insn (count_exp
, tmp
);
23623 emit_label (label
);
23624 LABEL_NUSES (label
) = 1;
23627 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23629 if (force_loopy_epilogue
)
23630 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23631 epilogue_size_needed
);
23633 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23634 epilogue_size_needed
);
23636 if (jump_around_label
)
23637 emit_label (jump_around_label
);
23641 /* Expand the appropriate insns for doing strlen if not just doing
23644 out = result, initialized with the start address
23645 align_rtx = alignment of the address.
23646 scratch = scratch register, initialized with the startaddress when
23647 not aligned, otherwise undefined
23649 This is just the body. It needs the initializations mentioned above and
23650 some address computing at the end. These things are done in i386.md. */
23653 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23657 rtx align_2_label
= NULL_RTX
;
23658 rtx align_3_label
= NULL_RTX
;
23659 rtx align_4_label
= gen_label_rtx ();
23660 rtx end_0_label
= gen_label_rtx ();
23662 rtx tmpreg
= gen_reg_rtx (SImode
);
23663 rtx scratch
= gen_reg_rtx (SImode
);
23667 if (CONST_INT_P (align_rtx
))
23668 align
= INTVAL (align_rtx
);
23670 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23672 /* Is there a known alignment and is it less than 4? */
23675 rtx scratch1
= gen_reg_rtx (Pmode
);
23676 emit_move_insn (scratch1
, out
);
23677 /* Is there a known alignment and is it not 2? */
23680 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23681 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23683 /* Leave just the 3 lower bits. */
23684 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23685 NULL_RTX
, 0, OPTAB_WIDEN
);
23687 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23688 Pmode
, 1, align_4_label
);
23689 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23690 Pmode
, 1, align_2_label
);
23691 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23692 Pmode
, 1, align_3_label
);
23696 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23697 check if is aligned to 4 - byte. */
23699 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23700 NULL_RTX
, 0, OPTAB_WIDEN
);
23702 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23703 Pmode
, 1, align_4_label
);
23706 mem
= change_address (src
, QImode
, out
);
23708 /* Now compare the bytes. */
23710 /* Compare the first n unaligned byte on a byte per byte basis. */
23711 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23712 QImode
, 1, end_0_label
);
23714 /* Increment the address. */
23715 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23717 /* Not needed with an alignment of 2 */
23720 emit_label (align_2_label
);
23722 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23725 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23727 emit_label (align_3_label
);
23730 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23733 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23736 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23737 align this loop. It gives only huge programs, but does not help to
23739 emit_label (align_4_label
);
23741 mem
= change_address (src
, SImode
, out
);
23742 emit_move_insn (scratch
, mem
);
23743 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23745 /* This formula yields a nonzero result iff one of the bytes is zero.
23746 This saves three branches inside loop and many cycles. */
23748 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23749 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23750 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23751 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23752 gen_int_mode (0x80808080, SImode
)));
23753 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23758 rtx reg
= gen_reg_rtx (SImode
);
23759 rtx reg2
= gen_reg_rtx (Pmode
);
23760 emit_move_insn (reg
, tmpreg
);
23761 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23763 /* If zero is not in the first two bytes, move two bytes forward. */
23764 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23765 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23766 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23767 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23768 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23771 /* Emit lea manually to avoid clobbering of flags. */
23772 emit_insn (gen_rtx_SET (SImode
, reg2
,
23773 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23775 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23776 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23777 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23778 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23784 rtx end_2_label
= gen_label_rtx ();
23785 /* Is zero in the first two bytes? */
23787 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23788 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23789 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23790 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23791 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23793 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23794 JUMP_LABEL (tmp
) = end_2_label
;
23796 /* Not in the first two. Move two bytes forward. */
23797 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23798 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23800 emit_label (end_2_label
);
23804 /* Avoid branch in fixing the byte. */
23805 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23806 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23807 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23808 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23809 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23811 emit_label (end_0_label
);
23814 /* Expand strlen. */
23817 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23819 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23821 /* The generic case of strlen expander is long. Avoid it's
23822 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23824 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23825 && !TARGET_INLINE_ALL_STRINGOPS
23826 && !optimize_insn_for_size_p ()
23827 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23830 addr
= force_reg (Pmode
, XEXP (src
, 0));
23831 scratch1
= gen_reg_rtx (Pmode
);
23833 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23834 && !optimize_insn_for_size_p ())
23836 /* Well it seems that some optimizer does not combine a call like
23837 foo(strlen(bar), strlen(bar));
23838 when the move and the subtraction is done here. It does calculate
23839 the length just once when these instructions are done inside of
23840 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23841 often used and I use one fewer register for the lifetime of
23842 output_strlen_unroll() this is better. */
23844 emit_move_insn (out
, addr
);
23846 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23848 /* strlensi_unroll_1 returns the address of the zero at the end of
23849 the string, like memchr(), so compute the length by subtracting
23850 the start address. */
23851 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23857 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23858 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23861 scratch2
= gen_reg_rtx (Pmode
);
23862 scratch3
= gen_reg_rtx (Pmode
);
23863 scratch4
= force_reg (Pmode
, constm1_rtx
);
23865 emit_move_insn (scratch3
, addr
);
23866 eoschar
= force_reg (QImode
, eoschar
);
23868 src
= replace_equiv_address_nv (src
, scratch3
);
23870 /* If .md starts supporting :P, this can be done in .md. */
23871 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23872 scratch4
), UNSPEC_SCAS
);
23873 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23874 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23875 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23880 /* For given symbol (function) construct code to compute address of it's PLT
23881 entry in large x86-64 PIC model. */
23883 construct_plt_address (rtx symbol
)
23887 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23888 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
23889 gcc_assert (Pmode
== DImode
);
23891 tmp
= gen_reg_rtx (Pmode
);
23892 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23894 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23895 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23900 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23902 rtx pop
, bool sibcall
)
23904 unsigned int const cregs_size
23905 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
23906 rtx vec
[3 + cregs_size
];
23907 rtx use
= NULL
, call
;
23908 unsigned int vec_len
= 0;
23910 if (pop
== const0_rtx
)
23912 gcc_assert (!TARGET_64BIT
|| !pop
);
23914 if (TARGET_MACHO
&& !TARGET_64BIT
)
23917 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23918 fnaddr
= machopic_indirect_call_target (fnaddr
);
23923 /* Static functions and indirect calls don't need the pic register. */
23926 || (ix86_cmodel
== CM_LARGE_PIC
23927 && DEFAULT_ABI
!= MS_ABI
))
23928 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23929 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23930 use_reg (&use
, pic_offset_table_rtx
);
23933 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23935 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23936 emit_move_insn (al
, callarg2
);
23937 use_reg (&use
, al
);
23940 if (ix86_cmodel
== CM_LARGE_PIC
23943 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23944 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23945 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23947 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23948 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23950 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23951 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23954 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23956 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23957 vec
[vec_len
++] = call
;
23961 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23962 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23963 vec
[vec_len
++] = pop
;
23966 if (TARGET_64BIT_MS_ABI
23967 && (!callarg2
|| INTVAL (callarg2
) != -2))
23971 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23972 UNSPEC_MS_TO_SYSV_CALL
);
23974 for (i
= 0; i
< cregs_size
; i
++)
23976 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
23977 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
23980 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
23985 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23986 call
= emit_call_insn (call
);
23988 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23993 /* Output the assembly for a call instruction. */
23996 ix86_output_call_insn (rtx insn
, rtx call_op
)
23998 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23999 bool seh_nop_p
= false;
24002 if (SIBLING_CALL_P (insn
))
24006 /* SEH epilogue detection requires the indirect branch case
24007 to include REX.W. */
24008 else if (TARGET_SEH
)
24009 xasm
= "rex.W jmp %A0";
24013 output_asm_insn (xasm
, &call_op
);
24017 /* SEH unwinding can require an extra nop to be emitted in several
24018 circumstances. Determine if we have one of those. */
24023 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24025 /* If we get to another real insn, we don't need the nop. */
24029 /* If we get to the epilogue note, prevent a catch region from
24030 being adjacent to the standard epilogue sequence. If non-
24031 call-exceptions, we'll have done this during epilogue emission. */
24032 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24033 && !flag_non_call_exceptions
24034 && !can_throw_internal (insn
))
24041 /* If we didn't find a real insn following the call, prevent the
24042 unwinder from looking into the next function. */
24048 xasm
= "call\t%P0";
24050 xasm
= "call\t%A0";
24052 output_asm_insn (xasm
, &call_op
);
24060 /* Clear stack slot assignments remembered from previous functions.
24061 This is called from INIT_EXPANDERS once before RTL is emitted for each
24064 static struct machine_function
*
24065 ix86_init_machine_status (void)
24067 struct machine_function
*f
;
24069 f
= ggc_alloc_cleared_machine_function ();
24070 f
->use_fast_prologue_epilogue_nregs
= -1;
24071 f
->call_abi
= ix86_abi
;
24076 /* Return a MEM corresponding to a stack slot with mode MODE.
24077 Allocate a new slot if necessary.
24079 The RTL for a function can have several slots available: N is
24080 which slot to use. */
24083 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24085 struct stack_local_entry
*s
;
24087 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24089 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24090 if (s
->mode
== mode
&& s
->n
== n
)
24091 return validize_mem (copy_rtx (s
->rtl
));
24093 s
= ggc_alloc_stack_local_entry ();
24096 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24098 s
->next
= ix86_stack_locals
;
24099 ix86_stack_locals
= s
;
24100 return validize_mem (s
->rtl
);
24104 ix86_instantiate_decls (void)
24106 struct stack_local_entry
*s
;
24108 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24109 if (s
->rtl
!= NULL_RTX
)
24110 instantiate_decl_rtl (s
->rtl
);
24113 /* Calculate the length of the memory address in the instruction encoding.
24114 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24115 or other prefixes. We never generate addr32 prefix for LEA insn. */
24118 memory_address_length (rtx addr
, bool lea
)
24120 struct ix86_address parts
;
24121 rtx base
, index
, disp
;
24125 if (GET_CODE (addr
) == PRE_DEC
24126 || GET_CODE (addr
) == POST_INC
24127 || GET_CODE (addr
) == PRE_MODIFY
24128 || GET_CODE (addr
) == POST_MODIFY
)
24131 ok
= ix86_decompose_address (addr
, &parts
);
24134 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24136 /* If this is not LEA instruction, add the length of addr32 prefix. */
24137 if (TARGET_64BIT
&& !lea
24138 && (SImode_address_operand (addr
, VOIDmode
)
24139 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24140 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24144 index
= parts
.index
;
24147 if (base
&& GET_CODE (base
) == SUBREG
)
24148 base
= SUBREG_REG (base
);
24149 if (index
&& GET_CODE (index
) == SUBREG
)
24150 index
= SUBREG_REG (index
);
24152 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24153 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24156 - esp as the base always wants an index,
24157 - ebp as the base always wants a displacement,
24158 - r12 as the base always wants an index,
24159 - r13 as the base always wants a displacement. */
24161 /* Register Indirect. */
24162 if (base
&& !index
&& !disp
)
24164 /* esp (for its index) and ebp (for its displacement) need
24165 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24167 if (base
== arg_pointer_rtx
24168 || base
== frame_pointer_rtx
24169 || REGNO (base
) == SP_REG
24170 || REGNO (base
) == BP_REG
24171 || REGNO (base
) == R12_REG
24172 || REGNO (base
) == R13_REG
)
24176 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24177 is not disp32, but disp32(%rip), so for disp32
24178 SIB byte is needed, unless print_operand_address
24179 optimizes it into disp32(%rip) or (%rip) is implied
24181 else if (disp
&& !base
&& !index
)
24188 if (GET_CODE (disp
) == CONST
)
24189 symbol
= XEXP (disp
, 0);
24190 if (GET_CODE (symbol
) == PLUS
24191 && CONST_INT_P (XEXP (symbol
, 1)))
24192 symbol
= XEXP (symbol
, 0);
24194 if (GET_CODE (symbol
) != LABEL_REF
24195 && (GET_CODE (symbol
) != SYMBOL_REF
24196 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
24197 && (GET_CODE (symbol
) != UNSPEC
24198 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
24199 && XINT (symbol
, 1) != UNSPEC_PCREL
24200 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
24206 /* Find the length of the displacement constant. */
24209 if (base
&& satisfies_constraint_K (disp
))
24214 /* ebp always wants a displacement. Similarly r13. */
24215 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24218 /* An index requires the two-byte modrm form.... */
24220 /* ...like esp (or r12), which always wants an index. */
24221 || base
== arg_pointer_rtx
24222 || base
== frame_pointer_rtx
24223 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24230 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24231 is set, expect that insn have 8bit immediate alternative. */
24233 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24237 extract_insn_cached (insn
);
24238 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24239 if (CONSTANT_P (recog_data
.operand
[i
]))
24241 enum attr_mode mode
= get_attr_mode (insn
);
24244 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24246 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24253 ival
= trunc_int_for_mode (ival
, HImode
);
24256 ival
= trunc_int_for_mode (ival
, SImode
);
24261 if (IN_RANGE (ival
, -128, 127))
24278 /* Immediates for DImode instructions are encoded
24279 as 32bit sign extended values. */
24284 fatal_insn ("unknown insn mode", insn
);
24290 /* Compute default value for "length_address" attribute. */
24292 ix86_attr_length_address_default (rtx insn
)
24296 if (get_attr_type (insn
) == TYPE_LEA
)
24298 rtx set
= PATTERN (insn
), addr
;
24300 if (GET_CODE (set
) == PARALLEL
)
24301 set
= XVECEXP (set
, 0, 0);
24303 gcc_assert (GET_CODE (set
) == SET
);
24305 addr
= SET_SRC (set
);
24307 return memory_address_length (addr
, true);
24310 extract_insn_cached (insn
);
24311 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24312 if (MEM_P (recog_data
.operand
[i
]))
24314 constrain_operands_cached (reload_completed
);
24315 if (which_alternative
!= -1)
24317 const char *constraints
= recog_data
.constraints
[i
];
24318 int alt
= which_alternative
;
24320 while (*constraints
== '=' || *constraints
== '+')
24323 while (*constraints
++ != ',')
24325 /* Skip ignored operands. */
24326 if (*constraints
== 'X')
24329 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24334 /* Compute default value for "length_vex" attribute. It includes
24335 2 or 3 byte VEX prefix and 1 opcode byte. */
24338 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24342 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24343 byte VEX prefix. */
24344 if (!has_0f_opcode
|| has_vex_w
)
24347 /* We can always use 2 byte VEX prefix in 32bit. */
24351 extract_insn_cached (insn
);
24353 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24354 if (REG_P (recog_data
.operand
[i
]))
24356 /* REX.W bit uses 3 byte VEX prefix. */
24357 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24358 && GENERAL_REG_P (recog_data
.operand
[i
]))
24363 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24364 if (MEM_P (recog_data
.operand
[i
])
24365 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24372 /* Return the maximum number of instructions a cpu can issue. */
24375 ix86_issue_rate (void)
24379 case PROCESSOR_PENTIUM
:
24380 case PROCESSOR_ATOM
:
24381 case PROCESSOR_SLM
:
24383 case PROCESSOR_BTVER2
:
24386 case PROCESSOR_PENTIUMPRO
:
24387 case PROCESSOR_PENTIUM4
:
24388 case PROCESSOR_CORE2
:
24389 case PROCESSOR_COREI7
:
24390 case PROCESSOR_HASWELL
:
24391 case PROCESSOR_ATHLON
:
24393 case PROCESSOR_AMDFAM10
:
24394 case PROCESSOR_NOCONA
:
24395 case PROCESSOR_GENERIC32
:
24396 case PROCESSOR_GENERIC64
:
24397 case PROCESSOR_BDVER1
:
24398 case PROCESSOR_BDVER2
:
24399 case PROCESSOR_BDVER3
:
24400 case PROCESSOR_BTVER1
:
24408 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24409 by DEP_INSN and nothing set by DEP_INSN. */
24412 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24416 /* Simplify the test for uninteresting insns. */
24417 if (insn_type
!= TYPE_SETCC
24418 && insn_type
!= TYPE_ICMOV
24419 && insn_type
!= TYPE_FCMOV
24420 && insn_type
!= TYPE_IBR
)
24423 if ((set
= single_set (dep_insn
)) != 0)
24425 set
= SET_DEST (set
);
24428 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24429 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24430 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24431 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24433 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24434 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24439 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24442 /* This test is true if the dependent insn reads the flags but
24443 not any other potentially set register. */
24444 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24447 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24453 /* Return true iff USE_INSN has a memory address with operands set by
24457 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24460 extract_insn_cached (use_insn
);
24461 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24462 if (MEM_P (recog_data
.operand
[i
]))
24464 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24465 return modified_in_p (addr
, set_insn
) != 0;
24470 /* Helper function for exact_store_load_dependency.
24471 Return true if addr is found in insn. */
24473 exact_dependency_1 (rtx addr
, rtx insn
)
24475 enum rtx_code code
;
24476 const char *format_ptr
;
24479 code
= GET_CODE (insn
);
24483 if (rtx_equal_p (addr
, insn
))
24498 format_ptr
= GET_RTX_FORMAT (code
);
24499 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24501 switch (*format_ptr
++)
24504 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24508 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24509 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24517 /* Return true if there exists exact dependency for store & load, i.e.
24518 the same memory address is used in them. */
24520 exact_store_load_dependency (rtx store
, rtx load
)
24524 set1
= single_set (store
);
24527 if (!MEM_P (SET_DEST (set1
)))
24529 set2
= single_set (load
);
24532 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24538 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24540 enum attr_type insn_type
, dep_insn_type
;
24541 enum attr_memory memory
;
24543 int dep_insn_code_number
;
24545 /* Anti and output dependencies have zero cost on all CPUs. */
24546 if (REG_NOTE_KIND (link
) != 0)
24549 dep_insn_code_number
= recog_memoized (dep_insn
);
24551 /* If we can't recognize the insns, we can't really do anything. */
24552 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24555 insn_type
= get_attr_type (insn
);
24556 dep_insn_type
= get_attr_type (dep_insn
);
24560 case PROCESSOR_PENTIUM
:
24561 /* Address Generation Interlock adds a cycle of latency. */
24562 if (insn_type
== TYPE_LEA
)
24564 rtx addr
= PATTERN (insn
);
24566 if (GET_CODE (addr
) == PARALLEL
)
24567 addr
= XVECEXP (addr
, 0, 0);
24569 gcc_assert (GET_CODE (addr
) == SET
);
24571 addr
= SET_SRC (addr
);
24572 if (modified_in_p (addr
, dep_insn
))
24575 else if (ix86_agi_dependent (dep_insn
, insn
))
24578 /* ??? Compares pair with jump/setcc. */
24579 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24582 /* Floating point stores require value to be ready one cycle earlier. */
24583 if (insn_type
== TYPE_FMOV
24584 && get_attr_memory (insn
) == MEMORY_STORE
24585 && !ix86_agi_dependent (dep_insn
, insn
))
24589 case PROCESSOR_PENTIUMPRO
:
24590 memory
= get_attr_memory (insn
);
24592 /* INT->FP conversion is expensive. */
24593 if (get_attr_fp_int_src (dep_insn
))
24596 /* There is one cycle extra latency between an FP op and a store. */
24597 if (insn_type
== TYPE_FMOV
24598 && (set
= single_set (dep_insn
)) != NULL_RTX
24599 && (set2
= single_set (insn
)) != NULL_RTX
24600 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24601 && MEM_P (SET_DEST (set2
)))
24604 /* Show ability of reorder buffer to hide latency of load by executing
24605 in parallel with previous instruction in case
24606 previous instruction is not needed to compute the address. */
24607 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24608 && !ix86_agi_dependent (dep_insn
, insn
))
24610 /* Claim moves to take one cycle, as core can issue one load
24611 at time and the next load can start cycle later. */
24612 if (dep_insn_type
== TYPE_IMOV
24613 || dep_insn_type
== TYPE_FMOV
)
24621 memory
= get_attr_memory (insn
);
24623 /* The esp dependency is resolved before the instruction is really
24625 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24626 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24629 /* INT->FP conversion is expensive. */
24630 if (get_attr_fp_int_src (dep_insn
))
24633 /* Show ability of reorder buffer to hide latency of load by executing
24634 in parallel with previous instruction in case
24635 previous instruction is not needed to compute the address. */
24636 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24637 && !ix86_agi_dependent (dep_insn
, insn
))
24639 /* Claim moves to take one cycle, as core can issue one load
24640 at time and the next load can start cycle later. */
24641 if (dep_insn_type
== TYPE_IMOV
24642 || dep_insn_type
== TYPE_FMOV
)
24651 case PROCESSOR_ATHLON
:
24653 case PROCESSOR_AMDFAM10
:
24654 case PROCESSOR_BDVER1
:
24655 case PROCESSOR_BDVER2
:
24656 case PROCESSOR_BDVER3
:
24657 case PROCESSOR_BTVER1
:
24658 case PROCESSOR_BTVER2
:
24659 case PROCESSOR_ATOM
:
24660 case PROCESSOR_GENERIC32
:
24661 case PROCESSOR_GENERIC64
:
24662 memory
= get_attr_memory (insn
);
24664 /* Show ability of reorder buffer to hide latency of load by executing
24665 in parallel with previous instruction in case
24666 previous instruction is not needed to compute the address. */
24667 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24668 && !ix86_agi_dependent (dep_insn
, insn
))
24670 enum attr_unit unit
= get_attr_unit (insn
);
24673 /* Because of the difference between the length of integer and
24674 floating unit pipeline preparation stages, the memory operands
24675 for floating point are cheaper.
24677 ??? For Athlon it the difference is most probably 2. */
24678 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24681 loadcost
= TARGET_ATHLON
? 2 : 0;
24683 if (cost
>= loadcost
)
24690 case PROCESSOR_SLM
:
24691 if (!reload_completed
)
24694 /* Increase cost of integer loads. */
24695 memory
= get_attr_memory (dep_insn
);
24696 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24698 enum attr_unit unit
= get_attr_unit (dep_insn
);
24699 if (unit
== UNIT_INTEGER
&& cost
== 1)
24701 if (memory
== MEMORY_LOAD
)
24705 /* Increase cost of ld/st for short int types only
24706 because of store forwarding issue. */
24707 rtx set
= single_set (dep_insn
);
24708 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
24709 || GET_MODE (SET_DEST (set
)) == HImode
))
24711 /* Increase cost of store/load insn if exact
24712 dependence exists and it is load insn. */
24713 enum attr_memory insn_memory
= get_attr_memory (insn
);
24714 if (insn_memory
== MEMORY_LOAD
24715 && exact_store_load_dependency (dep_insn
, insn
))
24729 /* How many alternative schedules to try. This should be as wide as the
24730 scheduling freedom in the DFA, but no wider. Making this value too
24731 large results extra work for the scheduler. */
24734 ia32_multipass_dfa_lookahead (void)
24738 case PROCESSOR_PENTIUM
:
24741 case PROCESSOR_PENTIUMPRO
:
24745 case PROCESSOR_CORE2
:
24746 case PROCESSOR_COREI7
:
24747 case PROCESSOR_HASWELL
:
24748 case PROCESSOR_ATOM
:
24749 case PROCESSOR_SLM
:
24750 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24751 as many instructions can be executed on a cycle, i.e.,
24752 issue_rate. I wonder why tuning for many CPUs does not do this. */
24753 if (reload_completed
)
24754 return ix86_issue_rate ();
24755 /* Don't use lookahead for pre-reload schedule to save compile time. */
24763 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24764 execution. It is applied if
24765 (1) IMUL instruction is on the top of list;
24766 (2) There exists the only producer of independent IMUL instruction in
24768 Return index of IMUL producer if it was found and -1 otherwise. */
24770 do_reorder_for_imul (rtx
*ready
, int n_ready
)
24772 rtx insn
, set
, insn1
, insn2
;
24773 sd_iterator_def sd_it
;
24778 if (ix86_tune
!= PROCESSOR_ATOM
)
24781 /* Check that IMUL instruction is on the top of ready list. */
24782 insn
= ready
[n_ready
- 1];
24783 set
= single_set (insn
);
24786 if (!(GET_CODE (SET_SRC (set
)) == MULT
24787 && GET_MODE (SET_SRC (set
)) == SImode
))
24790 /* Search for producer of independent IMUL instruction. */
24791 for (i
= n_ready
- 2; i
>= 0; i
--)
24794 if (!NONDEBUG_INSN_P (insn
))
24796 /* Skip IMUL instruction. */
24797 insn2
= PATTERN (insn
);
24798 if (GET_CODE (insn2
) == PARALLEL
)
24799 insn2
= XVECEXP (insn2
, 0, 0);
24800 if (GET_CODE (insn2
) == SET
24801 && GET_CODE (SET_SRC (insn2
)) == MULT
24802 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24805 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24808 con
= DEP_CON (dep
);
24809 if (!NONDEBUG_INSN_P (con
))
24811 insn1
= PATTERN (con
);
24812 if (GET_CODE (insn1
) == PARALLEL
)
24813 insn1
= XVECEXP (insn1
, 0, 0);
24815 if (GET_CODE (insn1
) == SET
24816 && GET_CODE (SET_SRC (insn1
)) == MULT
24817 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24819 sd_iterator_def sd_it1
;
24821 /* Check if there is no other dependee for IMUL. */
24823 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24826 pro
= DEP_PRO (dep1
);
24827 if (!NONDEBUG_INSN_P (pro
))
24842 /* Try to find the best candidate on the top of ready list if two insns
24843 have the same priority - candidate is best if its dependees were
24844 scheduled earlier. Applied for Silvermont only.
24845 Return true if top 2 insns must be interchanged. */
24847 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
24849 rtx top
= ready
[n_ready
- 1];
24850 rtx next
= ready
[n_ready
- 2];
24852 sd_iterator_def sd_it
;
24856 #define INSN_TICK(INSN) (HID (INSN)->tick)
24858 if (ix86_tune
!= PROCESSOR_SLM
)
24861 if (!NONDEBUG_INSN_P (top
))
24863 if (!NONJUMP_INSN_P (top
))
24865 if (!NONDEBUG_INSN_P (next
))
24867 if (!NONJUMP_INSN_P (next
))
24869 set
= single_set (top
);
24872 set
= single_set (next
);
24876 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
24878 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
24880 /* Determine winner more precise. */
24881 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
24884 pro
= DEP_PRO (dep
);
24885 if (!NONDEBUG_INSN_P (pro
))
24887 if (INSN_TICK (pro
) > clock1
)
24888 clock1
= INSN_TICK (pro
);
24890 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
24893 pro
= DEP_PRO (dep
);
24894 if (!NONDEBUG_INSN_P (pro
))
24896 if (INSN_TICK (pro
) > clock2
)
24897 clock2
= INSN_TICK (pro
);
24900 if (clock1
== clock2
)
24902 /* Determine winner - load must win. */
24903 enum attr_memory memory1
, memory2
;
24904 memory1
= get_attr_memory (top
);
24905 memory2
= get_attr_memory (next
);
24906 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
24909 return (bool) (clock2
< clock1
);
24915 /* Perform possible reodering of ready list for Atom/Silvermont only.
24916 Return issue rate. */
24918 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24921 int issue_rate
= -1;
24922 int n_ready
= *pn_ready
;
24927 /* Set up issue rate. */
24928 issue_rate
= ix86_issue_rate ();
24930 /* Do reodering for Atom/SLM only. */
24931 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
24934 /* Nothing to do if ready list contains only 1 instruction. */
24938 /* Do reodering for post-reload scheduler only. */
24939 if (!reload_completed
)
24942 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
24944 if (sched_verbose
> 1)
24945 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
24946 INSN_UID (ready
[index
]));
24948 /* Put IMUL producer (ready[index]) at the top of ready list. */
24949 insn
= ready
[index
];
24950 for (i
= index
; i
< n_ready
- 1; i
++)
24951 ready
[i
] = ready
[i
+ 1];
24952 ready
[n_ready
- 1] = insn
;
24955 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
24957 if (sched_verbose
> 1)
24958 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
24959 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
24960 /* Swap 2 top elements of ready list. */
24961 insn
= ready
[n_ready
- 1];
24962 ready
[n_ready
- 1] = ready
[n_ready
- 2];
24963 ready
[n_ready
- 2] = insn
;
24969 ix86_class_likely_spilled_p (reg_class_t
);
24971 /* Returns true if lhs of insn is HW function argument register and set up
24972 is_spilled to true if it is likely spilled HW register. */
24974 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24978 if (!NONDEBUG_INSN_P (insn
))
24980 /* Call instructions are not movable, ignore it. */
24983 insn
= PATTERN (insn
);
24984 if (GET_CODE (insn
) == PARALLEL
)
24985 insn
= XVECEXP (insn
, 0, 0);
24986 if (GET_CODE (insn
) != SET
)
24988 dst
= SET_DEST (insn
);
24989 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24990 && ix86_function_arg_regno_p (REGNO (dst
)))
24992 /* Is it likely spilled HW register? */
24993 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24994 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24995 *is_spilled
= true;
25001 /* Add output dependencies for chain of function adjacent arguments if only
25002 there is a move to likely spilled HW register. Return first argument
25003 if at least one dependence was added or NULL otherwise. */
25005 add_parameter_dependencies (rtx call
, rtx head
)
25009 rtx first_arg
= NULL
;
25010 bool is_spilled
= false;
25012 head
= PREV_INSN (head
);
25014 /* Find nearest to call argument passing instruction. */
25017 last
= PREV_INSN (last
);
25020 if (!NONDEBUG_INSN_P (last
))
25022 if (insn_is_function_arg (last
, &is_spilled
))
25030 insn
= PREV_INSN (last
);
25031 if (!INSN_P (insn
))
25035 if (!NONDEBUG_INSN_P (insn
))
25040 if (insn_is_function_arg (insn
, &is_spilled
))
25042 /* Add output depdendence between two function arguments if chain
25043 of output arguments contains likely spilled HW registers. */
25045 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25046 first_arg
= last
= insn
;
25056 /* Add output or anti dependency from insn to first_arg to restrict its code
25059 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25064 set
= single_set (insn
);
25067 tmp
= SET_DEST (set
);
25070 /* Add output dependency to the first function argument. */
25071 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25074 /* Add anti dependency. */
25075 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25078 /* Avoid cross block motion of function argument through adding dependency
25079 from the first non-jump instruction in bb. */
25081 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25083 rtx insn
= BB_END (bb
);
25087 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25089 rtx set
= single_set (insn
);
25092 avoid_func_arg_motion (arg
, insn
);
25096 if (insn
== BB_HEAD (bb
))
25098 insn
= PREV_INSN (insn
);
25102 /* Hook for pre-reload schedule - avoid motion of function arguments
25103 passed in likely spilled HW registers. */
25105 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25108 rtx first_arg
= NULL
;
25109 if (reload_completed
)
25111 while (head
!= tail
&& DEBUG_INSN_P (head
))
25112 head
= NEXT_INSN (head
);
25113 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25114 if (INSN_P (insn
) && CALL_P (insn
))
25116 first_arg
= add_parameter_dependencies (insn
, head
);
25119 /* Add dependee for first argument to predecessors if only
25120 region contains more than one block. */
25121 basic_block bb
= BLOCK_FOR_INSN (insn
);
25122 int rgn
= CONTAINING_RGN (bb
->index
);
25123 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25124 /* Skip trivial regions and region head blocks that can have
25125 predecessors outside of region. */
25126 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25130 /* Assume that region is SCC, i.e. all immediate predecessors
25131 of non-head block are in the same region. */
25132 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25134 /* Avoid creating of loop-carried dependencies through
25135 using topological odering in region. */
25136 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25137 add_dependee_for_func_arg (first_arg
, e
->src
);
25145 else if (first_arg
)
25146 avoid_func_arg_motion (first_arg
, insn
);
25149 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25150 HW registers to maximum, to schedule them at soon as possible. These are
25151 moves from function argument registers at the top of the function entry
25152 and moves from function return value registers after call. */
25154 ix86_adjust_priority (rtx insn
, int priority
)
25158 if (reload_completed
)
25161 if (!NONDEBUG_INSN_P (insn
))
25164 set
= single_set (insn
);
25167 rtx tmp
= SET_SRC (set
);
25169 && HARD_REGISTER_P (tmp
)
25170 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25171 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25172 return current_sched_info
->sched_max_insns_priority
;
25178 /* Model decoder of Core 2/i7.
25179 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25180 track the instruction fetch block boundaries and make sure that long
25181 (9+ bytes) instructions are assigned to D0. */
25183 /* Maximum length of an insn that can be handled by
25184 a secondary decoder unit. '8' for Core 2/i7. */
25185 static int core2i7_secondary_decoder_max_insn_size
;
25187 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25188 '16' for Core 2/i7. */
25189 static int core2i7_ifetch_block_size
;
25191 /* Maximum number of instructions decoder can handle per cycle.
25192 '6' for Core 2/i7. */
25193 static int core2i7_ifetch_block_max_insns
;
25195 typedef struct ix86_first_cycle_multipass_data_
*
25196 ix86_first_cycle_multipass_data_t
;
25197 typedef const struct ix86_first_cycle_multipass_data_
*
25198 const_ix86_first_cycle_multipass_data_t
;
25200 /* A variable to store target state across calls to max_issue within
25202 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25203 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25205 /* Initialize DATA. */
25207 core2i7_first_cycle_multipass_init (void *_data
)
25209 ix86_first_cycle_multipass_data_t data
25210 = (ix86_first_cycle_multipass_data_t
) _data
;
25212 data
->ifetch_block_len
= 0;
25213 data
->ifetch_block_n_insns
= 0;
25214 data
->ready_try_change
= NULL
;
25215 data
->ready_try_change_size
= 0;
25218 /* Advancing the cycle; reset ifetch block counts. */
25220 core2i7_dfa_post_advance_cycle (void)
25222 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25224 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25226 data
->ifetch_block_len
= 0;
25227 data
->ifetch_block_n_insns
= 0;
25230 static int min_insn_size (rtx
);
25232 /* Filter out insns from ready_try that the core will not be able to issue
25233 on current cycle due to decoder. */
25235 core2i7_first_cycle_multipass_filter_ready_try
25236 (const_ix86_first_cycle_multipass_data_t data
,
25237 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25244 if (ready_try
[n_ready
])
25247 insn
= get_ready_element (n_ready
);
25248 insn_size
= min_insn_size (insn
);
25250 if (/* If this is a too long an insn for a secondary decoder ... */
25251 (!first_cycle_insn_p
25252 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25253 /* ... or it would not fit into the ifetch block ... */
25254 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25255 /* ... or the decoder is full already ... */
25256 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25257 /* ... mask the insn out. */
25259 ready_try
[n_ready
] = 1;
25261 if (data
->ready_try_change
)
25262 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25267 /* Prepare for a new round of multipass lookahead scheduling. */
25269 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25270 bool first_cycle_insn_p
)
25272 ix86_first_cycle_multipass_data_t data
25273 = (ix86_first_cycle_multipass_data_t
) _data
;
25274 const_ix86_first_cycle_multipass_data_t prev_data
25275 = ix86_first_cycle_multipass_data
;
25277 /* Restore the state from the end of the previous round. */
25278 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25279 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25281 /* Filter instructions that cannot be issued on current cycle due to
25282 decoder restrictions. */
25283 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25284 first_cycle_insn_p
);
25287 /* INSN is being issued in current solution. Account for its impact on
25288 the decoder model. */
25290 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25291 rtx insn
, const void *_prev_data
)
25293 ix86_first_cycle_multipass_data_t data
25294 = (ix86_first_cycle_multipass_data_t
) _data
;
25295 const_ix86_first_cycle_multipass_data_t prev_data
25296 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25298 int insn_size
= min_insn_size (insn
);
25300 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25301 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25302 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25303 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25305 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25306 if (!data
->ready_try_change
)
25308 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25309 data
->ready_try_change_size
= n_ready
;
25311 else if (data
->ready_try_change_size
< n_ready
)
25313 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25315 data
->ready_try_change_size
= n_ready
;
25317 bitmap_clear (data
->ready_try_change
);
25319 /* Filter out insns from ready_try that the core will not be able to issue
25320 on current cycle due to decoder. */
25321 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25325 /* Revert the effect on ready_try. */
25327 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25329 int n_ready ATTRIBUTE_UNUSED
)
25331 const_ix86_first_cycle_multipass_data_t data
25332 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25333 unsigned int i
= 0;
25334 sbitmap_iterator sbi
;
25336 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25337 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25343 /* Save the result of multipass lookahead scheduling for the next round. */
25345 core2i7_first_cycle_multipass_end (const void *_data
)
25347 const_ix86_first_cycle_multipass_data_t data
25348 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25349 ix86_first_cycle_multipass_data_t next_data
25350 = ix86_first_cycle_multipass_data
;
25354 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25355 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25359 /* Deallocate target data. */
25361 core2i7_first_cycle_multipass_fini (void *_data
)
25363 ix86_first_cycle_multipass_data_t data
25364 = (ix86_first_cycle_multipass_data_t
) _data
;
25366 if (data
->ready_try_change
)
25368 sbitmap_free (data
->ready_try_change
);
25369 data
->ready_try_change
= NULL
;
25370 data
->ready_try_change_size
= 0;
25374 /* Prepare for scheduling pass. */
25376 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25377 int verbose ATTRIBUTE_UNUSED
,
25378 int max_uid ATTRIBUTE_UNUSED
)
25380 /* Install scheduling hooks for current CPU. Some of these hooks are used
25381 in time-critical parts of the scheduler, so we only set them up when
25382 they are actually used. */
25385 case PROCESSOR_CORE2
:
25386 case PROCESSOR_COREI7
:
25387 case PROCESSOR_HASWELL
:
25388 /* Do not perform multipass scheduling for pre-reload schedule
25389 to save compile time. */
25390 if (reload_completed
)
25392 targetm
.sched
.dfa_post_advance_cycle
25393 = core2i7_dfa_post_advance_cycle
;
25394 targetm
.sched
.first_cycle_multipass_init
25395 = core2i7_first_cycle_multipass_init
;
25396 targetm
.sched
.first_cycle_multipass_begin
25397 = core2i7_first_cycle_multipass_begin
;
25398 targetm
.sched
.first_cycle_multipass_issue
25399 = core2i7_first_cycle_multipass_issue
;
25400 targetm
.sched
.first_cycle_multipass_backtrack
25401 = core2i7_first_cycle_multipass_backtrack
;
25402 targetm
.sched
.first_cycle_multipass_end
25403 = core2i7_first_cycle_multipass_end
;
25404 targetm
.sched
.first_cycle_multipass_fini
25405 = core2i7_first_cycle_multipass_fini
;
25407 /* Set decoder parameters. */
25408 core2i7_secondary_decoder_max_insn_size
= 8;
25409 core2i7_ifetch_block_size
= 16;
25410 core2i7_ifetch_block_max_insns
= 6;
25413 /* ... Fall through ... */
25415 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25416 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25417 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25418 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25419 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25420 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25421 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
25427 /* Compute the alignment given to a constant that is being placed in memory.
25428 EXP is the constant and ALIGN is the alignment that the object would
25430 The value of this function is used instead of that alignment to align
25434 ix86_constant_alignment (tree exp
, int align
)
25436 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
25437 || TREE_CODE (exp
) == INTEGER_CST
)
25439 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
25441 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
25444 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
25445 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
25446 return BITS_PER_WORD
;
25451 /* Compute the alignment for a static variable.
25452 TYPE is the data type, and ALIGN is the alignment that
25453 the object would ordinarily have. The value of this function is used
25454 instead of that alignment to align the object. */
25457 ix86_data_alignment (tree type
, int align
, bool opt
)
25459 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
25462 && AGGREGATE_TYPE_P (type
)
25463 && TYPE_SIZE (type
)
25464 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25465 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
25466 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
25467 && align
< max_align
)
25470 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25471 to 16byte boundary. */
25474 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
25475 && TYPE_SIZE (type
)
25476 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25477 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
25478 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25485 if (TREE_CODE (type
) == ARRAY_TYPE
)
25487 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25489 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25492 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25495 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25497 if ((TYPE_MODE (type
) == XCmode
25498 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25501 else if ((TREE_CODE (type
) == RECORD_TYPE
25502 || TREE_CODE (type
) == UNION_TYPE
25503 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25504 && TYPE_FIELDS (type
))
25506 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25508 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25511 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25512 || TREE_CODE (type
) == INTEGER_TYPE
)
25514 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25516 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25523 /* Compute the alignment for a local variable or a stack slot. EXP is
25524 the data type or decl itself, MODE is the widest mode available and
25525 ALIGN is the alignment that the object would ordinarily have. The
25526 value of this macro is used instead of that alignment to align the
25530 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25531 unsigned int align
)
25535 if (exp
&& DECL_P (exp
))
25537 type
= TREE_TYPE (exp
);
25546 /* Don't do dynamic stack realignment for long long objects with
25547 -mpreferred-stack-boundary=2. */
25550 && ix86_preferred_stack_boundary
< 64
25551 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25552 && (!type
|| !TYPE_USER_ALIGN (type
))
25553 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25556 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25557 register in MODE. We will return the largest alignment of XF
25561 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25562 align
= GET_MODE_ALIGNMENT (DFmode
);
25566 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25567 to 16byte boundary. Exact wording is:
25569 An array uses the same alignment as its elements, except that a local or
25570 global array variable of length at least 16 bytes or
25571 a C99 variable-length array variable always has alignment of at least 16 bytes.
25573 This was added to allow use of aligned SSE instructions at arrays. This
25574 rule is meant for static storage (where compiler can not do the analysis
25575 by itself). We follow it for automatic variables only when convenient.
25576 We fully control everything in the function compiled and functions from
25577 other unit can not rely on the alignment.
25579 Exclude va_list type. It is the common case of local array where
25580 we can not benefit from the alignment.
25582 TODO: Probably one should optimize for size only when var is not escaping. */
25583 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25586 if (AGGREGATE_TYPE_P (type
)
25587 && (va_list_type_node
== NULL_TREE
25588 || (TYPE_MAIN_VARIANT (type
)
25589 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25590 && TYPE_SIZE (type
)
25591 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25592 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25593 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25596 if (TREE_CODE (type
) == ARRAY_TYPE
)
25598 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25600 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25603 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25605 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25607 if ((TYPE_MODE (type
) == XCmode
25608 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25611 else if ((TREE_CODE (type
) == RECORD_TYPE
25612 || TREE_CODE (type
) == UNION_TYPE
25613 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25614 && TYPE_FIELDS (type
))
25616 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25618 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25621 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25622 || TREE_CODE (type
) == INTEGER_TYPE
)
25625 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25627 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25633 /* Compute the minimum required alignment for dynamic stack realignment
25634 purposes for a local variable, parameter or a stack slot. EXP is
25635 the data type or decl itself, MODE is its mode and ALIGN is the
25636 alignment that the object would ordinarily have. */
25639 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25640 unsigned int align
)
25644 if (exp
&& DECL_P (exp
))
25646 type
= TREE_TYPE (exp
);
25655 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25658 /* Don't do dynamic stack realignment for long long objects with
25659 -mpreferred-stack-boundary=2. */
25660 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25661 && (!type
|| !TYPE_USER_ALIGN (type
))
25662 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25668 /* Find a location for the static chain incoming to a nested function.
25669 This is a register, unless all free registers are used by arguments. */
25672 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25676 if (!DECL_STATIC_CHAIN (fndecl
))
25681 /* We always use R10 in 64-bit mode. */
25689 /* By default in 32-bit mode we use ECX to pass the static chain. */
25692 fntype
= TREE_TYPE (fndecl
);
25693 ccvt
= ix86_get_callcvt (fntype
);
25694 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25696 /* Fastcall functions use ecx/edx for arguments, which leaves
25697 us with EAX for the static chain.
25698 Thiscall functions use ecx for arguments, which also
25699 leaves us with EAX for the static chain. */
25702 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25704 /* Thiscall functions use ecx for arguments, which leaves
25705 us with EAX and EDX for the static chain.
25706 We are using for abi-compatibility EAX. */
25709 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25711 /* For regparm 3, we have no free call-clobbered registers in
25712 which to store the static chain. In order to implement this,
25713 we have the trampoline push the static chain to the stack.
25714 However, we can't push a value below the return address when
25715 we call the nested function directly, so we have to use an
25716 alternate entry point. For this we use ESI, and have the
25717 alternate entry point push ESI, so that things appear the
25718 same once we're executing the nested function. */
25721 if (fndecl
== current_function_decl
)
25722 ix86_static_chain_on_stack
= true;
25723 return gen_frame_mem (SImode
,
25724 plus_constant (Pmode
,
25725 arg_pointer_rtx
, -8));
25731 return gen_rtx_REG (Pmode
, regno
);
25734 /* Emit RTL insns to initialize the variable parts of a trampoline.
25735 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25736 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25737 to be passed to the target function. */
25740 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25746 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25752 /* Load the function address to r11. Try to load address using
25753 the shorter movl instead of movabs. We may want to support
25754 movq for kernel mode, but kernel does not use trampolines at
25755 the moment. FNADDR is a 32bit address and may not be in
25756 DImode when ptr_mode == SImode. Always use movl in this
25758 if (ptr_mode
== SImode
25759 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25761 fnaddr
= copy_addr_to_reg (fnaddr
);
25763 mem
= adjust_address (m_tramp
, HImode
, offset
);
25764 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25766 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25767 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25772 mem
= adjust_address (m_tramp
, HImode
, offset
);
25773 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25775 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25776 emit_move_insn (mem
, fnaddr
);
25780 /* Load static chain using movabs to r10. Use the shorter movl
25781 instead of movabs when ptr_mode == SImode. */
25782 if (ptr_mode
== SImode
)
25793 mem
= adjust_address (m_tramp
, HImode
, offset
);
25794 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25796 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25797 emit_move_insn (mem
, chain_value
);
25800 /* Jump to r11; the last (unused) byte is a nop, only there to
25801 pad the write out to a single 32-bit store. */
25802 mem
= adjust_address (m_tramp
, SImode
, offset
);
25803 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25810 /* Depending on the static chain location, either load a register
25811 with a constant, or push the constant to the stack. All of the
25812 instructions are the same size. */
25813 chain
= ix86_static_chain (fndecl
, true);
25816 switch (REGNO (chain
))
25819 opcode
= 0xb8; break;
25821 opcode
= 0xb9; break;
25823 gcc_unreachable ();
25829 mem
= adjust_address (m_tramp
, QImode
, offset
);
25830 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25832 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25833 emit_move_insn (mem
, chain_value
);
25836 mem
= adjust_address (m_tramp
, QImode
, offset
);
25837 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25839 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25841 /* Compute offset from the end of the jmp to the target function.
25842 In the case in which the trampoline stores the static chain on
25843 the stack, we need to skip the first insn which pushes the
25844 (call-saved) register static chain; this push is 1 byte. */
25846 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25847 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25848 offset
- (MEM_P (chain
) ? 1 : 0)),
25849 NULL_RTX
, 1, OPTAB_DIRECT
);
25850 emit_move_insn (mem
, disp
);
25853 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25855 #ifdef HAVE_ENABLE_EXECUTE_STACK
25856 #ifdef CHECK_EXECUTE_STACK_ENABLED
25857 if (CHECK_EXECUTE_STACK_ENABLED
)
25859 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25860 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25864 /* The following file contains several enumerations and data structures
25865 built from the definitions in i386-builtin-types.def. */
25867 #include "i386-builtin-types.inc"
25869 /* Table for the ix86 builtin non-function types. */
25870 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25872 /* Retrieve an element from the above table, building some of
25873 the types lazily. */
25876 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25878 unsigned int index
;
25881 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25883 type
= ix86_builtin_type_tab
[(int) tcode
];
25887 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25888 if (tcode
<= IX86_BT_LAST_VECT
)
25890 enum machine_mode mode
;
25892 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25893 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25894 mode
= ix86_builtin_type_vect_mode
[index
];
25896 type
= build_vector_type_for_mode (itype
, mode
);
25902 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25903 if (tcode
<= IX86_BT_LAST_PTR
)
25904 quals
= TYPE_UNQUALIFIED
;
25906 quals
= TYPE_QUAL_CONST
;
25908 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25909 if (quals
!= TYPE_UNQUALIFIED
)
25910 itype
= build_qualified_type (itype
, quals
);
25912 type
= build_pointer_type (itype
);
25915 ix86_builtin_type_tab
[(int) tcode
] = type
;
25919 /* Table for the ix86 builtin function types. */
25920 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25922 /* Retrieve an element from the above table, building some of
25923 the types lazily. */
25926 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25930 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25932 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25936 if (tcode
<= IX86_BT_LAST_FUNC
)
25938 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25939 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25940 tree rtype
, atype
, args
= void_list_node
;
25943 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25944 for (i
= after
- 1; i
> start
; --i
)
25946 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25947 args
= tree_cons (NULL
, atype
, args
);
25950 type
= build_function_type (rtype
, args
);
25954 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25955 enum ix86_builtin_func_type icode
;
25957 icode
= ix86_builtin_func_alias_base
[index
];
25958 type
= ix86_get_builtin_func_type (icode
);
25961 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25966 /* Codes for all the SSE/MMX builtins. */
25969 IX86_BUILTIN_ADDPS
,
25970 IX86_BUILTIN_ADDSS
,
25971 IX86_BUILTIN_DIVPS
,
25972 IX86_BUILTIN_DIVSS
,
25973 IX86_BUILTIN_MULPS
,
25974 IX86_BUILTIN_MULSS
,
25975 IX86_BUILTIN_SUBPS
,
25976 IX86_BUILTIN_SUBSS
,
25978 IX86_BUILTIN_CMPEQPS
,
25979 IX86_BUILTIN_CMPLTPS
,
25980 IX86_BUILTIN_CMPLEPS
,
25981 IX86_BUILTIN_CMPGTPS
,
25982 IX86_BUILTIN_CMPGEPS
,
25983 IX86_BUILTIN_CMPNEQPS
,
25984 IX86_BUILTIN_CMPNLTPS
,
25985 IX86_BUILTIN_CMPNLEPS
,
25986 IX86_BUILTIN_CMPNGTPS
,
25987 IX86_BUILTIN_CMPNGEPS
,
25988 IX86_BUILTIN_CMPORDPS
,
25989 IX86_BUILTIN_CMPUNORDPS
,
25990 IX86_BUILTIN_CMPEQSS
,
25991 IX86_BUILTIN_CMPLTSS
,
25992 IX86_BUILTIN_CMPLESS
,
25993 IX86_BUILTIN_CMPNEQSS
,
25994 IX86_BUILTIN_CMPNLTSS
,
25995 IX86_BUILTIN_CMPNLESS
,
25996 IX86_BUILTIN_CMPORDSS
,
25997 IX86_BUILTIN_CMPUNORDSS
,
25999 IX86_BUILTIN_COMIEQSS
,
26000 IX86_BUILTIN_COMILTSS
,
26001 IX86_BUILTIN_COMILESS
,
26002 IX86_BUILTIN_COMIGTSS
,
26003 IX86_BUILTIN_COMIGESS
,
26004 IX86_BUILTIN_COMINEQSS
,
26005 IX86_BUILTIN_UCOMIEQSS
,
26006 IX86_BUILTIN_UCOMILTSS
,
26007 IX86_BUILTIN_UCOMILESS
,
26008 IX86_BUILTIN_UCOMIGTSS
,
26009 IX86_BUILTIN_UCOMIGESS
,
26010 IX86_BUILTIN_UCOMINEQSS
,
26012 IX86_BUILTIN_CVTPI2PS
,
26013 IX86_BUILTIN_CVTPS2PI
,
26014 IX86_BUILTIN_CVTSI2SS
,
26015 IX86_BUILTIN_CVTSI642SS
,
26016 IX86_BUILTIN_CVTSS2SI
,
26017 IX86_BUILTIN_CVTSS2SI64
,
26018 IX86_BUILTIN_CVTTPS2PI
,
26019 IX86_BUILTIN_CVTTSS2SI
,
26020 IX86_BUILTIN_CVTTSS2SI64
,
26022 IX86_BUILTIN_MAXPS
,
26023 IX86_BUILTIN_MAXSS
,
26024 IX86_BUILTIN_MINPS
,
26025 IX86_BUILTIN_MINSS
,
26027 IX86_BUILTIN_LOADUPS
,
26028 IX86_BUILTIN_STOREUPS
,
26029 IX86_BUILTIN_MOVSS
,
26031 IX86_BUILTIN_MOVHLPS
,
26032 IX86_BUILTIN_MOVLHPS
,
26033 IX86_BUILTIN_LOADHPS
,
26034 IX86_BUILTIN_LOADLPS
,
26035 IX86_BUILTIN_STOREHPS
,
26036 IX86_BUILTIN_STORELPS
,
26038 IX86_BUILTIN_MASKMOVQ
,
26039 IX86_BUILTIN_MOVMSKPS
,
26040 IX86_BUILTIN_PMOVMSKB
,
26042 IX86_BUILTIN_MOVNTPS
,
26043 IX86_BUILTIN_MOVNTQ
,
26045 IX86_BUILTIN_LOADDQU
,
26046 IX86_BUILTIN_STOREDQU
,
26048 IX86_BUILTIN_PACKSSWB
,
26049 IX86_BUILTIN_PACKSSDW
,
26050 IX86_BUILTIN_PACKUSWB
,
26052 IX86_BUILTIN_PADDB
,
26053 IX86_BUILTIN_PADDW
,
26054 IX86_BUILTIN_PADDD
,
26055 IX86_BUILTIN_PADDQ
,
26056 IX86_BUILTIN_PADDSB
,
26057 IX86_BUILTIN_PADDSW
,
26058 IX86_BUILTIN_PADDUSB
,
26059 IX86_BUILTIN_PADDUSW
,
26060 IX86_BUILTIN_PSUBB
,
26061 IX86_BUILTIN_PSUBW
,
26062 IX86_BUILTIN_PSUBD
,
26063 IX86_BUILTIN_PSUBQ
,
26064 IX86_BUILTIN_PSUBSB
,
26065 IX86_BUILTIN_PSUBSW
,
26066 IX86_BUILTIN_PSUBUSB
,
26067 IX86_BUILTIN_PSUBUSW
,
26070 IX86_BUILTIN_PANDN
,
26074 IX86_BUILTIN_PAVGB
,
26075 IX86_BUILTIN_PAVGW
,
26077 IX86_BUILTIN_PCMPEQB
,
26078 IX86_BUILTIN_PCMPEQW
,
26079 IX86_BUILTIN_PCMPEQD
,
26080 IX86_BUILTIN_PCMPGTB
,
26081 IX86_BUILTIN_PCMPGTW
,
26082 IX86_BUILTIN_PCMPGTD
,
26084 IX86_BUILTIN_PMADDWD
,
26086 IX86_BUILTIN_PMAXSW
,
26087 IX86_BUILTIN_PMAXUB
,
26088 IX86_BUILTIN_PMINSW
,
26089 IX86_BUILTIN_PMINUB
,
26091 IX86_BUILTIN_PMULHUW
,
26092 IX86_BUILTIN_PMULHW
,
26093 IX86_BUILTIN_PMULLW
,
26095 IX86_BUILTIN_PSADBW
,
26096 IX86_BUILTIN_PSHUFW
,
26098 IX86_BUILTIN_PSLLW
,
26099 IX86_BUILTIN_PSLLD
,
26100 IX86_BUILTIN_PSLLQ
,
26101 IX86_BUILTIN_PSRAW
,
26102 IX86_BUILTIN_PSRAD
,
26103 IX86_BUILTIN_PSRLW
,
26104 IX86_BUILTIN_PSRLD
,
26105 IX86_BUILTIN_PSRLQ
,
26106 IX86_BUILTIN_PSLLWI
,
26107 IX86_BUILTIN_PSLLDI
,
26108 IX86_BUILTIN_PSLLQI
,
26109 IX86_BUILTIN_PSRAWI
,
26110 IX86_BUILTIN_PSRADI
,
26111 IX86_BUILTIN_PSRLWI
,
26112 IX86_BUILTIN_PSRLDI
,
26113 IX86_BUILTIN_PSRLQI
,
26115 IX86_BUILTIN_PUNPCKHBW
,
26116 IX86_BUILTIN_PUNPCKHWD
,
26117 IX86_BUILTIN_PUNPCKHDQ
,
26118 IX86_BUILTIN_PUNPCKLBW
,
26119 IX86_BUILTIN_PUNPCKLWD
,
26120 IX86_BUILTIN_PUNPCKLDQ
,
26122 IX86_BUILTIN_SHUFPS
,
26124 IX86_BUILTIN_RCPPS
,
26125 IX86_BUILTIN_RCPSS
,
26126 IX86_BUILTIN_RSQRTPS
,
26127 IX86_BUILTIN_RSQRTPS_NR
,
26128 IX86_BUILTIN_RSQRTSS
,
26129 IX86_BUILTIN_RSQRTF
,
26130 IX86_BUILTIN_SQRTPS
,
26131 IX86_BUILTIN_SQRTPS_NR
,
26132 IX86_BUILTIN_SQRTSS
,
26134 IX86_BUILTIN_UNPCKHPS
,
26135 IX86_BUILTIN_UNPCKLPS
,
26137 IX86_BUILTIN_ANDPS
,
26138 IX86_BUILTIN_ANDNPS
,
26140 IX86_BUILTIN_XORPS
,
26143 IX86_BUILTIN_LDMXCSR
,
26144 IX86_BUILTIN_STMXCSR
,
26145 IX86_BUILTIN_SFENCE
,
26147 IX86_BUILTIN_FXSAVE
,
26148 IX86_BUILTIN_FXRSTOR
,
26149 IX86_BUILTIN_FXSAVE64
,
26150 IX86_BUILTIN_FXRSTOR64
,
26152 IX86_BUILTIN_XSAVE
,
26153 IX86_BUILTIN_XRSTOR
,
26154 IX86_BUILTIN_XSAVE64
,
26155 IX86_BUILTIN_XRSTOR64
,
26157 IX86_BUILTIN_XSAVEOPT
,
26158 IX86_BUILTIN_XSAVEOPT64
,
26160 /* 3DNow! Original */
26161 IX86_BUILTIN_FEMMS
,
26162 IX86_BUILTIN_PAVGUSB
,
26163 IX86_BUILTIN_PF2ID
,
26164 IX86_BUILTIN_PFACC
,
26165 IX86_BUILTIN_PFADD
,
26166 IX86_BUILTIN_PFCMPEQ
,
26167 IX86_BUILTIN_PFCMPGE
,
26168 IX86_BUILTIN_PFCMPGT
,
26169 IX86_BUILTIN_PFMAX
,
26170 IX86_BUILTIN_PFMIN
,
26171 IX86_BUILTIN_PFMUL
,
26172 IX86_BUILTIN_PFRCP
,
26173 IX86_BUILTIN_PFRCPIT1
,
26174 IX86_BUILTIN_PFRCPIT2
,
26175 IX86_BUILTIN_PFRSQIT1
,
26176 IX86_BUILTIN_PFRSQRT
,
26177 IX86_BUILTIN_PFSUB
,
26178 IX86_BUILTIN_PFSUBR
,
26179 IX86_BUILTIN_PI2FD
,
26180 IX86_BUILTIN_PMULHRW
,
26182 /* 3DNow! Athlon Extensions */
26183 IX86_BUILTIN_PF2IW
,
26184 IX86_BUILTIN_PFNACC
,
26185 IX86_BUILTIN_PFPNACC
,
26186 IX86_BUILTIN_PI2FW
,
26187 IX86_BUILTIN_PSWAPDSI
,
26188 IX86_BUILTIN_PSWAPDSF
,
26191 IX86_BUILTIN_ADDPD
,
26192 IX86_BUILTIN_ADDSD
,
26193 IX86_BUILTIN_DIVPD
,
26194 IX86_BUILTIN_DIVSD
,
26195 IX86_BUILTIN_MULPD
,
26196 IX86_BUILTIN_MULSD
,
26197 IX86_BUILTIN_SUBPD
,
26198 IX86_BUILTIN_SUBSD
,
26200 IX86_BUILTIN_CMPEQPD
,
26201 IX86_BUILTIN_CMPLTPD
,
26202 IX86_BUILTIN_CMPLEPD
,
26203 IX86_BUILTIN_CMPGTPD
,
26204 IX86_BUILTIN_CMPGEPD
,
26205 IX86_BUILTIN_CMPNEQPD
,
26206 IX86_BUILTIN_CMPNLTPD
,
26207 IX86_BUILTIN_CMPNLEPD
,
26208 IX86_BUILTIN_CMPNGTPD
,
26209 IX86_BUILTIN_CMPNGEPD
,
26210 IX86_BUILTIN_CMPORDPD
,
26211 IX86_BUILTIN_CMPUNORDPD
,
26212 IX86_BUILTIN_CMPEQSD
,
26213 IX86_BUILTIN_CMPLTSD
,
26214 IX86_BUILTIN_CMPLESD
,
26215 IX86_BUILTIN_CMPNEQSD
,
26216 IX86_BUILTIN_CMPNLTSD
,
26217 IX86_BUILTIN_CMPNLESD
,
26218 IX86_BUILTIN_CMPORDSD
,
26219 IX86_BUILTIN_CMPUNORDSD
,
26221 IX86_BUILTIN_COMIEQSD
,
26222 IX86_BUILTIN_COMILTSD
,
26223 IX86_BUILTIN_COMILESD
,
26224 IX86_BUILTIN_COMIGTSD
,
26225 IX86_BUILTIN_COMIGESD
,
26226 IX86_BUILTIN_COMINEQSD
,
26227 IX86_BUILTIN_UCOMIEQSD
,
26228 IX86_BUILTIN_UCOMILTSD
,
26229 IX86_BUILTIN_UCOMILESD
,
26230 IX86_BUILTIN_UCOMIGTSD
,
26231 IX86_BUILTIN_UCOMIGESD
,
26232 IX86_BUILTIN_UCOMINEQSD
,
26234 IX86_BUILTIN_MAXPD
,
26235 IX86_BUILTIN_MAXSD
,
26236 IX86_BUILTIN_MINPD
,
26237 IX86_BUILTIN_MINSD
,
26239 IX86_BUILTIN_ANDPD
,
26240 IX86_BUILTIN_ANDNPD
,
26242 IX86_BUILTIN_XORPD
,
26244 IX86_BUILTIN_SQRTPD
,
26245 IX86_BUILTIN_SQRTSD
,
26247 IX86_BUILTIN_UNPCKHPD
,
26248 IX86_BUILTIN_UNPCKLPD
,
26250 IX86_BUILTIN_SHUFPD
,
26252 IX86_BUILTIN_LOADUPD
,
26253 IX86_BUILTIN_STOREUPD
,
26254 IX86_BUILTIN_MOVSD
,
26256 IX86_BUILTIN_LOADHPD
,
26257 IX86_BUILTIN_LOADLPD
,
26259 IX86_BUILTIN_CVTDQ2PD
,
26260 IX86_BUILTIN_CVTDQ2PS
,
26262 IX86_BUILTIN_CVTPD2DQ
,
26263 IX86_BUILTIN_CVTPD2PI
,
26264 IX86_BUILTIN_CVTPD2PS
,
26265 IX86_BUILTIN_CVTTPD2DQ
,
26266 IX86_BUILTIN_CVTTPD2PI
,
26268 IX86_BUILTIN_CVTPI2PD
,
26269 IX86_BUILTIN_CVTSI2SD
,
26270 IX86_BUILTIN_CVTSI642SD
,
26272 IX86_BUILTIN_CVTSD2SI
,
26273 IX86_BUILTIN_CVTSD2SI64
,
26274 IX86_BUILTIN_CVTSD2SS
,
26275 IX86_BUILTIN_CVTSS2SD
,
26276 IX86_BUILTIN_CVTTSD2SI
,
26277 IX86_BUILTIN_CVTTSD2SI64
,
26279 IX86_BUILTIN_CVTPS2DQ
,
26280 IX86_BUILTIN_CVTPS2PD
,
26281 IX86_BUILTIN_CVTTPS2DQ
,
26283 IX86_BUILTIN_MOVNTI
,
26284 IX86_BUILTIN_MOVNTI64
,
26285 IX86_BUILTIN_MOVNTPD
,
26286 IX86_BUILTIN_MOVNTDQ
,
26288 IX86_BUILTIN_MOVQ128
,
26291 IX86_BUILTIN_MASKMOVDQU
,
26292 IX86_BUILTIN_MOVMSKPD
,
26293 IX86_BUILTIN_PMOVMSKB128
,
26295 IX86_BUILTIN_PACKSSWB128
,
26296 IX86_BUILTIN_PACKSSDW128
,
26297 IX86_BUILTIN_PACKUSWB128
,
26299 IX86_BUILTIN_PADDB128
,
26300 IX86_BUILTIN_PADDW128
,
26301 IX86_BUILTIN_PADDD128
,
26302 IX86_BUILTIN_PADDQ128
,
26303 IX86_BUILTIN_PADDSB128
,
26304 IX86_BUILTIN_PADDSW128
,
26305 IX86_BUILTIN_PADDUSB128
,
26306 IX86_BUILTIN_PADDUSW128
,
26307 IX86_BUILTIN_PSUBB128
,
26308 IX86_BUILTIN_PSUBW128
,
26309 IX86_BUILTIN_PSUBD128
,
26310 IX86_BUILTIN_PSUBQ128
,
26311 IX86_BUILTIN_PSUBSB128
,
26312 IX86_BUILTIN_PSUBSW128
,
26313 IX86_BUILTIN_PSUBUSB128
,
26314 IX86_BUILTIN_PSUBUSW128
,
26316 IX86_BUILTIN_PAND128
,
26317 IX86_BUILTIN_PANDN128
,
26318 IX86_BUILTIN_POR128
,
26319 IX86_BUILTIN_PXOR128
,
26321 IX86_BUILTIN_PAVGB128
,
26322 IX86_BUILTIN_PAVGW128
,
26324 IX86_BUILTIN_PCMPEQB128
,
26325 IX86_BUILTIN_PCMPEQW128
,
26326 IX86_BUILTIN_PCMPEQD128
,
26327 IX86_BUILTIN_PCMPGTB128
,
26328 IX86_BUILTIN_PCMPGTW128
,
26329 IX86_BUILTIN_PCMPGTD128
,
26331 IX86_BUILTIN_PMADDWD128
,
26333 IX86_BUILTIN_PMAXSW128
,
26334 IX86_BUILTIN_PMAXUB128
,
26335 IX86_BUILTIN_PMINSW128
,
26336 IX86_BUILTIN_PMINUB128
,
26338 IX86_BUILTIN_PMULUDQ
,
26339 IX86_BUILTIN_PMULUDQ128
,
26340 IX86_BUILTIN_PMULHUW128
,
26341 IX86_BUILTIN_PMULHW128
,
26342 IX86_BUILTIN_PMULLW128
,
26344 IX86_BUILTIN_PSADBW128
,
26345 IX86_BUILTIN_PSHUFHW
,
26346 IX86_BUILTIN_PSHUFLW
,
26347 IX86_BUILTIN_PSHUFD
,
26349 IX86_BUILTIN_PSLLDQI128
,
26350 IX86_BUILTIN_PSLLWI128
,
26351 IX86_BUILTIN_PSLLDI128
,
26352 IX86_BUILTIN_PSLLQI128
,
26353 IX86_BUILTIN_PSRAWI128
,
26354 IX86_BUILTIN_PSRADI128
,
26355 IX86_BUILTIN_PSRLDQI128
,
26356 IX86_BUILTIN_PSRLWI128
,
26357 IX86_BUILTIN_PSRLDI128
,
26358 IX86_BUILTIN_PSRLQI128
,
26360 IX86_BUILTIN_PSLLDQ128
,
26361 IX86_BUILTIN_PSLLW128
,
26362 IX86_BUILTIN_PSLLD128
,
26363 IX86_BUILTIN_PSLLQ128
,
26364 IX86_BUILTIN_PSRAW128
,
26365 IX86_BUILTIN_PSRAD128
,
26366 IX86_BUILTIN_PSRLW128
,
26367 IX86_BUILTIN_PSRLD128
,
26368 IX86_BUILTIN_PSRLQ128
,
26370 IX86_BUILTIN_PUNPCKHBW128
,
26371 IX86_BUILTIN_PUNPCKHWD128
,
26372 IX86_BUILTIN_PUNPCKHDQ128
,
26373 IX86_BUILTIN_PUNPCKHQDQ128
,
26374 IX86_BUILTIN_PUNPCKLBW128
,
26375 IX86_BUILTIN_PUNPCKLWD128
,
26376 IX86_BUILTIN_PUNPCKLDQ128
,
26377 IX86_BUILTIN_PUNPCKLQDQ128
,
26379 IX86_BUILTIN_CLFLUSH
,
26380 IX86_BUILTIN_MFENCE
,
26381 IX86_BUILTIN_LFENCE
,
26382 IX86_BUILTIN_PAUSE
,
26384 IX86_BUILTIN_BSRSI
,
26385 IX86_BUILTIN_BSRDI
,
26386 IX86_BUILTIN_RDPMC
,
26387 IX86_BUILTIN_RDTSC
,
26388 IX86_BUILTIN_RDTSCP
,
26389 IX86_BUILTIN_ROLQI
,
26390 IX86_BUILTIN_ROLHI
,
26391 IX86_BUILTIN_RORQI
,
26392 IX86_BUILTIN_RORHI
,
26395 IX86_BUILTIN_ADDSUBPS
,
26396 IX86_BUILTIN_HADDPS
,
26397 IX86_BUILTIN_HSUBPS
,
26398 IX86_BUILTIN_MOVSHDUP
,
26399 IX86_BUILTIN_MOVSLDUP
,
26400 IX86_BUILTIN_ADDSUBPD
,
26401 IX86_BUILTIN_HADDPD
,
26402 IX86_BUILTIN_HSUBPD
,
26403 IX86_BUILTIN_LDDQU
,
26405 IX86_BUILTIN_MONITOR
,
26406 IX86_BUILTIN_MWAIT
,
26409 IX86_BUILTIN_PHADDW
,
26410 IX86_BUILTIN_PHADDD
,
26411 IX86_BUILTIN_PHADDSW
,
26412 IX86_BUILTIN_PHSUBW
,
26413 IX86_BUILTIN_PHSUBD
,
26414 IX86_BUILTIN_PHSUBSW
,
26415 IX86_BUILTIN_PMADDUBSW
,
26416 IX86_BUILTIN_PMULHRSW
,
26417 IX86_BUILTIN_PSHUFB
,
26418 IX86_BUILTIN_PSIGNB
,
26419 IX86_BUILTIN_PSIGNW
,
26420 IX86_BUILTIN_PSIGND
,
26421 IX86_BUILTIN_PALIGNR
,
26422 IX86_BUILTIN_PABSB
,
26423 IX86_BUILTIN_PABSW
,
26424 IX86_BUILTIN_PABSD
,
26426 IX86_BUILTIN_PHADDW128
,
26427 IX86_BUILTIN_PHADDD128
,
26428 IX86_BUILTIN_PHADDSW128
,
26429 IX86_BUILTIN_PHSUBW128
,
26430 IX86_BUILTIN_PHSUBD128
,
26431 IX86_BUILTIN_PHSUBSW128
,
26432 IX86_BUILTIN_PMADDUBSW128
,
26433 IX86_BUILTIN_PMULHRSW128
,
26434 IX86_BUILTIN_PSHUFB128
,
26435 IX86_BUILTIN_PSIGNB128
,
26436 IX86_BUILTIN_PSIGNW128
,
26437 IX86_BUILTIN_PSIGND128
,
26438 IX86_BUILTIN_PALIGNR128
,
26439 IX86_BUILTIN_PABSB128
,
26440 IX86_BUILTIN_PABSW128
,
26441 IX86_BUILTIN_PABSD128
,
26443 /* AMDFAM10 - SSE4A New Instructions. */
26444 IX86_BUILTIN_MOVNTSD
,
26445 IX86_BUILTIN_MOVNTSS
,
26446 IX86_BUILTIN_EXTRQI
,
26447 IX86_BUILTIN_EXTRQ
,
26448 IX86_BUILTIN_INSERTQI
,
26449 IX86_BUILTIN_INSERTQ
,
26452 IX86_BUILTIN_BLENDPD
,
26453 IX86_BUILTIN_BLENDPS
,
26454 IX86_BUILTIN_BLENDVPD
,
26455 IX86_BUILTIN_BLENDVPS
,
26456 IX86_BUILTIN_PBLENDVB128
,
26457 IX86_BUILTIN_PBLENDW128
,
26462 IX86_BUILTIN_INSERTPS128
,
26464 IX86_BUILTIN_MOVNTDQA
,
26465 IX86_BUILTIN_MPSADBW128
,
26466 IX86_BUILTIN_PACKUSDW128
,
26467 IX86_BUILTIN_PCMPEQQ
,
26468 IX86_BUILTIN_PHMINPOSUW128
,
26470 IX86_BUILTIN_PMAXSB128
,
26471 IX86_BUILTIN_PMAXSD128
,
26472 IX86_BUILTIN_PMAXUD128
,
26473 IX86_BUILTIN_PMAXUW128
,
26475 IX86_BUILTIN_PMINSB128
,
26476 IX86_BUILTIN_PMINSD128
,
26477 IX86_BUILTIN_PMINUD128
,
26478 IX86_BUILTIN_PMINUW128
,
26480 IX86_BUILTIN_PMOVSXBW128
,
26481 IX86_BUILTIN_PMOVSXBD128
,
26482 IX86_BUILTIN_PMOVSXBQ128
,
26483 IX86_BUILTIN_PMOVSXWD128
,
26484 IX86_BUILTIN_PMOVSXWQ128
,
26485 IX86_BUILTIN_PMOVSXDQ128
,
26487 IX86_BUILTIN_PMOVZXBW128
,
26488 IX86_BUILTIN_PMOVZXBD128
,
26489 IX86_BUILTIN_PMOVZXBQ128
,
26490 IX86_BUILTIN_PMOVZXWD128
,
26491 IX86_BUILTIN_PMOVZXWQ128
,
26492 IX86_BUILTIN_PMOVZXDQ128
,
26494 IX86_BUILTIN_PMULDQ128
,
26495 IX86_BUILTIN_PMULLD128
,
26497 IX86_BUILTIN_ROUNDSD
,
26498 IX86_BUILTIN_ROUNDSS
,
26500 IX86_BUILTIN_ROUNDPD
,
26501 IX86_BUILTIN_ROUNDPS
,
26503 IX86_BUILTIN_FLOORPD
,
26504 IX86_BUILTIN_CEILPD
,
26505 IX86_BUILTIN_TRUNCPD
,
26506 IX86_BUILTIN_RINTPD
,
26507 IX86_BUILTIN_ROUNDPD_AZ
,
26509 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26510 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26511 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26513 IX86_BUILTIN_FLOORPS
,
26514 IX86_BUILTIN_CEILPS
,
26515 IX86_BUILTIN_TRUNCPS
,
26516 IX86_BUILTIN_RINTPS
,
26517 IX86_BUILTIN_ROUNDPS_AZ
,
26519 IX86_BUILTIN_FLOORPS_SFIX
,
26520 IX86_BUILTIN_CEILPS_SFIX
,
26521 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26523 IX86_BUILTIN_PTESTZ
,
26524 IX86_BUILTIN_PTESTC
,
26525 IX86_BUILTIN_PTESTNZC
,
26527 IX86_BUILTIN_VEC_INIT_V2SI
,
26528 IX86_BUILTIN_VEC_INIT_V4HI
,
26529 IX86_BUILTIN_VEC_INIT_V8QI
,
26530 IX86_BUILTIN_VEC_EXT_V2DF
,
26531 IX86_BUILTIN_VEC_EXT_V2DI
,
26532 IX86_BUILTIN_VEC_EXT_V4SF
,
26533 IX86_BUILTIN_VEC_EXT_V4SI
,
26534 IX86_BUILTIN_VEC_EXT_V8HI
,
26535 IX86_BUILTIN_VEC_EXT_V2SI
,
26536 IX86_BUILTIN_VEC_EXT_V4HI
,
26537 IX86_BUILTIN_VEC_EXT_V16QI
,
26538 IX86_BUILTIN_VEC_SET_V2DI
,
26539 IX86_BUILTIN_VEC_SET_V4SF
,
26540 IX86_BUILTIN_VEC_SET_V4SI
,
26541 IX86_BUILTIN_VEC_SET_V8HI
,
26542 IX86_BUILTIN_VEC_SET_V4HI
,
26543 IX86_BUILTIN_VEC_SET_V16QI
,
26545 IX86_BUILTIN_VEC_PACK_SFIX
,
26546 IX86_BUILTIN_VEC_PACK_SFIX256
,
26549 IX86_BUILTIN_CRC32QI
,
26550 IX86_BUILTIN_CRC32HI
,
26551 IX86_BUILTIN_CRC32SI
,
26552 IX86_BUILTIN_CRC32DI
,
26554 IX86_BUILTIN_PCMPESTRI128
,
26555 IX86_BUILTIN_PCMPESTRM128
,
26556 IX86_BUILTIN_PCMPESTRA128
,
26557 IX86_BUILTIN_PCMPESTRC128
,
26558 IX86_BUILTIN_PCMPESTRO128
,
26559 IX86_BUILTIN_PCMPESTRS128
,
26560 IX86_BUILTIN_PCMPESTRZ128
,
26561 IX86_BUILTIN_PCMPISTRI128
,
26562 IX86_BUILTIN_PCMPISTRM128
,
26563 IX86_BUILTIN_PCMPISTRA128
,
26564 IX86_BUILTIN_PCMPISTRC128
,
26565 IX86_BUILTIN_PCMPISTRO128
,
26566 IX86_BUILTIN_PCMPISTRS128
,
26567 IX86_BUILTIN_PCMPISTRZ128
,
26569 IX86_BUILTIN_PCMPGTQ
,
26571 /* AES instructions */
26572 IX86_BUILTIN_AESENC128
,
26573 IX86_BUILTIN_AESENCLAST128
,
26574 IX86_BUILTIN_AESDEC128
,
26575 IX86_BUILTIN_AESDECLAST128
,
26576 IX86_BUILTIN_AESIMC128
,
26577 IX86_BUILTIN_AESKEYGENASSIST128
,
26579 /* PCLMUL instruction */
26580 IX86_BUILTIN_PCLMULQDQ128
,
26583 IX86_BUILTIN_ADDPD256
,
26584 IX86_BUILTIN_ADDPS256
,
26585 IX86_BUILTIN_ADDSUBPD256
,
26586 IX86_BUILTIN_ADDSUBPS256
,
26587 IX86_BUILTIN_ANDPD256
,
26588 IX86_BUILTIN_ANDPS256
,
26589 IX86_BUILTIN_ANDNPD256
,
26590 IX86_BUILTIN_ANDNPS256
,
26591 IX86_BUILTIN_BLENDPD256
,
26592 IX86_BUILTIN_BLENDPS256
,
26593 IX86_BUILTIN_BLENDVPD256
,
26594 IX86_BUILTIN_BLENDVPS256
,
26595 IX86_BUILTIN_DIVPD256
,
26596 IX86_BUILTIN_DIVPS256
,
26597 IX86_BUILTIN_DPPS256
,
26598 IX86_BUILTIN_HADDPD256
,
26599 IX86_BUILTIN_HADDPS256
,
26600 IX86_BUILTIN_HSUBPD256
,
26601 IX86_BUILTIN_HSUBPS256
,
26602 IX86_BUILTIN_MAXPD256
,
26603 IX86_BUILTIN_MAXPS256
,
26604 IX86_BUILTIN_MINPD256
,
26605 IX86_BUILTIN_MINPS256
,
26606 IX86_BUILTIN_MULPD256
,
26607 IX86_BUILTIN_MULPS256
,
26608 IX86_BUILTIN_ORPD256
,
26609 IX86_BUILTIN_ORPS256
,
26610 IX86_BUILTIN_SHUFPD256
,
26611 IX86_BUILTIN_SHUFPS256
,
26612 IX86_BUILTIN_SUBPD256
,
26613 IX86_BUILTIN_SUBPS256
,
26614 IX86_BUILTIN_XORPD256
,
26615 IX86_BUILTIN_XORPS256
,
26616 IX86_BUILTIN_CMPSD
,
26617 IX86_BUILTIN_CMPSS
,
26618 IX86_BUILTIN_CMPPD
,
26619 IX86_BUILTIN_CMPPS
,
26620 IX86_BUILTIN_CMPPD256
,
26621 IX86_BUILTIN_CMPPS256
,
26622 IX86_BUILTIN_CVTDQ2PD256
,
26623 IX86_BUILTIN_CVTDQ2PS256
,
26624 IX86_BUILTIN_CVTPD2PS256
,
26625 IX86_BUILTIN_CVTPS2DQ256
,
26626 IX86_BUILTIN_CVTPS2PD256
,
26627 IX86_BUILTIN_CVTTPD2DQ256
,
26628 IX86_BUILTIN_CVTPD2DQ256
,
26629 IX86_BUILTIN_CVTTPS2DQ256
,
26630 IX86_BUILTIN_EXTRACTF128PD256
,
26631 IX86_BUILTIN_EXTRACTF128PS256
,
26632 IX86_BUILTIN_EXTRACTF128SI256
,
26633 IX86_BUILTIN_VZEROALL
,
26634 IX86_BUILTIN_VZEROUPPER
,
26635 IX86_BUILTIN_VPERMILVARPD
,
26636 IX86_BUILTIN_VPERMILVARPS
,
26637 IX86_BUILTIN_VPERMILVARPD256
,
26638 IX86_BUILTIN_VPERMILVARPS256
,
26639 IX86_BUILTIN_VPERMILPD
,
26640 IX86_BUILTIN_VPERMILPS
,
26641 IX86_BUILTIN_VPERMILPD256
,
26642 IX86_BUILTIN_VPERMILPS256
,
26643 IX86_BUILTIN_VPERMIL2PD
,
26644 IX86_BUILTIN_VPERMIL2PS
,
26645 IX86_BUILTIN_VPERMIL2PD256
,
26646 IX86_BUILTIN_VPERMIL2PS256
,
26647 IX86_BUILTIN_VPERM2F128PD256
,
26648 IX86_BUILTIN_VPERM2F128PS256
,
26649 IX86_BUILTIN_VPERM2F128SI256
,
26650 IX86_BUILTIN_VBROADCASTSS
,
26651 IX86_BUILTIN_VBROADCASTSD256
,
26652 IX86_BUILTIN_VBROADCASTSS256
,
26653 IX86_BUILTIN_VBROADCASTPD256
,
26654 IX86_BUILTIN_VBROADCASTPS256
,
26655 IX86_BUILTIN_VINSERTF128PD256
,
26656 IX86_BUILTIN_VINSERTF128PS256
,
26657 IX86_BUILTIN_VINSERTF128SI256
,
26658 IX86_BUILTIN_LOADUPD256
,
26659 IX86_BUILTIN_LOADUPS256
,
26660 IX86_BUILTIN_STOREUPD256
,
26661 IX86_BUILTIN_STOREUPS256
,
26662 IX86_BUILTIN_LDDQU256
,
26663 IX86_BUILTIN_MOVNTDQ256
,
26664 IX86_BUILTIN_MOVNTPD256
,
26665 IX86_BUILTIN_MOVNTPS256
,
26666 IX86_BUILTIN_LOADDQU256
,
26667 IX86_BUILTIN_STOREDQU256
,
26668 IX86_BUILTIN_MASKLOADPD
,
26669 IX86_BUILTIN_MASKLOADPS
,
26670 IX86_BUILTIN_MASKSTOREPD
,
26671 IX86_BUILTIN_MASKSTOREPS
,
26672 IX86_BUILTIN_MASKLOADPD256
,
26673 IX86_BUILTIN_MASKLOADPS256
,
26674 IX86_BUILTIN_MASKSTOREPD256
,
26675 IX86_BUILTIN_MASKSTOREPS256
,
26676 IX86_BUILTIN_MOVSHDUP256
,
26677 IX86_BUILTIN_MOVSLDUP256
,
26678 IX86_BUILTIN_MOVDDUP256
,
26680 IX86_BUILTIN_SQRTPD256
,
26681 IX86_BUILTIN_SQRTPS256
,
26682 IX86_BUILTIN_SQRTPS_NR256
,
26683 IX86_BUILTIN_RSQRTPS256
,
26684 IX86_BUILTIN_RSQRTPS_NR256
,
26686 IX86_BUILTIN_RCPPS256
,
26688 IX86_BUILTIN_ROUNDPD256
,
26689 IX86_BUILTIN_ROUNDPS256
,
26691 IX86_BUILTIN_FLOORPD256
,
26692 IX86_BUILTIN_CEILPD256
,
26693 IX86_BUILTIN_TRUNCPD256
,
26694 IX86_BUILTIN_RINTPD256
,
26695 IX86_BUILTIN_ROUNDPD_AZ256
,
26697 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26698 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26699 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26701 IX86_BUILTIN_FLOORPS256
,
26702 IX86_BUILTIN_CEILPS256
,
26703 IX86_BUILTIN_TRUNCPS256
,
26704 IX86_BUILTIN_RINTPS256
,
26705 IX86_BUILTIN_ROUNDPS_AZ256
,
26707 IX86_BUILTIN_FLOORPS_SFIX256
,
26708 IX86_BUILTIN_CEILPS_SFIX256
,
26709 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26711 IX86_BUILTIN_UNPCKHPD256
,
26712 IX86_BUILTIN_UNPCKLPD256
,
26713 IX86_BUILTIN_UNPCKHPS256
,
26714 IX86_BUILTIN_UNPCKLPS256
,
26716 IX86_BUILTIN_SI256_SI
,
26717 IX86_BUILTIN_PS256_PS
,
26718 IX86_BUILTIN_PD256_PD
,
26719 IX86_BUILTIN_SI_SI256
,
26720 IX86_BUILTIN_PS_PS256
,
26721 IX86_BUILTIN_PD_PD256
,
26723 IX86_BUILTIN_VTESTZPD
,
26724 IX86_BUILTIN_VTESTCPD
,
26725 IX86_BUILTIN_VTESTNZCPD
,
26726 IX86_BUILTIN_VTESTZPS
,
26727 IX86_BUILTIN_VTESTCPS
,
26728 IX86_BUILTIN_VTESTNZCPS
,
26729 IX86_BUILTIN_VTESTZPD256
,
26730 IX86_BUILTIN_VTESTCPD256
,
26731 IX86_BUILTIN_VTESTNZCPD256
,
26732 IX86_BUILTIN_VTESTZPS256
,
26733 IX86_BUILTIN_VTESTCPS256
,
26734 IX86_BUILTIN_VTESTNZCPS256
,
26735 IX86_BUILTIN_PTESTZ256
,
26736 IX86_BUILTIN_PTESTC256
,
26737 IX86_BUILTIN_PTESTNZC256
,
26739 IX86_BUILTIN_MOVMSKPD256
,
26740 IX86_BUILTIN_MOVMSKPS256
,
26743 IX86_BUILTIN_MPSADBW256
,
26744 IX86_BUILTIN_PABSB256
,
26745 IX86_BUILTIN_PABSW256
,
26746 IX86_BUILTIN_PABSD256
,
26747 IX86_BUILTIN_PACKSSDW256
,
26748 IX86_BUILTIN_PACKSSWB256
,
26749 IX86_BUILTIN_PACKUSDW256
,
26750 IX86_BUILTIN_PACKUSWB256
,
26751 IX86_BUILTIN_PADDB256
,
26752 IX86_BUILTIN_PADDW256
,
26753 IX86_BUILTIN_PADDD256
,
26754 IX86_BUILTIN_PADDQ256
,
26755 IX86_BUILTIN_PADDSB256
,
26756 IX86_BUILTIN_PADDSW256
,
26757 IX86_BUILTIN_PADDUSB256
,
26758 IX86_BUILTIN_PADDUSW256
,
26759 IX86_BUILTIN_PALIGNR256
,
26760 IX86_BUILTIN_AND256I
,
26761 IX86_BUILTIN_ANDNOT256I
,
26762 IX86_BUILTIN_PAVGB256
,
26763 IX86_BUILTIN_PAVGW256
,
26764 IX86_BUILTIN_PBLENDVB256
,
26765 IX86_BUILTIN_PBLENDVW256
,
26766 IX86_BUILTIN_PCMPEQB256
,
26767 IX86_BUILTIN_PCMPEQW256
,
26768 IX86_BUILTIN_PCMPEQD256
,
26769 IX86_BUILTIN_PCMPEQQ256
,
26770 IX86_BUILTIN_PCMPGTB256
,
26771 IX86_BUILTIN_PCMPGTW256
,
26772 IX86_BUILTIN_PCMPGTD256
,
26773 IX86_BUILTIN_PCMPGTQ256
,
26774 IX86_BUILTIN_PHADDW256
,
26775 IX86_BUILTIN_PHADDD256
,
26776 IX86_BUILTIN_PHADDSW256
,
26777 IX86_BUILTIN_PHSUBW256
,
26778 IX86_BUILTIN_PHSUBD256
,
26779 IX86_BUILTIN_PHSUBSW256
,
26780 IX86_BUILTIN_PMADDUBSW256
,
26781 IX86_BUILTIN_PMADDWD256
,
26782 IX86_BUILTIN_PMAXSB256
,
26783 IX86_BUILTIN_PMAXSW256
,
26784 IX86_BUILTIN_PMAXSD256
,
26785 IX86_BUILTIN_PMAXUB256
,
26786 IX86_BUILTIN_PMAXUW256
,
26787 IX86_BUILTIN_PMAXUD256
,
26788 IX86_BUILTIN_PMINSB256
,
26789 IX86_BUILTIN_PMINSW256
,
26790 IX86_BUILTIN_PMINSD256
,
26791 IX86_BUILTIN_PMINUB256
,
26792 IX86_BUILTIN_PMINUW256
,
26793 IX86_BUILTIN_PMINUD256
,
26794 IX86_BUILTIN_PMOVMSKB256
,
26795 IX86_BUILTIN_PMOVSXBW256
,
26796 IX86_BUILTIN_PMOVSXBD256
,
26797 IX86_BUILTIN_PMOVSXBQ256
,
26798 IX86_BUILTIN_PMOVSXWD256
,
26799 IX86_BUILTIN_PMOVSXWQ256
,
26800 IX86_BUILTIN_PMOVSXDQ256
,
26801 IX86_BUILTIN_PMOVZXBW256
,
26802 IX86_BUILTIN_PMOVZXBD256
,
26803 IX86_BUILTIN_PMOVZXBQ256
,
26804 IX86_BUILTIN_PMOVZXWD256
,
26805 IX86_BUILTIN_PMOVZXWQ256
,
26806 IX86_BUILTIN_PMOVZXDQ256
,
26807 IX86_BUILTIN_PMULDQ256
,
26808 IX86_BUILTIN_PMULHRSW256
,
26809 IX86_BUILTIN_PMULHUW256
,
26810 IX86_BUILTIN_PMULHW256
,
26811 IX86_BUILTIN_PMULLW256
,
26812 IX86_BUILTIN_PMULLD256
,
26813 IX86_BUILTIN_PMULUDQ256
,
26814 IX86_BUILTIN_POR256
,
26815 IX86_BUILTIN_PSADBW256
,
26816 IX86_BUILTIN_PSHUFB256
,
26817 IX86_BUILTIN_PSHUFD256
,
26818 IX86_BUILTIN_PSHUFHW256
,
26819 IX86_BUILTIN_PSHUFLW256
,
26820 IX86_BUILTIN_PSIGNB256
,
26821 IX86_BUILTIN_PSIGNW256
,
26822 IX86_BUILTIN_PSIGND256
,
26823 IX86_BUILTIN_PSLLDQI256
,
26824 IX86_BUILTIN_PSLLWI256
,
26825 IX86_BUILTIN_PSLLW256
,
26826 IX86_BUILTIN_PSLLDI256
,
26827 IX86_BUILTIN_PSLLD256
,
26828 IX86_BUILTIN_PSLLQI256
,
26829 IX86_BUILTIN_PSLLQ256
,
26830 IX86_BUILTIN_PSRAWI256
,
26831 IX86_BUILTIN_PSRAW256
,
26832 IX86_BUILTIN_PSRADI256
,
26833 IX86_BUILTIN_PSRAD256
,
26834 IX86_BUILTIN_PSRLDQI256
,
26835 IX86_BUILTIN_PSRLWI256
,
26836 IX86_BUILTIN_PSRLW256
,
26837 IX86_BUILTIN_PSRLDI256
,
26838 IX86_BUILTIN_PSRLD256
,
26839 IX86_BUILTIN_PSRLQI256
,
26840 IX86_BUILTIN_PSRLQ256
,
26841 IX86_BUILTIN_PSUBB256
,
26842 IX86_BUILTIN_PSUBW256
,
26843 IX86_BUILTIN_PSUBD256
,
26844 IX86_BUILTIN_PSUBQ256
,
26845 IX86_BUILTIN_PSUBSB256
,
26846 IX86_BUILTIN_PSUBSW256
,
26847 IX86_BUILTIN_PSUBUSB256
,
26848 IX86_BUILTIN_PSUBUSW256
,
26849 IX86_BUILTIN_PUNPCKHBW256
,
26850 IX86_BUILTIN_PUNPCKHWD256
,
26851 IX86_BUILTIN_PUNPCKHDQ256
,
26852 IX86_BUILTIN_PUNPCKHQDQ256
,
26853 IX86_BUILTIN_PUNPCKLBW256
,
26854 IX86_BUILTIN_PUNPCKLWD256
,
26855 IX86_BUILTIN_PUNPCKLDQ256
,
26856 IX86_BUILTIN_PUNPCKLQDQ256
,
26857 IX86_BUILTIN_PXOR256
,
26858 IX86_BUILTIN_MOVNTDQA256
,
26859 IX86_BUILTIN_VBROADCASTSS_PS
,
26860 IX86_BUILTIN_VBROADCASTSS_PS256
,
26861 IX86_BUILTIN_VBROADCASTSD_PD256
,
26862 IX86_BUILTIN_VBROADCASTSI256
,
26863 IX86_BUILTIN_PBLENDD256
,
26864 IX86_BUILTIN_PBLENDD128
,
26865 IX86_BUILTIN_PBROADCASTB256
,
26866 IX86_BUILTIN_PBROADCASTW256
,
26867 IX86_BUILTIN_PBROADCASTD256
,
26868 IX86_BUILTIN_PBROADCASTQ256
,
26869 IX86_BUILTIN_PBROADCASTB128
,
26870 IX86_BUILTIN_PBROADCASTW128
,
26871 IX86_BUILTIN_PBROADCASTD128
,
26872 IX86_BUILTIN_PBROADCASTQ128
,
26873 IX86_BUILTIN_VPERMVARSI256
,
26874 IX86_BUILTIN_VPERMDF256
,
26875 IX86_BUILTIN_VPERMVARSF256
,
26876 IX86_BUILTIN_VPERMDI256
,
26877 IX86_BUILTIN_VPERMTI256
,
26878 IX86_BUILTIN_VEXTRACT128I256
,
26879 IX86_BUILTIN_VINSERT128I256
,
26880 IX86_BUILTIN_MASKLOADD
,
26881 IX86_BUILTIN_MASKLOADQ
,
26882 IX86_BUILTIN_MASKLOADD256
,
26883 IX86_BUILTIN_MASKLOADQ256
,
26884 IX86_BUILTIN_MASKSTORED
,
26885 IX86_BUILTIN_MASKSTOREQ
,
26886 IX86_BUILTIN_MASKSTORED256
,
26887 IX86_BUILTIN_MASKSTOREQ256
,
26888 IX86_BUILTIN_PSLLVV4DI
,
26889 IX86_BUILTIN_PSLLVV2DI
,
26890 IX86_BUILTIN_PSLLVV8SI
,
26891 IX86_BUILTIN_PSLLVV4SI
,
26892 IX86_BUILTIN_PSRAVV8SI
,
26893 IX86_BUILTIN_PSRAVV4SI
,
26894 IX86_BUILTIN_PSRLVV4DI
,
26895 IX86_BUILTIN_PSRLVV2DI
,
26896 IX86_BUILTIN_PSRLVV8SI
,
26897 IX86_BUILTIN_PSRLVV4SI
,
26899 IX86_BUILTIN_GATHERSIV2DF
,
26900 IX86_BUILTIN_GATHERSIV4DF
,
26901 IX86_BUILTIN_GATHERDIV2DF
,
26902 IX86_BUILTIN_GATHERDIV4DF
,
26903 IX86_BUILTIN_GATHERSIV4SF
,
26904 IX86_BUILTIN_GATHERSIV8SF
,
26905 IX86_BUILTIN_GATHERDIV4SF
,
26906 IX86_BUILTIN_GATHERDIV8SF
,
26907 IX86_BUILTIN_GATHERSIV2DI
,
26908 IX86_BUILTIN_GATHERSIV4DI
,
26909 IX86_BUILTIN_GATHERDIV2DI
,
26910 IX86_BUILTIN_GATHERDIV4DI
,
26911 IX86_BUILTIN_GATHERSIV4SI
,
26912 IX86_BUILTIN_GATHERSIV8SI
,
26913 IX86_BUILTIN_GATHERDIV4SI
,
26914 IX86_BUILTIN_GATHERDIV8SI
,
26916 /* Alternate 4 element gather for the vectorizer where
26917 all operands are 32-byte wide. */
26918 IX86_BUILTIN_GATHERALTSIV4DF
,
26919 IX86_BUILTIN_GATHERALTDIV8SF
,
26920 IX86_BUILTIN_GATHERALTSIV4DI
,
26921 IX86_BUILTIN_GATHERALTDIV8SI
,
26923 /* TFmode support builtins. */
26925 IX86_BUILTIN_HUGE_VALQ
,
26926 IX86_BUILTIN_FABSQ
,
26927 IX86_BUILTIN_COPYSIGNQ
,
26929 /* Vectorizer support builtins. */
26930 IX86_BUILTIN_CPYSGNPS
,
26931 IX86_BUILTIN_CPYSGNPD
,
26932 IX86_BUILTIN_CPYSGNPS256
,
26933 IX86_BUILTIN_CPYSGNPD256
,
26935 /* FMA4 instructions. */
26936 IX86_BUILTIN_VFMADDSS
,
26937 IX86_BUILTIN_VFMADDSD
,
26938 IX86_BUILTIN_VFMADDPS
,
26939 IX86_BUILTIN_VFMADDPD
,
26940 IX86_BUILTIN_VFMADDPS256
,
26941 IX86_BUILTIN_VFMADDPD256
,
26942 IX86_BUILTIN_VFMADDSUBPS
,
26943 IX86_BUILTIN_VFMADDSUBPD
,
26944 IX86_BUILTIN_VFMADDSUBPS256
,
26945 IX86_BUILTIN_VFMADDSUBPD256
,
26947 /* FMA3 instructions. */
26948 IX86_BUILTIN_VFMADDSS3
,
26949 IX86_BUILTIN_VFMADDSD3
,
26951 /* XOP instructions. */
26952 IX86_BUILTIN_VPCMOV
,
26953 IX86_BUILTIN_VPCMOV_V2DI
,
26954 IX86_BUILTIN_VPCMOV_V4SI
,
26955 IX86_BUILTIN_VPCMOV_V8HI
,
26956 IX86_BUILTIN_VPCMOV_V16QI
,
26957 IX86_BUILTIN_VPCMOV_V4SF
,
26958 IX86_BUILTIN_VPCMOV_V2DF
,
26959 IX86_BUILTIN_VPCMOV256
,
26960 IX86_BUILTIN_VPCMOV_V4DI256
,
26961 IX86_BUILTIN_VPCMOV_V8SI256
,
26962 IX86_BUILTIN_VPCMOV_V16HI256
,
26963 IX86_BUILTIN_VPCMOV_V32QI256
,
26964 IX86_BUILTIN_VPCMOV_V8SF256
,
26965 IX86_BUILTIN_VPCMOV_V4DF256
,
26967 IX86_BUILTIN_VPPERM
,
26969 IX86_BUILTIN_VPMACSSWW
,
26970 IX86_BUILTIN_VPMACSWW
,
26971 IX86_BUILTIN_VPMACSSWD
,
26972 IX86_BUILTIN_VPMACSWD
,
26973 IX86_BUILTIN_VPMACSSDD
,
26974 IX86_BUILTIN_VPMACSDD
,
26975 IX86_BUILTIN_VPMACSSDQL
,
26976 IX86_BUILTIN_VPMACSSDQH
,
26977 IX86_BUILTIN_VPMACSDQL
,
26978 IX86_BUILTIN_VPMACSDQH
,
26979 IX86_BUILTIN_VPMADCSSWD
,
26980 IX86_BUILTIN_VPMADCSWD
,
26982 IX86_BUILTIN_VPHADDBW
,
26983 IX86_BUILTIN_VPHADDBD
,
26984 IX86_BUILTIN_VPHADDBQ
,
26985 IX86_BUILTIN_VPHADDWD
,
26986 IX86_BUILTIN_VPHADDWQ
,
26987 IX86_BUILTIN_VPHADDDQ
,
26988 IX86_BUILTIN_VPHADDUBW
,
26989 IX86_BUILTIN_VPHADDUBD
,
26990 IX86_BUILTIN_VPHADDUBQ
,
26991 IX86_BUILTIN_VPHADDUWD
,
26992 IX86_BUILTIN_VPHADDUWQ
,
26993 IX86_BUILTIN_VPHADDUDQ
,
26994 IX86_BUILTIN_VPHSUBBW
,
26995 IX86_BUILTIN_VPHSUBWD
,
26996 IX86_BUILTIN_VPHSUBDQ
,
26998 IX86_BUILTIN_VPROTB
,
26999 IX86_BUILTIN_VPROTW
,
27000 IX86_BUILTIN_VPROTD
,
27001 IX86_BUILTIN_VPROTQ
,
27002 IX86_BUILTIN_VPROTB_IMM
,
27003 IX86_BUILTIN_VPROTW_IMM
,
27004 IX86_BUILTIN_VPROTD_IMM
,
27005 IX86_BUILTIN_VPROTQ_IMM
,
27007 IX86_BUILTIN_VPSHLB
,
27008 IX86_BUILTIN_VPSHLW
,
27009 IX86_BUILTIN_VPSHLD
,
27010 IX86_BUILTIN_VPSHLQ
,
27011 IX86_BUILTIN_VPSHAB
,
27012 IX86_BUILTIN_VPSHAW
,
27013 IX86_BUILTIN_VPSHAD
,
27014 IX86_BUILTIN_VPSHAQ
,
27016 IX86_BUILTIN_VFRCZSS
,
27017 IX86_BUILTIN_VFRCZSD
,
27018 IX86_BUILTIN_VFRCZPS
,
27019 IX86_BUILTIN_VFRCZPD
,
27020 IX86_BUILTIN_VFRCZPS256
,
27021 IX86_BUILTIN_VFRCZPD256
,
27023 IX86_BUILTIN_VPCOMEQUB
,
27024 IX86_BUILTIN_VPCOMNEUB
,
27025 IX86_BUILTIN_VPCOMLTUB
,
27026 IX86_BUILTIN_VPCOMLEUB
,
27027 IX86_BUILTIN_VPCOMGTUB
,
27028 IX86_BUILTIN_VPCOMGEUB
,
27029 IX86_BUILTIN_VPCOMFALSEUB
,
27030 IX86_BUILTIN_VPCOMTRUEUB
,
27032 IX86_BUILTIN_VPCOMEQUW
,
27033 IX86_BUILTIN_VPCOMNEUW
,
27034 IX86_BUILTIN_VPCOMLTUW
,
27035 IX86_BUILTIN_VPCOMLEUW
,
27036 IX86_BUILTIN_VPCOMGTUW
,
27037 IX86_BUILTIN_VPCOMGEUW
,
27038 IX86_BUILTIN_VPCOMFALSEUW
,
27039 IX86_BUILTIN_VPCOMTRUEUW
,
27041 IX86_BUILTIN_VPCOMEQUD
,
27042 IX86_BUILTIN_VPCOMNEUD
,
27043 IX86_BUILTIN_VPCOMLTUD
,
27044 IX86_BUILTIN_VPCOMLEUD
,
27045 IX86_BUILTIN_VPCOMGTUD
,
27046 IX86_BUILTIN_VPCOMGEUD
,
27047 IX86_BUILTIN_VPCOMFALSEUD
,
27048 IX86_BUILTIN_VPCOMTRUEUD
,
27050 IX86_BUILTIN_VPCOMEQUQ
,
27051 IX86_BUILTIN_VPCOMNEUQ
,
27052 IX86_BUILTIN_VPCOMLTUQ
,
27053 IX86_BUILTIN_VPCOMLEUQ
,
27054 IX86_BUILTIN_VPCOMGTUQ
,
27055 IX86_BUILTIN_VPCOMGEUQ
,
27056 IX86_BUILTIN_VPCOMFALSEUQ
,
27057 IX86_BUILTIN_VPCOMTRUEUQ
,
27059 IX86_BUILTIN_VPCOMEQB
,
27060 IX86_BUILTIN_VPCOMNEB
,
27061 IX86_BUILTIN_VPCOMLTB
,
27062 IX86_BUILTIN_VPCOMLEB
,
27063 IX86_BUILTIN_VPCOMGTB
,
27064 IX86_BUILTIN_VPCOMGEB
,
27065 IX86_BUILTIN_VPCOMFALSEB
,
27066 IX86_BUILTIN_VPCOMTRUEB
,
27068 IX86_BUILTIN_VPCOMEQW
,
27069 IX86_BUILTIN_VPCOMNEW
,
27070 IX86_BUILTIN_VPCOMLTW
,
27071 IX86_BUILTIN_VPCOMLEW
,
27072 IX86_BUILTIN_VPCOMGTW
,
27073 IX86_BUILTIN_VPCOMGEW
,
27074 IX86_BUILTIN_VPCOMFALSEW
,
27075 IX86_BUILTIN_VPCOMTRUEW
,
27077 IX86_BUILTIN_VPCOMEQD
,
27078 IX86_BUILTIN_VPCOMNED
,
27079 IX86_BUILTIN_VPCOMLTD
,
27080 IX86_BUILTIN_VPCOMLED
,
27081 IX86_BUILTIN_VPCOMGTD
,
27082 IX86_BUILTIN_VPCOMGED
,
27083 IX86_BUILTIN_VPCOMFALSED
,
27084 IX86_BUILTIN_VPCOMTRUED
,
27086 IX86_BUILTIN_VPCOMEQQ
,
27087 IX86_BUILTIN_VPCOMNEQ
,
27088 IX86_BUILTIN_VPCOMLTQ
,
27089 IX86_BUILTIN_VPCOMLEQ
,
27090 IX86_BUILTIN_VPCOMGTQ
,
27091 IX86_BUILTIN_VPCOMGEQ
,
27092 IX86_BUILTIN_VPCOMFALSEQ
,
27093 IX86_BUILTIN_VPCOMTRUEQ
,
27095 /* LWP instructions. */
27096 IX86_BUILTIN_LLWPCB
,
27097 IX86_BUILTIN_SLWPCB
,
27098 IX86_BUILTIN_LWPVAL32
,
27099 IX86_BUILTIN_LWPVAL64
,
27100 IX86_BUILTIN_LWPINS32
,
27101 IX86_BUILTIN_LWPINS64
,
27106 IX86_BUILTIN_XBEGIN
,
27108 IX86_BUILTIN_XABORT
,
27109 IX86_BUILTIN_XTEST
,
27111 /* BMI instructions. */
27112 IX86_BUILTIN_BEXTR32
,
27113 IX86_BUILTIN_BEXTR64
,
27116 /* TBM instructions. */
27117 IX86_BUILTIN_BEXTRI32
,
27118 IX86_BUILTIN_BEXTRI64
,
27120 /* BMI2 instructions. */
27121 IX86_BUILTIN_BZHI32
,
27122 IX86_BUILTIN_BZHI64
,
27123 IX86_BUILTIN_PDEP32
,
27124 IX86_BUILTIN_PDEP64
,
27125 IX86_BUILTIN_PEXT32
,
27126 IX86_BUILTIN_PEXT64
,
27128 /* ADX instructions. */
27129 IX86_BUILTIN_ADDCARRYX32
,
27130 IX86_BUILTIN_ADDCARRYX64
,
27132 /* FSGSBASE instructions. */
27133 IX86_BUILTIN_RDFSBASE32
,
27134 IX86_BUILTIN_RDFSBASE64
,
27135 IX86_BUILTIN_RDGSBASE32
,
27136 IX86_BUILTIN_RDGSBASE64
,
27137 IX86_BUILTIN_WRFSBASE32
,
27138 IX86_BUILTIN_WRFSBASE64
,
27139 IX86_BUILTIN_WRGSBASE32
,
27140 IX86_BUILTIN_WRGSBASE64
,
27142 /* RDRND instructions. */
27143 IX86_BUILTIN_RDRAND16_STEP
,
27144 IX86_BUILTIN_RDRAND32_STEP
,
27145 IX86_BUILTIN_RDRAND64_STEP
,
27147 /* RDSEED instructions. */
27148 IX86_BUILTIN_RDSEED16_STEP
,
27149 IX86_BUILTIN_RDSEED32_STEP
,
27150 IX86_BUILTIN_RDSEED64_STEP
,
27152 /* F16C instructions. */
27153 IX86_BUILTIN_CVTPH2PS
,
27154 IX86_BUILTIN_CVTPH2PS256
,
27155 IX86_BUILTIN_CVTPS2PH
,
27156 IX86_BUILTIN_CVTPS2PH256
,
27158 /* CFString built-in for darwin */
27159 IX86_BUILTIN_CFSTRING
,
27161 /* Builtins to get CPU type and supported features. */
27162 IX86_BUILTIN_CPU_INIT
,
27163 IX86_BUILTIN_CPU_IS
,
27164 IX86_BUILTIN_CPU_SUPPORTS
,
27169 /* Table for the ix86 builtin decls. */
27170 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27172 /* Table of all of the builtin functions that are possible with different ISA's
27173 but are waiting to be built until a function is declared to use that
27175 struct builtin_isa
{
27176 const char *name
; /* function name */
27177 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27178 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27179 bool const_p
; /* true if the declaration is constant */
27180 bool set_and_not_built_p
;
27183 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27186 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27187 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27188 function decl in the ix86_builtins array. Returns the function decl or
27189 NULL_TREE, if the builtin was not added.
27191 If the front end has a special hook for builtin functions, delay adding
27192 builtin functions that aren't in the current ISA until the ISA is changed
27193 with function specific optimization. Doing so, can save about 300K for the
27194 default compiler. When the builtin is expanded, check at that time whether
27197 If the front end doesn't have a special hook, record all builtins, even if
27198 it isn't an instruction set in the current ISA in case the user uses
27199 function specific options for a different ISA, so that we don't get scope
27200 errors if a builtin is added in the middle of a function scope. */
27203 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27204 enum ix86_builtin_func_type tcode
,
27205 enum ix86_builtins code
)
27207 tree decl
= NULL_TREE
;
27209 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27211 ix86_builtins_isa
[(int) code
].isa
= mask
;
27213 mask
&= ~OPTION_MASK_ISA_64BIT
;
27215 || (mask
& ix86_isa_flags
) != 0
27216 || (lang_hooks
.builtin_function
27217 == lang_hooks
.builtin_function_ext_scope
))
27220 tree type
= ix86_get_builtin_func_type (tcode
);
27221 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27223 ix86_builtins
[(int) code
] = decl
;
27224 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27228 ix86_builtins
[(int) code
] = NULL_TREE
;
27229 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27230 ix86_builtins_isa
[(int) code
].name
= name
;
27231 ix86_builtins_isa
[(int) code
].const_p
= false;
27232 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27239 /* Like def_builtin, but also marks the function decl "const". */
27242 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27243 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27245 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27247 TREE_READONLY (decl
) = 1;
27249 ix86_builtins_isa
[(int) code
].const_p
= true;
27254 /* Add any new builtin functions for a given ISA that may not have been
27255 declared. This saves a bit of space compared to adding all of the
27256 declarations to the tree, even if we didn't use them. */
27259 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27263 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27265 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27266 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27270 /* Don't define the builtin again. */
27271 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27273 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27274 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27275 type
, i
, BUILT_IN_MD
, NULL
,
27278 ix86_builtins
[i
] = decl
;
27279 if (ix86_builtins_isa
[i
].const_p
)
27280 TREE_READONLY (decl
) = 1;
27285 /* Bits for builtin_description.flag. */
27287 /* Set when we don't support the comparison natively, and should
27288 swap_comparison in order to support it. */
27289 #define BUILTIN_DESC_SWAP_OPERANDS 1
27291 struct builtin_description
27293 const HOST_WIDE_INT mask
;
27294 const enum insn_code icode
;
27295 const char *const name
;
27296 const enum ix86_builtins code
;
27297 const enum rtx_code comparison
;
27301 static const struct builtin_description bdesc_comi
[] =
27303 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27304 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27305 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27306 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27307 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27308 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27310 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27311 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27312 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27313 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27329 static const struct builtin_description bdesc_pcmpestr
[] =
27332 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27333 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27334 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27335 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27336 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27337 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27338 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27341 static const struct builtin_description bdesc_pcmpistr
[] =
27344 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27345 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27346 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27347 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27348 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27349 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27350 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27353 /* Special builtins with variable number of arguments. */
27354 static const struct builtin_description bdesc_special_args
[] =
27356 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27357 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27358 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27361 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27364 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27366 /* FXSR, XSAVE and XSAVEOPT */
27367 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27368 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27369 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27370 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27371 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27373 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27374 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27375 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27376 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27377 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27380 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27381 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27382 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27384 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27385 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27386 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27387 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27389 /* SSE or 3DNow!A */
27390 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27391 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27401 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27409 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27415 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27416 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27419 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27420 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27422 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27423 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27424 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27425 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
27426 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
27428 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27429 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27430 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27431 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27432 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27433 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
27434 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27436 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
27437 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27438 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27440 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
27441 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
27442 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
27443 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
27444 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
27445 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
27446 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
27447 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
27450 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
27451 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
27452 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
27453 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
27454 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
27455 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
27456 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
27457 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
27458 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
27460 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27461 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
27462 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
27463 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
27464 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
27465 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
27468 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27469 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27470 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27471 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27472 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27473 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27474 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27475 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27478 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27479 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27480 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
27483 /* Builtins with variable number of arguments. */
27484 static const struct builtin_description bdesc_args
[] =
27486 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
27487 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
27488 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
27489 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27490 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27491 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27492 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27497 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27498 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27499 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27502 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27504 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27505 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27506 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27507 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27508 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27509 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27511 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27512 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27514 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27516 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27523 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27526 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27527 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27528 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27529 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27530 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27531 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27533 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27534 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27535 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27537 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27539 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27540 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27541 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27542 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27543 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27544 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27546 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27547 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27548 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27549 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27550 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27551 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27553 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27554 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27555 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27556 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27559 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27560 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27561 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27562 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27564 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27565 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27566 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27567 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27568 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27569 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27570 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27571 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27572 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27573 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27574 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27575 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27576 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27577 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27578 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27581 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27582 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27583 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27584 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27585 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27586 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27590 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27591 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27592 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27593 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27597 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27598 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27599 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27600 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27602 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27604 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27605 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27606 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27607 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27608 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27609 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27610 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27611 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27619 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27620 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27622 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27623 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27624 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27625 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27626 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27630 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27631 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27632 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27634 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27635 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27637 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27639 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27641 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27642 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27644 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27646 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27647 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27648 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27649 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27650 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27652 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27653 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27654 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27656 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27659 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27660 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27662 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27663 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27665 /* SSE MMX or 3Dnow!A */
27666 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27667 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27668 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27670 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27671 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27672 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27673 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27675 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27676 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27678 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27683 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27689 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27699 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27700 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27709 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27720 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27766 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27772 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27817 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27824 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27834 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27838 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27848 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27851 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27852 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27855 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27856 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27858 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27859 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27860 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27861 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27862 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27863 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27866 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27867 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27868 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27869 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27870 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27871 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27873 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27874 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27875 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27876 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27877 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27878 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27879 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27880 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27881 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27882 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27883 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27884 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27885 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27886 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27887 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27888 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27889 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27890 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27891 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27892 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27893 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27894 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27895 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27896 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27899 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27900 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27903 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27904 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27905 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27906 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27908 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27909 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27910 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27911 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27912 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27914 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27915 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27916 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27917 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27918 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27919 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27920 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27921 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27922 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27923 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27924 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27925 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27926 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27928 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27929 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27930 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27931 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27932 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27933 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27934 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27935 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27936 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27937 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27938 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27939 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27942 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27943 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27944 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27945 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27947 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27948 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27949 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27950 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27952 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27953 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27955 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27956 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27958 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27959 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27960 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27961 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27963 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27964 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27966 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27967 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27969 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27970 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27971 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27974 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27975 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27976 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27977 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27978 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27981 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27982 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27983 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27984 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27987 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27988 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27990 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27991 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27992 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27993 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27999 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28000 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28003 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28004 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28007 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28013 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28014 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28015 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28016 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28017 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28018 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28019 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28020 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28021 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28022 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28023 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28024 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28028 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28029 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28034 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28037 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28038 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28039 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28040 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28041 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28043 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28044 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28045 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28046 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28047 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28048 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28049 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28051 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28052 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28054 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28070 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28072 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28074 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28076 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28082 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28083 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28086 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28087 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28089 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28090 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28093 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28095 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28098 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28100 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28101 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28103 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28104 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28105 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28106 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28108 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28109 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28110 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28111 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28112 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28113 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28115 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28116 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28117 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28118 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28119 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28120 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28121 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28122 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28123 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28124 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28125 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28128 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28129 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28131 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28132 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28134 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28135 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28137 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28140 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28141 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28142 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28143 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28144 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28145 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28146 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28147 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28148 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28149 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28150 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28151 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28152 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28153 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28154 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28155 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28156 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28259 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28260 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28261 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28262 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28263 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28264 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28287 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28290 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28291 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28292 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28295 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28296 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28299 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28300 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28301 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28302 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28305 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28306 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28307 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28308 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28309 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28310 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28313 /* FMA4 and XOP. */
28314 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28315 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28316 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28317 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28318 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28319 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28320 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28321 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28322 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28323 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28324 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28325 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28326 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28327 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28328 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28329 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28330 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28331 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28332 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28333 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28334 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28335 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28336 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28337 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28338 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28339 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28340 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28341 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28342 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28343 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28344 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28345 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28346 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28347 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28348 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28349 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28350 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28351 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28352 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28353 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28354 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28355 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28356 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28357 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28358 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28359 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28360 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28361 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28362 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28363 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28364 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28365 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28367 static const struct builtin_description bdesc_multi_arg
[] =
28369 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28370 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28371 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28372 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28373 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28374 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28376 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28377 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28378 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28379 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28380 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28381 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28383 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28384 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28385 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28386 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28387 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28388 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28389 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28390 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28391 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28392 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28393 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28394 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28396 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28397 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28398 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28399 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28400 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28401 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28402 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28403 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28404 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28405 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28406 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28407 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28409 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28410 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28411 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28412 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28413 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28414 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28415 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28417 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28418 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28419 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28420 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28421 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28422 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28423 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28425 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
28427 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28428 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28429 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28430 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28432 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28437 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
28445 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
28446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
28447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
28448 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
28458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
28459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
28460 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
28461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
28462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
28464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
28484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
28485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
28486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
28488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
28492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28551 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28556 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28564 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28569 /* TM vector builtins. */
28571 /* Reuse the existing x86-specific `struct builtin_description' cause
28572 we're lazy. Add casts to make them fit. */
28573 static const struct builtin_description bdesc_tm
[] =
28575 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28576 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28577 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28578 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28579 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28580 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28581 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28583 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28584 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28585 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28586 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28587 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28588 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28589 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28591 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28592 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28593 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28594 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28595 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28596 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28597 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28599 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28600 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28601 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28604 /* TM callbacks. */
28606 /* Return the builtin decl needed to load a vector of TYPE. */
28609 ix86_builtin_tm_load (tree type
)
28611 if (TREE_CODE (type
) == VECTOR_TYPE
)
28613 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28616 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28618 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28620 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28626 /* Return the builtin decl needed to store a vector of TYPE. */
28629 ix86_builtin_tm_store (tree type
)
28631 if (TREE_CODE (type
) == VECTOR_TYPE
)
28633 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28636 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28638 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28640 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28646 /* Initialize the transactional memory vector load/store builtins. */
28649 ix86_init_tm_builtins (void)
28651 enum ix86_builtin_func_type ftype
;
28652 const struct builtin_description
*d
;
28655 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28656 tree attrs_log
, attrs_type_log
;
28661 /* If there are no builtins defined, we must be compiling in a
28662 language without trans-mem support. */
28663 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28666 /* Use whatever attributes a normal TM load has. */
28667 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28668 attrs_load
= DECL_ATTRIBUTES (decl
);
28669 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28670 /* Use whatever attributes a normal TM store has. */
28671 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28672 attrs_store
= DECL_ATTRIBUTES (decl
);
28673 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28674 /* Use whatever attributes a normal TM log has. */
28675 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28676 attrs_log
= DECL_ATTRIBUTES (decl
);
28677 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28679 for (i
= 0, d
= bdesc_tm
;
28680 i
< ARRAY_SIZE (bdesc_tm
);
28683 if ((d
->mask
& ix86_isa_flags
) != 0
28684 || (lang_hooks
.builtin_function
28685 == lang_hooks
.builtin_function_ext_scope
))
28687 tree type
, attrs
, attrs_type
;
28688 enum built_in_function code
= (enum built_in_function
) d
->code
;
28690 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28691 type
= ix86_get_builtin_func_type (ftype
);
28693 if (BUILTIN_TM_LOAD_P (code
))
28695 attrs
= attrs_load
;
28696 attrs_type
= attrs_type_load
;
28698 else if (BUILTIN_TM_STORE_P (code
))
28700 attrs
= attrs_store
;
28701 attrs_type
= attrs_type_store
;
28706 attrs_type
= attrs_type_log
;
28708 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28709 /* The builtin without the prefix for
28710 calling it directly. */
28711 d
->name
+ strlen ("__builtin_"),
28713 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28714 set the TYPE_ATTRIBUTES. */
28715 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28717 set_builtin_decl (code
, decl
, false);
28722 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28723 in the current target ISA to allow the user to compile particular modules
28724 with different target specific options that differ from the command line
28727 ix86_init_mmx_sse_builtins (void)
28729 const struct builtin_description
* d
;
28730 enum ix86_builtin_func_type ftype
;
28733 /* Add all special builtins with variable number of operands. */
28734 for (i
= 0, d
= bdesc_special_args
;
28735 i
< ARRAY_SIZE (bdesc_special_args
);
28741 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28742 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28745 /* Add all builtins with variable number of operands. */
28746 for (i
= 0, d
= bdesc_args
;
28747 i
< ARRAY_SIZE (bdesc_args
);
28753 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28754 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28757 /* pcmpestr[im] insns. */
28758 for (i
= 0, d
= bdesc_pcmpestr
;
28759 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28762 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28763 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28765 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28766 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28769 /* pcmpistr[im] insns. */
28770 for (i
= 0, d
= bdesc_pcmpistr
;
28771 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28774 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28775 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28777 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28778 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28781 /* comi/ucomi insns. */
28782 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28784 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28785 ftype
= INT_FTYPE_V2DF_V2DF
;
28787 ftype
= INT_FTYPE_V4SF_V4SF
;
28788 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28792 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28793 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28794 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28795 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28797 /* SSE or 3DNow!A */
28798 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28799 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28800 IX86_BUILTIN_MASKMOVQ
);
28803 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28804 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28806 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28807 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28808 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28809 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28812 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28813 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28814 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28815 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28818 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28819 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28820 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28821 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28822 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28823 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28824 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28825 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28826 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28827 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28828 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28829 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28832 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28833 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28836 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28837 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28838 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28839 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28840 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28841 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28842 IX86_BUILTIN_RDRAND64_STEP
);
28845 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28846 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28847 IX86_BUILTIN_GATHERSIV2DF
);
28849 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28850 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28851 IX86_BUILTIN_GATHERSIV4DF
);
28853 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28854 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28855 IX86_BUILTIN_GATHERDIV2DF
);
28857 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28858 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28859 IX86_BUILTIN_GATHERDIV4DF
);
28861 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28862 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28863 IX86_BUILTIN_GATHERSIV4SF
);
28865 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28866 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28867 IX86_BUILTIN_GATHERSIV8SF
);
28869 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28870 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28871 IX86_BUILTIN_GATHERDIV4SF
);
28873 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28874 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28875 IX86_BUILTIN_GATHERDIV8SF
);
28877 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28878 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28879 IX86_BUILTIN_GATHERSIV2DI
);
28881 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28882 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28883 IX86_BUILTIN_GATHERSIV4DI
);
28885 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28886 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28887 IX86_BUILTIN_GATHERDIV2DI
);
28889 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28890 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28891 IX86_BUILTIN_GATHERDIV4DI
);
28893 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28894 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28895 IX86_BUILTIN_GATHERSIV4SI
);
28897 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28898 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28899 IX86_BUILTIN_GATHERSIV8SI
);
28901 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28902 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28903 IX86_BUILTIN_GATHERDIV4SI
);
28905 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28906 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28907 IX86_BUILTIN_GATHERDIV8SI
);
28909 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28910 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28911 IX86_BUILTIN_GATHERALTSIV4DF
);
28913 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28914 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28915 IX86_BUILTIN_GATHERALTDIV8SF
);
28917 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28918 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28919 IX86_BUILTIN_GATHERALTSIV4DI
);
28921 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28922 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28923 IX86_BUILTIN_GATHERALTDIV8SI
);
28926 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28927 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28929 /* MMX access to the vec_init patterns. */
28930 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28931 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28933 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28934 V4HI_FTYPE_HI_HI_HI_HI
,
28935 IX86_BUILTIN_VEC_INIT_V4HI
);
28937 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28938 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28939 IX86_BUILTIN_VEC_INIT_V8QI
);
28941 /* Access to the vec_extract patterns. */
28942 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28943 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28944 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28945 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28946 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28947 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28948 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28949 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28950 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28951 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28953 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28954 "__builtin_ia32_vec_ext_v4hi",
28955 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28957 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28958 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28960 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28961 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28963 /* Access to the vec_set patterns. */
28964 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28965 "__builtin_ia32_vec_set_v2di",
28966 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28968 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28969 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28971 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28972 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28974 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28975 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28977 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28978 "__builtin_ia32_vec_set_v4hi",
28979 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28981 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28982 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28985 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28986 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28987 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28988 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28989 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28990 "__builtin_ia32_rdseed_di_step",
28991 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28994 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28995 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28996 def_builtin (OPTION_MASK_ISA_64BIT
,
28997 "__builtin_ia32_addcarryx_u64",
28998 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28999 IX86_BUILTIN_ADDCARRYX64
);
29001 /* Add FMA4 multi-arg argument instructions */
29002 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29007 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29008 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29012 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29013 to return a pointer to VERSION_DECL if the outcome of the expression
29014 formed by PREDICATE_CHAIN is true. This function will be called during
29015 version dispatch to decide which function version to execute. It returns
29016 the basic block at the end, to which more conditions can be added. */
29019 add_condition_to_bb (tree function_decl
, tree version_decl
,
29020 tree predicate_chain
, basic_block new_bb
)
29022 gimple return_stmt
;
29023 tree convert_expr
, result_var
;
29024 gimple convert_stmt
;
29025 gimple call_cond_stmt
;
29026 gimple if_else_stmt
;
29028 basic_block bb1
, bb2
, bb3
;
29031 tree cond_var
, and_expr_var
= NULL_TREE
;
29034 tree predicate_decl
, predicate_arg
;
29036 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29038 gcc_assert (new_bb
!= NULL
);
29039 gseq
= bb_seq (new_bb
);
29042 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29043 build_fold_addr_expr (version_decl
));
29044 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29045 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29046 return_stmt
= gimple_build_return (result_var
);
29048 if (predicate_chain
== NULL_TREE
)
29050 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29051 gimple_seq_add_stmt (&gseq
, return_stmt
);
29052 set_bb_seq (new_bb
, gseq
);
29053 gimple_set_bb (convert_stmt
, new_bb
);
29054 gimple_set_bb (return_stmt
, new_bb
);
29059 while (predicate_chain
!= NULL
)
29061 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29062 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29063 predicate_arg
= TREE_VALUE (predicate_chain
);
29064 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29065 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29067 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29068 gimple_set_bb (call_cond_stmt
, new_bb
);
29069 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29071 predicate_chain
= TREE_CHAIN (predicate_chain
);
29073 if (and_expr_var
== NULL
)
29074 and_expr_var
= cond_var
;
29077 gimple assign_stmt
;
29078 /* Use MIN_EXPR to check if any integer is zero?.
29079 and_expr_var = min_expr <cond_var, and_expr_var> */
29080 assign_stmt
= gimple_build_assign (and_expr_var
,
29081 build2 (MIN_EXPR
, integer_type_node
,
29082 cond_var
, and_expr_var
));
29084 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29085 gimple_set_bb (assign_stmt
, new_bb
);
29086 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29090 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29092 NULL_TREE
, NULL_TREE
);
29093 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29094 gimple_set_bb (if_else_stmt
, new_bb
);
29095 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29097 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29098 gimple_seq_add_stmt (&gseq
, return_stmt
);
29099 set_bb_seq (new_bb
, gseq
);
29102 e12
= split_block (bb1
, if_else_stmt
);
29104 e12
->flags
&= ~EDGE_FALLTHRU
;
29105 e12
->flags
|= EDGE_TRUE_VALUE
;
29107 e23
= split_block (bb2
, return_stmt
);
29109 gimple_set_bb (convert_stmt
, bb2
);
29110 gimple_set_bb (return_stmt
, bb2
);
29113 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29116 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29123 /* This parses the attribute arguments to target in DECL and determines
29124 the right builtin to use to match the platform specification.
29125 It returns the priority value for this version decl. If PREDICATE_LIST
29126 is not NULL, it stores the list of cpu features that need to be checked
29127 before dispatching this function. */
29129 static unsigned int
29130 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29133 struct cl_target_option cur_target
;
29135 struct cl_target_option
*new_target
;
29136 const char *arg_str
= NULL
;
29137 const char *attrs_str
= NULL
;
29138 char *tok_str
= NULL
;
29141 /* Priority of i386 features, greater value is higher priority. This is
29142 used to decide the order in which function dispatch must happen. For
29143 instance, a version specialized for SSE4.2 should be checked for dispatch
29144 before a version for SSE3, as SSE4.2 implies SSE3. */
29145 enum feature_priority
29166 enum feature_priority priority
= P_ZERO
;
29168 /* These are the target attribute strings for which a dispatcher is
29169 available, from fold_builtin_cpu. */
29171 static struct _feature_list
29173 const char *const name
;
29174 const enum feature_priority priority
;
29176 const feature_list
[] =
29182 {"ssse3", P_SSSE3
},
29183 {"sse4.1", P_SSE4_1
},
29184 {"sse4.2", P_SSE4_2
},
29185 {"popcnt", P_POPCNT
},
29191 static unsigned int NUM_FEATURES
29192 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29196 tree predicate_chain
= NULL_TREE
;
29197 tree predicate_decl
, predicate_arg
;
29199 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29200 gcc_assert (attrs
!= NULL
);
29202 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29204 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29205 attrs_str
= TREE_STRING_POINTER (attrs
);
29207 /* Return priority zero for default function. */
29208 if (strcmp (attrs_str
, "default") == 0)
29211 /* Handle arch= if specified. For priority, set it to be 1 more than
29212 the best instruction set the processor can handle. For instance, if
29213 there is a version for atom and a version for ssse3 (the highest ISA
29214 priority for atom), the atom version must be checked for dispatch
29215 before the ssse3 version. */
29216 if (strstr (attrs_str
, "arch=") != NULL
)
29218 cl_target_option_save (&cur_target
, &global_options
);
29219 target_node
= ix86_valid_target_attribute_tree (attrs
);
29221 gcc_assert (target_node
);
29222 new_target
= TREE_TARGET_OPTION (target_node
);
29223 gcc_assert (new_target
);
29225 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29227 switch (new_target
->arch
)
29229 case PROCESSOR_CORE2
:
29231 priority
= P_PROC_SSSE3
;
29233 case PROCESSOR_COREI7
:
29234 arg_str
= "corei7";
29235 priority
= P_PROC_SSE4_2
;
29237 case PROCESSOR_ATOM
:
29239 priority
= P_PROC_SSSE3
;
29241 case PROCESSOR_AMDFAM10
:
29242 arg_str
= "amdfam10h";
29243 priority
= P_PROC_SSE4_a
;
29245 case PROCESSOR_BDVER1
:
29246 arg_str
= "bdver1";
29247 priority
= P_PROC_FMA
;
29249 case PROCESSOR_BDVER2
:
29250 arg_str
= "bdver2";
29251 priority
= P_PROC_FMA
;
29256 cl_target_option_restore (&global_options
, &cur_target
);
29258 if (predicate_list
&& arg_str
== NULL
)
29260 error_at (DECL_SOURCE_LOCATION (decl
),
29261 "No dispatcher found for the versioning attributes");
29265 if (predicate_list
)
29267 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29268 /* For a C string literal the length includes the trailing NULL. */
29269 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29270 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29275 /* Process feature name. */
29276 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29277 strcpy (tok_str
, attrs_str
);
29278 token
= strtok (tok_str
, ",");
29279 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29281 while (token
!= NULL
)
29283 /* Do not process "arch=" */
29284 if (strncmp (token
, "arch=", 5) == 0)
29286 token
= strtok (NULL
, ",");
29289 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29291 if (strcmp (token
, feature_list
[i
].name
) == 0)
29293 if (predicate_list
)
29295 predicate_arg
= build_string_literal (
29296 strlen (feature_list
[i
].name
) + 1,
29297 feature_list
[i
].name
);
29298 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29301 /* Find the maximum priority feature. */
29302 if (feature_list
[i
].priority
> priority
)
29303 priority
= feature_list
[i
].priority
;
29308 if (predicate_list
&& i
== NUM_FEATURES
)
29310 error_at (DECL_SOURCE_LOCATION (decl
),
29311 "No dispatcher found for %s", token
);
29314 token
= strtok (NULL
, ",");
29318 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29320 error_at (DECL_SOURCE_LOCATION (decl
),
29321 "No dispatcher found for the versioning attributes : %s",
29325 else if (predicate_list
)
29327 predicate_chain
= nreverse (predicate_chain
);
29328 *predicate_list
= predicate_chain
;
29334 /* This compares the priority of target features in function DECL1
29335 and DECL2. It returns positive value if DECL1 is higher priority,
29336 negative value if DECL2 is higher priority and 0 if they are the
29340 ix86_compare_version_priority (tree decl1
, tree decl2
)
29342 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29343 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29345 return (int)priority1
- (int)priority2
;
29348 /* V1 and V2 point to function versions with different priorities
29349 based on the target ISA. This function compares their priorities. */
29352 feature_compare (const void *v1
, const void *v2
)
29354 typedef struct _function_version_info
29357 tree predicate_chain
;
29358 unsigned int dispatch_priority
;
29359 } function_version_info
;
29361 const function_version_info c1
= *(const function_version_info
*)v1
;
29362 const function_version_info c2
= *(const function_version_info
*)v2
;
29363 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29366 /* This function generates the dispatch function for
29367 multi-versioned functions. DISPATCH_DECL is the function which will
29368 contain the dispatch logic. FNDECLS are the function choices for
29369 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29370 in DISPATCH_DECL in which the dispatch code is generated. */
29373 dispatch_function_versions (tree dispatch_decl
,
29375 basic_block
*empty_bb
)
29378 gimple ifunc_cpu_init_stmt
;
29382 vec
<tree
> *fndecls
;
29383 unsigned int num_versions
= 0;
29384 unsigned int actual_versions
= 0;
29387 struct _function_version_info
29390 tree predicate_chain
;
29391 unsigned int dispatch_priority
;
29392 }*function_version_info
;
29394 gcc_assert (dispatch_decl
!= NULL
29395 && fndecls_p
!= NULL
29396 && empty_bb
!= NULL
);
29398 /*fndecls_p is actually a vector. */
29399 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29401 /* At least one more version other than the default. */
29402 num_versions
= fndecls
->length ();
29403 gcc_assert (num_versions
>= 2);
29405 function_version_info
= (struct _function_version_info
*)
29406 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29408 /* The first version in the vector is the default decl. */
29409 default_decl
= (*fndecls
)[0];
29411 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29413 gseq
= bb_seq (*empty_bb
);
29414 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29415 constructors, so explicity call __builtin_cpu_init here. */
29416 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29417 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
29418 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
29419 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
29420 set_bb_seq (*empty_bb
, gseq
);
29425 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
29427 tree version_decl
= ele
;
29428 tree predicate_chain
= NULL_TREE
;
29429 unsigned int priority
;
29430 /* Get attribute string, parse it and find the right predicate decl.
29431 The predicate function could be a lengthy combination of many
29432 features, like arch-type and various isa-variants. */
29433 priority
= get_builtin_code_for_version (version_decl
,
29436 if (predicate_chain
== NULL_TREE
)
29439 function_version_info
[actual_versions
].version_decl
= version_decl
;
29440 function_version_info
[actual_versions
].predicate_chain
29442 function_version_info
[actual_versions
].dispatch_priority
= priority
;
29446 /* Sort the versions according to descending order of dispatch priority. The
29447 priority is based on the ISA. This is not a perfect solution. There
29448 could still be ambiguity. If more than one function version is suitable
29449 to execute, which one should be dispatched? In future, allow the user
29450 to specify a dispatch priority next to the version. */
29451 qsort (function_version_info
, actual_versions
,
29452 sizeof (struct _function_version_info
), feature_compare
);
29454 for (i
= 0; i
< actual_versions
; ++i
)
29455 *empty_bb
= add_condition_to_bb (dispatch_decl
,
29456 function_version_info
[i
].version_decl
,
29457 function_version_info
[i
].predicate_chain
,
29460 /* dispatch default version at the end. */
29461 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
29464 free (function_version_info
);
29468 /* Comparator function to be used in qsort routine to sort attribute
29469 specification strings to "target". */
29472 attr_strcmp (const void *v1
, const void *v2
)
29474 const char *c1
= *(char *const*)v1
;
29475 const char *c2
= *(char *const*)v2
;
29476 return strcmp (c1
, c2
);
29479 /* ARGLIST is the argument to target attribute. This function tokenizes
29480 the comma separated arguments, sorts them and returns a string which
29481 is a unique identifier for the comma separated arguments. It also
29482 replaces non-identifier characters "=,-" with "_". */
29485 sorted_attr_string (tree arglist
)
29488 size_t str_len_sum
= 0;
29489 char **args
= NULL
;
29490 char *attr_str
, *ret_str
;
29492 unsigned int argnum
= 1;
29495 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29497 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29498 size_t len
= strlen (str
);
29499 str_len_sum
+= len
+ 1;
29500 if (arg
!= arglist
)
29502 for (i
= 0; i
< strlen (str
); i
++)
29507 attr_str
= XNEWVEC (char, str_len_sum
);
29509 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29511 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29512 size_t len
= strlen (str
);
29513 memcpy (attr_str
+ str_len_sum
, str
, len
);
29514 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29515 str_len_sum
+= len
+ 1;
29518 /* Replace "=,-" with "_". */
29519 for (i
= 0; i
< strlen (attr_str
); i
++)
29520 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29526 args
= XNEWVEC (char *, argnum
);
29529 attr
= strtok (attr_str
, ",");
29530 while (attr
!= NULL
)
29534 attr
= strtok (NULL
, ",");
29537 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29539 ret_str
= XNEWVEC (char, str_len_sum
);
29541 for (i
= 0; i
< argnum
; i
++)
29543 size_t len
= strlen (args
[i
]);
29544 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29545 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29546 str_len_sum
+= len
+ 1;
29550 XDELETEVEC (attr_str
);
29554 /* This function changes the assembler name for functions that are
29555 versions. If DECL is a function version and has a "target"
29556 attribute, it appends the attribute string to its assembler name. */
29559 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29562 const char *orig_name
, *version_string
;
29563 char *attr_str
, *assembler_name
;
29565 if (DECL_DECLARED_INLINE_P (decl
)
29566 && lookup_attribute ("gnu_inline",
29567 DECL_ATTRIBUTES (decl
)))
29568 error_at (DECL_SOURCE_LOCATION (decl
),
29569 "Function versions cannot be marked as gnu_inline,"
29570 " bodies have to be generated");
29572 if (DECL_VIRTUAL_P (decl
)
29573 || DECL_VINDEX (decl
))
29574 sorry ("Virtual function multiversioning not supported");
29576 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29578 /* target attribute string cannot be NULL. */
29579 gcc_assert (version_attr
!= NULL_TREE
);
29581 orig_name
= IDENTIFIER_POINTER (id
);
29583 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29585 if (strcmp (version_string
, "default") == 0)
29588 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29589 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29591 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29593 /* Allow assembler name to be modified if already set. */
29594 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29595 SET_DECL_RTL (decl
, NULL
);
29597 tree ret
= get_identifier (assembler_name
);
29598 XDELETEVEC (attr_str
);
29599 XDELETEVEC (assembler_name
);
29603 /* This function returns true if FN1 and FN2 are versions of the same function,
29604 that is, the target strings of the function decls are different. This assumes
29605 that FN1 and FN2 have the same signature. */
29608 ix86_function_versions (tree fn1
, tree fn2
)
29611 char *target1
, *target2
;
29614 if (TREE_CODE (fn1
) != FUNCTION_DECL
29615 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29618 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29619 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29621 /* At least one function decl should have the target attribute specified. */
29622 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29625 /* Diagnose missing target attribute if one of the decls is already
29626 multi-versioned. */
29627 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29629 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29631 if (attr2
!= NULL_TREE
)
29638 error_at (DECL_SOURCE_LOCATION (fn2
),
29639 "missing %<target%> attribute for multi-versioned %D",
29641 inform (DECL_SOURCE_LOCATION (fn1
),
29642 "previous declaration of %D", fn1
);
29643 /* Prevent diagnosing of the same error multiple times. */
29644 DECL_ATTRIBUTES (fn2
)
29645 = tree_cons (get_identifier ("target"),
29646 copy_node (TREE_VALUE (attr1
)),
29647 DECL_ATTRIBUTES (fn2
));
29652 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29653 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29655 /* The sorted target strings must be different for fn1 and fn2
29657 if (strcmp (target1
, target2
) == 0)
29662 XDELETEVEC (target1
);
29663 XDELETEVEC (target2
);
29669 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29671 /* For function version, add the target suffix to the assembler name. */
29672 if (TREE_CODE (decl
) == FUNCTION_DECL
29673 && DECL_FUNCTION_VERSIONED (decl
))
29674 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29675 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29676 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29682 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29683 is true, append the full path name of the source file. */
29686 make_name (tree decl
, const char *suffix
, bool make_unique
)
29688 char *global_var_name
;
29691 const char *unique_name
= NULL
;
29693 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29695 /* Get a unique name that can be used globally without any chances
29696 of collision at link time. */
29698 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29700 name_len
= strlen (name
) + strlen (suffix
) + 2;
29703 name_len
+= strlen (unique_name
) + 1;
29704 global_var_name
= XNEWVEC (char, name_len
);
29706 /* Use '.' to concatenate names as it is demangler friendly. */
29708 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29711 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29713 return global_var_name
;
29716 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29718 /* Make a dispatcher declaration for the multi-versioned function DECL.
29719 Calls to DECL function will be replaced with calls to the dispatcher
29720 by the front-end. Return the decl created. */
29723 make_dispatcher_decl (const tree decl
)
29727 tree fn_type
, func_type
;
29728 bool is_uniq
= false;
29730 if (TREE_PUBLIC (decl
) == 0)
29733 func_name
= make_name (decl
, "ifunc", is_uniq
);
29735 fn_type
= TREE_TYPE (decl
);
29736 func_type
= build_function_type (TREE_TYPE (fn_type
),
29737 TYPE_ARG_TYPES (fn_type
));
29739 func_decl
= build_fn_decl (func_name
, func_type
);
29740 XDELETEVEC (func_name
);
29741 TREE_USED (func_decl
) = 1;
29742 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29743 DECL_INITIAL (func_decl
) = error_mark_node
;
29744 DECL_ARTIFICIAL (func_decl
) = 1;
29745 /* Mark this func as external, the resolver will flip it again if
29746 it gets generated. */
29747 DECL_EXTERNAL (func_decl
) = 1;
29748 /* This will be of type IFUNCs have to be externally visible. */
29749 TREE_PUBLIC (func_decl
) = 1;
29756 /* Returns true if decl is multi-versioned and DECL is the default function,
29757 that is it is not tagged with target specific optimization. */
29760 is_function_default_version (const tree decl
)
29762 if (TREE_CODE (decl
) != FUNCTION_DECL
29763 || !DECL_FUNCTION_VERSIONED (decl
))
29765 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29767 attr
= TREE_VALUE (TREE_VALUE (attr
));
29768 return (TREE_CODE (attr
) == STRING_CST
29769 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29772 /* Make a dispatcher declaration for the multi-versioned function DECL.
29773 Calls to DECL function will be replaced with calls to the dispatcher
29774 by the front-end. Returns the decl of the dispatcher function. */
29777 ix86_get_function_versions_dispatcher (void *decl
)
29779 tree fn
= (tree
) decl
;
29780 struct cgraph_node
*node
= NULL
;
29781 struct cgraph_node
*default_node
= NULL
;
29782 struct cgraph_function_version_info
*node_v
= NULL
;
29783 struct cgraph_function_version_info
*first_v
= NULL
;
29785 tree dispatch_decl
= NULL
;
29787 struct cgraph_function_version_info
*default_version_info
= NULL
;
29789 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29791 node
= cgraph_get_node (fn
);
29792 gcc_assert (node
!= NULL
);
29794 node_v
= get_cgraph_node_version (node
);
29795 gcc_assert (node_v
!= NULL
);
29797 if (node_v
->dispatcher_resolver
!= NULL
)
29798 return node_v
->dispatcher_resolver
;
29800 /* Find the default version and make it the first node. */
29802 /* Go to the beginning of the chain. */
29803 while (first_v
->prev
!= NULL
)
29804 first_v
= first_v
->prev
;
29805 default_version_info
= first_v
;
29806 while (default_version_info
!= NULL
)
29808 if (is_function_default_version
29809 (default_version_info
->this_node
->symbol
.decl
))
29811 default_version_info
= default_version_info
->next
;
29814 /* If there is no default node, just return NULL. */
29815 if (default_version_info
== NULL
)
29818 /* Make default info the first node. */
29819 if (first_v
!= default_version_info
)
29821 default_version_info
->prev
->next
= default_version_info
->next
;
29822 if (default_version_info
->next
)
29823 default_version_info
->next
->prev
= default_version_info
->prev
;
29824 first_v
->prev
= default_version_info
;
29825 default_version_info
->next
= first_v
;
29826 default_version_info
->prev
= NULL
;
29829 default_node
= default_version_info
->this_node
;
29831 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29832 if (targetm
.has_ifunc_p ())
29834 struct cgraph_function_version_info
*it_v
= NULL
;
29835 struct cgraph_node
*dispatcher_node
= NULL
;
29836 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29838 /* Right now, the dispatching is done via ifunc. */
29839 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29841 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29842 gcc_assert (dispatcher_node
!= NULL
);
29843 dispatcher_node
->dispatcher_function
= 1;
29844 dispatcher_version_info
29845 = insert_new_cgraph_node_version (dispatcher_node
);
29846 dispatcher_version_info
->next
= default_version_info
;
29847 dispatcher_node
->symbol
.definition
= 1;
29849 /* Set the dispatcher for all the versions. */
29850 it_v
= default_version_info
;
29851 while (it_v
!= NULL
)
29853 it_v
->dispatcher_resolver
= dispatch_decl
;
29860 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29861 "multiversioning needs ifunc which is not supported "
29865 return dispatch_decl
;
29868 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29872 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29875 tree attr_arg_name
;
29879 attr_name
= get_identifier (name
);
29880 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29881 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29882 attr
= tree_cons (attr_name
, attr_args
, chain
);
29886 /* Make the resolver function decl to dispatch the versions of
29887 a multi-versioned function, DEFAULT_DECL. Create an
29888 empty basic block in the resolver and store the pointer in
29889 EMPTY_BB. Return the decl of the resolver function. */
29892 make_resolver_func (const tree default_decl
,
29893 const tree dispatch_decl
,
29894 basic_block
*empty_bb
)
29896 char *resolver_name
;
29897 tree decl
, type
, decl_name
, t
;
29898 bool is_uniq
= false;
29900 /* IFUNC's have to be globally visible. So, if the default_decl is
29901 not, then the name of the IFUNC should be made unique. */
29902 if (TREE_PUBLIC (default_decl
) == 0)
29905 /* Append the filename to the resolver function if the versions are
29906 not externally visible. This is because the resolver function has
29907 to be externally visible for the loader to find it. So, appending
29908 the filename will prevent conflicts with a resolver function from
29909 another module which is based on the same version name. */
29910 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29912 /* The resolver function should return a (void *). */
29913 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29915 decl
= build_fn_decl (resolver_name
, type
);
29916 decl_name
= get_identifier (resolver_name
);
29917 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29919 DECL_NAME (decl
) = decl_name
;
29920 TREE_USED (decl
) = 1;
29921 DECL_ARTIFICIAL (decl
) = 1;
29922 DECL_IGNORED_P (decl
) = 0;
29923 /* IFUNC resolvers have to be externally visible. */
29924 TREE_PUBLIC (decl
) = 1;
29925 DECL_UNINLINABLE (decl
) = 0;
29927 /* Resolver is not external, body is generated. */
29928 DECL_EXTERNAL (decl
) = 0;
29929 DECL_EXTERNAL (dispatch_decl
) = 0;
29931 DECL_CONTEXT (decl
) = NULL_TREE
;
29932 DECL_INITIAL (decl
) = make_node (BLOCK
);
29933 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29935 if (DECL_COMDAT_GROUP (default_decl
)
29936 || TREE_PUBLIC (default_decl
))
29938 /* In this case, each translation unit with a call to this
29939 versioned function will put out a resolver. Ensure it
29940 is comdat to keep just one copy. */
29941 DECL_COMDAT (decl
) = 1;
29942 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29944 /* Build result decl and add to function_decl. */
29945 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29946 DECL_ARTIFICIAL (t
) = 1;
29947 DECL_IGNORED_P (t
) = 1;
29948 DECL_RESULT (decl
) = t
;
29950 gimplify_function_tree (decl
);
29951 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29952 *empty_bb
= init_lowered_empty_function (decl
, false);
29954 cgraph_add_new_function (decl
, true);
29955 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29959 gcc_assert (dispatch_decl
!= NULL
);
29960 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29961 DECL_ATTRIBUTES (dispatch_decl
)
29962 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29964 /* Create the alias for dispatch to resolver here. */
29965 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29966 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29967 XDELETEVEC (resolver_name
);
29971 /* Generate the dispatching code body to dispatch multi-versioned function
29972 DECL. The target hook is called to process the "target" attributes and
29973 provide the code to dispatch the right function at run-time. NODE points
29974 to the dispatcher decl whose body will be created. */
29977 ix86_generate_version_dispatcher_body (void *node_p
)
29979 tree resolver_decl
;
29980 basic_block empty_bb
;
29981 vec
<tree
> fn_ver_vec
= vNULL
;
29982 tree default_ver_decl
;
29983 struct cgraph_node
*versn
;
29984 struct cgraph_node
*node
;
29986 struct cgraph_function_version_info
*node_version_info
= NULL
;
29987 struct cgraph_function_version_info
*versn_info
= NULL
;
29989 node
= (cgraph_node
*)node_p
;
29991 node_version_info
= get_cgraph_node_version (node
);
29992 gcc_assert (node
->dispatcher_function
29993 && node_version_info
!= NULL
);
29995 if (node_version_info
->dispatcher_resolver
)
29996 return node_version_info
->dispatcher_resolver
;
29998 /* The first version in the chain corresponds to the default version. */
29999 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
30001 /* node is going to be an alias, so remove the finalized bit. */
30002 node
->symbol
.definition
= false;
30004 resolver_decl
= make_resolver_func (default_ver_decl
,
30005 node
->symbol
.decl
, &empty_bb
);
30007 node_version_info
->dispatcher_resolver
= resolver_decl
;
30009 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30011 fn_ver_vec
.create (2);
30013 for (versn_info
= node_version_info
->next
; versn_info
;
30014 versn_info
= versn_info
->next
)
30016 versn
= versn_info
->this_node
;
30017 /* Check for virtual functions here again, as by this time it should
30018 have been determined if this function needs a vtable index or
30019 not. This happens for methods in derived classes that override
30020 virtual methods in base classes but are not explicitly marked as
30022 if (DECL_VINDEX (versn
->symbol
.decl
))
30023 sorry ("Virtual function multiversioning not supported");
30025 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
30028 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30029 fn_ver_vec
.release ();
30030 rebuild_cgraph_edges ();
30032 return resolver_decl
;
30034 /* This builds the processor_model struct type defined in
30035 libgcc/config/i386/cpuinfo.c */
30038 build_processor_model_struct (void)
30040 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30042 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30044 tree type
= make_node (RECORD_TYPE
);
30046 /* The first 3 fields are unsigned int. */
30047 for (i
= 0; i
< 3; ++i
)
30049 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30050 get_identifier (field_name
[i
]), unsigned_type_node
);
30051 if (field_chain
!= NULL_TREE
)
30052 DECL_CHAIN (field
) = field_chain
;
30053 field_chain
= field
;
30056 /* The last field is an array of unsigned integers of size one. */
30057 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30058 get_identifier (field_name
[3]),
30059 build_array_type (unsigned_type_node
,
30060 build_index_type (size_one_node
)));
30061 if (field_chain
!= NULL_TREE
)
30062 DECL_CHAIN (field
) = field_chain
;
30063 field_chain
= field
;
30065 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30069 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30072 make_var_decl (tree type
, const char *name
)
30076 new_decl
= build_decl (UNKNOWN_LOCATION
,
30078 get_identifier(name
),
30081 DECL_EXTERNAL (new_decl
) = 1;
30082 TREE_STATIC (new_decl
) = 1;
30083 TREE_PUBLIC (new_decl
) = 1;
30084 DECL_INITIAL (new_decl
) = 0;
30085 DECL_ARTIFICIAL (new_decl
) = 0;
30086 DECL_PRESERVE_P (new_decl
) = 1;
30088 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30089 assemble_variable (new_decl
, 0, 0, 0);
30094 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30095 into an integer defined in libgcc/config/i386/cpuinfo.c */
30098 fold_builtin_cpu (tree fndecl
, tree
*args
)
30101 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30102 DECL_FUNCTION_CODE (fndecl
);
30103 tree param_string_cst
= NULL
;
30105 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30106 enum processor_features
30122 /* These are the values for vendor types and cpu types and subtypes
30123 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30124 the corresponding start value. */
30125 enum processor_model
30136 M_CPU_SUBTYPE_START
,
30137 M_INTEL_COREI7_NEHALEM
,
30138 M_INTEL_COREI7_WESTMERE
,
30139 M_INTEL_COREI7_SANDYBRIDGE
,
30140 M_AMDFAM10H_BARCELONA
,
30141 M_AMDFAM10H_SHANGHAI
,
30142 M_AMDFAM10H_ISTANBUL
,
30143 M_AMDFAM15H_BDVER1
,
30144 M_AMDFAM15H_BDVER2
,
30148 static struct _arch_names_table
30150 const char *const name
;
30151 const enum processor_model model
;
30153 const arch_names_table
[] =
30156 {"intel", M_INTEL
},
30157 {"atom", M_INTEL_ATOM
},
30158 {"slm", M_INTEL_SLM
},
30159 {"core2", M_INTEL_CORE2
},
30160 {"corei7", M_INTEL_COREI7
},
30161 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30162 {"westmere", M_INTEL_COREI7_WESTMERE
},
30163 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30164 {"amdfam10h", M_AMDFAM10H
},
30165 {"barcelona", M_AMDFAM10H_BARCELONA
},
30166 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30167 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30168 {"amdfam15h", M_AMDFAM15H
},
30169 {"bdver1", M_AMDFAM15H_BDVER1
},
30170 {"bdver2", M_AMDFAM15H_BDVER2
},
30171 {"bdver3", M_AMDFAM15H_BDVER3
},
30174 static struct _isa_names_table
30176 const char *const name
;
30177 const enum processor_features feature
;
30179 const isa_names_table
[] =
30183 {"popcnt", F_POPCNT
},
30187 {"ssse3", F_SSSE3
},
30188 {"sse4.1", F_SSE4_1
},
30189 {"sse4.2", F_SSE4_2
},
30194 tree __processor_model_type
= build_processor_model_struct ();
30195 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30199 varpool_add_new_variable (__cpu_model_var
);
30201 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30203 param_string_cst
= *args
;
30204 while (param_string_cst
30205 && TREE_CODE (param_string_cst
) != STRING_CST
)
30207 /* *args must be a expr that can contain other EXPRS leading to a
30209 if (!EXPR_P (param_string_cst
))
30211 error ("Parameter to builtin must be a string constant or literal");
30212 return integer_zero_node
;
30214 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30217 gcc_assert (param_string_cst
);
30219 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30225 unsigned int field_val
= 0;
30226 unsigned int NUM_ARCH_NAMES
30227 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30229 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30230 if (strcmp (arch_names_table
[i
].name
,
30231 TREE_STRING_POINTER (param_string_cst
)) == 0)
30234 if (i
== NUM_ARCH_NAMES
)
30236 error ("Parameter to builtin not valid: %s",
30237 TREE_STRING_POINTER (param_string_cst
));
30238 return integer_zero_node
;
30241 field
= TYPE_FIELDS (__processor_model_type
);
30242 field_val
= arch_names_table
[i
].model
;
30244 /* CPU types are stored in the next field. */
30245 if (field_val
> M_CPU_TYPE_START
30246 && field_val
< M_CPU_SUBTYPE_START
)
30248 field
= DECL_CHAIN (field
);
30249 field_val
-= M_CPU_TYPE_START
;
30252 /* CPU subtypes are stored in the next field. */
30253 if (field_val
> M_CPU_SUBTYPE_START
)
30255 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30256 field_val
-= M_CPU_SUBTYPE_START
;
30259 /* Get the appropriate field in __cpu_model. */
30260 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30263 /* Check the value. */
30264 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30265 build_int_cstu (unsigned_type_node
, field_val
));
30266 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30268 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30275 unsigned int field_val
= 0;
30276 unsigned int NUM_ISA_NAMES
30277 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30279 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30280 if (strcmp (isa_names_table
[i
].name
,
30281 TREE_STRING_POINTER (param_string_cst
)) == 0)
30284 if (i
== NUM_ISA_NAMES
)
30286 error ("Parameter to builtin not valid: %s",
30287 TREE_STRING_POINTER (param_string_cst
));
30288 return integer_zero_node
;
30291 field
= TYPE_FIELDS (__processor_model_type
);
30292 /* Get the last field, which is __cpu_features. */
30293 while (DECL_CHAIN (field
))
30294 field
= DECL_CHAIN (field
);
30296 /* Get the appropriate field: __cpu_model.__cpu_features */
30297 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30300 /* Access the 0th element of __cpu_features array. */
30301 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30302 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30304 field_val
= (1 << isa_names_table
[i
].feature
);
30305 /* Return __cpu_model.__cpu_features[0] & field_val */
30306 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30307 build_int_cstu (unsigned_type_node
, field_val
));
30308 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30310 gcc_unreachable ();
30314 ix86_fold_builtin (tree fndecl
, int n_args
,
30315 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30317 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30319 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30320 DECL_FUNCTION_CODE (fndecl
);
30321 if (fn_code
== IX86_BUILTIN_CPU_IS
30322 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30324 gcc_assert (n_args
== 1);
30325 return fold_builtin_cpu (fndecl
, args
);
30329 #ifdef SUBTARGET_FOLD_BUILTIN
30330 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30336 /* Make builtins to detect cpu type and features supported. NAME is
30337 the builtin name, CODE is the builtin code, and FTYPE is the function
30338 type of the builtin. */
30341 make_cpu_type_builtin (const char* name
, int code
,
30342 enum ix86_builtin_func_type ftype
, bool is_const
)
30347 type
= ix86_get_builtin_func_type (ftype
);
30348 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30350 gcc_assert (decl
!= NULL_TREE
);
30351 ix86_builtins
[(int) code
] = decl
;
30352 TREE_READONLY (decl
) = is_const
;
30355 /* Make builtins to get CPU type and features supported. The created
30358 __builtin_cpu_init (), to detect cpu type and features,
30359 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30360 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30364 ix86_init_platform_type_builtins (void)
30366 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30367 INT_FTYPE_VOID
, false);
30368 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30369 INT_FTYPE_PCCHAR
, true);
30370 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30371 INT_FTYPE_PCCHAR
, true);
30374 /* Internal method for ix86_init_builtins. */
30377 ix86_init_builtins_va_builtins_abi (void)
30379 tree ms_va_ref
, sysv_va_ref
;
30380 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30381 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30382 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30383 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30387 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30388 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30389 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30391 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30394 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30395 fnvoid_va_start_ms
=
30396 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30397 fnvoid_va_end_sysv
=
30398 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30399 fnvoid_va_start_sysv
=
30400 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30402 fnvoid_va_copy_ms
=
30403 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30405 fnvoid_va_copy_sysv
=
30406 build_function_type_list (void_type_node
, sysv_va_ref
,
30407 sysv_va_ref
, NULL_TREE
);
30409 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30410 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30411 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30412 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30413 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30414 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30415 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30416 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30417 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
30418 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30419 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
30420 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30424 ix86_init_builtin_types (void)
30426 tree float128_type_node
, float80_type_node
;
30428 /* The __float80 type. */
30429 float80_type_node
= long_double_type_node
;
30430 if (TYPE_MODE (float80_type_node
) != XFmode
)
30432 /* The __float80 type. */
30433 float80_type_node
= make_node (REAL_TYPE
);
30435 TYPE_PRECISION (float80_type_node
) = 80;
30436 layout_type (float80_type_node
);
30438 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
30440 /* The __float128 type. */
30441 float128_type_node
= make_node (REAL_TYPE
);
30442 TYPE_PRECISION (float128_type_node
) = 128;
30443 layout_type (float128_type_node
);
30444 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
30446 /* This macro is built by i386-builtin-types.awk. */
30447 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
30451 ix86_init_builtins (void)
30455 ix86_init_builtin_types ();
30457 /* Builtins to get CPU type and features. */
30458 ix86_init_platform_type_builtins ();
30460 /* TFmode support builtins. */
30461 def_builtin_const (0, "__builtin_infq",
30462 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
30463 def_builtin_const (0, "__builtin_huge_valq",
30464 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
30466 /* We will expand them to normal call if SSE isn't available since
30467 they are used by libgcc. */
30468 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
30469 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
30470 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
30471 TREE_READONLY (t
) = 1;
30472 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
30474 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
30475 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
30476 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
30477 TREE_READONLY (t
) = 1;
30478 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
30480 ix86_init_tm_builtins ();
30481 ix86_init_mmx_sse_builtins ();
30484 ix86_init_builtins_va_builtins_abi ();
30486 #ifdef SUBTARGET_INIT_BUILTINS
30487 SUBTARGET_INIT_BUILTINS
;
30491 /* Return the ix86 builtin for CODE. */
30494 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
30496 if (code
>= IX86_BUILTIN_MAX
)
30497 return error_mark_node
;
30499 return ix86_builtins
[code
];
30502 /* Errors in the source file can cause expand_expr to return const0_rtx
30503 where we expect a vector. To avoid crashing, use one of the vector
30504 clear instructions. */
30506 safe_vector_operand (rtx x
, enum machine_mode mode
)
30508 if (x
== const0_rtx
)
30509 x
= CONST0_RTX (mode
);
30513 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30516 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30519 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30520 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30521 rtx op0
= expand_normal (arg0
);
30522 rtx op1
= expand_normal (arg1
);
30523 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30524 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30525 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30527 if (VECTOR_MODE_P (mode0
))
30528 op0
= safe_vector_operand (op0
, mode0
);
30529 if (VECTOR_MODE_P (mode1
))
30530 op1
= safe_vector_operand (op1
, mode1
);
30532 if (optimize
|| !target
30533 || GET_MODE (target
) != tmode
30534 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30535 target
= gen_reg_rtx (tmode
);
30537 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30539 rtx x
= gen_reg_rtx (V4SImode
);
30540 emit_insn (gen_sse2_loadd (x
, op1
));
30541 op1
= gen_lowpart (TImode
, x
);
30544 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30545 op0
= copy_to_mode_reg (mode0
, op0
);
30546 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30547 op1
= copy_to_mode_reg (mode1
, op1
);
30549 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30558 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30561 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30562 enum ix86_builtin_func_type m_type
,
30563 enum rtx_code sub_code
)
30568 bool comparison_p
= false;
30570 bool last_arg_constant
= false;
30571 int num_memory
= 0;
30574 enum machine_mode mode
;
30577 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30581 case MULTI_ARG_4_DF2_DI_I
:
30582 case MULTI_ARG_4_DF2_DI_I1
:
30583 case MULTI_ARG_4_SF2_SI_I
:
30584 case MULTI_ARG_4_SF2_SI_I1
:
30586 last_arg_constant
= true;
30589 case MULTI_ARG_3_SF
:
30590 case MULTI_ARG_3_DF
:
30591 case MULTI_ARG_3_SF2
:
30592 case MULTI_ARG_3_DF2
:
30593 case MULTI_ARG_3_DI
:
30594 case MULTI_ARG_3_SI
:
30595 case MULTI_ARG_3_SI_DI
:
30596 case MULTI_ARG_3_HI
:
30597 case MULTI_ARG_3_HI_SI
:
30598 case MULTI_ARG_3_QI
:
30599 case MULTI_ARG_3_DI2
:
30600 case MULTI_ARG_3_SI2
:
30601 case MULTI_ARG_3_HI2
:
30602 case MULTI_ARG_3_QI2
:
30606 case MULTI_ARG_2_SF
:
30607 case MULTI_ARG_2_DF
:
30608 case MULTI_ARG_2_DI
:
30609 case MULTI_ARG_2_SI
:
30610 case MULTI_ARG_2_HI
:
30611 case MULTI_ARG_2_QI
:
30615 case MULTI_ARG_2_DI_IMM
:
30616 case MULTI_ARG_2_SI_IMM
:
30617 case MULTI_ARG_2_HI_IMM
:
30618 case MULTI_ARG_2_QI_IMM
:
30620 last_arg_constant
= true;
30623 case MULTI_ARG_1_SF
:
30624 case MULTI_ARG_1_DF
:
30625 case MULTI_ARG_1_SF2
:
30626 case MULTI_ARG_1_DF2
:
30627 case MULTI_ARG_1_DI
:
30628 case MULTI_ARG_1_SI
:
30629 case MULTI_ARG_1_HI
:
30630 case MULTI_ARG_1_QI
:
30631 case MULTI_ARG_1_SI_DI
:
30632 case MULTI_ARG_1_HI_DI
:
30633 case MULTI_ARG_1_HI_SI
:
30634 case MULTI_ARG_1_QI_DI
:
30635 case MULTI_ARG_1_QI_SI
:
30636 case MULTI_ARG_1_QI_HI
:
30640 case MULTI_ARG_2_DI_CMP
:
30641 case MULTI_ARG_2_SI_CMP
:
30642 case MULTI_ARG_2_HI_CMP
:
30643 case MULTI_ARG_2_QI_CMP
:
30645 comparison_p
= true;
30648 case MULTI_ARG_2_SF_TF
:
30649 case MULTI_ARG_2_DF_TF
:
30650 case MULTI_ARG_2_DI_TF
:
30651 case MULTI_ARG_2_SI_TF
:
30652 case MULTI_ARG_2_HI_TF
:
30653 case MULTI_ARG_2_QI_TF
:
30659 gcc_unreachable ();
30662 if (optimize
|| !target
30663 || GET_MODE (target
) != tmode
30664 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30665 target
= gen_reg_rtx (tmode
);
30667 gcc_assert (nargs
<= 4);
30669 for (i
= 0; i
< nargs
; i
++)
30671 tree arg
= CALL_EXPR_ARG (exp
, i
);
30672 rtx op
= expand_normal (arg
);
30673 int adjust
= (comparison_p
) ? 1 : 0;
30674 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30676 if (last_arg_constant
&& i
== nargs
- 1)
30678 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30680 enum insn_code new_icode
= icode
;
30683 case CODE_FOR_xop_vpermil2v2df3
:
30684 case CODE_FOR_xop_vpermil2v4sf3
:
30685 case CODE_FOR_xop_vpermil2v4df3
:
30686 case CODE_FOR_xop_vpermil2v8sf3
:
30687 error ("the last argument must be a 2-bit immediate");
30688 return gen_reg_rtx (tmode
);
30689 case CODE_FOR_xop_rotlv2di3
:
30690 new_icode
= CODE_FOR_rotlv2di3
;
30692 case CODE_FOR_xop_rotlv4si3
:
30693 new_icode
= CODE_FOR_rotlv4si3
;
30695 case CODE_FOR_xop_rotlv8hi3
:
30696 new_icode
= CODE_FOR_rotlv8hi3
;
30698 case CODE_FOR_xop_rotlv16qi3
:
30699 new_icode
= CODE_FOR_rotlv16qi3
;
30701 if (CONST_INT_P (op
))
30703 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30704 op
= GEN_INT (INTVAL (op
) & mask
);
30705 gcc_checking_assert
30706 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30710 gcc_checking_assert
30712 && insn_data
[new_icode
].operand
[0].mode
== tmode
30713 && insn_data
[new_icode
].operand
[1].mode
== tmode
30714 && insn_data
[new_icode
].operand
[2].mode
== mode
30715 && insn_data
[new_icode
].operand
[0].predicate
30716 == insn_data
[icode
].operand
[0].predicate
30717 && insn_data
[new_icode
].operand
[1].predicate
30718 == insn_data
[icode
].operand
[1].predicate
);
30724 gcc_unreachable ();
30731 if (VECTOR_MODE_P (mode
))
30732 op
= safe_vector_operand (op
, mode
);
30734 /* If we aren't optimizing, only allow one memory operand to be
30736 if (memory_operand (op
, mode
))
30739 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30742 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30744 op
= force_reg (mode
, op
);
30748 args
[i
].mode
= mode
;
30754 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30759 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30760 GEN_INT ((int)sub_code
));
30761 else if (! comparison_p
)
30762 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30765 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30769 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30774 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30778 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30782 gcc_unreachable ();
30792 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30793 insns with vec_merge. */
30796 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30800 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30801 rtx op1
, op0
= expand_normal (arg0
);
30802 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30803 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30805 if (optimize
|| !target
30806 || GET_MODE (target
) != tmode
30807 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30808 target
= gen_reg_rtx (tmode
);
30810 if (VECTOR_MODE_P (mode0
))
30811 op0
= safe_vector_operand (op0
, mode0
);
30813 if ((optimize
&& !register_operand (op0
, mode0
))
30814 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30815 op0
= copy_to_mode_reg (mode0
, op0
);
30818 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30819 op1
= copy_to_mode_reg (mode0
, op1
);
30821 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30828 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30831 ix86_expand_sse_compare (const struct builtin_description
*d
,
30832 tree exp
, rtx target
, bool swap
)
30835 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30836 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30837 rtx op0
= expand_normal (arg0
);
30838 rtx op1
= expand_normal (arg1
);
30840 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30841 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30842 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30843 enum rtx_code comparison
= d
->comparison
;
30845 if (VECTOR_MODE_P (mode0
))
30846 op0
= safe_vector_operand (op0
, mode0
);
30847 if (VECTOR_MODE_P (mode1
))
30848 op1
= safe_vector_operand (op1
, mode1
);
30850 /* Swap operands if we have a comparison that isn't available in
30854 rtx tmp
= gen_reg_rtx (mode1
);
30855 emit_move_insn (tmp
, op1
);
30860 if (optimize
|| !target
30861 || GET_MODE (target
) != tmode
30862 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30863 target
= gen_reg_rtx (tmode
);
30865 if ((optimize
&& !register_operand (op0
, mode0
))
30866 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30867 op0
= copy_to_mode_reg (mode0
, op0
);
30868 if ((optimize
&& !register_operand (op1
, mode1
))
30869 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30870 op1
= copy_to_mode_reg (mode1
, op1
);
30872 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30873 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30880 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30883 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30887 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30888 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30889 rtx op0
= expand_normal (arg0
);
30890 rtx op1
= expand_normal (arg1
);
30891 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30892 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30893 enum rtx_code comparison
= d
->comparison
;
30895 if (VECTOR_MODE_P (mode0
))
30896 op0
= safe_vector_operand (op0
, mode0
);
30897 if (VECTOR_MODE_P (mode1
))
30898 op1
= safe_vector_operand (op1
, mode1
);
30900 /* Swap operands if we have a comparison that isn't available in
30902 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30909 target
= gen_reg_rtx (SImode
);
30910 emit_move_insn (target
, const0_rtx
);
30911 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30913 if ((optimize
&& !register_operand (op0
, mode0
))
30914 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30915 op0
= copy_to_mode_reg (mode0
, op0
);
30916 if ((optimize
&& !register_operand (op1
, mode1
))
30917 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30918 op1
= copy_to_mode_reg (mode1
, op1
);
30920 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30924 emit_insn (gen_rtx_SET (VOIDmode
,
30925 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30926 gen_rtx_fmt_ee (comparison
, QImode
,
30930 return SUBREG_REG (target
);
30933 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30936 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30940 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30941 rtx op1
, op0
= expand_normal (arg0
);
30942 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30943 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30945 if (optimize
|| target
== 0
30946 || GET_MODE (target
) != tmode
30947 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30948 target
= gen_reg_rtx (tmode
);
30950 if (VECTOR_MODE_P (mode0
))
30951 op0
= safe_vector_operand (op0
, mode0
);
30953 if ((optimize
&& !register_operand (op0
, mode0
))
30954 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30955 op0
= copy_to_mode_reg (mode0
, op0
);
30957 op1
= GEN_INT (d
->comparison
);
30959 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30967 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30968 tree exp
, rtx target
)
30971 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30972 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30973 rtx op0
= expand_normal (arg0
);
30974 rtx op1
= expand_normal (arg1
);
30976 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30977 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30978 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30980 if (optimize
|| target
== 0
30981 || GET_MODE (target
) != tmode
30982 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30983 target
= gen_reg_rtx (tmode
);
30985 op0
= safe_vector_operand (op0
, mode0
);
30986 op1
= safe_vector_operand (op1
, mode1
);
30988 if ((optimize
&& !register_operand (op0
, mode0
))
30989 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30990 op0
= copy_to_mode_reg (mode0
, op0
);
30991 if ((optimize
&& !register_operand (op1
, mode1
))
30992 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30993 op1
= copy_to_mode_reg (mode1
, op1
);
30995 op2
= GEN_INT (d
->comparison
);
30997 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31004 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31007 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31011 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31012 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31013 rtx op0
= expand_normal (arg0
);
31014 rtx op1
= expand_normal (arg1
);
31015 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31016 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31017 enum rtx_code comparison
= d
->comparison
;
31019 if (VECTOR_MODE_P (mode0
))
31020 op0
= safe_vector_operand (op0
, mode0
);
31021 if (VECTOR_MODE_P (mode1
))
31022 op1
= safe_vector_operand (op1
, mode1
);
31024 target
= gen_reg_rtx (SImode
);
31025 emit_move_insn (target
, const0_rtx
);
31026 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31028 if ((optimize
&& !register_operand (op0
, mode0
))
31029 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31030 op0
= copy_to_mode_reg (mode0
, op0
);
31031 if ((optimize
&& !register_operand (op1
, mode1
))
31032 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31033 op1
= copy_to_mode_reg (mode1
, op1
);
31035 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31039 emit_insn (gen_rtx_SET (VOIDmode
,
31040 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31041 gen_rtx_fmt_ee (comparison
, QImode
,
31045 return SUBREG_REG (target
);
31048 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31051 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31052 tree exp
, rtx target
)
31055 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31056 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31057 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31058 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31059 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31060 rtx scratch0
, scratch1
;
31061 rtx op0
= expand_normal (arg0
);
31062 rtx op1
= expand_normal (arg1
);
31063 rtx op2
= expand_normal (arg2
);
31064 rtx op3
= expand_normal (arg3
);
31065 rtx op4
= expand_normal (arg4
);
31066 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31068 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31069 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31070 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31071 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31072 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31073 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31074 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31076 if (VECTOR_MODE_P (modev2
))
31077 op0
= safe_vector_operand (op0
, modev2
);
31078 if (VECTOR_MODE_P (modev4
))
31079 op2
= safe_vector_operand (op2
, modev4
);
31081 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31082 op0
= copy_to_mode_reg (modev2
, op0
);
31083 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31084 op1
= copy_to_mode_reg (modei3
, op1
);
31085 if ((optimize
&& !register_operand (op2
, modev4
))
31086 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31087 op2
= copy_to_mode_reg (modev4
, op2
);
31088 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31089 op3
= copy_to_mode_reg (modei5
, op3
);
31091 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31093 error ("the fifth argument must be an 8-bit immediate");
31097 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31099 if (optimize
|| !target
31100 || GET_MODE (target
) != tmode0
31101 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31102 target
= gen_reg_rtx (tmode0
);
31104 scratch1
= gen_reg_rtx (tmode1
);
31106 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31108 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31110 if (optimize
|| !target
31111 || GET_MODE (target
) != tmode1
31112 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31113 target
= gen_reg_rtx (tmode1
);
31115 scratch0
= gen_reg_rtx (tmode0
);
31117 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31121 gcc_assert (d
->flag
);
31123 scratch0
= gen_reg_rtx (tmode0
);
31124 scratch1
= gen_reg_rtx (tmode1
);
31126 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31136 target
= gen_reg_rtx (SImode
);
31137 emit_move_insn (target
, const0_rtx
);
31138 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31141 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31142 gen_rtx_fmt_ee (EQ
, QImode
,
31143 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31146 return SUBREG_REG (target
);
31153 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31156 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31157 tree exp
, rtx target
)
31160 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31161 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31162 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31163 rtx scratch0
, scratch1
;
31164 rtx op0
= expand_normal (arg0
);
31165 rtx op1
= expand_normal (arg1
);
31166 rtx op2
= expand_normal (arg2
);
31167 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31169 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31170 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31171 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31172 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31173 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31175 if (VECTOR_MODE_P (modev2
))
31176 op0
= safe_vector_operand (op0
, modev2
);
31177 if (VECTOR_MODE_P (modev3
))
31178 op1
= safe_vector_operand (op1
, modev3
);
31180 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31181 op0
= copy_to_mode_reg (modev2
, op0
);
31182 if ((optimize
&& !register_operand (op1
, modev3
))
31183 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31184 op1
= copy_to_mode_reg (modev3
, op1
);
31186 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31188 error ("the third argument must be an 8-bit immediate");
31192 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31194 if (optimize
|| !target
31195 || GET_MODE (target
) != tmode0
31196 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31197 target
= gen_reg_rtx (tmode0
);
31199 scratch1
= gen_reg_rtx (tmode1
);
31201 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31203 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31205 if (optimize
|| !target
31206 || GET_MODE (target
) != tmode1
31207 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31208 target
= gen_reg_rtx (tmode1
);
31210 scratch0
= gen_reg_rtx (tmode0
);
31212 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31216 gcc_assert (d
->flag
);
31218 scratch0
= gen_reg_rtx (tmode0
);
31219 scratch1
= gen_reg_rtx (tmode1
);
31221 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31231 target
= gen_reg_rtx (SImode
);
31232 emit_move_insn (target
, const0_rtx
);
31233 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31236 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31237 gen_rtx_fmt_ee (EQ
, QImode
,
31238 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31241 return SUBREG_REG (target
);
31247 /* Subroutine of ix86_expand_builtin to take care of insns with
31248 variable number of operands. */
31251 ix86_expand_args_builtin (const struct builtin_description
*d
,
31252 tree exp
, rtx target
)
31254 rtx pat
, real_target
;
31255 unsigned int i
, nargs
;
31256 unsigned int nargs_constant
= 0;
31257 int num_memory
= 0;
31261 enum machine_mode mode
;
31263 bool last_arg_count
= false;
31264 enum insn_code icode
= d
->icode
;
31265 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31266 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31267 enum machine_mode rmode
= VOIDmode
;
31269 enum rtx_code comparison
= d
->comparison
;
31271 switch ((enum ix86_builtin_func_type
) d
->flag
)
31273 case V2DF_FTYPE_V2DF_ROUND
:
31274 case V4DF_FTYPE_V4DF_ROUND
:
31275 case V4SF_FTYPE_V4SF_ROUND
:
31276 case V8SF_FTYPE_V8SF_ROUND
:
31277 case V4SI_FTYPE_V4SF_ROUND
:
31278 case V8SI_FTYPE_V8SF_ROUND
:
31279 return ix86_expand_sse_round (d
, exp
, target
);
31280 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31281 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31282 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31283 case INT_FTYPE_V8SF_V8SF_PTEST
:
31284 case INT_FTYPE_V4DI_V4DI_PTEST
:
31285 case INT_FTYPE_V4DF_V4DF_PTEST
:
31286 case INT_FTYPE_V4SF_V4SF_PTEST
:
31287 case INT_FTYPE_V2DI_V2DI_PTEST
:
31288 case INT_FTYPE_V2DF_V2DF_PTEST
:
31289 return ix86_expand_sse_ptest (d
, exp
, target
);
31290 case FLOAT128_FTYPE_FLOAT128
:
31291 case FLOAT_FTYPE_FLOAT
:
31292 case INT_FTYPE_INT
:
31293 case UINT64_FTYPE_INT
:
31294 case UINT16_FTYPE_UINT16
:
31295 case INT64_FTYPE_INT64
:
31296 case INT64_FTYPE_V4SF
:
31297 case INT64_FTYPE_V2DF
:
31298 case INT_FTYPE_V16QI
:
31299 case INT_FTYPE_V8QI
:
31300 case INT_FTYPE_V8SF
:
31301 case INT_FTYPE_V4DF
:
31302 case INT_FTYPE_V4SF
:
31303 case INT_FTYPE_V2DF
:
31304 case INT_FTYPE_V32QI
:
31305 case V16QI_FTYPE_V16QI
:
31306 case V8SI_FTYPE_V8SF
:
31307 case V8SI_FTYPE_V4SI
:
31308 case V8HI_FTYPE_V8HI
:
31309 case V8HI_FTYPE_V16QI
:
31310 case V8QI_FTYPE_V8QI
:
31311 case V8SF_FTYPE_V8SF
:
31312 case V8SF_FTYPE_V8SI
:
31313 case V8SF_FTYPE_V4SF
:
31314 case V8SF_FTYPE_V8HI
:
31315 case V4SI_FTYPE_V4SI
:
31316 case V4SI_FTYPE_V16QI
:
31317 case V4SI_FTYPE_V4SF
:
31318 case V4SI_FTYPE_V8SI
:
31319 case V4SI_FTYPE_V8HI
:
31320 case V4SI_FTYPE_V4DF
:
31321 case V4SI_FTYPE_V2DF
:
31322 case V4HI_FTYPE_V4HI
:
31323 case V4DF_FTYPE_V4DF
:
31324 case V4DF_FTYPE_V4SI
:
31325 case V4DF_FTYPE_V4SF
:
31326 case V4DF_FTYPE_V2DF
:
31327 case V4SF_FTYPE_V4SF
:
31328 case V4SF_FTYPE_V4SI
:
31329 case V4SF_FTYPE_V8SF
:
31330 case V4SF_FTYPE_V4DF
:
31331 case V4SF_FTYPE_V8HI
:
31332 case V4SF_FTYPE_V2DF
:
31333 case V2DI_FTYPE_V2DI
:
31334 case V2DI_FTYPE_V16QI
:
31335 case V2DI_FTYPE_V8HI
:
31336 case V2DI_FTYPE_V4SI
:
31337 case V2DF_FTYPE_V2DF
:
31338 case V2DF_FTYPE_V4SI
:
31339 case V2DF_FTYPE_V4DF
:
31340 case V2DF_FTYPE_V4SF
:
31341 case V2DF_FTYPE_V2SI
:
31342 case V2SI_FTYPE_V2SI
:
31343 case V2SI_FTYPE_V4SF
:
31344 case V2SI_FTYPE_V2SF
:
31345 case V2SI_FTYPE_V2DF
:
31346 case V2SF_FTYPE_V2SF
:
31347 case V2SF_FTYPE_V2SI
:
31348 case V32QI_FTYPE_V32QI
:
31349 case V32QI_FTYPE_V16QI
:
31350 case V16HI_FTYPE_V16HI
:
31351 case V16HI_FTYPE_V8HI
:
31352 case V8SI_FTYPE_V8SI
:
31353 case V16HI_FTYPE_V16QI
:
31354 case V8SI_FTYPE_V16QI
:
31355 case V4DI_FTYPE_V16QI
:
31356 case V8SI_FTYPE_V8HI
:
31357 case V4DI_FTYPE_V8HI
:
31358 case V4DI_FTYPE_V4SI
:
31359 case V4DI_FTYPE_V2DI
:
31362 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31363 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31364 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31365 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31366 case V16QI_FTYPE_V16QI_V16QI
:
31367 case V16QI_FTYPE_V8HI_V8HI
:
31368 case V8QI_FTYPE_V8QI_V8QI
:
31369 case V8QI_FTYPE_V4HI_V4HI
:
31370 case V8HI_FTYPE_V8HI_V8HI
:
31371 case V8HI_FTYPE_V16QI_V16QI
:
31372 case V8HI_FTYPE_V4SI_V4SI
:
31373 case V8SF_FTYPE_V8SF_V8SF
:
31374 case V8SF_FTYPE_V8SF_V8SI
:
31375 case V4SI_FTYPE_V4SI_V4SI
:
31376 case V4SI_FTYPE_V8HI_V8HI
:
31377 case V4SI_FTYPE_V4SF_V4SF
:
31378 case V4SI_FTYPE_V2DF_V2DF
:
31379 case V4HI_FTYPE_V4HI_V4HI
:
31380 case V4HI_FTYPE_V8QI_V8QI
:
31381 case V4HI_FTYPE_V2SI_V2SI
:
31382 case V4DF_FTYPE_V4DF_V4DF
:
31383 case V4DF_FTYPE_V4DF_V4DI
:
31384 case V4SF_FTYPE_V4SF_V4SF
:
31385 case V4SF_FTYPE_V4SF_V4SI
:
31386 case V4SF_FTYPE_V4SF_V2SI
:
31387 case V4SF_FTYPE_V4SF_V2DF
:
31388 case V4SF_FTYPE_V4SF_DI
:
31389 case V4SF_FTYPE_V4SF_SI
:
31390 case V2DI_FTYPE_V2DI_V2DI
:
31391 case V2DI_FTYPE_V16QI_V16QI
:
31392 case V2DI_FTYPE_V4SI_V4SI
:
31393 case V2UDI_FTYPE_V4USI_V4USI
:
31394 case V2DI_FTYPE_V2DI_V16QI
:
31395 case V2DI_FTYPE_V2DF_V2DF
:
31396 case V2SI_FTYPE_V2SI_V2SI
:
31397 case V2SI_FTYPE_V4HI_V4HI
:
31398 case V2SI_FTYPE_V2SF_V2SF
:
31399 case V2DF_FTYPE_V2DF_V2DF
:
31400 case V2DF_FTYPE_V2DF_V4SF
:
31401 case V2DF_FTYPE_V2DF_V2DI
:
31402 case V2DF_FTYPE_V2DF_DI
:
31403 case V2DF_FTYPE_V2DF_SI
:
31404 case V2SF_FTYPE_V2SF_V2SF
:
31405 case V1DI_FTYPE_V1DI_V1DI
:
31406 case V1DI_FTYPE_V8QI_V8QI
:
31407 case V1DI_FTYPE_V2SI_V2SI
:
31408 case V32QI_FTYPE_V16HI_V16HI
:
31409 case V16HI_FTYPE_V8SI_V8SI
:
31410 case V32QI_FTYPE_V32QI_V32QI
:
31411 case V16HI_FTYPE_V32QI_V32QI
:
31412 case V16HI_FTYPE_V16HI_V16HI
:
31413 case V8SI_FTYPE_V4DF_V4DF
:
31414 case V8SI_FTYPE_V8SI_V8SI
:
31415 case V8SI_FTYPE_V16HI_V16HI
:
31416 case V4DI_FTYPE_V4DI_V4DI
:
31417 case V4DI_FTYPE_V8SI_V8SI
:
31418 case V4UDI_FTYPE_V8USI_V8USI
:
31419 if (comparison
== UNKNOWN
)
31420 return ix86_expand_binop_builtin (icode
, exp
, target
);
31423 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
31424 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
31425 gcc_assert (comparison
!= UNKNOWN
);
31429 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
31430 case V16HI_FTYPE_V16HI_SI_COUNT
:
31431 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
31432 case V8SI_FTYPE_V8SI_SI_COUNT
:
31433 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
31434 case V4DI_FTYPE_V4DI_INT_COUNT
:
31435 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
31436 case V8HI_FTYPE_V8HI_SI_COUNT
:
31437 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
31438 case V4SI_FTYPE_V4SI_SI_COUNT
:
31439 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
31440 case V4HI_FTYPE_V4HI_SI_COUNT
:
31441 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
31442 case V2DI_FTYPE_V2DI_SI_COUNT
:
31443 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
31444 case V2SI_FTYPE_V2SI_SI_COUNT
:
31445 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
31446 case V1DI_FTYPE_V1DI_SI_COUNT
:
31448 last_arg_count
= true;
31450 case UINT64_FTYPE_UINT64_UINT64
:
31451 case UINT_FTYPE_UINT_UINT
:
31452 case UINT_FTYPE_UINT_USHORT
:
31453 case UINT_FTYPE_UINT_UCHAR
:
31454 case UINT16_FTYPE_UINT16_INT
:
31455 case UINT8_FTYPE_UINT8_INT
:
31458 case V2DI_FTYPE_V2DI_INT_CONVERT
:
31461 nargs_constant
= 1;
31463 case V4DI_FTYPE_V4DI_INT_CONVERT
:
31466 nargs_constant
= 1;
31468 case V8HI_FTYPE_V8HI_INT
:
31469 case V8HI_FTYPE_V8SF_INT
:
31470 case V8HI_FTYPE_V4SF_INT
:
31471 case V8SF_FTYPE_V8SF_INT
:
31472 case V4SI_FTYPE_V4SI_INT
:
31473 case V4SI_FTYPE_V8SI_INT
:
31474 case V4HI_FTYPE_V4HI_INT
:
31475 case V4DF_FTYPE_V4DF_INT
:
31476 case V4SF_FTYPE_V4SF_INT
:
31477 case V4SF_FTYPE_V8SF_INT
:
31478 case V2DI_FTYPE_V2DI_INT
:
31479 case V2DF_FTYPE_V2DF_INT
:
31480 case V2DF_FTYPE_V4DF_INT
:
31481 case V16HI_FTYPE_V16HI_INT
:
31482 case V8SI_FTYPE_V8SI_INT
:
31483 case V4DI_FTYPE_V4DI_INT
:
31484 case V2DI_FTYPE_V4DI_INT
:
31486 nargs_constant
= 1;
31488 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
31489 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
31490 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
31491 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
31492 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
31493 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
31496 case V32QI_FTYPE_V32QI_V32QI_INT
:
31497 case V16HI_FTYPE_V16HI_V16HI_INT
:
31498 case V16QI_FTYPE_V16QI_V16QI_INT
:
31499 case V4DI_FTYPE_V4DI_V4DI_INT
:
31500 case V8HI_FTYPE_V8HI_V8HI_INT
:
31501 case V8SI_FTYPE_V8SI_V8SI_INT
:
31502 case V8SI_FTYPE_V8SI_V4SI_INT
:
31503 case V8SF_FTYPE_V8SF_V8SF_INT
:
31504 case V8SF_FTYPE_V8SF_V4SF_INT
:
31505 case V4SI_FTYPE_V4SI_V4SI_INT
:
31506 case V4DF_FTYPE_V4DF_V4DF_INT
:
31507 case V4DF_FTYPE_V4DF_V2DF_INT
:
31508 case V4SF_FTYPE_V4SF_V4SF_INT
:
31509 case V2DI_FTYPE_V2DI_V2DI_INT
:
31510 case V4DI_FTYPE_V4DI_V2DI_INT
:
31511 case V2DF_FTYPE_V2DF_V2DF_INT
:
31513 nargs_constant
= 1;
31515 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31518 nargs_constant
= 1;
31520 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31523 nargs_constant
= 1;
31525 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31528 nargs_constant
= 1;
31530 case V2DI_FTYPE_V2DI_UINT_UINT
:
31532 nargs_constant
= 2;
31534 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31535 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31536 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31537 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31539 nargs_constant
= 1;
31541 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31543 nargs_constant
= 2;
31545 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31546 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31550 gcc_unreachable ();
31553 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31555 if (comparison
!= UNKNOWN
)
31557 gcc_assert (nargs
== 2);
31558 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31561 if (rmode
== VOIDmode
|| rmode
== tmode
)
31565 || GET_MODE (target
) != tmode
31566 || !insn_p
->operand
[0].predicate (target
, tmode
))
31567 target
= gen_reg_rtx (tmode
);
31568 real_target
= target
;
31572 target
= gen_reg_rtx (rmode
);
31573 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31576 for (i
= 0; i
< nargs
; i
++)
31578 tree arg
= CALL_EXPR_ARG (exp
, i
);
31579 rtx op
= expand_normal (arg
);
31580 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31581 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31583 if (last_arg_count
&& (i
+ 1) == nargs
)
31585 /* SIMD shift insns take either an 8-bit immediate or
31586 register as count. But builtin functions take int as
31587 count. If count doesn't match, we put it in register. */
31590 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31591 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31592 op
= copy_to_reg (op
);
31595 else if ((nargs
- i
) <= nargs_constant
)
31600 case CODE_FOR_avx2_inserti128
:
31601 case CODE_FOR_avx2_extracti128
:
31602 error ("the last argument must be an 1-bit immediate");
31605 case CODE_FOR_sse4_1_roundsd
:
31606 case CODE_FOR_sse4_1_roundss
:
31608 case CODE_FOR_sse4_1_roundpd
:
31609 case CODE_FOR_sse4_1_roundps
:
31610 case CODE_FOR_avx_roundpd256
:
31611 case CODE_FOR_avx_roundps256
:
31613 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31614 case CODE_FOR_sse4_1_roundps_sfix
:
31615 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31616 case CODE_FOR_avx_roundps_sfix256
:
31618 case CODE_FOR_sse4_1_blendps
:
31619 case CODE_FOR_avx_blendpd256
:
31620 case CODE_FOR_avx_vpermilv4df
:
31621 error ("the last argument must be a 4-bit immediate");
31624 case CODE_FOR_sse4_1_blendpd
:
31625 case CODE_FOR_avx_vpermilv2df
:
31626 case CODE_FOR_xop_vpermil2v2df3
:
31627 case CODE_FOR_xop_vpermil2v4sf3
:
31628 case CODE_FOR_xop_vpermil2v4df3
:
31629 case CODE_FOR_xop_vpermil2v8sf3
:
31630 error ("the last argument must be a 2-bit immediate");
31633 case CODE_FOR_avx_vextractf128v4df
:
31634 case CODE_FOR_avx_vextractf128v8sf
:
31635 case CODE_FOR_avx_vextractf128v8si
:
31636 case CODE_FOR_avx_vinsertf128v4df
:
31637 case CODE_FOR_avx_vinsertf128v8sf
:
31638 case CODE_FOR_avx_vinsertf128v8si
:
31639 error ("the last argument must be a 1-bit immediate");
31642 case CODE_FOR_avx_vmcmpv2df3
:
31643 case CODE_FOR_avx_vmcmpv4sf3
:
31644 case CODE_FOR_avx_cmpv2df3
:
31645 case CODE_FOR_avx_cmpv4sf3
:
31646 case CODE_FOR_avx_cmpv4df3
:
31647 case CODE_FOR_avx_cmpv8sf3
:
31648 error ("the last argument must be a 5-bit immediate");
31652 switch (nargs_constant
)
31655 if ((nargs
- i
) == nargs_constant
)
31657 error ("the next to last argument must be an 8-bit immediate");
31661 error ("the last argument must be an 8-bit immediate");
31664 gcc_unreachable ();
31671 if (VECTOR_MODE_P (mode
))
31672 op
= safe_vector_operand (op
, mode
);
31674 /* If we aren't optimizing, only allow one memory operand to
31676 if (memory_operand (op
, mode
))
31679 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31681 if (optimize
|| !match
|| num_memory
> 1)
31682 op
= copy_to_mode_reg (mode
, op
);
31686 op
= copy_to_reg (op
);
31687 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31692 args
[i
].mode
= mode
;
31698 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31701 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31704 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31708 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31709 args
[2].op
, args
[3].op
);
31712 gcc_unreachable ();
31722 /* Subroutine of ix86_expand_builtin to take care of special insns
31723 with variable number of operands. */
31726 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31727 tree exp
, rtx target
)
31731 unsigned int i
, nargs
, arg_adjust
, memory
;
31735 enum machine_mode mode
;
31737 enum insn_code icode
= d
->icode
;
31738 bool last_arg_constant
= false;
31739 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31740 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31741 enum { load
, store
} klass
;
31743 switch ((enum ix86_builtin_func_type
) d
->flag
)
31745 case VOID_FTYPE_VOID
:
31746 emit_insn (GEN_FCN (icode
) (target
));
31748 case VOID_FTYPE_UINT64
:
31749 case VOID_FTYPE_UNSIGNED
:
31755 case INT_FTYPE_VOID
:
31756 case UINT64_FTYPE_VOID
:
31757 case UNSIGNED_FTYPE_VOID
:
31762 case UINT64_FTYPE_PUNSIGNED
:
31763 case V2DI_FTYPE_PV2DI
:
31764 case V4DI_FTYPE_PV4DI
:
31765 case V32QI_FTYPE_PCCHAR
:
31766 case V16QI_FTYPE_PCCHAR
:
31767 case V8SF_FTYPE_PCV4SF
:
31768 case V8SF_FTYPE_PCFLOAT
:
31769 case V4SF_FTYPE_PCFLOAT
:
31770 case V4DF_FTYPE_PCV2DF
:
31771 case V4DF_FTYPE_PCDOUBLE
:
31772 case V2DF_FTYPE_PCDOUBLE
:
31773 case VOID_FTYPE_PVOID
:
31778 case VOID_FTYPE_PV2SF_V4SF
:
31779 case VOID_FTYPE_PV4DI_V4DI
:
31780 case VOID_FTYPE_PV2DI_V2DI
:
31781 case VOID_FTYPE_PCHAR_V32QI
:
31782 case VOID_FTYPE_PCHAR_V16QI
:
31783 case VOID_FTYPE_PFLOAT_V8SF
:
31784 case VOID_FTYPE_PFLOAT_V4SF
:
31785 case VOID_FTYPE_PDOUBLE_V4DF
:
31786 case VOID_FTYPE_PDOUBLE_V2DF
:
31787 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31788 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31789 case VOID_FTYPE_PINT_INT
:
31792 /* Reserve memory operand for target. */
31793 memory
= ARRAY_SIZE (args
);
31795 case V4SF_FTYPE_V4SF_PCV2SF
:
31796 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31801 case V8SF_FTYPE_PCV8SF_V8SI
:
31802 case V4DF_FTYPE_PCV4DF_V4DI
:
31803 case V4SF_FTYPE_PCV4SF_V4SI
:
31804 case V2DF_FTYPE_PCV2DF_V2DI
:
31805 case V8SI_FTYPE_PCV8SI_V8SI
:
31806 case V4DI_FTYPE_PCV4DI_V4DI
:
31807 case V4SI_FTYPE_PCV4SI_V4SI
:
31808 case V2DI_FTYPE_PCV2DI_V2DI
:
31813 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31814 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31815 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31816 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31817 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31818 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31819 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31820 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31823 /* Reserve memory operand for target. */
31824 memory
= ARRAY_SIZE (args
);
31826 case VOID_FTYPE_UINT_UINT_UINT
:
31827 case VOID_FTYPE_UINT64_UINT_UINT
:
31828 case UCHAR_FTYPE_UINT_UINT_UINT
:
31829 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31832 memory
= ARRAY_SIZE (args
);
31833 last_arg_constant
= true;
31836 gcc_unreachable ();
31839 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31841 if (klass
== store
)
31843 arg
= CALL_EXPR_ARG (exp
, 0);
31844 op
= expand_normal (arg
);
31845 gcc_assert (target
== 0);
31848 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31849 target
= gen_rtx_MEM (tmode
, op
);
31852 target
= force_reg (tmode
, op
);
31860 || !register_operand (target
, tmode
)
31861 || GET_MODE (target
) != tmode
)
31862 target
= gen_reg_rtx (tmode
);
31865 for (i
= 0; i
< nargs
; i
++)
31867 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31870 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31871 op
= expand_normal (arg
);
31872 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31874 if (last_arg_constant
&& (i
+ 1) == nargs
)
31878 if (icode
== CODE_FOR_lwp_lwpvalsi3
31879 || icode
== CODE_FOR_lwp_lwpinssi3
31880 || icode
== CODE_FOR_lwp_lwpvaldi3
31881 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31882 error ("the last argument must be a 32-bit immediate");
31884 error ("the last argument must be an 8-bit immediate");
31892 /* This must be the memory operand. */
31893 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31894 op
= gen_rtx_MEM (mode
, op
);
31895 gcc_assert (GET_MODE (op
) == mode
31896 || GET_MODE (op
) == VOIDmode
);
31900 /* This must be register. */
31901 if (VECTOR_MODE_P (mode
))
31902 op
= safe_vector_operand (op
, mode
);
31904 gcc_assert (GET_MODE (op
) == mode
31905 || GET_MODE (op
) == VOIDmode
);
31906 op
= copy_to_mode_reg (mode
, op
);
31911 args
[i
].mode
= mode
;
31917 pat
= GEN_FCN (icode
) (target
);
31920 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31923 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31926 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31929 gcc_unreachable ();
31935 return klass
== store
? 0 : target
;
31938 /* Return the integer constant in ARG. Constrain it to be in the range
31939 of the subparts of VEC_TYPE; issue an error if not. */
31942 get_element_number (tree vec_type
, tree arg
)
31944 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31946 if (!host_integerp (arg
, 1)
31947 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31949 error ("selector must be an integer constant in the range 0..%wi", max
);
31956 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31957 ix86_expand_vector_init. We DO have language-level syntax for this, in
31958 the form of (type){ init-list }. Except that since we can't place emms
31959 instructions from inside the compiler, we can't allow the use of MMX
31960 registers unless the user explicitly asks for it. So we do *not* define
31961 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31962 we have builtins invoked by mmintrin.h that gives us license to emit
31963 these sorts of instructions. */
31966 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31968 enum machine_mode tmode
= TYPE_MODE (type
);
31969 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31970 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31971 rtvec v
= rtvec_alloc (n_elt
);
31973 gcc_assert (VECTOR_MODE_P (tmode
));
31974 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31976 for (i
= 0; i
< n_elt
; ++i
)
31978 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31979 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31982 if (!target
|| !register_operand (target
, tmode
))
31983 target
= gen_reg_rtx (tmode
);
31985 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31989 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31990 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31991 had a language-level syntax for referencing vector elements. */
31994 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31996 enum machine_mode tmode
, mode0
;
32001 arg0
= CALL_EXPR_ARG (exp
, 0);
32002 arg1
= CALL_EXPR_ARG (exp
, 1);
32004 op0
= expand_normal (arg0
);
32005 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32007 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32008 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32009 gcc_assert (VECTOR_MODE_P (mode0
));
32011 op0
= force_reg (mode0
, op0
);
32013 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32014 target
= gen_reg_rtx (tmode
);
32016 ix86_expand_vector_extract (true, target
, op0
, elt
);
32021 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32022 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32023 a language-level syntax for referencing vector elements. */
32026 ix86_expand_vec_set_builtin (tree exp
)
32028 enum machine_mode tmode
, mode1
;
32029 tree arg0
, arg1
, arg2
;
32031 rtx op0
, op1
, target
;
32033 arg0
= CALL_EXPR_ARG (exp
, 0);
32034 arg1
= CALL_EXPR_ARG (exp
, 1);
32035 arg2
= CALL_EXPR_ARG (exp
, 2);
32037 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32038 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32039 gcc_assert (VECTOR_MODE_P (tmode
));
32041 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32042 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32043 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32045 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32046 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32048 op0
= force_reg (tmode
, op0
);
32049 op1
= force_reg (mode1
, op1
);
32051 /* OP0 is the source of these builtin functions and shouldn't be
32052 modified. Create a copy, use it and return it as target. */
32053 target
= gen_reg_rtx (tmode
);
32054 emit_move_insn (target
, op0
);
32055 ix86_expand_vector_set (true, target
, op1
, elt
);
32060 /* Expand an expression EXP that calls a built-in function,
32061 with result going to TARGET if that's convenient
32062 (and in mode MODE if that's convenient).
32063 SUBTARGET may be used as the target for computing one of EXP's operands.
32064 IGNORE is nonzero if the value is to be ignored. */
32067 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
32068 enum machine_mode mode ATTRIBUTE_UNUSED
,
32069 int ignore ATTRIBUTE_UNUSED
)
32071 const struct builtin_description
*d
;
32073 enum insn_code icode
;
32074 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32075 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32076 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32077 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32078 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32080 /* For CPU builtins that can be folded, fold first and expand the fold. */
32083 case IX86_BUILTIN_CPU_INIT
:
32085 /* Make it call __cpu_indicator_init in libgcc. */
32086 tree call_expr
, fndecl
, type
;
32087 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32088 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32089 call_expr
= build_call_expr (fndecl
, 0);
32090 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32092 case IX86_BUILTIN_CPU_IS
:
32093 case IX86_BUILTIN_CPU_SUPPORTS
:
32095 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32096 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32097 gcc_assert (fold_expr
!= NULL_TREE
);
32098 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32102 /* Determine whether the builtin function is available under the current ISA.
32103 Originally the builtin was not created if it wasn't applicable to the
32104 current ISA based on the command line switches. With function specific
32105 options, we need to check in the context of the function making the call
32106 whether it is supported. */
32107 if (ix86_builtins_isa
[fcode
].isa
32108 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32110 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32111 NULL
, (enum fpmath_unit
) 0, false);
32114 error ("%qE needs unknown isa option", fndecl
);
32117 gcc_assert (opts
!= NULL
);
32118 error ("%qE needs isa option %s", fndecl
, opts
);
32126 case IX86_BUILTIN_MASKMOVQ
:
32127 case IX86_BUILTIN_MASKMOVDQU
:
32128 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32129 ? CODE_FOR_mmx_maskmovq
32130 : CODE_FOR_sse2_maskmovdqu
);
32131 /* Note the arg order is different from the operand order. */
32132 arg1
= CALL_EXPR_ARG (exp
, 0);
32133 arg2
= CALL_EXPR_ARG (exp
, 1);
32134 arg0
= CALL_EXPR_ARG (exp
, 2);
32135 op0
= expand_normal (arg0
);
32136 op1
= expand_normal (arg1
);
32137 op2
= expand_normal (arg2
);
32138 mode0
= insn_data
[icode
].operand
[0].mode
;
32139 mode1
= insn_data
[icode
].operand
[1].mode
;
32140 mode2
= insn_data
[icode
].operand
[2].mode
;
32142 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32143 op0
= gen_rtx_MEM (mode1
, op0
);
32145 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32146 op0
= copy_to_mode_reg (mode0
, op0
);
32147 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32148 op1
= copy_to_mode_reg (mode1
, op1
);
32149 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32150 op2
= copy_to_mode_reg (mode2
, op2
);
32151 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32157 case IX86_BUILTIN_LDMXCSR
:
32158 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32159 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32160 emit_move_insn (target
, op0
);
32161 emit_insn (gen_sse_ldmxcsr (target
));
32164 case IX86_BUILTIN_STMXCSR
:
32165 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32166 emit_insn (gen_sse_stmxcsr (target
));
32167 return copy_to_mode_reg (SImode
, target
);
32169 case IX86_BUILTIN_CLFLUSH
:
32170 arg0
= CALL_EXPR_ARG (exp
, 0);
32171 op0
= expand_normal (arg0
);
32172 icode
= CODE_FOR_sse2_clflush
;
32173 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32174 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32176 emit_insn (gen_sse2_clflush (op0
));
32179 case IX86_BUILTIN_MONITOR
:
32180 arg0
= CALL_EXPR_ARG (exp
, 0);
32181 arg1
= CALL_EXPR_ARG (exp
, 1);
32182 arg2
= CALL_EXPR_ARG (exp
, 2);
32183 op0
= expand_normal (arg0
);
32184 op1
= expand_normal (arg1
);
32185 op2
= expand_normal (arg2
);
32187 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32189 op1
= copy_to_mode_reg (SImode
, op1
);
32191 op2
= copy_to_mode_reg (SImode
, op2
);
32192 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32195 case IX86_BUILTIN_MWAIT
:
32196 arg0
= CALL_EXPR_ARG (exp
, 0);
32197 arg1
= CALL_EXPR_ARG (exp
, 1);
32198 op0
= expand_normal (arg0
);
32199 op1
= expand_normal (arg1
);
32201 op0
= copy_to_mode_reg (SImode
, op0
);
32203 op1
= copy_to_mode_reg (SImode
, op1
);
32204 emit_insn (gen_sse3_mwait (op0
, op1
));
32207 case IX86_BUILTIN_VEC_INIT_V2SI
:
32208 case IX86_BUILTIN_VEC_INIT_V4HI
:
32209 case IX86_BUILTIN_VEC_INIT_V8QI
:
32210 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32212 case IX86_BUILTIN_VEC_EXT_V2DF
:
32213 case IX86_BUILTIN_VEC_EXT_V2DI
:
32214 case IX86_BUILTIN_VEC_EXT_V4SF
:
32215 case IX86_BUILTIN_VEC_EXT_V4SI
:
32216 case IX86_BUILTIN_VEC_EXT_V8HI
:
32217 case IX86_BUILTIN_VEC_EXT_V2SI
:
32218 case IX86_BUILTIN_VEC_EXT_V4HI
:
32219 case IX86_BUILTIN_VEC_EXT_V16QI
:
32220 return ix86_expand_vec_ext_builtin (exp
, target
);
32222 case IX86_BUILTIN_VEC_SET_V2DI
:
32223 case IX86_BUILTIN_VEC_SET_V4SF
:
32224 case IX86_BUILTIN_VEC_SET_V4SI
:
32225 case IX86_BUILTIN_VEC_SET_V8HI
:
32226 case IX86_BUILTIN_VEC_SET_V4HI
:
32227 case IX86_BUILTIN_VEC_SET_V16QI
:
32228 return ix86_expand_vec_set_builtin (exp
);
32230 case IX86_BUILTIN_INFQ
:
32231 case IX86_BUILTIN_HUGE_VALQ
:
32233 REAL_VALUE_TYPE inf
;
32237 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32239 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32242 target
= gen_reg_rtx (mode
);
32244 emit_move_insn (target
, tmp
);
32248 case IX86_BUILTIN_RDPMC
:
32249 case IX86_BUILTIN_RDTSC
:
32250 case IX86_BUILTIN_RDTSCP
:
32252 op0
= gen_reg_rtx (DImode
);
32253 op1
= gen_reg_rtx (DImode
);
32255 if (fcode
== IX86_BUILTIN_RDPMC
)
32257 arg0
= CALL_EXPR_ARG (exp
, 0);
32258 op2
= expand_normal (arg0
);
32259 if (!register_operand (op2
, SImode
))
32260 op2
= copy_to_mode_reg (SImode
, op2
);
32262 insn
= (TARGET_64BIT
32263 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32264 : gen_rdpmc (op0
, op2
));
32267 else if (fcode
== IX86_BUILTIN_RDTSC
)
32269 insn
= (TARGET_64BIT
32270 ? gen_rdtsc_rex64 (op0
, op1
)
32271 : gen_rdtsc (op0
));
32276 op2
= gen_reg_rtx (SImode
);
32278 insn
= (TARGET_64BIT
32279 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32280 : gen_rdtscp (op0
, op2
));
32283 arg0
= CALL_EXPR_ARG (exp
, 0);
32284 op4
= expand_normal (arg0
);
32285 if (!address_operand (op4
, VOIDmode
))
32287 op4
= convert_memory_address (Pmode
, op4
);
32288 op4
= copy_addr_to_reg (op4
);
32290 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32295 /* mode is VOIDmode if __builtin_rd* has been called
32297 if (mode
== VOIDmode
)
32299 target
= gen_reg_rtx (mode
);
32304 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32305 op1
, 1, OPTAB_DIRECT
);
32306 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32307 op0
, 1, OPTAB_DIRECT
);
32310 emit_move_insn (target
, op0
);
32313 case IX86_BUILTIN_FXSAVE
:
32314 case IX86_BUILTIN_FXRSTOR
:
32315 case IX86_BUILTIN_FXSAVE64
:
32316 case IX86_BUILTIN_FXRSTOR64
:
32319 case IX86_BUILTIN_FXSAVE
:
32320 icode
= CODE_FOR_fxsave
;
32322 case IX86_BUILTIN_FXRSTOR
:
32323 icode
= CODE_FOR_fxrstor
;
32325 case IX86_BUILTIN_FXSAVE64
:
32326 icode
= CODE_FOR_fxsave64
;
32328 case IX86_BUILTIN_FXRSTOR64
:
32329 icode
= CODE_FOR_fxrstor64
;
32332 gcc_unreachable ();
32335 arg0
= CALL_EXPR_ARG (exp
, 0);
32336 op0
= expand_normal (arg0
);
32338 if (!address_operand (op0
, VOIDmode
))
32340 op0
= convert_memory_address (Pmode
, op0
);
32341 op0
= copy_addr_to_reg (op0
);
32343 op0
= gen_rtx_MEM (BLKmode
, op0
);
32345 pat
= GEN_FCN (icode
) (op0
);
32350 case IX86_BUILTIN_XSAVE
:
32351 case IX86_BUILTIN_XRSTOR
:
32352 case IX86_BUILTIN_XSAVE64
:
32353 case IX86_BUILTIN_XRSTOR64
:
32354 case IX86_BUILTIN_XSAVEOPT
:
32355 case IX86_BUILTIN_XSAVEOPT64
:
32356 arg0
= CALL_EXPR_ARG (exp
, 0);
32357 arg1
= CALL_EXPR_ARG (exp
, 1);
32358 op0
= expand_normal (arg0
);
32359 op1
= expand_normal (arg1
);
32361 if (!address_operand (op0
, VOIDmode
))
32363 op0
= convert_memory_address (Pmode
, op0
);
32364 op0
= copy_addr_to_reg (op0
);
32366 op0
= gen_rtx_MEM (BLKmode
, op0
);
32368 op1
= force_reg (DImode
, op1
);
32372 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32373 NULL
, 1, OPTAB_DIRECT
);
32376 case IX86_BUILTIN_XSAVE
:
32377 icode
= CODE_FOR_xsave_rex64
;
32379 case IX86_BUILTIN_XRSTOR
:
32380 icode
= CODE_FOR_xrstor_rex64
;
32382 case IX86_BUILTIN_XSAVE64
:
32383 icode
= CODE_FOR_xsave64
;
32385 case IX86_BUILTIN_XRSTOR64
:
32386 icode
= CODE_FOR_xrstor64
;
32388 case IX86_BUILTIN_XSAVEOPT
:
32389 icode
= CODE_FOR_xsaveopt_rex64
;
32391 case IX86_BUILTIN_XSAVEOPT64
:
32392 icode
= CODE_FOR_xsaveopt64
;
32395 gcc_unreachable ();
32398 op2
= gen_lowpart (SImode
, op2
);
32399 op1
= gen_lowpart (SImode
, op1
);
32400 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32406 case IX86_BUILTIN_XSAVE
:
32407 icode
= CODE_FOR_xsave
;
32409 case IX86_BUILTIN_XRSTOR
:
32410 icode
= CODE_FOR_xrstor
;
32412 case IX86_BUILTIN_XSAVEOPT
:
32413 icode
= CODE_FOR_xsaveopt
;
32416 gcc_unreachable ();
32418 pat
= GEN_FCN (icode
) (op0
, op1
);
32425 case IX86_BUILTIN_LLWPCB
:
32426 arg0
= CALL_EXPR_ARG (exp
, 0);
32427 op0
= expand_normal (arg0
);
32428 icode
= CODE_FOR_lwp_llwpcb
;
32429 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32430 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32431 emit_insn (gen_lwp_llwpcb (op0
));
32434 case IX86_BUILTIN_SLWPCB
:
32435 icode
= CODE_FOR_lwp_slwpcb
;
32437 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
32438 target
= gen_reg_rtx (Pmode
);
32439 emit_insn (gen_lwp_slwpcb (target
));
32442 case IX86_BUILTIN_BEXTRI32
:
32443 case IX86_BUILTIN_BEXTRI64
:
32444 arg0
= CALL_EXPR_ARG (exp
, 0);
32445 arg1
= CALL_EXPR_ARG (exp
, 1);
32446 op0
= expand_normal (arg0
);
32447 op1
= expand_normal (arg1
);
32448 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
32449 ? CODE_FOR_tbm_bextri_si
32450 : CODE_FOR_tbm_bextri_di
);
32451 if (!CONST_INT_P (op1
))
32453 error ("last argument must be an immediate");
32458 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
32459 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
32460 op1
= GEN_INT (length
);
32461 op2
= GEN_INT (lsb_index
);
32462 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
32468 case IX86_BUILTIN_RDRAND16_STEP
:
32469 icode
= CODE_FOR_rdrandhi_1
;
32473 case IX86_BUILTIN_RDRAND32_STEP
:
32474 icode
= CODE_FOR_rdrandsi_1
;
32478 case IX86_BUILTIN_RDRAND64_STEP
:
32479 icode
= CODE_FOR_rdranddi_1
;
32483 op0
= gen_reg_rtx (mode0
);
32484 emit_insn (GEN_FCN (icode
) (op0
));
32486 arg0
= CALL_EXPR_ARG (exp
, 0);
32487 op1
= expand_normal (arg0
);
32488 if (!address_operand (op1
, VOIDmode
))
32490 op1
= convert_memory_address (Pmode
, op1
);
32491 op1
= copy_addr_to_reg (op1
);
32493 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32495 op1
= gen_reg_rtx (SImode
);
32496 emit_move_insn (op1
, CONST1_RTX (SImode
));
32498 /* Emit SImode conditional move. */
32499 if (mode0
== HImode
)
32501 op2
= gen_reg_rtx (SImode
);
32502 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32504 else if (mode0
== SImode
)
32507 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32510 target
= gen_reg_rtx (SImode
);
32512 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32514 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32515 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32518 case IX86_BUILTIN_RDSEED16_STEP
:
32519 icode
= CODE_FOR_rdseedhi_1
;
32523 case IX86_BUILTIN_RDSEED32_STEP
:
32524 icode
= CODE_FOR_rdseedsi_1
;
32528 case IX86_BUILTIN_RDSEED64_STEP
:
32529 icode
= CODE_FOR_rdseeddi_1
;
32533 op0
= gen_reg_rtx (mode0
);
32534 emit_insn (GEN_FCN (icode
) (op0
));
32536 arg0
= CALL_EXPR_ARG (exp
, 0);
32537 op1
= expand_normal (arg0
);
32538 if (!address_operand (op1
, VOIDmode
))
32540 op1
= convert_memory_address (Pmode
, op1
);
32541 op1
= copy_addr_to_reg (op1
);
32543 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32545 op2
= gen_reg_rtx (QImode
);
32547 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32549 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32552 target
= gen_reg_rtx (SImode
);
32554 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32557 case IX86_BUILTIN_ADDCARRYX32
:
32558 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32562 case IX86_BUILTIN_ADDCARRYX64
:
32563 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32567 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32568 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32569 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32570 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32572 op0
= gen_reg_rtx (QImode
);
32574 /* Generate CF from input operand. */
32575 op1
= expand_normal (arg0
);
32576 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32577 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32579 /* Gen ADCX instruction to compute X+Y+CF. */
32580 op2
= expand_normal (arg1
);
32581 op3
= expand_normal (arg2
);
32584 op2
= copy_to_mode_reg (mode0
, op2
);
32586 op3
= copy_to_mode_reg (mode0
, op3
);
32588 op0
= gen_reg_rtx (mode0
);
32590 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32591 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32592 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32594 /* Store the result. */
32595 op4
= expand_normal (arg3
);
32596 if (!address_operand (op4
, VOIDmode
))
32598 op4
= convert_memory_address (Pmode
, op4
);
32599 op4
= copy_addr_to_reg (op4
);
32601 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32603 /* Return current CF value. */
32605 target
= gen_reg_rtx (QImode
);
32607 PUT_MODE (pat
, QImode
);
32608 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32611 case IX86_BUILTIN_GATHERSIV2DF
:
32612 icode
= CODE_FOR_avx2_gathersiv2df
;
32614 case IX86_BUILTIN_GATHERSIV4DF
:
32615 icode
= CODE_FOR_avx2_gathersiv4df
;
32617 case IX86_BUILTIN_GATHERDIV2DF
:
32618 icode
= CODE_FOR_avx2_gatherdiv2df
;
32620 case IX86_BUILTIN_GATHERDIV4DF
:
32621 icode
= CODE_FOR_avx2_gatherdiv4df
;
32623 case IX86_BUILTIN_GATHERSIV4SF
:
32624 icode
= CODE_FOR_avx2_gathersiv4sf
;
32626 case IX86_BUILTIN_GATHERSIV8SF
:
32627 icode
= CODE_FOR_avx2_gathersiv8sf
;
32629 case IX86_BUILTIN_GATHERDIV4SF
:
32630 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32632 case IX86_BUILTIN_GATHERDIV8SF
:
32633 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32635 case IX86_BUILTIN_GATHERSIV2DI
:
32636 icode
= CODE_FOR_avx2_gathersiv2di
;
32638 case IX86_BUILTIN_GATHERSIV4DI
:
32639 icode
= CODE_FOR_avx2_gathersiv4di
;
32641 case IX86_BUILTIN_GATHERDIV2DI
:
32642 icode
= CODE_FOR_avx2_gatherdiv2di
;
32644 case IX86_BUILTIN_GATHERDIV4DI
:
32645 icode
= CODE_FOR_avx2_gatherdiv4di
;
32647 case IX86_BUILTIN_GATHERSIV4SI
:
32648 icode
= CODE_FOR_avx2_gathersiv4si
;
32650 case IX86_BUILTIN_GATHERSIV8SI
:
32651 icode
= CODE_FOR_avx2_gathersiv8si
;
32653 case IX86_BUILTIN_GATHERDIV4SI
:
32654 icode
= CODE_FOR_avx2_gatherdiv4si
;
32656 case IX86_BUILTIN_GATHERDIV8SI
:
32657 icode
= CODE_FOR_avx2_gatherdiv8si
;
32659 case IX86_BUILTIN_GATHERALTSIV4DF
:
32660 icode
= CODE_FOR_avx2_gathersiv4df
;
32662 case IX86_BUILTIN_GATHERALTDIV8SF
:
32663 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32665 case IX86_BUILTIN_GATHERALTSIV4DI
:
32666 icode
= CODE_FOR_avx2_gathersiv4di
;
32668 case IX86_BUILTIN_GATHERALTDIV8SI
:
32669 icode
= CODE_FOR_avx2_gatherdiv8si
;
32673 arg0
= CALL_EXPR_ARG (exp
, 0);
32674 arg1
= CALL_EXPR_ARG (exp
, 1);
32675 arg2
= CALL_EXPR_ARG (exp
, 2);
32676 arg3
= CALL_EXPR_ARG (exp
, 3);
32677 arg4
= CALL_EXPR_ARG (exp
, 4);
32678 op0
= expand_normal (arg0
);
32679 op1
= expand_normal (arg1
);
32680 op2
= expand_normal (arg2
);
32681 op3
= expand_normal (arg3
);
32682 op4
= expand_normal (arg4
);
32683 /* Note the arg order is different from the operand order. */
32684 mode0
= insn_data
[icode
].operand
[1].mode
;
32685 mode2
= insn_data
[icode
].operand
[3].mode
;
32686 mode3
= insn_data
[icode
].operand
[4].mode
;
32687 mode4
= insn_data
[icode
].operand
[5].mode
;
32689 if (target
== NULL_RTX
32690 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32691 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32693 subtarget
= target
;
32695 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32696 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32698 rtx half
= gen_reg_rtx (V4SImode
);
32699 if (!nonimmediate_operand (op2
, V8SImode
))
32700 op2
= copy_to_mode_reg (V8SImode
, op2
);
32701 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32704 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32705 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32707 rtx (*gen
) (rtx
, rtx
);
32708 rtx half
= gen_reg_rtx (mode0
);
32709 if (mode0
== V4SFmode
)
32710 gen
= gen_vec_extract_lo_v8sf
;
32712 gen
= gen_vec_extract_lo_v8si
;
32713 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32714 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32715 emit_insn (gen (half
, op0
));
32717 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32718 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32719 emit_insn (gen (half
, op3
));
32723 /* Force memory operand only with base register here. But we
32724 don't want to do it on memory operand for other builtin
32726 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32728 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32729 op0
= copy_to_mode_reg (mode0
, op0
);
32730 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32731 op1
= copy_to_mode_reg (Pmode
, op1
);
32732 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32733 op2
= copy_to_mode_reg (mode2
, op2
);
32734 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32735 op3
= copy_to_mode_reg (mode3
, op3
);
32736 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32738 error ("last argument must be scale 1, 2, 4, 8");
32742 /* Optimize. If mask is known to have all high bits set,
32743 replace op0 with pc_rtx to signal that the instruction
32744 overwrites the whole destination and doesn't use its
32745 previous contents. */
32748 if (TREE_CODE (arg3
) == VECTOR_CST
)
32750 unsigned int negative
= 0;
32751 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32753 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32754 if (TREE_CODE (cst
) == INTEGER_CST
32755 && tree_int_cst_sign_bit (cst
))
32757 else if (TREE_CODE (cst
) == REAL_CST
32758 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32761 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32764 else if (TREE_CODE (arg3
) == SSA_NAME
)
32766 /* Recognize also when mask is like:
32767 __v2df src = _mm_setzero_pd ();
32768 __v2df mask = _mm_cmpeq_pd (src, src);
32770 __v8sf src = _mm256_setzero_ps ();
32771 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32772 as that is a cheaper way to load all ones into
32773 a register than having to load a constant from
32775 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32776 if (is_gimple_call (def_stmt
))
32778 tree fndecl
= gimple_call_fndecl (def_stmt
);
32780 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32781 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32783 case IX86_BUILTIN_CMPPD
:
32784 case IX86_BUILTIN_CMPPS
:
32785 case IX86_BUILTIN_CMPPD256
:
32786 case IX86_BUILTIN_CMPPS256
:
32787 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32790 case IX86_BUILTIN_CMPEQPD
:
32791 case IX86_BUILTIN_CMPEQPS
:
32792 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32793 && initializer_zerop (gimple_call_arg (def_stmt
,
32804 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32809 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32810 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32812 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32813 ? V4SFmode
: V4SImode
;
32814 if (target
== NULL_RTX
)
32815 target
= gen_reg_rtx (tmode
);
32816 if (tmode
== V4SFmode
)
32817 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32819 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32822 target
= subtarget
;
32826 case IX86_BUILTIN_XABORT
:
32827 icode
= CODE_FOR_xabort
;
32828 arg0
= CALL_EXPR_ARG (exp
, 0);
32829 op0
= expand_normal (arg0
);
32830 mode0
= insn_data
[icode
].operand
[0].mode
;
32831 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32833 error ("the xabort's argument must be an 8-bit immediate");
32836 emit_insn (gen_xabort (op0
));
32843 for (i
= 0, d
= bdesc_special_args
;
32844 i
< ARRAY_SIZE (bdesc_special_args
);
32846 if (d
->code
== fcode
)
32847 return ix86_expand_special_args_builtin (d
, exp
, target
);
32849 for (i
= 0, d
= bdesc_args
;
32850 i
< ARRAY_SIZE (bdesc_args
);
32852 if (d
->code
== fcode
)
32855 case IX86_BUILTIN_FABSQ
:
32856 case IX86_BUILTIN_COPYSIGNQ
:
32858 /* Emit a normal call if SSE isn't available. */
32859 return expand_call (exp
, target
, ignore
);
32861 return ix86_expand_args_builtin (d
, exp
, target
);
32864 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32865 if (d
->code
== fcode
)
32866 return ix86_expand_sse_comi (d
, exp
, target
);
32868 for (i
= 0, d
= bdesc_pcmpestr
;
32869 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32871 if (d
->code
== fcode
)
32872 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32874 for (i
= 0, d
= bdesc_pcmpistr
;
32875 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32877 if (d
->code
== fcode
)
32878 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32880 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32881 if (d
->code
== fcode
)
32882 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32883 (enum ix86_builtin_func_type
)
32884 d
->flag
, d
->comparison
);
32886 gcc_unreachable ();
32889 /* Returns a function decl for a vectorized version of the builtin function
32890 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32891 if it is not available. */
32894 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32897 enum machine_mode in_mode
, out_mode
;
32899 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32901 if (TREE_CODE (type_out
) != VECTOR_TYPE
32902 || TREE_CODE (type_in
) != VECTOR_TYPE
32903 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32906 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32907 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32908 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32909 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32913 case BUILT_IN_SQRT
:
32914 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32916 if (out_n
== 2 && in_n
== 2)
32917 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32918 else if (out_n
== 4 && in_n
== 4)
32919 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32923 case BUILT_IN_SQRTF
:
32924 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32926 if (out_n
== 4 && in_n
== 4)
32927 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32928 else if (out_n
== 8 && in_n
== 8)
32929 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32933 case BUILT_IN_IFLOOR
:
32934 case BUILT_IN_LFLOOR
:
32935 case BUILT_IN_LLFLOOR
:
32936 /* The round insn does not trap on denormals. */
32937 if (flag_trapping_math
|| !TARGET_ROUND
)
32940 if (out_mode
== SImode
&& in_mode
== DFmode
)
32942 if (out_n
== 4 && in_n
== 2)
32943 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32944 else if (out_n
== 8 && in_n
== 4)
32945 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32949 case BUILT_IN_IFLOORF
:
32950 case BUILT_IN_LFLOORF
:
32951 case BUILT_IN_LLFLOORF
:
32952 /* The round insn does not trap on denormals. */
32953 if (flag_trapping_math
|| !TARGET_ROUND
)
32956 if (out_mode
== SImode
&& in_mode
== SFmode
)
32958 if (out_n
== 4 && in_n
== 4)
32959 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32960 else if (out_n
== 8 && in_n
== 8)
32961 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32965 case BUILT_IN_ICEIL
:
32966 case BUILT_IN_LCEIL
:
32967 case BUILT_IN_LLCEIL
:
32968 /* The round insn does not trap on denormals. */
32969 if (flag_trapping_math
|| !TARGET_ROUND
)
32972 if (out_mode
== SImode
&& in_mode
== DFmode
)
32974 if (out_n
== 4 && in_n
== 2)
32975 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32976 else if (out_n
== 8 && in_n
== 4)
32977 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32981 case BUILT_IN_ICEILF
:
32982 case BUILT_IN_LCEILF
:
32983 case BUILT_IN_LLCEILF
:
32984 /* The round insn does not trap on denormals. */
32985 if (flag_trapping_math
|| !TARGET_ROUND
)
32988 if (out_mode
== SImode
&& in_mode
== SFmode
)
32990 if (out_n
== 4 && in_n
== 4)
32991 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32992 else if (out_n
== 8 && in_n
== 8)
32993 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32997 case BUILT_IN_IRINT
:
32998 case BUILT_IN_LRINT
:
32999 case BUILT_IN_LLRINT
:
33000 if (out_mode
== SImode
&& in_mode
== DFmode
)
33002 if (out_n
== 4 && in_n
== 2)
33003 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33004 else if (out_n
== 8 && in_n
== 4)
33005 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33009 case BUILT_IN_IRINTF
:
33010 case BUILT_IN_LRINTF
:
33011 case BUILT_IN_LLRINTF
:
33012 if (out_mode
== SImode
&& in_mode
== SFmode
)
33014 if (out_n
== 4 && in_n
== 4)
33015 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33016 else if (out_n
== 8 && in_n
== 8)
33017 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33021 case BUILT_IN_IROUND
:
33022 case BUILT_IN_LROUND
:
33023 case BUILT_IN_LLROUND
:
33024 /* The round insn does not trap on denormals. */
33025 if (flag_trapping_math
|| !TARGET_ROUND
)
33028 if (out_mode
== SImode
&& in_mode
== DFmode
)
33030 if (out_n
== 4 && in_n
== 2)
33031 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33032 else if (out_n
== 8 && in_n
== 4)
33033 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33037 case BUILT_IN_IROUNDF
:
33038 case BUILT_IN_LROUNDF
:
33039 case BUILT_IN_LLROUNDF
:
33040 /* The round insn does not trap on denormals. */
33041 if (flag_trapping_math
|| !TARGET_ROUND
)
33044 if (out_mode
== SImode
&& in_mode
== SFmode
)
33046 if (out_n
== 4 && in_n
== 4)
33047 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33048 else if (out_n
== 8 && in_n
== 8)
33049 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33053 case BUILT_IN_COPYSIGN
:
33054 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33056 if (out_n
== 2 && in_n
== 2)
33057 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33058 else if (out_n
== 4 && in_n
== 4)
33059 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33063 case BUILT_IN_COPYSIGNF
:
33064 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33066 if (out_n
== 4 && in_n
== 4)
33067 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33068 else if (out_n
== 8 && in_n
== 8)
33069 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33073 case BUILT_IN_FLOOR
:
33074 /* The round insn does not trap on denormals. */
33075 if (flag_trapping_math
|| !TARGET_ROUND
)
33078 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33080 if (out_n
== 2 && in_n
== 2)
33081 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33082 else if (out_n
== 4 && in_n
== 4)
33083 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33087 case BUILT_IN_FLOORF
:
33088 /* The round insn does not trap on denormals. */
33089 if (flag_trapping_math
|| !TARGET_ROUND
)
33092 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33094 if (out_n
== 4 && in_n
== 4)
33095 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33096 else if (out_n
== 8 && in_n
== 8)
33097 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33101 case BUILT_IN_CEIL
:
33102 /* The round insn does not trap on denormals. */
33103 if (flag_trapping_math
|| !TARGET_ROUND
)
33106 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33108 if (out_n
== 2 && in_n
== 2)
33109 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33110 else if (out_n
== 4 && in_n
== 4)
33111 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33115 case BUILT_IN_CEILF
:
33116 /* The round insn does not trap on denormals. */
33117 if (flag_trapping_math
|| !TARGET_ROUND
)
33120 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33122 if (out_n
== 4 && in_n
== 4)
33123 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33124 else if (out_n
== 8 && in_n
== 8)
33125 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33129 case BUILT_IN_TRUNC
:
33130 /* The round insn does not trap on denormals. */
33131 if (flag_trapping_math
|| !TARGET_ROUND
)
33134 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33136 if (out_n
== 2 && in_n
== 2)
33137 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33138 else if (out_n
== 4 && in_n
== 4)
33139 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33143 case BUILT_IN_TRUNCF
:
33144 /* The round insn does not trap on denormals. */
33145 if (flag_trapping_math
|| !TARGET_ROUND
)
33148 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33150 if (out_n
== 4 && in_n
== 4)
33151 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33152 else if (out_n
== 8 && in_n
== 8)
33153 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33157 case BUILT_IN_RINT
:
33158 /* The round insn does not trap on denormals. */
33159 if (flag_trapping_math
|| !TARGET_ROUND
)
33162 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33164 if (out_n
== 2 && in_n
== 2)
33165 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33166 else if (out_n
== 4 && in_n
== 4)
33167 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33171 case BUILT_IN_RINTF
:
33172 /* The round insn does not trap on denormals. */
33173 if (flag_trapping_math
|| !TARGET_ROUND
)
33176 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33178 if (out_n
== 4 && in_n
== 4)
33179 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33180 else if (out_n
== 8 && in_n
== 8)
33181 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33185 case BUILT_IN_ROUND
:
33186 /* The round insn does not trap on denormals. */
33187 if (flag_trapping_math
|| !TARGET_ROUND
)
33190 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33192 if (out_n
== 2 && in_n
== 2)
33193 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33194 else if (out_n
== 4 && in_n
== 4)
33195 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33199 case BUILT_IN_ROUNDF
:
33200 /* The round insn does not trap on denormals. */
33201 if (flag_trapping_math
|| !TARGET_ROUND
)
33204 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33206 if (out_n
== 4 && in_n
== 4)
33207 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33208 else if (out_n
== 8 && in_n
== 8)
33209 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33214 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33216 if (out_n
== 2 && in_n
== 2)
33217 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33218 if (out_n
== 4 && in_n
== 4)
33219 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33223 case BUILT_IN_FMAF
:
33224 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33226 if (out_n
== 4 && in_n
== 4)
33227 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33228 if (out_n
== 8 && in_n
== 8)
33229 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33237 /* Dispatch to a handler for a vectorization library. */
33238 if (ix86_veclib_handler
)
33239 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33245 /* Handler for an SVML-style interface to
33246 a library with vectorized intrinsics. */
33249 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33252 tree fntype
, new_fndecl
, args
;
33255 enum machine_mode el_mode
, in_mode
;
33258 /* The SVML is suitable for unsafe math only. */
33259 if (!flag_unsafe_math_optimizations
)
33262 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33263 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33264 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33265 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33266 if (el_mode
!= in_mode
33274 case BUILT_IN_LOG10
:
33276 case BUILT_IN_TANH
:
33278 case BUILT_IN_ATAN
:
33279 case BUILT_IN_ATAN2
:
33280 case BUILT_IN_ATANH
:
33281 case BUILT_IN_CBRT
:
33282 case BUILT_IN_SINH
:
33284 case BUILT_IN_ASINH
:
33285 case BUILT_IN_ASIN
:
33286 case BUILT_IN_COSH
:
33288 case BUILT_IN_ACOSH
:
33289 case BUILT_IN_ACOS
:
33290 if (el_mode
!= DFmode
|| n
!= 2)
33294 case BUILT_IN_EXPF
:
33295 case BUILT_IN_LOGF
:
33296 case BUILT_IN_LOG10F
:
33297 case BUILT_IN_POWF
:
33298 case BUILT_IN_TANHF
:
33299 case BUILT_IN_TANF
:
33300 case BUILT_IN_ATANF
:
33301 case BUILT_IN_ATAN2F
:
33302 case BUILT_IN_ATANHF
:
33303 case BUILT_IN_CBRTF
:
33304 case BUILT_IN_SINHF
:
33305 case BUILT_IN_SINF
:
33306 case BUILT_IN_ASINHF
:
33307 case BUILT_IN_ASINF
:
33308 case BUILT_IN_COSHF
:
33309 case BUILT_IN_COSF
:
33310 case BUILT_IN_ACOSHF
:
33311 case BUILT_IN_ACOSF
:
33312 if (el_mode
!= SFmode
|| n
!= 4)
33320 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33322 if (fn
== BUILT_IN_LOGF
)
33323 strcpy (name
, "vmlsLn4");
33324 else if (fn
== BUILT_IN_LOG
)
33325 strcpy (name
, "vmldLn2");
33328 sprintf (name
, "vmls%s", bname
+10);
33329 name
[strlen (name
)-1] = '4';
33332 sprintf (name
, "vmld%s2", bname
+10);
33334 /* Convert to uppercase. */
33338 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33340 args
= TREE_CHAIN (args
))
33344 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33346 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33348 /* Build a function declaration for the vectorized function. */
33349 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33350 FUNCTION_DECL
, get_identifier (name
), fntype
);
33351 TREE_PUBLIC (new_fndecl
) = 1;
33352 DECL_EXTERNAL (new_fndecl
) = 1;
33353 DECL_IS_NOVOPS (new_fndecl
) = 1;
33354 TREE_READONLY (new_fndecl
) = 1;
33359 /* Handler for an ACML-style interface to
33360 a library with vectorized intrinsics. */
33363 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33365 char name
[20] = "__vr.._";
33366 tree fntype
, new_fndecl
, args
;
33369 enum machine_mode el_mode
, in_mode
;
33372 /* The ACML is 64bits only and suitable for unsafe math only as
33373 it does not correctly support parts of IEEE with the required
33374 precision such as denormals. */
33376 || !flag_unsafe_math_optimizations
)
33379 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33380 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33381 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33382 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33383 if (el_mode
!= in_mode
33393 case BUILT_IN_LOG2
:
33394 case BUILT_IN_LOG10
:
33397 if (el_mode
!= DFmode
33402 case BUILT_IN_SINF
:
33403 case BUILT_IN_COSF
:
33404 case BUILT_IN_EXPF
:
33405 case BUILT_IN_POWF
:
33406 case BUILT_IN_LOGF
:
33407 case BUILT_IN_LOG2F
:
33408 case BUILT_IN_LOG10F
:
33411 if (el_mode
!= SFmode
33420 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33421 sprintf (name
+ 7, "%s", bname
+10);
33424 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33426 args
= TREE_CHAIN (args
))
33430 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33432 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33434 /* Build a function declaration for the vectorized function. */
33435 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33436 FUNCTION_DECL
, get_identifier (name
), fntype
);
33437 TREE_PUBLIC (new_fndecl
) = 1;
33438 DECL_EXTERNAL (new_fndecl
) = 1;
33439 DECL_IS_NOVOPS (new_fndecl
) = 1;
33440 TREE_READONLY (new_fndecl
) = 1;
33445 /* Returns a decl of a function that implements gather load with
33446 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33447 Return NULL_TREE if it is not available. */
33450 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
33451 const_tree index_type
, int scale
)
33454 enum ix86_builtins code
;
33459 if ((TREE_CODE (index_type
) != INTEGER_TYPE
33460 && !POINTER_TYPE_P (index_type
))
33461 || (TYPE_MODE (index_type
) != SImode
33462 && TYPE_MODE (index_type
) != DImode
))
33465 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
33468 /* v*gather* insn sign extends index to pointer mode. */
33469 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
33470 && TYPE_UNSIGNED (index_type
))
33475 || (scale
& (scale
- 1)) != 0)
33478 si
= TYPE_MODE (index_type
) == SImode
;
33479 switch (TYPE_MODE (mem_vectype
))
33482 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
33485 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
33488 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
33491 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
33494 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
33497 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
33500 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
33503 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33509 return ix86_builtins
[code
];
33512 /* Returns a code for a target-specific builtin that implements
33513 reciprocal of the function, or NULL_TREE if not available. */
33516 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33517 bool sqrt ATTRIBUTE_UNUSED
)
33519 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33520 && flag_finite_math_only
&& !flag_trapping_math
33521 && flag_unsafe_math_optimizations
))
33525 /* Machine dependent builtins. */
33528 /* Vectorized version of sqrt to rsqrt conversion. */
33529 case IX86_BUILTIN_SQRTPS_NR
:
33530 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33532 case IX86_BUILTIN_SQRTPS_NR256
:
33533 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33539 /* Normal builtins. */
33542 /* Sqrt to rsqrt conversion. */
33543 case BUILT_IN_SQRTF
:
33544 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33551 /* Helper for avx_vpermilps256_operand et al. This is also used by
33552 the expansion functions to turn the parallel back into a mask.
33553 The return value is 0 for no match and the imm8+1 for a match. */
33556 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33558 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33560 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33562 if (XVECLEN (par
, 0) != (int) nelt
)
33565 /* Validate that all of the elements are constants, and not totally
33566 out of range. Copy the data into an integral array to make the
33567 subsequent checks easier. */
33568 for (i
= 0; i
< nelt
; ++i
)
33570 rtx er
= XVECEXP (par
, 0, i
);
33571 unsigned HOST_WIDE_INT ei
;
33573 if (!CONST_INT_P (er
))
33584 /* In the 256-bit DFmode case, we can only move elements within
33586 for (i
= 0; i
< 2; ++i
)
33590 mask
|= ipar
[i
] << i
;
33592 for (i
= 2; i
< 4; ++i
)
33596 mask
|= (ipar
[i
] - 2) << i
;
33601 /* In the 256-bit SFmode case, we have full freedom of movement
33602 within the low 128-bit lane, but the high 128-bit lane must
33603 mirror the exact same pattern. */
33604 for (i
= 0; i
< 4; ++i
)
33605 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33612 /* In the 128-bit case, we've full freedom in the placement of
33613 the elements from the source operand. */
33614 for (i
= 0; i
< nelt
; ++i
)
33615 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33619 gcc_unreachable ();
33622 /* Make sure success has a non-zero value by adding one. */
33626 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33627 the expansion functions to turn the parallel back into a mask.
33628 The return value is 0 for no match and the imm8+1 for a match. */
33631 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33633 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33635 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33637 if (XVECLEN (par
, 0) != (int) nelt
)
33640 /* Validate that all of the elements are constants, and not totally
33641 out of range. Copy the data into an integral array to make the
33642 subsequent checks easier. */
33643 for (i
= 0; i
< nelt
; ++i
)
33645 rtx er
= XVECEXP (par
, 0, i
);
33646 unsigned HOST_WIDE_INT ei
;
33648 if (!CONST_INT_P (er
))
33651 if (ei
>= 2 * nelt
)
33656 /* Validate that the halves of the permute are halves. */
33657 for (i
= 0; i
< nelt2
- 1; ++i
)
33658 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33660 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33661 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33664 /* Reconstruct the mask. */
33665 for (i
= 0; i
< 2; ++i
)
33667 unsigned e
= ipar
[i
* nelt2
];
33671 mask
|= e
<< (i
* 4);
33674 /* Make sure success has a non-zero value by adding one. */
33678 /* Store OPERAND to the memory after reload is completed. This means
33679 that we can't easily use assign_stack_local. */
33681 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33685 gcc_assert (reload_completed
);
33686 if (ix86_using_red_zone ())
33688 result
= gen_rtx_MEM (mode
,
33689 gen_rtx_PLUS (Pmode
,
33691 GEN_INT (-RED_ZONE_SIZE
)));
33692 emit_move_insn (result
, operand
);
33694 else if (TARGET_64BIT
)
33700 operand
= gen_lowpart (DImode
, operand
);
33704 gen_rtx_SET (VOIDmode
,
33705 gen_rtx_MEM (DImode
,
33706 gen_rtx_PRE_DEC (DImode
,
33707 stack_pointer_rtx
)),
33711 gcc_unreachable ();
33713 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33722 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33724 gen_rtx_SET (VOIDmode
,
33725 gen_rtx_MEM (SImode
,
33726 gen_rtx_PRE_DEC (Pmode
,
33727 stack_pointer_rtx
)),
33730 gen_rtx_SET (VOIDmode
,
33731 gen_rtx_MEM (SImode
,
33732 gen_rtx_PRE_DEC (Pmode
,
33733 stack_pointer_rtx
)),
33738 /* Store HImodes as SImodes. */
33739 operand
= gen_lowpart (SImode
, operand
);
33743 gen_rtx_SET (VOIDmode
,
33744 gen_rtx_MEM (GET_MODE (operand
),
33745 gen_rtx_PRE_DEC (SImode
,
33746 stack_pointer_rtx
)),
33750 gcc_unreachable ();
33752 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33757 /* Free operand from the memory. */
33759 ix86_free_from_memory (enum machine_mode mode
)
33761 if (!ix86_using_red_zone ())
33765 if (mode
== DImode
|| TARGET_64BIT
)
33769 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33770 to pop or add instruction if registers are available. */
33771 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33772 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33777 /* Return a register priority for hard reg REGNO. */
33779 ix86_register_priority (int hard_regno
)
33781 /* ebp and r13 as the base always wants a displacement, r12 as the
33782 base always wants an index. So discourage their usage in an
33784 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33786 if (hard_regno
== BP_REG
)
33788 /* New x86-64 int registers result in bigger code size. Discourage
33790 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33792 /* New x86-64 SSE registers result in bigger code size. Discourage
33794 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33796 /* Usage of AX register results in smaller code. Prefer it. */
33797 if (hard_regno
== 0)
33802 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33804 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33805 QImode must go into class Q_REGS.
33806 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33807 movdf to do mem-to-mem moves through integer regs. */
33810 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33812 enum machine_mode mode
= GET_MODE (x
);
33814 /* We're only allowed to return a subclass of CLASS. Many of the
33815 following checks fail for NO_REGS, so eliminate that early. */
33816 if (regclass
== NO_REGS
)
33819 /* All classes can load zeros. */
33820 if (x
== CONST0_RTX (mode
))
33823 /* Force constants into memory if we are loading a (nonzero) constant into
33824 an MMX or SSE register. This is because there are no MMX/SSE instructions
33825 to load from a constant. */
33827 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33830 /* Prefer SSE regs only, if we can use them for math. */
33831 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33832 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33834 /* Floating-point constants need more complex checks. */
33835 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33837 /* General regs can load everything. */
33838 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33841 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33842 zero above. We only want to wind up preferring 80387 registers if
33843 we plan on doing computation with them. */
33845 && standard_80387_constant_p (x
) > 0)
33847 /* Limit class to non-sse. */
33848 if (regclass
== FLOAT_SSE_REGS
)
33850 if (regclass
== FP_TOP_SSE_REGS
)
33852 if (regclass
== FP_SECOND_SSE_REGS
)
33853 return FP_SECOND_REG
;
33854 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33861 /* Generally when we see PLUS here, it's the function invariant
33862 (plus soft-fp const_int). Which can only be computed into general
33864 if (GET_CODE (x
) == PLUS
)
33865 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33867 /* QImode constants are easy to load, but non-constant QImode data
33868 must go into Q_REGS. */
33869 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33871 if (reg_class_subset_p (regclass
, Q_REGS
))
33873 if (reg_class_subset_p (Q_REGS
, regclass
))
33881 /* Discourage putting floating-point values in SSE registers unless
33882 SSE math is being used, and likewise for the 387 registers. */
33884 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33886 enum machine_mode mode
= GET_MODE (x
);
33888 /* Restrict the output reload class to the register bank that we are doing
33889 math on. If we would like not to return a subset of CLASS, reject this
33890 alternative: if reload cannot do this, it will still use its choice. */
33891 mode
= GET_MODE (x
);
33892 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33893 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33895 if (X87_FLOAT_MODE_P (mode
))
33897 if (regclass
== FP_TOP_SSE_REGS
)
33899 else if (regclass
== FP_SECOND_SSE_REGS
)
33900 return FP_SECOND_REG
;
33902 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33909 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33910 enum machine_mode mode
, secondary_reload_info
*sri
)
33912 /* Double-word spills from general registers to non-offsettable memory
33913 references (zero-extended addresses) require special handling. */
33916 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33917 && INTEGER_CLASS_P (rclass
)
33918 && !offsettable_memref_p (x
))
33921 ? CODE_FOR_reload_noff_load
33922 : CODE_FOR_reload_noff_store
);
33923 /* Add the cost of moving address to a temporary. */
33924 sri
->extra_cost
= 1;
33929 /* QImode spills from non-QI registers require
33930 intermediate register on 32bit targets. */
33932 && !in_p
&& mode
== QImode
33933 && INTEGER_CLASS_P (rclass
)
33934 && MAYBE_NON_Q_CLASS_P (rclass
))
33943 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33944 regno
= true_regnum (x
);
33946 /* Return Q_REGS if the operand is in memory. */
33951 /* This condition handles corner case where an expression involving
33952 pointers gets vectorized. We're trying to use the address of a
33953 stack slot as a vector initializer.
33955 (set (reg:V2DI 74 [ vect_cst_.2 ])
33956 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33958 Eventually frame gets turned into sp+offset like this:
33960 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33961 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33962 (const_int 392 [0x188]))))
33964 That later gets turned into:
33966 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33967 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33968 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33970 We'll have the following reload recorded:
33972 Reload 0: reload_in (DI) =
33973 (plus:DI (reg/f:DI 7 sp)
33974 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33975 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33976 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33977 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33978 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33979 reload_reg_rtx: (reg:V2DI 22 xmm1)
33981 Which isn't going to work since SSE instructions can't handle scalar
33982 additions. Returning GENERAL_REGS forces the addition into integer
33983 register and reload can handle subsequent reloads without problems. */
33985 if (in_p
&& GET_CODE (x
) == PLUS
33986 && SSE_CLASS_P (rclass
)
33987 && SCALAR_INT_MODE_P (mode
))
33988 return GENERAL_REGS
;
33993 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33996 ix86_class_likely_spilled_p (reg_class_t rclass
)
34007 case SSE_FIRST_REG
:
34009 case FP_SECOND_REG
:
34019 /* If we are copying between general and FP registers, we need a memory
34020 location. The same is true for SSE and MMX registers.
34022 To optimize register_move_cost performance, allow inline variant.
34024 The macro can't work reliably when one of the CLASSES is class containing
34025 registers from multiple units (SSE, MMX, integer). We avoid this by never
34026 combining those units in single alternative in the machine description.
34027 Ensure that this constraint holds to avoid unexpected surprises.
34029 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34030 enforce these sanity checks. */
34033 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34034 enum machine_mode mode
, int strict
)
34036 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34038 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34039 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34040 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34041 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34042 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34043 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34045 gcc_assert (!strict
|| lra_in_progress
);
34049 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34052 /* ??? This is a lie. We do have moves between mmx/general, and for
34053 mmx/sse2. But by saying we need secondary memory we discourage the
34054 register allocator from using the mmx registers unless needed. */
34055 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34058 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34060 /* SSE1 doesn't have any direct moves from other classes. */
34064 /* If the target says that inter-unit moves are more expensive
34065 than moving through memory, then don't generate them. */
34066 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34067 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34070 /* Between SSE and general, we have moves no larger than word size. */
34071 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34079 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34080 enum machine_mode mode
, int strict
)
34082 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34085 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34087 On the 80386, this is the size of MODE in words,
34088 except in the FP regs, where a single reg is always enough. */
34090 static unsigned char
34091 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34093 if (MAYBE_INTEGER_CLASS_P (rclass
))
34095 if (mode
== XFmode
)
34096 return (TARGET_64BIT
? 2 : 3);
34097 else if (mode
== XCmode
)
34098 return (TARGET_64BIT
? 4 : 6);
34100 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34104 if (COMPLEX_MODE_P (mode
))
34111 /* Return true if the registers in CLASS cannot represent the change from
34112 modes FROM to TO. */
34115 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34116 enum reg_class regclass
)
34121 /* x87 registers can't do subreg at all, as all values are reformatted
34122 to extended precision. */
34123 if (MAYBE_FLOAT_CLASS_P (regclass
))
34126 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34128 /* Vector registers do not support QI or HImode loads. If we don't
34129 disallow a change to these modes, reload will assume it's ok to
34130 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34131 the vec_dupv4hi pattern. */
34132 if (GET_MODE_SIZE (from
) < 4)
34135 /* Vector registers do not support subreg with nonzero offsets, which
34136 are otherwise valid for integer registers. Since we can't see
34137 whether we have a nonzero offset from here, prohibit all
34138 nonparadoxical subregs changing size. */
34139 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34146 /* Return the cost of moving data of mode M between a
34147 register and memory. A value of 2 is the default; this cost is
34148 relative to those in `REGISTER_MOVE_COST'.
34150 This function is used extensively by register_move_cost that is used to
34151 build tables at startup. Make it inline in this case.
34152 When IN is 2, return maximum of in and out move cost.
34154 If moving between registers and memory is more expensive than
34155 between two registers, you should define this macro to express the
34158 Model also increased moving costs of QImode registers in non
34162 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34166 if (FLOAT_CLASS_P (regclass
))
34184 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34185 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34187 if (SSE_CLASS_P (regclass
))
34190 switch (GET_MODE_SIZE (mode
))
34205 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34206 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34208 if (MMX_CLASS_P (regclass
))
34211 switch (GET_MODE_SIZE (mode
))
34223 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34224 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34226 switch (GET_MODE_SIZE (mode
))
34229 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34232 return ix86_cost
->int_store
[0];
34233 if (TARGET_PARTIAL_REG_DEPENDENCY
34234 && optimize_function_for_speed_p (cfun
))
34235 cost
= ix86_cost
->movzbl_load
;
34237 cost
= ix86_cost
->int_load
[0];
34239 return MAX (cost
, ix86_cost
->int_store
[0]);
34245 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34247 return ix86_cost
->movzbl_load
;
34249 return ix86_cost
->int_store
[0] + 4;
34254 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34255 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34257 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34258 if (mode
== TFmode
)
34261 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34263 cost
= ix86_cost
->int_load
[2];
34265 cost
= ix86_cost
->int_store
[2];
34266 return (cost
* (((int) GET_MODE_SIZE (mode
)
34267 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34272 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34275 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34279 /* Return the cost of moving data from a register in class CLASS1 to
34280 one in class CLASS2.
34282 It is not required that the cost always equal 2 when FROM is the same as TO;
34283 on some machines it is expensive to move between registers if they are not
34284 general registers. */
34287 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34288 reg_class_t class2_i
)
34290 enum reg_class class1
= (enum reg_class
) class1_i
;
34291 enum reg_class class2
= (enum reg_class
) class2_i
;
34293 /* In case we require secondary memory, compute cost of the store followed
34294 by load. In order to avoid bad register allocation choices, we need
34295 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34297 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34301 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34302 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34304 /* In case of copying from general_purpose_register we may emit multiple
34305 stores followed by single load causing memory size mismatch stall.
34306 Count this as arbitrarily high cost of 20. */
34307 if (targetm
.class_max_nregs (class1
, mode
)
34308 > targetm
.class_max_nregs (class2
, mode
))
34311 /* In the case of FP/MMX moves, the registers actually overlap, and we
34312 have to switch modes in order to treat them differently. */
34313 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34314 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34320 /* Moves between SSE/MMX and integer unit are expensive. */
34321 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34322 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34324 /* ??? By keeping returned value relatively high, we limit the number
34325 of moves between integer and MMX/SSE registers for all targets.
34326 Additionally, high value prevents problem with x86_modes_tieable_p(),
34327 where integer modes in MMX/SSE registers are not tieable
34328 because of missing QImode and HImode moves to, from or between
34329 MMX/SSE registers. */
34330 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34332 if (MAYBE_FLOAT_CLASS_P (class1
))
34333 return ix86_cost
->fp_move
;
34334 if (MAYBE_SSE_CLASS_P (class1
))
34335 return ix86_cost
->sse_move
;
34336 if (MAYBE_MMX_CLASS_P (class1
))
34337 return ix86_cost
->mmx_move
;
34341 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34345 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34347 /* Flags and only flags can only hold CCmode values. */
34348 if (CC_REGNO_P (regno
))
34349 return GET_MODE_CLASS (mode
) == MODE_CC
;
34350 if (GET_MODE_CLASS (mode
) == MODE_CC
34351 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34352 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34354 if (STACK_REGNO_P (regno
))
34355 return VALID_FP_MODE_P (mode
);
34356 if (SSE_REGNO_P (regno
))
34358 /* We implement the move patterns for all vector modes into and
34359 out of SSE registers, even when no operation instructions
34360 are available. OImode move is available only when AVX is
34362 return ((TARGET_AVX
&& mode
== OImode
)
34363 || VALID_AVX256_REG_MODE (mode
)
34364 || VALID_SSE_REG_MODE (mode
)
34365 || VALID_SSE2_REG_MODE (mode
)
34366 || VALID_MMX_REG_MODE (mode
)
34367 || VALID_MMX_REG_MODE_3DNOW (mode
));
34369 if (MMX_REGNO_P (regno
))
34371 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34372 so if the register is available at all, then we can move data of
34373 the given mode into or out of it. */
34374 return (VALID_MMX_REG_MODE (mode
)
34375 || VALID_MMX_REG_MODE_3DNOW (mode
));
34378 if (mode
== QImode
)
34380 /* Take care for QImode values - they can be in non-QI regs,
34381 but then they do cause partial register stalls. */
34382 if (ANY_QI_REGNO_P (regno
))
34384 if (!TARGET_PARTIAL_REG_STALL
)
34386 /* LRA checks if the hard register is OK for the given mode.
34387 QImode values can live in non-QI regs, so we allow all
34389 if (lra_in_progress
)
34391 return !can_create_pseudo_p ();
34393 /* We handle both integer and floats in the general purpose registers. */
34394 else if (VALID_INT_MODE_P (mode
))
34396 else if (VALID_FP_MODE_P (mode
))
34398 else if (VALID_DFP_MODE_P (mode
))
34400 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34401 on to use that value in smaller contexts, this can easily force a
34402 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34403 supporting DImode, allow it. */
34404 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
34410 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34411 tieable integer mode. */
34414 ix86_tieable_integer_mode_p (enum machine_mode mode
)
34423 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
34426 return TARGET_64BIT
;
34433 /* Return true if MODE1 is accessible in a register that can hold MODE2
34434 without copying. That is, all register classes that can hold MODE2
34435 can also hold MODE1. */
34438 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
34440 if (mode1
== mode2
)
34443 if (ix86_tieable_integer_mode_p (mode1
)
34444 && ix86_tieable_integer_mode_p (mode2
))
34447 /* MODE2 being XFmode implies fp stack or general regs, which means we
34448 can tie any smaller floating point modes to it. Note that we do not
34449 tie this with TFmode. */
34450 if (mode2
== XFmode
)
34451 return mode1
== SFmode
|| mode1
== DFmode
;
34453 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34454 that we can tie it with SFmode. */
34455 if (mode2
== DFmode
)
34456 return mode1
== SFmode
;
34458 /* If MODE2 is only appropriate for an SSE register, then tie with
34459 any other mode acceptable to SSE registers. */
34460 if (GET_MODE_SIZE (mode2
) == 32
34461 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34462 return (GET_MODE_SIZE (mode1
) == 32
34463 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34464 if (GET_MODE_SIZE (mode2
) == 16
34465 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34466 return (GET_MODE_SIZE (mode1
) == 16
34467 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34469 /* If MODE2 is appropriate for an MMX register, then tie
34470 with any other mode acceptable to MMX registers. */
34471 if (GET_MODE_SIZE (mode2
) == 8
34472 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
34473 return (GET_MODE_SIZE (mode1
) == 8
34474 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
34479 /* Return the cost of moving between two registers of mode MODE. */
34482 ix86_set_reg_reg_cost (enum machine_mode mode
)
34484 unsigned int units
= UNITS_PER_WORD
;
34486 switch (GET_MODE_CLASS (mode
))
34492 units
= GET_MODE_SIZE (CCmode
);
34496 if ((TARGET_SSE
&& mode
== TFmode
)
34497 || (TARGET_80387
&& mode
== XFmode
)
34498 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
34499 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
34500 units
= GET_MODE_SIZE (mode
);
34503 case MODE_COMPLEX_FLOAT
:
34504 if ((TARGET_SSE
&& mode
== TCmode
)
34505 || (TARGET_80387
&& mode
== XCmode
)
34506 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
34507 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34508 units
= GET_MODE_SIZE (mode
);
34511 case MODE_VECTOR_INT
:
34512 case MODE_VECTOR_FLOAT
:
34513 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34514 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34515 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34516 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34517 units
= GET_MODE_SIZE (mode
);
34520 /* Return the cost of moving between two registers of mode MODE,
34521 assuming that the move will be in pieces of at most UNITS bytes. */
34522 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34525 /* Compute a (partial) cost for rtx X. Return true if the complete
34526 cost has been computed, and false if subexpressions should be
34527 scanned. In either case, *TOTAL contains the cost result. */
34530 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34533 enum rtx_code code
= (enum rtx_code
) code_i
;
34534 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34535 enum machine_mode mode
= GET_MODE (x
);
34536 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34541 if (register_operand (SET_DEST (x
), VOIDmode
)
34542 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34544 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34553 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34555 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34557 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34559 || (!GET_CODE (x
) != LABEL_REF
34560 && (GET_CODE (x
) != SYMBOL_REF
34561 || !SYMBOL_REF_LOCAL_P (x
)))))
34568 if (mode
== VOIDmode
)
34573 switch (standard_80387_constant_p (x
))
34578 default: /* Other constants */
34585 if (SSE_FLOAT_MODE_P (mode
))
34588 switch (standard_sse_constant_p (x
))
34592 case 1: /* 0: xor eliminates false dependency */
34595 default: /* -1: cmp contains false dependency */
34600 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34601 it'll probably end up. Add a penalty for size. */
34602 *total
= (COSTS_N_INSNS (1)
34603 + (flag_pic
!= 0 && !TARGET_64BIT
)
34604 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34608 /* The zero extensions is often completely free on x86_64, so make
34609 it as cheap as possible. */
34610 if (TARGET_64BIT
&& mode
== DImode
34611 && GET_MODE (XEXP (x
, 0)) == SImode
)
34613 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34614 *total
= cost
->add
;
34616 *total
= cost
->movzx
;
34620 *total
= cost
->movsx
;
34624 if (SCALAR_INT_MODE_P (mode
)
34625 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34626 && CONST_INT_P (XEXP (x
, 1)))
34628 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34631 *total
= cost
->add
;
34634 if ((value
== 2 || value
== 3)
34635 && cost
->lea
<= cost
->shift_const
)
34637 *total
= cost
->lea
;
34647 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34649 /* ??? Should be SSE vector operation cost. */
34650 /* At least for published AMD latencies, this really is the same
34651 as the latency for a simple fpu operation like fabs. */
34652 /* V*QImode is emulated with 1-11 insns. */
34653 if (mode
== V16QImode
|| mode
== V32QImode
)
34656 if (TARGET_XOP
&& mode
== V16QImode
)
34658 /* For XOP we use vpshab, which requires a broadcast of the
34659 value to the variable shift insn. For constants this
34660 means a V16Q const in mem; even when we can perform the
34661 shift with one insn set the cost to prefer paddb. */
34662 if (CONSTANT_P (XEXP (x
, 1)))
34664 *total
= (cost
->fabs
34665 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34666 + (speed
? 2 : COSTS_N_BYTES (16)));
34671 else if (TARGET_SSSE3
)
34673 *total
= cost
->fabs
* count
;
34676 *total
= cost
->fabs
;
34678 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34680 if (CONST_INT_P (XEXP (x
, 1)))
34682 if (INTVAL (XEXP (x
, 1)) > 32)
34683 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34685 *total
= cost
->shift_const
* 2;
34689 if (GET_CODE (XEXP (x
, 1)) == AND
)
34690 *total
= cost
->shift_var
* 2;
34692 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34697 if (CONST_INT_P (XEXP (x
, 1)))
34698 *total
= cost
->shift_const
;
34699 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
34700 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
34702 /* Return the cost after shift-and truncation. */
34703 *total
= cost
->shift_var
;
34707 *total
= cost
->shift_var
;
34715 gcc_assert (FLOAT_MODE_P (mode
));
34716 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34718 /* ??? SSE scalar/vector cost should be used here. */
34719 /* ??? Bald assumption that fma has the same cost as fmul. */
34720 *total
= cost
->fmul
;
34721 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34723 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34725 if (GET_CODE (sub
) == NEG
)
34726 sub
= XEXP (sub
, 0);
34727 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34730 if (GET_CODE (sub
) == NEG
)
34731 sub
= XEXP (sub
, 0);
34732 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34737 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34739 /* ??? SSE scalar cost should be used here. */
34740 *total
= cost
->fmul
;
34743 else if (X87_FLOAT_MODE_P (mode
))
34745 *total
= cost
->fmul
;
34748 else if (FLOAT_MODE_P (mode
))
34750 /* ??? SSE vector cost should be used here. */
34751 *total
= cost
->fmul
;
34754 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34756 /* V*QImode is emulated with 7-13 insns. */
34757 if (mode
== V16QImode
|| mode
== V32QImode
)
34760 if (TARGET_XOP
&& mode
== V16QImode
)
34762 else if (TARGET_SSSE3
)
34764 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34766 /* V*DImode is emulated with 5-8 insns. */
34767 else if (mode
== V2DImode
|| mode
== V4DImode
)
34769 if (TARGET_XOP
&& mode
== V2DImode
)
34770 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34772 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34774 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34775 insns, including two PMULUDQ. */
34776 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34777 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34779 *total
= cost
->fmul
;
34784 rtx op0
= XEXP (x
, 0);
34785 rtx op1
= XEXP (x
, 1);
34787 if (CONST_INT_P (XEXP (x
, 1)))
34789 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34790 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34794 /* This is arbitrary. */
34797 /* Compute costs correctly for widening multiplication. */
34798 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34799 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34800 == GET_MODE_SIZE (mode
))
34802 int is_mulwiden
= 0;
34803 enum machine_mode inner_mode
= GET_MODE (op0
);
34805 if (GET_CODE (op0
) == GET_CODE (op1
))
34806 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34807 else if (CONST_INT_P (op1
))
34809 if (GET_CODE (op0
) == SIGN_EXTEND
)
34810 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34813 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34817 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34820 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34821 + nbits
* cost
->mult_bit
34822 + rtx_cost (op0
, outer_code
, opno
, speed
)
34823 + rtx_cost (op1
, outer_code
, opno
, speed
));
34832 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34833 /* ??? SSE cost should be used here. */
34834 *total
= cost
->fdiv
;
34835 else if (X87_FLOAT_MODE_P (mode
))
34836 *total
= cost
->fdiv
;
34837 else if (FLOAT_MODE_P (mode
))
34838 /* ??? SSE vector cost should be used here. */
34839 *total
= cost
->fdiv
;
34841 *total
= cost
->divide
[MODE_INDEX (mode
)];
34845 if (GET_MODE_CLASS (mode
) == MODE_INT
34846 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34848 if (GET_CODE (XEXP (x
, 0)) == PLUS
34849 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34850 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34851 && CONSTANT_P (XEXP (x
, 1)))
34853 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34854 if (val
== 2 || val
== 4 || val
== 8)
34856 *total
= cost
->lea
;
34857 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34858 outer_code
, opno
, speed
);
34859 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34860 outer_code
, opno
, speed
);
34861 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34865 else if (GET_CODE (XEXP (x
, 0)) == MULT
34866 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34868 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34869 if (val
== 2 || val
== 4 || val
== 8)
34871 *total
= cost
->lea
;
34872 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34873 outer_code
, opno
, speed
);
34874 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34878 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34880 *total
= cost
->lea
;
34881 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34882 outer_code
, opno
, speed
);
34883 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34884 outer_code
, opno
, speed
);
34885 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34892 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34894 /* ??? SSE cost should be used here. */
34895 *total
= cost
->fadd
;
34898 else if (X87_FLOAT_MODE_P (mode
))
34900 *total
= cost
->fadd
;
34903 else if (FLOAT_MODE_P (mode
))
34905 /* ??? SSE vector cost should be used here. */
34906 *total
= cost
->fadd
;
34914 if (GET_MODE_CLASS (mode
) == MODE_INT
34915 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34917 *total
= (cost
->add
* 2
34918 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34919 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34920 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34921 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34927 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34929 /* ??? SSE cost should be used here. */
34930 *total
= cost
->fchs
;
34933 else if (X87_FLOAT_MODE_P (mode
))
34935 *total
= cost
->fchs
;
34938 else if (FLOAT_MODE_P (mode
))
34940 /* ??? SSE vector cost should be used here. */
34941 *total
= cost
->fchs
;
34947 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34949 /* ??? Should be SSE vector operation cost. */
34950 /* At least for published AMD latencies, this really is the same
34951 as the latency for a simple fpu operation like fabs. */
34952 *total
= cost
->fabs
;
34954 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34955 *total
= cost
->add
* 2;
34957 *total
= cost
->add
;
34961 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34962 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34963 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34964 && XEXP (x
, 1) == const0_rtx
)
34966 /* This kind of construct is implemented using test[bwl].
34967 Treat it as if we had an AND. */
34968 *total
= (cost
->add
34969 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34970 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34976 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34981 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34982 /* ??? SSE cost should be used here. */
34983 *total
= cost
->fabs
;
34984 else if (X87_FLOAT_MODE_P (mode
))
34985 *total
= cost
->fabs
;
34986 else if (FLOAT_MODE_P (mode
))
34987 /* ??? SSE vector cost should be used here. */
34988 *total
= cost
->fabs
;
34992 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34993 /* ??? SSE cost should be used here. */
34994 *total
= cost
->fsqrt
;
34995 else if (X87_FLOAT_MODE_P (mode
))
34996 *total
= cost
->fsqrt
;
34997 else if (FLOAT_MODE_P (mode
))
34998 /* ??? SSE vector cost should be used here. */
34999 *total
= cost
->fsqrt
;
35003 if (XINT (x
, 1) == UNSPEC_TP
)
35010 case VEC_DUPLICATE
:
35011 /* ??? Assume all of these vector manipulation patterns are
35012 recognizable. In which case they all pretty much have the
35014 *total
= cost
->fabs
;
35024 static int current_machopic_label_num
;
35026 /* Given a symbol name and its associated stub, write out the
35027 definition of the stub. */
35030 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35032 unsigned int length
;
35033 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35034 int label
= ++current_machopic_label_num
;
35036 /* For 64-bit we shouldn't get here. */
35037 gcc_assert (!TARGET_64BIT
);
35039 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35040 symb
= targetm
.strip_name_encoding (symb
);
35042 length
= strlen (stub
);
35043 binder_name
= XALLOCAVEC (char, length
+ 32);
35044 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35046 length
= strlen (symb
);
35047 symbol_name
= XALLOCAVEC (char, length
+ 32);
35048 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35050 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35052 if (MACHOPIC_ATT_STUB
)
35053 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35054 else if (MACHOPIC_PURE
)
35055 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35057 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35059 fprintf (file
, "%s:\n", stub
);
35060 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35062 if (MACHOPIC_ATT_STUB
)
35064 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35066 else if (MACHOPIC_PURE
)
35069 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35070 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35071 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35072 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35073 label
, lazy_ptr_name
, label
);
35074 fprintf (file
, "\tjmp\t*%%ecx\n");
35077 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35079 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35080 it needs no stub-binding-helper. */
35081 if (MACHOPIC_ATT_STUB
)
35084 fprintf (file
, "%s:\n", binder_name
);
35088 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35089 fprintf (file
, "\tpushl\t%%ecx\n");
35092 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35094 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35096 /* N.B. Keep the correspondence of these
35097 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35098 old-pic/new-pic/non-pic stubs; altering this will break
35099 compatibility with existing dylibs. */
35102 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35103 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35106 /* 16-byte -mdynamic-no-pic stub. */
35107 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35109 fprintf (file
, "%s:\n", lazy_ptr_name
);
35110 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35111 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35113 #endif /* TARGET_MACHO */
35115 /* Order the registers for register allocator. */
35118 x86_order_regs_for_local_alloc (void)
35123 /* First allocate the local general purpose registers. */
35124 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35125 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35126 reg_alloc_order
[pos
++] = i
;
35128 /* Global general purpose registers. */
35129 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35130 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35131 reg_alloc_order
[pos
++] = i
;
35133 /* x87 registers come first in case we are doing FP math
35135 if (!TARGET_SSE_MATH
)
35136 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35137 reg_alloc_order
[pos
++] = i
;
35139 /* SSE registers. */
35140 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35141 reg_alloc_order
[pos
++] = i
;
35142 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35143 reg_alloc_order
[pos
++] = i
;
35145 /* x87 registers. */
35146 if (TARGET_SSE_MATH
)
35147 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35148 reg_alloc_order
[pos
++] = i
;
35150 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35151 reg_alloc_order
[pos
++] = i
;
35153 /* Initialize the rest of array as we do not allocate some registers
35155 while (pos
< FIRST_PSEUDO_REGISTER
)
35156 reg_alloc_order
[pos
++] = 0;
35159 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35160 in struct attribute_spec handler. */
35162 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35164 int flags ATTRIBUTE_UNUSED
,
35165 bool *no_add_attrs
)
35167 if (TREE_CODE (*node
) != FUNCTION_TYPE
35168 && TREE_CODE (*node
) != METHOD_TYPE
35169 && TREE_CODE (*node
) != FIELD_DECL
35170 && TREE_CODE (*node
) != TYPE_DECL
)
35172 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35174 *no_add_attrs
= true;
35179 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35181 *no_add_attrs
= true;
35184 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35188 cst
= TREE_VALUE (args
);
35189 if (TREE_CODE (cst
) != INTEGER_CST
)
35191 warning (OPT_Wattributes
,
35192 "%qE attribute requires an integer constant argument",
35194 *no_add_attrs
= true;
35196 else if (compare_tree_int (cst
, 0) != 0
35197 && compare_tree_int (cst
, 1) != 0)
35199 warning (OPT_Wattributes
,
35200 "argument to %qE attribute is neither zero, nor one",
35202 *no_add_attrs
= true;
35211 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35212 struct attribute_spec.handler. */
35214 ix86_handle_abi_attribute (tree
*node
, tree name
,
35215 tree args ATTRIBUTE_UNUSED
,
35216 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35218 if (TREE_CODE (*node
) != FUNCTION_TYPE
35219 && TREE_CODE (*node
) != METHOD_TYPE
35220 && TREE_CODE (*node
) != FIELD_DECL
35221 && TREE_CODE (*node
) != TYPE_DECL
)
35223 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35225 *no_add_attrs
= true;
35229 /* Can combine regparm with all attributes but fastcall. */
35230 if (is_attribute_p ("ms_abi", name
))
35232 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35234 error ("ms_abi and sysv_abi attributes are not compatible");
35239 else if (is_attribute_p ("sysv_abi", name
))
35241 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35243 error ("ms_abi and sysv_abi attributes are not compatible");
35252 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35253 struct attribute_spec.handler. */
35255 ix86_handle_struct_attribute (tree
*node
, tree name
,
35256 tree args ATTRIBUTE_UNUSED
,
35257 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35260 if (DECL_P (*node
))
35262 if (TREE_CODE (*node
) == TYPE_DECL
)
35263 type
= &TREE_TYPE (*node
);
35268 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35270 warning (OPT_Wattributes
, "%qE attribute ignored",
35272 *no_add_attrs
= true;
35275 else if ((is_attribute_p ("ms_struct", name
)
35276 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35277 || ((is_attribute_p ("gcc_struct", name
)
35278 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35280 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35282 *no_add_attrs
= true;
35289 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35290 tree args ATTRIBUTE_UNUSED
,
35291 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35293 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35295 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35297 *no_add_attrs
= true;
35303 ix86_ms_bitfield_layout_p (const_tree record_type
)
35305 return ((TARGET_MS_BITFIELD_LAYOUT
35306 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35307 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35310 /* Returns an expression indicating where the this parameter is
35311 located on entry to the FUNCTION. */
35314 x86_this_parameter (tree function
)
35316 tree type
= TREE_TYPE (function
);
35317 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35322 const int *parm_regs
;
35324 if (ix86_function_type_abi (type
) == MS_ABI
)
35325 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35327 parm_regs
= x86_64_int_parameter_registers
;
35328 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35331 nregs
= ix86_function_regparm (type
, function
);
35333 if (nregs
> 0 && !stdarg_p (type
))
35336 unsigned int ccvt
= ix86_get_callcvt (type
);
35338 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35339 regno
= aggr
? DX_REG
: CX_REG
;
35340 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35344 return gen_rtx_MEM (SImode
,
35345 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35354 return gen_rtx_MEM (SImode
,
35355 plus_constant (Pmode
,
35356 stack_pointer_rtx
, 4));
35359 return gen_rtx_REG (SImode
, regno
);
35362 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35366 /* Determine whether x86_output_mi_thunk can succeed. */
35369 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35370 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35371 HOST_WIDE_INT vcall_offset
, const_tree function
)
35373 /* 64-bit can handle anything. */
35377 /* For 32-bit, everything's fine if we have one free register. */
35378 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35381 /* Need a free register for vcall_offset. */
35385 /* Need a free register for GOT references. */
35386 if (flag_pic
&& !targetm
.binds_local_p (function
))
35389 /* Otherwise ok. */
35393 /* Output the assembler code for a thunk function. THUNK_DECL is the
35394 declaration for the thunk function itself, FUNCTION is the decl for
35395 the target function. DELTA is an immediate constant offset to be
35396 added to THIS. If VCALL_OFFSET is nonzero, the word at
35397 *(*this + vcall_offset) should be added to THIS. */
35400 x86_output_mi_thunk (FILE *file
,
35401 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
35402 HOST_WIDE_INT vcall_offset
, tree function
)
35404 rtx this_param
= x86_this_parameter (function
);
35405 rtx this_reg
, tmp
, fnaddr
;
35406 unsigned int tmp_regno
;
35409 tmp_regno
= R10_REG
;
35412 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
35413 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35414 tmp_regno
= AX_REG
;
35415 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35416 tmp_regno
= DX_REG
;
35418 tmp_regno
= CX_REG
;
35421 emit_note (NOTE_INSN_PROLOGUE_END
);
35423 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35424 pull it in now and let DELTA benefit. */
35425 if (REG_P (this_param
))
35426 this_reg
= this_param
;
35427 else if (vcall_offset
)
35429 /* Put the this parameter into %eax. */
35430 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
35431 emit_move_insn (this_reg
, this_param
);
35434 this_reg
= NULL_RTX
;
35436 /* Adjust the this parameter by a fixed constant. */
35439 rtx delta_rtx
= GEN_INT (delta
);
35440 rtx delta_dst
= this_reg
? this_reg
: this_param
;
35444 if (!x86_64_general_operand (delta_rtx
, Pmode
))
35446 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35447 emit_move_insn (tmp
, delta_rtx
);
35452 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
35455 /* Adjust the this parameter by a value stored in the vtable. */
35458 rtx vcall_addr
, vcall_mem
, this_mem
;
35460 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35462 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
35463 if (Pmode
!= ptr_mode
)
35464 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
35465 emit_move_insn (tmp
, this_mem
);
35467 /* Adjust the this parameter. */
35468 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
35470 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
35472 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
35473 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
35474 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
35477 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
35478 if (Pmode
!= ptr_mode
)
35479 emit_insn (gen_addsi_1_zext (this_reg
,
35480 gen_rtx_REG (ptr_mode
,
35484 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
35487 /* If necessary, drop THIS back to its stack slot. */
35488 if (this_reg
&& this_reg
!= this_param
)
35489 emit_move_insn (this_param
, this_reg
);
35491 fnaddr
= XEXP (DECL_RTL (function
), 0);
35494 if (!flag_pic
|| targetm
.binds_local_p (function
)
35499 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
35500 tmp
= gen_rtx_CONST (Pmode
, tmp
);
35501 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
35506 if (!flag_pic
|| targetm
.binds_local_p (function
))
35509 else if (TARGET_MACHO
)
35511 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
35512 fnaddr
= XEXP (fnaddr
, 0);
35514 #endif /* TARGET_MACHO */
35517 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35518 output_set_got (tmp
, NULL_RTX
);
35520 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35521 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35522 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35526 /* Our sibling call patterns do not allow memories, because we have no
35527 predicate that can distinguish between frame and non-frame memory.
35528 For our purposes here, we can get away with (ab)using a jump pattern,
35529 because we're going to do no optimization. */
35530 if (MEM_P (fnaddr
))
35531 emit_jump_insn (gen_indirect_jump (fnaddr
));
35534 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35535 fnaddr
= legitimize_pic_address (fnaddr
,
35536 gen_rtx_REG (Pmode
, tmp_regno
));
35538 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35540 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35541 if (GET_MODE (fnaddr
) != word_mode
)
35542 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35543 emit_move_insn (tmp
, fnaddr
);
35547 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35548 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35549 tmp
= emit_call_insn (tmp
);
35550 SIBLING_CALL_P (tmp
) = 1;
35554 /* Emit just enough of rest_of_compilation to get the insns emitted.
35555 Note that use_thunk calls assemble_start_function et al. */
35556 tmp
= get_insns ();
35557 shorten_branches (tmp
);
35558 final_start_function (tmp
, file
, 1);
35559 final (tmp
, file
, 1);
35560 final_end_function ();
35564 x86_file_start (void)
35566 default_file_start ();
35568 darwin_file_start ();
35570 if (X86_FILE_START_VERSION_DIRECTIVE
)
35571 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35572 if (X86_FILE_START_FLTUSED
)
35573 fputs ("\t.global\t__fltused\n", asm_out_file
);
35574 if (ix86_asm_dialect
== ASM_INTEL
)
35575 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35579 x86_field_alignment (tree field
, int computed
)
35581 enum machine_mode mode
;
35582 tree type
= TREE_TYPE (field
);
35584 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35586 mode
= TYPE_MODE (strip_array_types (type
));
35587 if (mode
== DFmode
|| mode
== DCmode
35588 || GET_MODE_CLASS (mode
) == MODE_INT
35589 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35590 return MIN (32, computed
);
35594 /* Output assembler code to FILE to increment profiler label # LABELNO
35595 for profiling a function entry. */
35597 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35599 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35604 #ifndef NO_PROFILE_COUNTERS
35605 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35608 if (!TARGET_PECOFF
&& flag_pic
)
35609 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35611 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35615 #ifndef NO_PROFILE_COUNTERS
35616 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35619 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35623 #ifndef NO_PROFILE_COUNTERS
35624 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35627 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35631 /* We don't have exact information about the insn sizes, but we may assume
35632 quite safely that we are informed about all 1 byte insns and memory
35633 address sizes. This is enough to eliminate unnecessary padding in
35637 min_insn_size (rtx insn
)
35641 if (!INSN_P (insn
) || !active_insn_p (insn
))
35644 /* Discard alignments we've emit and jump instructions. */
35645 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35646 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35649 /* Important case - calls are always 5 bytes.
35650 It is common to have many calls in the row. */
35652 && symbolic_reference_mentioned_p (PATTERN (insn
))
35653 && !SIBLING_CALL_P (insn
))
35655 len
= get_attr_length (insn
);
35659 /* For normal instructions we rely on get_attr_length being exact,
35660 with a few exceptions. */
35661 if (!JUMP_P (insn
))
35663 enum attr_type type
= get_attr_type (insn
);
35668 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35669 || asm_noperands (PATTERN (insn
)) >= 0)
35676 /* Otherwise trust get_attr_length. */
35680 l
= get_attr_length_address (insn
);
35681 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35690 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35692 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35696 ix86_avoid_jump_mispredicts (void)
35698 rtx insn
, start
= get_insns ();
35699 int nbytes
= 0, njumps
= 0;
35702 /* Look for all minimal intervals of instructions containing 4 jumps.
35703 The intervals are bounded by START and INSN. NBYTES is the total
35704 size of instructions in the interval including INSN and not including
35705 START. When the NBYTES is smaller than 16 bytes, it is possible
35706 that the end of START and INSN ends up in the same 16byte page.
35708 The smallest offset in the page INSN can start is the case where START
35709 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35710 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35712 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35716 if (LABEL_P (insn
))
35718 int align
= label_to_alignment (insn
);
35719 int max_skip
= label_to_max_skip (insn
);
35723 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35724 already in the current 16 byte page, because otherwise
35725 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35726 bytes to reach 16 byte boundary. */
35728 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35731 fprintf (dump_file
, "Label %i with max_skip %i\n",
35732 INSN_UID (insn
), max_skip
);
35735 while (nbytes
+ max_skip
>= 16)
35737 start
= NEXT_INSN (start
);
35738 if (JUMP_P (start
) || CALL_P (start
))
35739 njumps
--, isjump
= 1;
35742 nbytes
-= min_insn_size (start
);
35748 min_size
= min_insn_size (insn
);
35749 nbytes
+= min_size
;
35751 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35752 INSN_UID (insn
), min_size
);
35753 if (JUMP_P (insn
) || CALL_P (insn
))
35760 start
= NEXT_INSN (start
);
35761 if (JUMP_P (start
) || CALL_P (start
))
35762 njumps
--, isjump
= 1;
35765 nbytes
-= min_insn_size (start
);
35767 gcc_assert (njumps
>= 0);
35769 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35770 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35772 if (njumps
== 3 && isjump
&& nbytes
< 16)
35774 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35777 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35778 INSN_UID (insn
), padsize
);
35779 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35785 /* AMD Athlon works faster
35786 when RET is not destination of conditional jump or directly preceded
35787 by other jump instruction. We avoid the penalty by inserting NOP just
35788 before the RET instructions in such cases. */
35790 ix86_pad_returns (void)
35795 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35797 basic_block bb
= e
->src
;
35798 rtx ret
= BB_END (bb
);
35800 bool replace
= false;
35802 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35803 || optimize_bb_for_size_p (bb
))
35805 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35806 if (active_insn_p (prev
) || LABEL_P (prev
))
35808 if (prev
&& LABEL_P (prev
))
35813 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35814 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35815 && !(e
->flags
& EDGE_FALLTHRU
))
35823 prev
= prev_active_insn (ret
);
35825 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35828 /* Empty functions get branch mispredict even when
35829 the jump destination is not visible to us. */
35830 if (!prev
&& !optimize_function_for_size_p (cfun
))
35835 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35841 /* Count the minimum number of instructions in BB. Return 4 if the
35842 number of instructions >= 4. */
35845 ix86_count_insn_bb (basic_block bb
)
35848 int insn_count
= 0;
35850 /* Count number of instructions in this block. Return 4 if the number
35851 of instructions >= 4. */
35852 FOR_BB_INSNS (bb
, insn
)
35854 /* Only happen in exit blocks. */
35856 && ANY_RETURN_P (PATTERN (insn
)))
35859 if (NONDEBUG_INSN_P (insn
)
35860 && GET_CODE (PATTERN (insn
)) != USE
35861 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35864 if (insn_count
>= 4)
35873 /* Count the minimum number of instructions in code path in BB.
35874 Return 4 if the number of instructions >= 4. */
35877 ix86_count_insn (basic_block bb
)
35881 int min_prev_count
;
35883 /* Only bother counting instructions along paths with no
35884 more than 2 basic blocks between entry and exit. Given
35885 that BB has an edge to exit, determine if a predecessor
35886 of BB has an edge from entry. If so, compute the number
35887 of instructions in the predecessor block. If there
35888 happen to be multiple such blocks, compute the minimum. */
35889 min_prev_count
= 4;
35890 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35893 edge_iterator prev_ei
;
35895 if (e
->src
== ENTRY_BLOCK_PTR
)
35897 min_prev_count
= 0;
35900 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35902 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35904 int count
= ix86_count_insn_bb (e
->src
);
35905 if (count
< min_prev_count
)
35906 min_prev_count
= count
;
35912 if (min_prev_count
< 4)
35913 min_prev_count
+= ix86_count_insn_bb (bb
);
35915 return min_prev_count
;
35918 /* Pad short function to 4 instructions. */
35921 ix86_pad_short_function (void)
35926 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35928 rtx ret
= BB_END (e
->src
);
35929 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35931 int insn_count
= ix86_count_insn (e
->src
);
35933 /* Pad short function. */
35934 if (insn_count
< 4)
35938 /* Find epilogue. */
35941 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35942 insn
= PREV_INSN (insn
);
35947 /* Two NOPs count as one instruction. */
35948 insn_count
= 2 * (4 - insn_count
);
35949 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35955 /* Fix up a Windows system unwinder issue. If an EH region falls through into
35956 the epilogue, the Windows system unwinder will apply epilogue logic and
35957 produce incorrect offsets. This can be avoided by adding a nop between
35958 the last insn that can throw and the first insn of the epilogue. */
35961 ix86_seh_fixup_eh_fallthru (void)
35966 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35970 /* Find the beginning of the epilogue. */
35971 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
35972 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
35977 /* We only care about preceding insns that can throw. */
35978 insn
= prev_active_insn (insn
);
35979 if (insn
== NULL
|| !can_throw_internal (insn
))
35982 /* Do not separate calls from their debug information. */
35983 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
35985 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
35986 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
35991 emit_insn_after (gen_nops (const1_rtx
), insn
);
35995 /* Implement machine specific optimizations. We implement padding of returns
35996 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36000 /* We are freeing block_for_insn in the toplev to keep compatibility
36001 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36002 compute_bb_for_insn ();
36004 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36005 ix86_seh_fixup_eh_fallthru ();
36007 if (optimize
&& optimize_function_for_speed_p (cfun
))
36009 if (TARGET_PAD_SHORT_FUNCTION
)
36010 ix86_pad_short_function ();
36011 else if (TARGET_PAD_RETURNS
)
36012 ix86_pad_returns ();
36013 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36014 if (TARGET_FOUR_JUMP_LIMIT
)
36015 ix86_avoid_jump_mispredicts ();
36020 /* Return nonzero when QImode register that must be represented via REX prefix
36023 x86_extended_QIreg_mentioned_p (rtx insn
)
36026 extract_insn_cached (insn
);
36027 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36028 if (GENERAL_REG_P (recog_data
.operand
[i
])
36029 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36034 /* Return nonzero when P points to register encoded via REX prefix.
36035 Called via for_each_rtx. */
36037 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36039 unsigned int regno
;
36042 regno
= REGNO (*p
);
36043 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36046 /* Return true when INSN mentions register that must be encoded using REX
36049 x86_extended_reg_mentioned_p (rtx insn
)
36051 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36052 extended_reg_mentioned_1
, NULL
);
36055 /* If profitable, negate (without causing overflow) integer constant
36056 of mode MODE at location LOC. Return true in this case. */
36058 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36062 if (!CONST_INT_P (*loc
))
36068 /* DImode x86_64 constants must fit in 32 bits. */
36069 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36080 gcc_unreachable ();
36083 /* Avoid overflows. */
36084 if (mode_signbit_p (mode
, *loc
))
36087 val
= INTVAL (*loc
);
36089 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36090 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36091 if ((val
< 0 && val
!= -128)
36094 *loc
= GEN_INT (-val
);
36101 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36102 optabs would emit if we didn't have TFmode patterns. */
36105 x86_emit_floatuns (rtx operands
[2])
36107 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36108 enum machine_mode mode
, inmode
;
36110 inmode
= GET_MODE (operands
[1]);
36111 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36114 in
= force_reg (inmode
, operands
[1]);
36115 mode
= GET_MODE (out
);
36116 neglab
= gen_label_rtx ();
36117 donelab
= gen_label_rtx ();
36118 f0
= gen_reg_rtx (mode
);
36120 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36122 expand_float (out
, in
, 0);
36124 emit_jump_insn (gen_jump (donelab
));
36127 emit_label (neglab
);
36129 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36131 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36133 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36135 expand_float (f0
, i0
, 0);
36137 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36139 emit_label (donelab
);
36142 /* AVX2 does support 32-byte integer vector operations,
36143 thus the longest vector we are faced with is V32QImode. */
36144 #define MAX_VECT_LEN 32
36146 struct expand_vec_perm_d
36148 rtx target
, op0
, op1
;
36149 unsigned char perm
[MAX_VECT_LEN
];
36150 enum machine_mode vmode
;
36151 unsigned char nelt
;
36152 bool one_operand_p
;
36156 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36157 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36158 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36160 /* Get a vector mode of the same size as the original but with elements
36161 twice as wide. This is only guaranteed to apply to integral vectors. */
36163 static inline enum machine_mode
36164 get_mode_wider_vector (enum machine_mode o
)
36166 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36167 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36168 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36169 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36173 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36174 with all elements equal to VAR. Return true if successful. */
36177 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36178 rtx target
, rtx val
)
36201 /* First attempt to recognize VAL as-is. */
36202 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36203 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36204 if (recog_memoized (insn
) < 0)
36207 /* If that fails, force VAL into a register. */
36210 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36211 seq
= get_insns ();
36214 emit_insn_before (seq
, insn
);
36216 ok
= recog_memoized (insn
) >= 0;
36225 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36229 val
= gen_lowpart (SImode
, val
);
36230 x
= gen_rtx_TRUNCATE (HImode
, val
);
36231 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36232 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36245 struct expand_vec_perm_d dperm
;
36249 memset (&dperm
, 0, sizeof (dperm
));
36250 dperm
.target
= target
;
36251 dperm
.vmode
= mode
;
36252 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36253 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36254 dperm
.one_operand_p
= true;
36256 /* Extend to SImode using a paradoxical SUBREG. */
36257 tmp1
= gen_reg_rtx (SImode
);
36258 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36260 /* Insert the SImode value as low element of a V4SImode vector. */
36261 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
36262 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36264 ok
= (expand_vec_perm_1 (&dperm
)
36265 || expand_vec_perm_broadcast_1 (&dperm
));
36277 /* Replicate the value once into the next wider mode and recurse. */
36279 enum machine_mode smode
, wsmode
, wvmode
;
36282 smode
= GET_MODE_INNER (mode
);
36283 wvmode
= get_mode_wider_vector (mode
);
36284 wsmode
= GET_MODE_INNER (wvmode
);
36286 val
= convert_modes (wsmode
, smode
, val
, true);
36287 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36288 GEN_INT (GET_MODE_BITSIZE (smode
)),
36289 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36290 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36292 x
= gen_lowpart (wvmode
, target
);
36293 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36301 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36302 rtx x
= gen_reg_rtx (hvmode
);
36304 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36307 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36308 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36317 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36318 whose ONE_VAR element is VAR, and other elements are zero. Return true
36322 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36323 rtx target
, rtx var
, int one_var
)
36325 enum machine_mode vsimode
;
36328 bool use_vector_set
= false;
36333 /* For SSE4.1, we normally use vector set. But if the second
36334 element is zero and inter-unit moves are OK, we use movq
36336 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36337 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36343 use_vector_set
= TARGET_SSE4_1
;
36346 use_vector_set
= TARGET_SSE2
;
36349 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36356 use_vector_set
= TARGET_AVX
;
36359 /* Use ix86_expand_vector_set in 64bit mode only. */
36360 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36366 if (use_vector_set
)
36368 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36369 var
= force_reg (GET_MODE_INNER (mode
), var
);
36370 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36386 var
= force_reg (GET_MODE_INNER (mode
), var
);
36387 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
36388 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36393 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
36394 new_target
= gen_reg_rtx (mode
);
36396 new_target
= target
;
36397 var
= force_reg (GET_MODE_INNER (mode
), var
);
36398 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
36399 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
36400 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
36403 /* We need to shuffle the value to the correct position, so
36404 create a new pseudo to store the intermediate result. */
36406 /* With SSE2, we can use the integer shuffle insns. */
36407 if (mode
!= V4SFmode
&& TARGET_SSE2
)
36409 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
36411 GEN_INT (one_var
== 1 ? 0 : 1),
36412 GEN_INT (one_var
== 2 ? 0 : 1),
36413 GEN_INT (one_var
== 3 ? 0 : 1)));
36414 if (target
!= new_target
)
36415 emit_move_insn (target
, new_target
);
36419 /* Otherwise convert the intermediate result to V4SFmode and
36420 use the SSE1 shuffle instructions. */
36421 if (mode
!= V4SFmode
)
36423 tmp
= gen_reg_rtx (V4SFmode
);
36424 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
36429 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
36431 GEN_INT (one_var
== 1 ? 0 : 1),
36432 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
36433 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
36435 if (mode
!= V4SFmode
)
36436 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
36437 else if (tmp
!= target
)
36438 emit_move_insn (target
, tmp
);
36440 else if (target
!= new_target
)
36441 emit_move_insn (target
, new_target
);
36446 vsimode
= V4SImode
;
36452 vsimode
= V2SImode
;
36458 /* Zero extend the variable element to SImode and recurse. */
36459 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
36461 x
= gen_reg_rtx (vsimode
);
36462 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
36464 gcc_unreachable ();
36466 emit_move_insn (target
, gen_lowpart (mode
, x
));
36474 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36475 consisting of the values in VALS. It is known that all elements
36476 except ONE_VAR are constants. Return true if successful. */
36479 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
36480 rtx target
, rtx vals
, int one_var
)
36482 rtx var
= XVECEXP (vals
, 0, one_var
);
36483 enum machine_mode wmode
;
36486 const_vec
= copy_rtx (vals
);
36487 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
36488 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
36496 /* For the two element vectors, it's just as easy to use
36497 the general case. */
36501 /* Use ix86_expand_vector_set in 64bit mode only. */
36524 /* There's no way to set one QImode entry easily. Combine
36525 the variable value with its adjacent constant value, and
36526 promote to an HImode set. */
36527 x
= XVECEXP (vals
, 0, one_var
^ 1);
36530 var
= convert_modes (HImode
, QImode
, var
, true);
36531 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
36532 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36533 x
= GEN_INT (INTVAL (x
) & 0xff);
36537 var
= convert_modes (HImode
, QImode
, var
, true);
36538 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
36540 if (x
!= const0_rtx
)
36541 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
36542 1, OPTAB_LIB_WIDEN
);
36544 x
= gen_reg_rtx (wmode
);
36545 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
36546 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
36548 emit_move_insn (target
, gen_lowpart (mode
, x
));
36555 emit_move_insn (target
, const_vec
);
36556 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36560 /* A subroutine of ix86_expand_vector_init_general. Use vector
36561 concatenate to handle the most general case: all values variable,
36562 and none identical. */
36565 ix86_expand_vector_init_concat (enum machine_mode mode
,
36566 rtx target
, rtx
*ops
, int n
)
36568 enum machine_mode cmode
, hmode
= VOIDmode
;
36569 rtx first
[8], second
[4];
36609 gcc_unreachable ();
36612 if (!register_operand (ops
[1], cmode
))
36613 ops
[1] = force_reg (cmode
, ops
[1]);
36614 if (!register_operand (ops
[0], cmode
))
36615 ops
[0] = force_reg (cmode
, ops
[0]);
36616 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36617 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36637 gcc_unreachable ();
36653 gcc_unreachable ();
36658 /* FIXME: We process inputs backward to help RA. PR 36222. */
36661 for (; i
> 0; i
-= 2, j
--)
36663 first
[j
] = gen_reg_rtx (cmode
);
36664 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36665 ix86_expand_vector_init (false, first
[j
],
36666 gen_rtx_PARALLEL (cmode
, v
));
36672 gcc_assert (hmode
!= VOIDmode
);
36673 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36675 second
[j
] = gen_reg_rtx (hmode
);
36676 ix86_expand_vector_init_concat (hmode
, second
[j
],
36680 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36683 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36687 gcc_unreachable ();
36691 /* A subroutine of ix86_expand_vector_init_general. Use vector
36692 interleave to handle the most general case: all values variable,
36693 and none identical. */
36696 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36697 rtx target
, rtx
*ops
, int n
)
36699 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36702 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36703 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36704 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36709 gen_load_even
= gen_vec_setv8hi
;
36710 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36711 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36712 inner_mode
= HImode
;
36713 first_imode
= V4SImode
;
36714 second_imode
= V2DImode
;
36715 third_imode
= VOIDmode
;
36718 gen_load_even
= gen_vec_setv16qi
;
36719 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36720 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36721 inner_mode
= QImode
;
36722 first_imode
= V8HImode
;
36723 second_imode
= V4SImode
;
36724 third_imode
= V2DImode
;
36727 gcc_unreachable ();
36730 for (i
= 0; i
< n
; i
++)
36732 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36733 op0
= gen_reg_rtx (SImode
);
36734 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36736 /* Insert the SImode value as low element of V4SImode vector. */
36737 op1
= gen_reg_rtx (V4SImode
);
36738 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36739 gen_rtx_VEC_DUPLICATE (V4SImode
,
36741 CONST0_RTX (V4SImode
),
36743 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36745 /* Cast the V4SImode vector back to a vector in orignal mode. */
36746 op0
= gen_reg_rtx (mode
);
36747 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36749 /* Load even elements into the second position. */
36750 emit_insn (gen_load_even (op0
,
36751 force_reg (inner_mode
,
36755 /* Cast vector to FIRST_IMODE vector. */
36756 ops
[i
] = gen_reg_rtx (first_imode
);
36757 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36760 /* Interleave low FIRST_IMODE vectors. */
36761 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36763 op0
= gen_reg_rtx (first_imode
);
36764 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36766 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36767 ops
[j
] = gen_reg_rtx (second_imode
);
36768 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36771 /* Interleave low SECOND_IMODE vectors. */
36772 switch (second_imode
)
36775 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36777 op0
= gen_reg_rtx (second_imode
);
36778 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36781 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36783 ops
[j
] = gen_reg_rtx (third_imode
);
36784 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36786 second_imode
= V2DImode
;
36787 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36791 op0
= gen_reg_rtx (second_imode
);
36792 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36795 /* Cast the SECOND_IMODE vector back to a vector on original
36797 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36798 gen_lowpart (mode
, op0
)));
36802 gcc_unreachable ();
36806 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36807 all values variable, and none identical. */
36810 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36811 rtx target
, rtx vals
)
36813 rtx ops
[32], op0
, op1
;
36814 enum machine_mode half_mode
= VOIDmode
;
36821 if (!mmx_ok
&& !TARGET_SSE
)
36833 n
= GET_MODE_NUNITS (mode
);
36834 for (i
= 0; i
< n
; i
++)
36835 ops
[i
] = XVECEXP (vals
, 0, i
);
36836 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36840 half_mode
= V16QImode
;
36844 half_mode
= V8HImode
;
36848 n
= GET_MODE_NUNITS (mode
);
36849 for (i
= 0; i
< n
; i
++)
36850 ops
[i
] = XVECEXP (vals
, 0, i
);
36851 op0
= gen_reg_rtx (half_mode
);
36852 op1
= gen_reg_rtx (half_mode
);
36853 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36855 ix86_expand_vector_init_interleave (half_mode
, op1
,
36856 &ops
[n
>> 1], n
>> 2);
36857 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36858 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36862 if (!TARGET_SSE4_1
)
36870 /* Don't use ix86_expand_vector_init_interleave if we can't
36871 move from GPR to SSE register directly. */
36872 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
36875 n
= GET_MODE_NUNITS (mode
);
36876 for (i
= 0; i
< n
; i
++)
36877 ops
[i
] = XVECEXP (vals
, 0, i
);
36878 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36886 gcc_unreachable ();
36890 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36891 enum machine_mode inner_mode
;
36892 rtx words
[4], shift
;
36894 inner_mode
= GET_MODE_INNER (mode
);
36895 n_elts
= GET_MODE_NUNITS (mode
);
36896 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36897 n_elt_per_word
= n_elts
/ n_words
;
36898 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36900 for (i
= 0; i
< n_words
; ++i
)
36902 rtx word
= NULL_RTX
;
36904 for (j
= 0; j
< n_elt_per_word
; ++j
)
36906 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36907 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36913 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36914 word
, 1, OPTAB_LIB_WIDEN
);
36915 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36916 word
, 1, OPTAB_LIB_WIDEN
);
36924 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36925 else if (n_words
== 2)
36927 rtx tmp
= gen_reg_rtx (mode
);
36928 emit_clobber (tmp
);
36929 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36930 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36931 emit_move_insn (target
, tmp
);
36933 else if (n_words
== 4)
36935 rtx tmp
= gen_reg_rtx (V4SImode
);
36936 gcc_assert (word_mode
== SImode
);
36937 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36938 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36939 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36942 gcc_unreachable ();
36946 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36947 instructions unless MMX_OK is true. */
36950 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36952 enum machine_mode mode
= GET_MODE (target
);
36953 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36954 int n_elts
= GET_MODE_NUNITS (mode
);
36955 int n_var
= 0, one_var
= -1;
36956 bool all_same
= true, all_const_zero
= true;
36960 for (i
= 0; i
< n_elts
; ++i
)
36962 x
= XVECEXP (vals
, 0, i
);
36963 if (!(CONST_INT_P (x
)
36964 || GET_CODE (x
) == CONST_DOUBLE
36965 || GET_CODE (x
) == CONST_FIXED
))
36966 n_var
++, one_var
= i
;
36967 else if (x
!= CONST0_RTX (inner_mode
))
36968 all_const_zero
= false;
36969 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36973 /* Constants are best loaded from the constant pool. */
36976 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36980 /* If all values are identical, broadcast the value. */
36982 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36983 XVECEXP (vals
, 0, 0)))
36986 /* Values where only one field is non-constant are best loaded from
36987 the pool and overwritten via move later. */
36991 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36992 XVECEXP (vals
, 0, one_var
),
36996 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37000 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37004 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37006 enum machine_mode mode
= GET_MODE (target
);
37007 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37008 enum machine_mode half_mode
;
37009 bool use_vec_merge
= false;
37011 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37013 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37014 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37015 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37016 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37017 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37018 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37020 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37022 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37023 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37024 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37025 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37026 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37027 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37037 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37038 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37040 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37042 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37043 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37049 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37053 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37054 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37056 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37058 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37059 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37066 /* For the two element vectors, we implement a VEC_CONCAT with
37067 the extraction of the other element. */
37069 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37070 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37073 op0
= val
, op1
= tmp
;
37075 op0
= tmp
, op1
= val
;
37077 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37078 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37083 use_vec_merge
= TARGET_SSE4_1
;
37090 use_vec_merge
= true;
37094 /* tmp = target = A B C D */
37095 tmp
= copy_to_reg (target
);
37096 /* target = A A B B */
37097 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37098 /* target = X A B B */
37099 ix86_expand_vector_set (false, target
, val
, 0);
37100 /* target = A X C D */
37101 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37102 const1_rtx
, const0_rtx
,
37103 GEN_INT (2+4), GEN_INT (3+4)));
37107 /* tmp = target = A B C D */
37108 tmp
= copy_to_reg (target
);
37109 /* tmp = X B C D */
37110 ix86_expand_vector_set (false, tmp
, val
, 0);
37111 /* target = A B X D */
37112 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37113 const0_rtx
, const1_rtx
,
37114 GEN_INT (0+4), GEN_INT (3+4)));
37118 /* tmp = target = A B C D */
37119 tmp
= copy_to_reg (target
);
37120 /* tmp = X B C D */
37121 ix86_expand_vector_set (false, tmp
, val
, 0);
37122 /* target = A B X D */
37123 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37124 const0_rtx
, const1_rtx
,
37125 GEN_INT (2+4), GEN_INT (0+4)));
37129 gcc_unreachable ();
37134 use_vec_merge
= TARGET_SSE4_1
;
37138 /* Element 0 handled by vec_merge below. */
37141 use_vec_merge
= true;
37147 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37148 store into element 0, then shuffle them back. */
37152 order
[0] = GEN_INT (elt
);
37153 order
[1] = const1_rtx
;
37154 order
[2] = const2_rtx
;
37155 order
[3] = GEN_INT (3);
37156 order
[elt
] = const0_rtx
;
37158 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37159 order
[1], order
[2], order
[3]));
37161 ix86_expand_vector_set (false, target
, val
, 0);
37163 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37164 order
[1], order
[2], order
[3]));
37168 /* For SSE1, we have to reuse the V4SF code. */
37169 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
37170 gen_lowpart (SFmode
, val
), elt
);
37175 use_vec_merge
= TARGET_SSE2
;
37178 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37182 use_vec_merge
= TARGET_SSE4_1
;
37189 half_mode
= V16QImode
;
37195 half_mode
= V8HImode
;
37201 half_mode
= V4SImode
;
37207 half_mode
= V2DImode
;
37213 half_mode
= V4SFmode
;
37219 half_mode
= V2DFmode
;
37225 /* Compute offset. */
37229 gcc_assert (i
<= 1);
37231 /* Extract the half. */
37232 tmp
= gen_reg_rtx (half_mode
);
37233 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37235 /* Put val in tmp at elt. */
37236 ix86_expand_vector_set (false, tmp
, val
, elt
);
37239 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37248 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37249 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37250 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37254 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37256 emit_move_insn (mem
, target
);
37258 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37259 emit_move_insn (tmp
, val
);
37261 emit_move_insn (target
, mem
);
37266 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37268 enum machine_mode mode
= GET_MODE (vec
);
37269 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37270 bool use_vec_extr
= false;
37283 use_vec_extr
= true;
37287 use_vec_extr
= TARGET_SSE4_1
;
37299 tmp
= gen_reg_rtx (mode
);
37300 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37301 GEN_INT (elt
), GEN_INT (elt
),
37302 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37306 tmp
= gen_reg_rtx (mode
);
37307 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37311 gcc_unreachable ();
37314 use_vec_extr
= true;
37319 use_vec_extr
= TARGET_SSE4_1
;
37333 tmp
= gen_reg_rtx (mode
);
37334 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37335 GEN_INT (elt
), GEN_INT (elt
),
37336 GEN_INT (elt
), GEN_INT (elt
)));
37340 tmp
= gen_reg_rtx (mode
);
37341 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37345 gcc_unreachable ();
37348 use_vec_extr
= true;
37353 /* For SSE1, we have to reuse the V4SF code. */
37354 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37355 gen_lowpart (V4SFmode
, vec
), elt
);
37361 use_vec_extr
= TARGET_SSE2
;
37364 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37368 use_vec_extr
= TARGET_SSE4_1
;
37374 tmp
= gen_reg_rtx (V4SFmode
);
37376 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37378 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37379 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37387 tmp
= gen_reg_rtx (V2DFmode
);
37389 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
37391 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
37392 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37400 tmp
= gen_reg_rtx (V16QImode
);
37402 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
37404 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
37405 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
37413 tmp
= gen_reg_rtx (V8HImode
);
37415 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
37417 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
37418 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
37426 tmp
= gen_reg_rtx (V4SImode
);
37428 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
37430 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
37431 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37439 tmp
= gen_reg_rtx (V2DImode
);
37441 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
37443 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
37444 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37450 /* ??? Could extract the appropriate HImode element and shift. */
37457 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
37458 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
37460 /* Let the rtl optimizers know about the zero extension performed. */
37461 if (inner_mode
== QImode
|| inner_mode
== HImode
)
37463 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
37464 target
= gen_lowpart (SImode
, target
);
37467 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37471 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37473 emit_move_insn (mem
, vec
);
37475 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37476 emit_move_insn (target
, tmp
);
37480 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37481 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37482 The upper bits of DEST are undefined, though they shouldn't cause
37483 exceptions (some bits from src or all zeros are ok). */
37486 emit_reduc_half (rtx dest
, rtx src
, int i
)
37489 switch (GET_MODE (src
))
37493 tem
= gen_sse_movhlps (dest
, src
, src
);
37495 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
37496 GEN_INT (1 + 4), GEN_INT (1 + 4));
37499 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
37505 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
37506 gen_lowpart (V1TImode
, src
),
37511 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
37513 tem
= gen_avx_shufps256 (dest
, src
, src
,
37514 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
37518 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
37520 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
37527 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
37528 gen_lowpart (V4DImode
, src
),
37529 gen_lowpart (V4DImode
, src
),
37532 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
37533 gen_lowpart (V2TImode
, src
),
37537 gcc_unreachable ();
37542 /* Expand a vector reduction. FN is the binary pattern to reduce;
37543 DEST is the destination; IN is the input vector. */
37546 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
37548 rtx half
, dst
, vec
= in
;
37549 enum machine_mode mode
= GET_MODE (in
);
37552 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37554 && mode
== V8HImode
37555 && fn
== gen_uminv8hi3
)
37557 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37561 for (i
= GET_MODE_BITSIZE (mode
);
37562 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37565 half
= gen_reg_rtx (mode
);
37566 emit_reduc_half (half
, vec
, i
);
37567 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37570 dst
= gen_reg_rtx (mode
);
37571 emit_insn (fn (dst
, half
, vec
));
37576 /* Target hook for scalar_mode_supported_p. */
37578 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37580 if (DECIMAL_FLOAT_MODE_P (mode
))
37581 return default_decimal_float_supported_p ();
37582 else if (mode
== TFmode
)
37585 return default_scalar_mode_supported_p (mode
);
37588 /* Implements target hook vector_mode_supported_p. */
37590 ix86_vector_mode_supported_p (enum machine_mode mode
)
37592 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37594 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37596 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37598 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37600 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37605 /* Target hook for c_mode_for_suffix. */
37606 static enum machine_mode
37607 ix86_c_mode_for_suffix (char suffix
)
37617 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37619 We do this in the new i386 backend to maintain source compatibility
37620 with the old cc0-based compiler. */
37623 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37624 tree inputs ATTRIBUTE_UNUSED
,
37627 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37629 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37634 /* Implements target vector targetm.asm.encode_section_info. */
37636 static void ATTRIBUTE_UNUSED
37637 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37639 default_encode_section_info (decl
, rtl
, first
);
37641 if (TREE_CODE (decl
) == VAR_DECL
37642 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37643 && ix86_in_large_data_p (decl
))
37644 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37647 /* Worker function for REVERSE_CONDITION. */
37650 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37652 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37653 ? reverse_condition (code
)
37654 : reverse_condition_maybe_unordered (code
));
37657 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37661 output_387_reg_move (rtx insn
, rtx
*operands
)
37663 if (REG_P (operands
[0]))
37665 if (REG_P (operands
[1])
37666 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37668 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37669 return output_387_ffreep (operands
, 0);
37670 return "fstp\t%y0";
37672 if (STACK_TOP_P (operands
[0]))
37673 return "fld%Z1\t%y1";
37676 else if (MEM_P (operands
[0]))
37678 gcc_assert (REG_P (operands
[1]));
37679 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37680 return "fstp%Z0\t%y0";
37683 /* There is no non-popping store to memory for XFmode.
37684 So if we need one, follow the store with a load. */
37685 if (GET_MODE (operands
[0]) == XFmode
)
37686 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37688 return "fst%Z0\t%y0";
37695 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37696 FP status register is set. */
37699 ix86_emit_fp_unordered_jump (rtx label
)
37701 rtx reg
= gen_reg_rtx (HImode
);
37704 emit_insn (gen_x86_fnstsw_1 (reg
));
37706 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37708 emit_insn (gen_x86_sahf_1 (reg
));
37710 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37711 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37715 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37717 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37718 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37721 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37722 gen_rtx_LABEL_REF (VOIDmode
, label
),
37724 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37726 emit_jump_insn (temp
);
37727 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37730 /* Output code to perform a log1p XFmode calculation. */
37732 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37734 rtx label1
= gen_label_rtx ();
37735 rtx label2
= gen_label_rtx ();
37737 rtx tmp
= gen_reg_rtx (XFmode
);
37738 rtx tmp2
= gen_reg_rtx (XFmode
);
37741 emit_insn (gen_absxf2 (tmp
, op1
));
37742 test
= gen_rtx_GE (VOIDmode
, tmp
,
37743 CONST_DOUBLE_FROM_REAL_VALUE (
37744 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37746 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37748 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37749 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37750 emit_jump (label2
);
37752 emit_label (label1
);
37753 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37754 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37755 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37756 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37758 emit_label (label2
);
37761 /* Emit code for round calculation. */
37762 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37764 enum machine_mode inmode
= GET_MODE (op1
);
37765 enum machine_mode outmode
= GET_MODE (op0
);
37766 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37767 rtx scratch
= gen_reg_rtx (HImode
);
37768 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37769 rtx jump_label
= gen_label_rtx ();
37771 rtx (*gen_abs
) (rtx
, rtx
);
37772 rtx (*gen_neg
) (rtx
, rtx
);
37777 gen_abs
= gen_abssf2
;
37780 gen_abs
= gen_absdf2
;
37783 gen_abs
= gen_absxf2
;
37786 gcc_unreachable ();
37792 gen_neg
= gen_negsf2
;
37795 gen_neg
= gen_negdf2
;
37798 gen_neg
= gen_negxf2
;
37801 gen_neg
= gen_neghi2
;
37804 gen_neg
= gen_negsi2
;
37807 gen_neg
= gen_negdi2
;
37810 gcc_unreachable ();
37813 e1
= gen_reg_rtx (inmode
);
37814 e2
= gen_reg_rtx (inmode
);
37815 res
= gen_reg_rtx (outmode
);
37817 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37819 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37821 /* scratch = fxam(op1) */
37822 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37823 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37825 /* e1 = fabs(op1) */
37826 emit_insn (gen_abs (e1
, op1
));
37828 /* e2 = e1 + 0.5 */
37829 half
= force_reg (inmode
, half
);
37830 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37831 gen_rtx_PLUS (inmode
, e1
, half
)));
37833 /* res = floor(e2) */
37834 if (inmode
!= XFmode
)
37836 tmp1
= gen_reg_rtx (XFmode
);
37838 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37839 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37849 rtx tmp0
= gen_reg_rtx (XFmode
);
37851 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37853 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37854 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37855 UNSPEC_TRUNC_NOOP
)));
37859 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37862 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37865 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37868 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37871 gcc_unreachable ();
37874 /* flags = signbit(a) */
37875 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37877 /* if (flags) then res = -res */
37878 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37879 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37880 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37882 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37883 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37884 JUMP_LABEL (insn
) = jump_label
;
37886 emit_insn (gen_neg (res
, res
));
37888 emit_label (jump_label
);
37889 LABEL_NUSES (jump_label
) = 1;
37891 emit_move_insn (op0
, res
);
37894 /* Output code to perform a Newton-Rhapson approximation of a single precision
37895 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37897 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37899 rtx x0
, x1
, e0
, e1
;
37901 x0
= gen_reg_rtx (mode
);
37902 e0
= gen_reg_rtx (mode
);
37903 e1
= gen_reg_rtx (mode
);
37904 x1
= gen_reg_rtx (mode
);
37906 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37908 b
= force_reg (mode
, b
);
37910 /* x0 = rcp(b) estimate */
37911 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37912 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37915 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37916 gen_rtx_MULT (mode
, x0
, b
)));
37919 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37920 gen_rtx_MULT (mode
, x0
, e0
)));
37923 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37924 gen_rtx_PLUS (mode
, x0
, x0
)));
37927 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37928 gen_rtx_MINUS (mode
, e1
, e0
)));
37931 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37932 gen_rtx_MULT (mode
, a
, x1
)));
37935 /* Output code to perform a Newton-Rhapson approximation of a
37936 single precision floating point [reciprocal] square root. */
37938 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37941 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37944 x0
= gen_reg_rtx (mode
);
37945 e0
= gen_reg_rtx (mode
);
37946 e1
= gen_reg_rtx (mode
);
37947 e2
= gen_reg_rtx (mode
);
37948 e3
= gen_reg_rtx (mode
);
37950 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37951 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37953 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37954 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37956 if (VECTOR_MODE_P (mode
))
37958 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37959 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37962 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37963 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37965 a
= force_reg (mode
, a
);
37967 /* x0 = rsqrt(a) estimate */
37968 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37969 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37972 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37977 zero
= gen_reg_rtx (mode
);
37978 mask
= gen_reg_rtx (mode
);
37980 zero
= force_reg (mode
, CONST0_RTX(mode
));
37981 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37982 gen_rtx_NE (mode
, zero
, a
)));
37984 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37985 gen_rtx_AND (mode
, x0
, mask
)));
37989 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37990 gen_rtx_MULT (mode
, x0
, a
)));
37992 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37993 gen_rtx_MULT (mode
, e0
, x0
)));
37996 mthree
= force_reg (mode
, mthree
);
37997 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37998 gen_rtx_PLUS (mode
, e1
, mthree
)));
38000 mhalf
= force_reg (mode
, mhalf
);
38002 /* e3 = -.5 * x0 */
38003 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38004 gen_rtx_MULT (mode
, x0
, mhalf
)));
38006 /* e3 = -.5 * e0 */
38007 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38008 gen_rtx_MULT (mode
, e0
, mhalf
)));
38009 /* ret = e2 * e3 */
38010 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38011 gen_rtx_MULT (mode
, e2
, e3
)));
38014 #ifdef TARGET_SOLARIS
38015 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38018 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38021 /* With Binutils 2.15, the "@unwind" marker must be specified on
38022 every occurrence of the ".eh_frame" section, not just the first
38025 && strcmp (name
, ".eh_frame") == 0)
38027 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38028 flags
& SECTION_WRITE
? "aw" : "a");
38033 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38035 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38040 default_elf_asm_named_section (name
, flags
, decl
);
38042 #endif /* TARGET_SOLARIS */
38044 /* Return the mangling of TYPE if it is an extended fundamental type. */
38046 static const char *
38047 ix86_mangle_type (const_tree type
)
38049 type
= TYPE_MAIN_VARIANT (type
);
38051 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38052 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38055 switch (TYPE_MODE (type
))
38058 /* __float128 is "g". */
38061 /* "long double" or __float80 is "e". */
38068 /* For 32-bit code we can save PIC register setup by using
38069 __stack_chk_fail_local hidden function instead of calling
38070 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38071 register, so it is better to call __stack_chk_fail directly. */
38073 static tree ATTRIBUTE_UNUSED
38074 ix86_stack_protect_fail (void)
38076 return TARGET_64BIT
38077 ? default_external_stack_protect_fail ()
38078 : default_hidden_stack_protect_fail ();
38081 /* Select a format to encode pointers in exception handling data. CODE
38082 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38083 true if the symbol may be affected by dynamic relocations.
38085 ??? All x86 object file formats are capable of representing this.
38086 After all, the relocation needed is the same as for the call insn.
38087 Whether or not a particular assembler allows us to enter such, I
38088 guess we'll have to see. */
38090 asm_preferred_eh_data_format (int code
, int global
)
38094 int type
= DW_EH_PE_sdata8
;
38096 || ix86_cmodel
== CM_SMALL_PIC
38097 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38098 type
= DW_EH_PE_sdata4
;
38099 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38101 if (ix86_cmodel
== CM_SMALL
38102 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38103 return DW_EH_PE_udata4
;
38104 return DW_EH_PE_absptr
;
38107 /* Expand copysign from SIGN to the positive value ABS_VALUE
38108 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38111 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38113 enum machine_mode mode
= GET_MODE (sign
);
38114 rtx sgn
= gen_reg_rtx (mode
);
38115 if (mask
== NULL_RTX
)
38117 enum machine_mode vmode
;
38119 if (mode
== SFmode
)
38121 else if (mode
== DFmode
)
38126 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38127 if (!VECTOR_MODE_P (mode
))
38129 /* We need to generate a scalar mode mask in this case. */
38130 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38131 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38132 mask
= gen_reg_rtx (mode
);
38133 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38137 mask
= gen_rtx_NOT (mode
, mask
);
38138 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38139 gen_rtx_AND (mode
, mask
, sign
)));
38140 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38141 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38144 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38145 mask for masking out the sign-bit is stored in *SMASK, if that is
38148 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38150 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38153 xa
= gen_reg_rtx (mode
);
38154 if (mode
== SFmode
)
38156 else if (mode
== DFmode
)
38160 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38161 if (!VECTOR_MODE_P (mode
))
38163 /* We need to generate a scalar mode mask in this case. */
38164 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38165 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38166 mask
= gen_reg_rtx (mode
);
38167 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38169 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38170 gen_rtx_AND (mode
, op0
, mask
)));
38178 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38179 swapping the operands if SWAP_OPERANDS is true. The expanded
38180 code is a forward jump to a newly created label in case the
38181 comparison is true. The generated label rtx is returned. */
38183 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38184 bool swap_operands
)
38195 label
= gen_label_rtx ();
38196 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
38197 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38198 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
38199 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38200 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38201 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38202 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38203 JUMP_LABEL (tmp
) = label
;
38208 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38209 using comparison code CODE. Operands are swapped for the comparison if
38210 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38212 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38213 bool swap_operands
)
38215 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38216 enum machine_mode mode
= GET_MODE (op0
);
38217 rtx mask
= gen_reg_rtx (mode
);
38226 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38228 emit_insn (insn (mask
, op0
, op1
,
38229 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38233 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38234 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38236 ix86_gen_TWO52 (enum machine_mode mode
)
38238 REAL_VALUE_TYPE TWO52r
;
38241 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38242 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38243 TWO52
= force_reg (mode
, TWO52
);
38248 /* Expand SSE sequence for computing lround from OP1 storing
38251 ix86_expand_lround (rtx op0
, rtx op1
)
38253 /* C code for the stuff we're doing below:
38254 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38257 enum machine_mode mode
= GET_MODE (op1
);
38258 const struct real_format
*fmt
;
38259 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38262 /* load nextafter (0.5, 0.0) */
38263 fmt
= REAL_MODE_FORMAT (mode
);
38264 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38265 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38267 /* adj = copysign (0.5, op1) */
38268 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38269 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38271 /* adj = op1 + adj */
38272 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38274 /* op0 = (imode)adj */
38275 expand_fix (op0
, adj
, 0);
38278 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38281 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38283 /* C code for the stuff we're doing below (for do_floor):
38285 xi -= (double)xi > op1 ? 1 : 0;
38288 enum machine_mode fmode
= GET_MODE (op1
);
38289 enum machine_mode imode
= GET_MODE (op0
);
38290 rtx ireg
, freg
, label
, tmp
;
38292 /* reg = (long)op1 */
38293 ireg
= gen_reg_rtx (imode
);
38294 expand_fix (ireg
, op1
, 0);
38296 /* freg = (double)reg */
38297 freg
= gen_reg_rtx (fmode
);
38298 expand_float (freg
, ireg
, 0);
38300 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38301 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38302 freg
, op1
, !do_floor
);
38303 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38304 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38305 emit_move_insn (ireg
, tmp
);
38307 emit_label (label
);
38308 LABEL_NUSES (label
) = 1;
38310 emit_move_insn (op0
, ireg
);
38313 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38314 result in OPERAND0. */
38316 ix86_expand_rint (rtx operand0
, rtx operand1
)
38318 /* C code for the stuff we're doing below:
38319 xa = fabs (operand1);
38320 if (!isless (xa, 2**52))
38322 xa = xa + 2**52 - 2**52;
38323 return copysign (xa, operand1);
38325 enum machine_mode mode
= GET_MODE (operand0
);
38326 rtx res
, xa
, label
, TWO52
, mask
;
38328 res
= gen_reg_rtx (mode
);
38329 emit_move_insn (res
, operand1
);
38331 /* xa = abs (operand1) */
38332 xa
= ix86_expand_sse_fabs (res
, &mask
);
38334 /* if (!isless (xa, TWO52)) goto label; */
38335 TWO52
= ix86_gen_TWO52 (mode
);
38336 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38338 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38339 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38341 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38343 emit_label (label
);
38344 LABEL_NUSES (label
) = 1;
38346 emit_move_insn (operand0
, res
);
38349 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38352 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38354 /* C code for the stuff we expand below.
38355 double xa = fabs (x), x2;
38356 if (!isless (xa, TWO52))
38358 xa = xa + TWO52 - TWO52;
38359 x2 = copysign (xa, x);
38368 enum machine_mode mode
= GET_MODE (operand0
);
38369 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38371 TWO52
= ix86_gen_TWO52 (mode
);
38373 /* Temporary for holding the result, initialized to the input
38374 operand to ease control flow. */
38375 res
= gen_reg_rtx (mode
);
38376 emit_move_insn (res
, operand1
);
38378 /* xa = abs (operand1) */
38379 xa
= ix86_expand_sse_fabs (res
, &mask
);
38381 /* if (!isless (xa, TWO52)) goto label; */
38382 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38384 /* xa = xa + TWO52 - TWO52; */
38385 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38386 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38388 /* xa = copysign (xa, operand1) */
38389 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
38391 /* generate 1.0 or -1.0 */
38392 one
= force_reg (mode
,
38393 const_double_from_real_value (do_floor
38394 ? dconst1
: dconstm1
, mode
));
38396 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38397 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38398 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38399 gen_rtx_AND (mode
, one
, tmp
)));
38400 /* We always need to subtract here to preserve signed zero. */
38401 tmp
= expand_simple_binop (mode
, MINUS
,
38402 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38403 emit_move_insn (res
, tmp
);
38405 emit_label (label
);
38406 LABEL_NUSES (label
) = 1;
38408 emit_move_insn (operand0
, res
);
38411 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38414 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
38416 /* C code for the stuff we expand below.
38417 double xa = fabs (x), x2;
38418 if (!isless (xa, TWO52))
38420 x2 = (double)(long)x;
38427 if (HONOR_SIGNED_ZEROS (mode))
38428 return copysign (x2, x);
38431 enum machine_mode mode
= GET_MODE (operand0
);
38432 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
38434 TWO52
= ix86_gen_TWO52 (mode
);
38436 /* Temporary for holding the result, initialized to the input
38437 operand to ease control flow. */
38438 res
= gen_reg_rtx (mode
);
38439 emit_move_insn (res
, operand1
);
38441 /* xa = abs (operand1) */
38442 xa
= ix86_expand_sse_fabs (res
, &mask
);
38444 /* if (!isless (xa, TWO52)) goto label; */
38445 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38447 /* xa = (double)(long)x */
38448 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38449 expand_fix (xi
, res
, 0);
38450 expand_float (xa
, xi
, 0);
38453 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38455 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38456 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38457 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38458 gen_rtx_AND (mode
, one
, tmp
)));
38459 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
38460 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38461 emit_move_insn (res
, tmp
);
38463 if (HONOR_SIGNED_ZEROS (mode
))
38464 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38466 emit_label (label
);
38467 LABEL_NUSES (label
) = 1;
38469 emit_move_insn (operand0
, res
);
38472 /* Expand SSE sequence for computing round from OPERAND1 storing
38473 into OPERAND0. Sequence that works without relying on DImode truncation
38474 via cvttsd2siq that is only available on 64bit targets. */
38476 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
38478 /* C code for the stuff we expand below.
38479 double xa = fabs (x), xa2, x2;
38480 if (!isless (xa, TWO52))
38482 Using the absolute value and copying back sign makes
38483 -0.0 -> -0.0 correct.
38484 xa2 = xa + TWO52 - TWO52;
38489 else if (dxa > 0.5)
38491 x2 = copysign (xa2, x);
38494 enum machine_mode mode
= GET_MODE (operand0
);
38495 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
38497 TWO52
= ix86_gen_TWO52 (mode
);
38499 /* Temporary for holding the result, initialized to the input
38500 operand to ease control flow. */
38501 res
= gen_reg_rtx (mode
);
38502 emit_move_insn (res
, operand1
);
38504 /* xa = abs (operand1) */
38505 xa
= ix86_expand_sse_fabs (res
, &mask
);
38507 /* if (!isless (xa, TWO52)) goto label; */
38508 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38510 /* xa2 = xa + TWO52 - TWO52; */
38511 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38512 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
38514 /* dxa = xa2 - xa; */
38515 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
38517 /* generate 0.5, 1.0 and -0.5 */
38518 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
38519 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38520 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
38524 tmp
= gen_reg_rtx (mode
);
38525 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38526 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
38527 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38528 gen_rtx_AND (mode
, one
, tmp
)));
38529 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38530 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38531 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
38532 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38533 gen_rtx_AND (mode
, one
, tmp
)));
38534 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38536 /* res = copysign (xa2, operand1) */
38537 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
38539 emit_label (label
);
38540 LABEL_NUSES (label
) = 1;
38542 emit_move_insn (operand0
, res
);
38545 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38548 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38550 /* C code for SSE variant we expand below.
38551 double xa = fabs (x), x2;
38552 if (!isless (xa, TWO52))
38554 x2 = (double)(long)x;
38555 if (HONOR_SIGNED_ZEROS (mode))
38556 return copysign (x2, x);
38559 enum machine_mode mode
= GET_MODE (operand0
);
38560 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38562 TWO52
= ix86_gen_TWO52 (mode
);
38564 /* Temporary for holding the result, initialized to the input
38565 operand to ease control flow. */
38566 res
= gen_reg_rtx (mode
);
38567 emit_move_insn (res
, operand1
);
38569 /* xa = abs (operand1) */
38570 xa
= ix86_expand_sse_fabs (res
, &mask
);
38572 /* if (!isless (xa, TWO52)) goto label; */
38573 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38575 /* x = (double)(long)x */
38576 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38577 expand_fix (xi
, res
, 0);
38578 expand_float (res
, xi
, 0);
38580 if (HONOR_SIGNED_ZEROS (mode
))
38581 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38583 emit_label (label
);
38584 LABEL_NUSES (label
) = 1;
38586 emit_move_insn (operand0
, res
);
38589 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38592 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38594 enum machine_mode mode
= GET_MODE (operand0
);
38595 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38597 /* C code for SSE variant we expand below.
38598 double xa = fabs (x), x2;
38599 if (!isless (xa, TWO52))
38601 xa2 = xa + TWO52 - TWO52;
38605 x2 = copysign (xa2, x);
38609 TWO52
= ix86_gen_TWO52 (mode
);
38611 /* Temporary for holding the result, initialized to the input
38612 operand to ease control flow. */
38613 res
= gen_reg_rtx (mode
);
38614 emit_move_insn (res
, operand1
);
38616 /* xa = abs (operand1) */
38617 xa
= ix86_expand_sse_fabs (res
, &smask
);
38619 /* if (!isless (xa, TWO52)) goto label; */
38620 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38622 /* res = xa + TWO52 - TWO52; */
38623 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38624 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38625 emit_move_insn (res
, tmp
);
38628 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38630 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38631 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38632 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38633 gen_rtx_AND (mode
, mask
, one
)));
38634 tmp
= expand_simple_binop (mode
, MINUS
,
38635 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38636 emit_move_insn (res
, tmp
);
38638 /* res = copysign (res, operand1) */
38639 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38641 emit_label (label
);
38642 LABEL_NUSES (label
) = 1;
38644 emit_move_insn (operand0
, res
);
38647 /* Expand SSE sequence for computing round from OPERAND1 storing
38650 ix86_expand_round (rtx operand0
, rtx operand1
)
38652 /* C code for the stuff we're doing below:
38653 double xa = fabs (x);
38654 if (!isless (xa, TWO52))
38656 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38657 return copysign (xa, x);
38659 enum machine_mode mode
= GET_MODE (operand0
);
38660 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38661 const struct real_format
*fmt
;
38662 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38664 /* Temporary for holding the result, initialized to the input
38665 operand to ease control flow. */
38666 res
= gen_reg_rtx (mode
);
38667 emit_move_insn (res
, operand1
);
38669 TWO52
= ix86_gen_TWO52 (mode
);
38670 xa
= ix86_expand_sse_fabs (res
, &mask
);
38671 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38673 /* load nextafter (0.5, 0.0) */
38674 fmt
= REAL_MODE_FORMAT (mode
);
38675 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38676 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38678 /* xa = xa + 0.5 */
38679 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38680 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38682 /* xa = (double)(int64_t)xa */
38683 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38684 expand_fix (xi
, xa
, 0);
38685 expand_float (xa
, xi
, 0);
38687 /* res = copysign (xa, operand1) */
38688 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38690 emit_label (label
);
38691 LABEL_NUSES (label
) = 1;
38693 emit_move_insn (operand0
, res
);
38696 /* Expand SSE sequence for computing round
38697 from OP1 storing into OP0 using sse4 round insn. */
38699 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38701 enum machine_mode mode
= GET_MODE (op0
);
38702 rtx e1
, e2
, res
, half
;
38703 const struct real_format
*fmt
;
38704 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38705 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38706 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38711 gen_copysign
= gen_copysignsf3
;
38712 gen_round
= gen_sse4_1_roundsf2
;
38715 gen_copysign
= gen_copysigndf3
;
38716 gen_round
= gen_sse4_1_rounddf2
;
38719 gcc_unreachable ();
38722 /* round (a) = trunc (a + copysign (0.5, a)) */
38724 /* load nextafter (0.5, 0.0) */
38725 fmt
= REAL_MODE_FORMAT (mode
);
38726 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38727 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38728 half
= const_double_from_real_value (pred_half
, mode
);
38730 /* e1 = copysign (0.5, op1) */
38731 e1
= gen_reg_rtx (mode
);
38732 emit_insn (gen_copysign (e1
, half
, op1
));
38734 /* e2 = op1 + e1 */
38735 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38737 /* res = trunc (e2) */
38738 res
= gen_reg_rtx (mode
);
38739 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38741 emit_move_insn (op0
, res
);
38745 /* Table of valid machine attributes. */
38746 static const struct attribute_spec ix86_attribute_table
[] =
38748 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38749 affects_type_identity } */
38750 /* Stdcall attribute says callee is responsible for popping arguments
38751 if they are not variable. */
38752 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38754 /* Fastcall attribute says callee is responsible for popping arguments
38755 if they are not variable. */
38756 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38758 /* Thiscall attribute says callee is responsible for popping arguments
38759 if they are not variable. */
38760 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38762 /* Cdecl attribute says the callee is a normal C declaration */
38763 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38765 /* Regparm attribute specifies how many integer arguments are to be
38766 passed in registers. */
38767 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38769 /* Sseregparm attribute says we are using x86_64 calling conventions
38770 for FP arguments. */
38771 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38773 /* The transactional memory builtins are implicitly regparm or fastcall
38774 depending on the ABI. Override the generic do-nothing attribute that
38775 these builtins were declared with. */
38776 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38778 /* force_align_arg_pointer says this function realigns the stack at entry. */
38779 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38780 false, true, true, ix86_handle_cconv_attribute
, false },
38781 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38782 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38783 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38784 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38787 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38789 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38791 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38792 SUBTARGET_ATTRIBUTE_TABLE
,
38794 /* ms_abi and sysv_abi calling convention function attributes. */
38795 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38796 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38797 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38799 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38800 ix86_handle_callee_pop_aggregate_return
, true },
38802 { NULL
, 0, 0, false, false, false, NULL
, false }
38805 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38807 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38809 int misalign ATTRIBUTE_UNUSED
)
38813 switch (type_of_cost
)
38816 return ix86_cost
->scalar_stmt_cost
;
38819 return ix86_cost
->scalar_load_cost
;
38822 return ix86_cost
->scalar_store_cost
;
38825 return ix86_cost
->vec_stmt_cost
;
38828 return ix86_cost
->vec_align_load_cost
;
38831 return ix86_cost
->vec_store_cost
;
38833 case vec_to_scalar
:
38834 return ix86_cost
->vec_to_scalar_cost
;
38836 case scalar_to_vec
:
38837 return ix86_cost
->scalar_to_vec_cost
;
38839 case unaligned_load
:
38840 case unaligned_store
:
38841 return ix86_cost
->vec_unalign_load_cost
;
38843 case cond_branch_taken
:
38844 return ix86_cost
->cond_taken_branch_cost
;
38846 case cond_branch_not_taken
:
38847 return ix86_cost
->cond_not_taken_branch_cost
;
38850 case vec_promote_demote
:
38851 return ix86_cost
->vec_stmt_cost
;
38853 case vec_construct
:
38854 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38855 return elements
/ 2 + 1;
38858 gcc_unreachable ();
38862 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38863 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38864 insn every time. */
38866 static GTY(()) rtx vselect_insn
;
38868 /* Initialize vselect_insn. */
38871 init_vselect_insn (void)
38876 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38877 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38878 XVECEXP (x
, 0, i
) = const0_rtx
;
38879 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38881 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38883 vselect_insn
= emit_insn (x
);
38887 /* Construct (set target (vec_select op0 (parallel perm))) and
38888 return true if that's a valid instruction in the active ISA. */
38891 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38892 unsigned nelt
, bool testing_p
)
38895 rtx x
, save_vconcat
;
38898 if (vselect_insn
== NULL_RTX
)
38899 init_vselect_insn ();
38901 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38902 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38903 for (i
= 0; i
< nelt
; ++i
)
38904 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38905 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38906 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38907 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38908 SET_DEST (PATTERN (vselect_insn
)) = target
;
38909 icode
= recog_memoized (vselect_insn
);
38911 if (icode
>= 0 && !testing_p
)
38912 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38914 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38915 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38916 INSN_CODE (vselect_insn
) = -1;
38921 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38924 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38925 const unsigned char *perm
, unsigned nelt
,
38928 enum machine_mode v2mode
;
38932 if (vselect_insn
== NULL_RTX
)
38933 init_vselect_insn ();
38935 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38936 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38937 PUT_MODE (x
, v2mode
);
38940 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38941 XEXP (x
, 0) = const0_rtx
;
38942 XEXP (x
, 1) = const0_rtx
;
38946 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38947 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38950 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38952 enum machine_mode vmode
= d
->vmode
;
38953 unsigned i
, mask
, nelt
= d
->nelt
;
38954 rtx target
, op0
, op1
, x
;
38955 rtx rperm
[32], vperm
;
38957 if (d
->one_operand_p
)
38959 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38961 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38963 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38968 /* This is a blend, not a permute. Elements must stay in their
38969 respective lanes. */
38970 for (i
= 0; i
< nelt
; ++i
)
38972 unsigned e
= d
->perm
[i
];
38973 if (!(e
== i
|| e
== i
+ nelt
))
38980 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38981 decision should be extracted elsewhere, so that we only try that
38982 sequence once all budget==3 options have been tried. */
38983 target
= d
->target
;
38996 for (i
= 0; i
< nelt
; ++i
)
38997 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39001 for (i
= 0; i
< 2; ++i
)
39002 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39007 for (i
= 0; i
< 4; ++i
)
39008 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39013 /* See if bytes move in pairs so we can use pblendw with
39014 an immediate argument, rather than pblendvb with a vector
39016 for (i
= 0; i
< 16; i
+= 2)
39017 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39020 for (i
= 0; i
< nelt
; ++i
)
39021 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39024 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39025 vperm
= force_reg (vmode
, vperm
);
39027 if (GET_MODE_SIZE (vmode
) == 16)
39028 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39030 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39034 for (i
= 0; i
< 8; ++i
)
39035 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39040 target
= gen_lowpart (vmode
, target
);
39041 op0
= gen_lowpart (vmode
, op0
);
39042 op1
= gen_lowpart (vmode
, op1
);
39046 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39047 for (i
= 0; i
< 32; i
+= 2)
39048 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39050 /* See if bytes move in quadruplets. If yes, vpblendd
39051 with immediate can be used. */
39052 for (i
= 0; i
< 32; i
+= 4)
39053 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39057 /* See if bytes move the same in both lanes. If yes,
39058 vpblendw with immediate can be used. */
39059 for (i
= 0; i
< 16; i
+= 2)
39060 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39063 /* Use vpblendw. */
39064 for (i
= 0; i
< 16; ++i
)
39065 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39070 /* Use vpblendd. */
39071 for (i
= 0; i
< 8; ++i
)
39072 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39077 /* See if words move in pairs. If yes, vpblendd can be used. */
39078 for (i
= 0; i
< 16; i
+= 2)
39079 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39083 /* See if words move the same in both lanes. If not,
39084 vpblendvb must be used. */
39085 for (i
= 0; i
< 8; i
++)
39086 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39088 /* Use vpblendvb. */
39089 for (i
= 0; i
< 32; ++i
)
39090 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39094 target
= gen_lowpart (vmode
, target
);
39095 op0
= gen_lowpart (vmode
, op0
);
39096 op1
= gen_lowpart (vmode
, op1
);
39097 goto finish_pblendvb
;
39100 /* Use vpblendw. */
39101 for (i
= 0; i
< 16; ++i
)
39102 mask
|= (d
->perm
[i
] >= 16) << i
;
39106 /* Use vpblendd. */
39107 for (i
= 0; i
< 8; ++i
)
39108 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39113 /* Use vpblendd. */
39114 for (i
= 0; i
< 4; ++i
)
39115 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39120 gcc_unreachable ();
39123 /* This matches five different patterns with the different modes. */
39124 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39125 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39131 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39132 in terms of the variable form of vpermilps.
39134 Note that we will have already failed the immediate input vpermilps,
39135 which requires that the high and low part shuffle be identical; the
39136 variable form doesn't require that. */
39139 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39141 rtx rperm
[8], vperm
;
39144 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39147 /* We can only permute within the 128-bit lane. */
39148 for (i
= 0; i
< 8; ++i
)
39150 unsigned e
= d
->perm
[i
];
39151 if (i
< 4 ? e
>= 4 : e
< 4)
39158 for (i
= 0; i
< 8; ++i
)
39160 unsigned e
= d
->perm
[i
];
39162 /* Within each 128-bit lane, the elements of op0 are numbered
39163 from 0 and the elements of op1 are numbered from 4. */
39169 rperm
[i
] = GEN_INT (e
);
39172 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39173 vperm
= force_reg (V8SImode
, vperm
);
39174 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39179 /* Return true if permutation D can be performed as VMODE permutation
39183 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39185 unsigned int i
, j
, chunk
;
39187 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39188 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39189 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39192 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39195 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39196 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39197 if (d
->perm
[i
] & (chunk
- 1))
39200 for (j
= 1; j
< chunk
; ++j
)
39201 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39207 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39208 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39211 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39213 unsigned i
, nelt
, eltsz
, mask
;
39214 unsigned char perm
[32];
39215 enum machine_mode vmode
= V16QImode
;
39216 rtx rperm
[32], vperm
, target
, op0
, op1
;
39220 if (!d
->one_operand_p
)
39222 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39225 && valid_perm_using_mode_p (V2TImode
, d
))
39230 /* Use vperm2i128 insn. The pattern uses
39231 V4DImode instead of V2TImode. */
39232 target
= gen_lowpart (V4DImode
, d
->target
);
39233 op0
= gen_lowpart (V4DImode
, d
->op0
);
39234 op1
= gen_lowpart (V4DImode
, d
->op1
);
39236 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39237 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39238 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39246 if (GET_MODE_SIZE (d
->vmode
) == 16)
39251 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39256 /* V4DImode should be already handled through
39257 expand_vselect by vpermq instruction. */
39258 gcc_assert (d
->vmode
!= V4DImode
);
39261 if (d
->vmode
== V8SImode
39262 || d
->vmode
== V16HImode
39263 || d
->vmode
== V32QImode
)
39265 /* First see if vpermq can be used for
39266 V8SImode/V16HImode/V32QImode. */
39267 if (valid_perm_using_mode_p (V4DImode
, d
))
39269 for (i
= 0; i
< 4; i
++)
39270 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39273 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
39274 gen_lowpart (V4DImode
, d
->op0
),
39278 /* Next see if vpermd can be used. */
39279 if (valid_perm_using_mode_p (V8SImode
, d
))
39282 /* Or if vpermps can be used. */
39283 else if (d
->vmode
== V8SFmode
)
39286 if (vmode
== V32QImode
)
39288 /* vpshufb only works intra lanes, it is not
39289 possible to shuffle bytes in between the lanes. */
39290 for (i
= 0; i
< nelt
; ++i
)
39291 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39302 if (vmode
== V8SImode
)
39303 for (i
= 0; i
< 8; ++i
)
39304 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39307 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39308 if (!d
->one_operand_p
)
39309 mask
= 2 * nelt
- 1;
39310 else if (vmode
== V16QImode
)
39313 mask
= nelt
/ 2 - 1;
39315 for (i
= 0; i
< nelt
; ++i
)
39317 unsigned j
, e
= d
->perm
[i
] & mask
;
39318 for (j
= 0; j
< eltsz
; ++j
)
39319 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39323 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39324 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39325 vperm
= force_reg (vmode
, vperm
);
39327 target
= gen_lowpart (vmode
, d
->target
);
39328 op0
= gen_lowpart (vmode
, d
->op0
);
39329 if (d
->one_operand_p
)
39331 if (vmode
== V16QImode
)
39332 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39333 else if (vmode
== V32QImode
)
39334 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39335 else if (vmode
== V8SFmode
)
39336 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39338 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39342 op1
= gen_lowpart (vmode
, d
->op1
);
39343 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39349 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39350 in a single instruction. */
39353 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
39355 unsigned i
, nelt
= d
->nelt
;
39356 unsigned char perm2
[MAX_VECT_LEN
];
39358 /* Check plain VEC_SELECT first, because AVX has instructions that could
39359 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39360 input where SEL+CONCAT may not. */
39361 if (d
->one_operand_p
)
39363 int mask
= nelt
- 1;
39364 bool identity_perm
= true;
39365 bool broadcast_perm
= true;
39367 for (i
= 0; i
< nelt
; i
++)
39369 perm2
[i
] = d
->perm
[i
] & mask
;
39371 identity_perm
= false;
39373 broadcast_perm
= false;
39379 emit_move_insn (d
->target
, d
->op0
);
39382 else if (broadcast_perm
&& TARGET_AVX2
)
39384 /* Use vpbroadcast{b,w,d}. */
39385 rtx (*gen
) (rtx
, rtx
) = NULL
;
39389 gen
= gen_avx2_pbroadcastv32qi_1
;
39392 gen
= gen_avx2_pbroadcastv16hi_1
;
39395 gen
= gen_avx2_pbroadcastv8si_1
;
39398 gen
= gen_avx2_pbroadcastv16qi
;
39401 gen
= gen_avx2_pbroadcastv8hi
;
39404 gen
= gen_avx2_vec_dupv8sf_1
;
39406 /* For other modes prefer other shuffles this function creates. */
39412 emit_insn (gen (d
->target
, d
->op0
));
39417 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
39420 /* There are plenty of patterns in sse.md that are written for
39421 SEL+CONCAT and are not replicated for a single op. Perhaps
39422 that should be changed, to avoid the nastiness here. */
39424 /* Recognize interleave style patterns, which means incrementing
39425 every other permutation operand. */
39426 for (i
= 0; i
< nelt
; i
+= 2)
39428 perm2
[i
] = d
->perm
[i
] & mask
;
39429 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
39431 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39435 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39438 for (i
= 0; i
< nelt
; i
+= 4)
39440 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
39441 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
39442 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
39443 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
39446 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39452 /* Finally, try the fully general two operand permute. */
39453 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
39457 /* Recognize interleave style patterns with reversed operands. */
39458 if (!d
->one_operand_p
)
39460 for (i
= 0; i
< nelt
; ++i
)
39462 unsigned e
= d
->perm
[i
];
39470 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
39475 /* Try the SSE4.1 blend variable merge instructions. */
39476 if (expand_vec_perm_blend (d
))
39479 /* Try one of the AVX vpermil variable permutations. */
39480 if (expand_vec_perm_vpermil (d
))
39483 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39484 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39485 if (expand_vec_perm_pshufb (d
))
39491 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39492 in terms of a pair of pshuflw + pshufhw instructions. */
39495 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
39497 unsigned char perm2
[MAX_VECT_LEN
];
39501 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
39504 /* The two permutations only operate in 64-bit lanes. */
39505 for (i
= 0; i
< 4; ++i
)
39506 if (d
->perm
[i
] >= 4)
39508 for (i
= 4; i
< 8; ++i
)
39509 if (d
->perm
[i
] < 4)
39515 /* Emit the pshuflw. */
39516 memcpy (perm2
, d
->perm
, 4);
39517 for (i
= 4; i
< 8; ++i
)
39519 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
39522 /* Emit the pshufhw. */
39523 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
39524 for (i
= 0; i
< 4; ++i
)
39526 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
39532 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39533 the permutation using the SSSE3 palignr instruction. This succeeds
39534 when all of the elements in PERM fit within one vector and we merely
39535 need to shift them down so that a single vector permutation has a
39536 chance to succeed. */
39539 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
39541 unsigned i
, nelt
= d
->nelt
;
39546 /* Even with AVX, palignr only operates on 128-bit vectors. */
39547 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39550 min
= nelt
, max
= 0;
39551 for (i
= 0; i
< nelt
; ++i
)
39553 unsigned e
= d
->perm
[i
];
39559 if (min
== 0 || max
- min
>= nelt
)
39562 /* Given that we have SSSE3, we know we'll be able to implement the
39563 single operand permutation after the palignr with pshufb. */
39567 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39568 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39569 gen_lowpart (TImode
, d
->op1
),
39570 gen_lowpart (TImode
, d
->op0
), shift
));
39572 d
->op0
= d
->op1
= d
->target
;
39573 d
->one_operand_p
= true;
39576 for (i
= 0; i
< nelt
; ++i
)
39578 unsigned e
= d
->perm
[i
] - min
;
39584 /* Test for the degenerate case where the alignment by itself
39585 produces the desired permutation. */
39589 ok
= expand_vec_perm_1 (d
);
39595 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39597 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39598 a two vector permutation into a single vector permutation by using
39599 an interleave operation to merge the vectors. */
39602 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39604 struct expand_vec_perm_d dremap
, dfinal
;
39605 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39606 unsigned HOST_WIDE_INT contents
;
39607 unsigned char remap
[2 * MAX_VECT_LEN
];
39609 bool ok
, same_halves
= false;
39611 if (GET_MODE_SIZE (d
->vmode
) == 16)
39613 if (d
->one_operand_p
)
39616 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39620 /* For 32-byte modes allow even d->one_operand_p.
39621 The lack of cross-lane shuffling in some instructions
39622 might prevent a single insn shuffle. */
39624 dfinal
.testing_p
= true;
39625 /* If expand_vec_perm_interleave3 can expand this into
39626 a 3 insn sequence, give up and let it be expanded as
39627 3 insn sequence. While that is one insn longer,
39628 it doesn't need a memory operand and in the common
39629 case that both interleave low and high permutations
39630 with the same operands are adjacent needs 4 insns
39631 for both after CSE. */
39632 if (expand_vec_perm_interleave3 (&dfinal
))
39638 /* Examine from whence the elements come. */
39640 for (i
= 0; i
< nelt
; ++i
)
39641 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39643 memset (remap
, 0xff, sizeof (remap
));
39646 if (GET_MODE_SIZE (d
->vmode
) == 16)
39648 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39650 /* Split the two input vectors into 4 halves. */
39651 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39656 /* If the elements from the low halves use interleave low, and similarly
39657 for interleave high. If the elements are from mis-matched halves, we
39658 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39659 if ((contents
& (h1
| h3
)) == contents
)
39662 for (i
= 0; i
< nelt2
; ++i
)
39665 remap
[i
+ nelt
] = i
* 2 + 1;
39666 dremap
.perm
[i
* 2] = i
;
39667 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39669 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39670 dremap
.vmode
= V4SFmode
;
39672 else if ((contents
& (h2
| h4
)) == contents
)
39675 for (i
= 0; i
< nelt2
; ++i
)
39677 remap
[i
+ nelt2
] = i
* 2;
39678 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39679 dremap
.perm
[i
* 2] = i
+ nelt2
;
39680 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39682 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39683 dremap
.vmode
= V4SFmode
;
39685 else if ((contents
& (h1
| h4
)) == contents
)
39688 for (i
= 0; i
< nelt2
; ++i
)
39691 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39692 dremap
.perm
[i
] = i
;
39693 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39698 dremap
.vmode
= V2DImode
;
39700 dremap
.perm
[0] = 0;
39701 dremap
.perm
[1] = 3;
39704 else if ((contents
& (h2
| h3
)) == contents
)
39707 for (i
= 0; i
< nelt2
; ++i
)
39709 remap
[i
+ nelt2
] = i
;
39710 remap
[i
+ nelt
] = i
+ nelt2
;
39711 dremap
.perm
[i
] = i
+ nelt2
;
39712 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39717 dremap
.vmode
= V2DImode
;
39719 dremap
.perm
[0] = 1;
39720 dremap
.perm
[1] = 2;
39728 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39729 unsigned HOST_WIDE_INT q
[8];
39730 unsigned int nonzero_halves
[4];
39732 /* Split the two input vectors into 8 quarters. */
39733 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39734 for (i
= 1; i
< 8; ++i
)
39735 q
[i
] = q
[0] << (nelt4
* i
);
39736 for (i
= 0; i
< 4; ++i
)
39737 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39739 nonzero_halves
[nzcnt
] = i
;
39745 gcc_assert (d
->one_operand_p
);
39746 nonzero_halves
[1] = nonzero_halves
[0];
39747 same_halves
= true;
39749 else if (d
->one_operand_p
)
39751 gcc_assert (nonzero_halves
[0] == 0);
39752 gcc_assert (nonzero_halves
[1] == 1);
39757 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39759 /* Attempt to increase the likelihood that dfinal
39760 shuffle will be intra-lane. */
39761 char tmph
= nonzero_halves
[0];
39762 nonzero_halves
[0] = nonzero_halves
[1];
39763 nonzero_halves
[1] = tmph
;
39766 /* vperm2f128 or vperm2i128. */
39767 for (i
= 0; i
< nelt2
; ++i
)
39769 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39770 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39771 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39772 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39775 if (d
->vmode
!= V8SFmode
39776 && d
->vmode
!= V4DFmode
39777 && d
->vmode
!= V8SImode
)
39779 dremap
.vmode
= V8SImode
;
39781 for (i
= 0; i
< 4; ++i
)
39783 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39784 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39788 else if (d
->one_operand_p
)
39790 else if (TARGET_AVX2
39791 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39794 for (i
= 0; i
< nelt4
; ++i
)
39797 remap
[i
+ nelt
] = i
* 2 + 1;
39798 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39799 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39800 dremap
.perm
[i
* 2] = i
;
39801 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39802 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39803 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39806 else if (TARGET_AVX2
39807 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39810 for (i
= 0; i
< nelt4
; ++i
)
39812 remap
[i
+ nelt4
] = i
* 2;
39813 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39814 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39815 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39816 dremap
.perm
[i
* 2] = i
+ nelt4
;
39817 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39818 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39819 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39826 /* Use the remapping array set up above to move the elements from their
39827 swizzled locations into their final destinations. */
39829 for (i
= 0; i
< nelt
; ++i
)
39831 unsigned e
= remap
[d
->perm
[i
]];
39832 gcc_assert (e
< nelt
);
39833 /* If same_halves is true, both halves of the remapped vector are the
39834 same. Avoid cross-lane accesses if possible. */
39835 if (same_halves
&& i
>= nelt2
)
39837 gcc_assert (e
< nelt2
);
39838 dfinal
.perm
[i
] = e
+ nelt2
;
39841 dfinal
.perm
[i
] = e
;
39843 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39844 dfinal
.op1
= dfinal
.op0
;
39845 dfinal
.one_operand_p
= true;
39846 dremap
.target
= dfinal
.op0
;
39848 /* Test if the final remap can be done with a single insn. For V4SFmode or
39849 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39851 ok
= expand_vec_perm_1 (&dfinal
);
39852 seq
= get_insns ();
39861 if (dremap
.vmode
!= dfinal
.vmode
)
39863 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39864 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39865 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39868 ok
= expand_vec_perm_1 (&dremap
);
39875 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39876 a single vector cross-lane permutation into vpermq followed
39877 by any of the single insn permutations. */
39880 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39882 struct expand_vec_perm_d dremap
, dfinal
;
39883 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39884 unsigned contents
[2];
39888 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39889 && d
->one_operand_p
))
39894 for (i
= 0; i
< nelt2
; ++i
)
39896 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39897 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39900 for (i
= 0; i
< 2; ++i
)
39902 unsigned int cnt
= 0;
39903 for (j
= 0; j
< 4; ++j
)
39904 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39912 dremap
.vmode
= V4DImode
;
39914 dremap
.target
= gen_reg_rtx (V4DImode
);
39915 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39916 dremap
.op1
= dremap
.op0
;
39917 dremap
.one_operand_p
= true;
39918 for (i
= 0; i
< 2; ++i
)
39920 unsigned int cnt
= 0;
39921 for (j
= 0; j
< 4; ++j
)
39922 if ((contents
[i
] & (1u << j
)) != 0)
39923 dremap
.perm
[2 * i
+ cnt
++] = j
;
39924 for (; cnt
< 2; ++cnt
)
39925 dremap
.perm
[2 * i
+ cnt
] = 0;
39929 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39930 dfinal
.op1
= dfinal
.op0
;
39931 dfinal
.one_operand_p
= true;
39932 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39936 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39937 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39939 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39940 dfinal
.perm
[i
] |= nelt4
;
39942 gcc_unreachable ();
39945 ok
= expand_vec_perm_1 (&dremap
);
39948 ok
= expand_vec_perm_1 (&dfinal
);
39954 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39955 a vector permutation using two instructions, vperm2f128 resp.
39956 vperm2i128 followed by any single in-lane permutation. */
39959 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39961 struct expand_vec_perm_d dfirst
, dsecond
;
39962 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39966 || GET_MODE_SIZE (d
->vmode
) != 32
39967 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39971 dsecond
.one_operand_p
= false;
39972 dsecond
.testing_p
= true;
39974 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39975 immediate. For perm < 16 the second permutation uses
39976 d->op0 as first operand, for perm >= 16 it uses d->op1
39977 as first operand. The second operand is the result of
39979 for (perm
= 0; perm
< 32; perm
++)
39981 /* Ignore permutations which do not move anything cross-lane. */
39984 /* The second shuffle for e.g. V4DFmode has
39985 0123 and ABCD operands.
39986 Ignore AB23, as 23 is already in the second lane
39987 of the first operand. */
39988 if ((perm
& 0xc) == (1 << 2)) continue;
39989 /* And 01CD, as 01 is in the first lane of the first
39991 if ((perm
& 3) == 0) continue;
39992 /* And 4567, as then the vperm2[fi]128 doesn't change
39993 anything on the original 4567 second operand. */
39994 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39998 /* The second shuffle for e.g. V4DFmode has
39999 4567 and ABCD operands.
40000 Ignore AB67, as 67 is already in the second lane
40001 of the first operand. */
40002 if ((perm
& 0xc) == (3 << 2)) continue;
40003 /* And 45CD, as 45 is in the first lane of the first
40005 if ((perm
& 3) == 2) continue;
40006 /* And 0123, as then the vperm2[fi]128 doesn't change
40007 anything on the original 0123 first operand. */
40008 if ((perm
& 0xf) == (1 << 2)) continue;
40011 for (i
= 0; i
< nelt
; i
++)
40013 j
= d
->perm
[i
] / nelt2
;
40014 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40015 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40016 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40017 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40025 ok
= expand_vec_perm_1 (&dsecond
);
40036 /* Found a usable second shuffle. dfirst will be
40037 vperm2f128 on d->op0 and d->op1. */
40038 dsecond
.testing_p
= false;
40040 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40041 for (i
= 0; i
< nelt
; i
++)
40042 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40043 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40045 ok
= expand_vec_perm_1 (&dfirst
);
40048 /* And dsecond is some single insn shuffle, taking
40049 d->op0 and result of vperm2f128 (if perm < 16) or
40050 d->op1 and result of vperm2f128 (otherwise). */
40051 dsecond
.op1
= dfirst
.target
;
40053 dsecond
.op0
= dfirst
.op1
;
40055 ok
= expand_vec_perm_1 (&dsecond
);
40061 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40062 if (d
->one_operand_p
)
40069 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40070 a two vector permutation using 2 intra-lane interleave insns
40071 and cross-lane shuffle for 32-byte vectors. */
40074 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40077 rtx (*gen
) (rtx
, rtx
, rtx
);
40079 if (d
->one_operand_p
)
40081 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40083 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40089 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40091 for (i
= 0; i
< nelt
; i
+= 2)
40092 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40093 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40103 gen
= gen_vec_interleave_highv32qi
;
40105 gen
= gen_vec_interleave_lowv32qi
;
40109 gen
= gen_vec_interleave_highv16hi
;
40111 gen
= gen_vec_interleave_lowv16hi
;
40115 gen
= gen_vec_interleave_highv8si
;
40117 gen
= gen_vec_interleave_lowv8si
;
40121 gen
= gen_vec_interleave_highv4di
;
40123 gen
= gen_vec_interleave_lowv4di
;
40127 gen
= gen_vec_interleave_highv8sf
;
40129 gen
= gen_vec_interleave_lowv8sf
;
40133 gen
= gen_vec_interleave_highv4df
;
40135 gen
= gen_vec_interleave_lowv4df
;
40138 gcc_unreachable ();
40141 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40145 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40146 a single vector permutation using a single intra-lane vector
40147 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40148 the non-swapped and swapped vectors together. */
40151 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40153 struct expand_vec_perm_d dfirst
, dsecond
;
40154 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40157 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40161 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40162 || !d
->one_operand_p
)
40166 for (i
= 0; i
< nelt
; i
++)
40167 dfirst
.perm
[i
] = 0xff;
40168 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40170 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40171 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40173 dfirst
.perm
[j
] = d
->perm
[i
];
40177 for (i
= 0; i
< nelt
; i
++)
40178 if (dfirst
.perm
[i
] == 0xff)
40179 dfirst
.perm
[i
] = i
;
40182 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40185 ok
= expand_vec_perm_1 (&dfirst
);
40186 seq
= get_insns ();
40198 dsecond
.op0
= dfirst
.target
;
40199 dsecond
.op1
= dfirst
.target
;
40200 dsecond
.one_operand_p
= true;
40201 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40202 for (i
= 0; i
< nelt
; i
++)
40203 dsecond
.perm
[i
] = i
^ nelt2
;
40205 ok
= expand_vec_perm_1 (&dsecond
);
40208 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40209 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40213 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40214 permutation using two vperm2f128, followed by a vshufpd insn blending
40215 the two vectors together. */
40218 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40220 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40223 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40233 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40234 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40235 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40236 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40237 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40238 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40239 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40240 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40241 dthird
.perm
[0] = (d
->perm
[0] % 2);
40242 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40243 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40244 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40246 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40247 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40248 dthird
.op0
= dfirst
.target
;
40249 dthird
.op1
= dsecond
.target
;
40250 dthird
.one_operand_p
= false;
40252 canonicalize_perm (&dfirst
);
40253 canonicalize_perm (&dsecond
);
40255 ok
= expand_vec_perm_1 (&dfirst
)
40256 && expand_vec_perm_1 (&dsecond
)
40257 && expand_vec_perm_1 (&dthird
);
40264 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40265 permutation with two pshufb insns and an ior. We should have already
40266 failed all two instruction sequences. */
40269 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40271 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40272 unsigned int i
, nelt
, eltsz
;
40274 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40276 gcc_assert (!d
->one_operand_p
);
40279 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40281 /* Generate two permutation masks. If the required element is within
40282 the given vector it is shuffled into the proper lane. If the required
40283 element is in the other vector, force a zero into the lane by setting
40284 bit 7 in the permutation mask. */
40285 m128
= GEN_INT (-128);
40286 for (i
= 0; i
< nelt
; ++i
)
40288 unsigned j
, e
= d
->perm
[i
];
40289 unsigned which
= (e
>= nelt
);
40293 for (j
= 0; j
< eltsz
; ++j
)
40295 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40296 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40300 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40301 vperm
= force_reg (V16QImode
, vperm
);
40303 l
= gen_reg_rtx (V16QImode
);
40304 op
= gen_lowpart (V16QImode
, d
->op0
);
40305 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40307 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40308 vperm
= force_reg (V16QImode
, vperm
);
40310 h
= gen_reg_rtx (V16QImode
);
40311 op
= gen_lowpart (V16QImode
, d
->op1
);
40312 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40314 op
= gen_lowpart (V16QImode
, d
->target
);
40315 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40320 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40321 with two vpshufb insns, vpermq and vpor. We should have already failed
40322 all two or three instruction sequences. */
40325 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40327 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40328 unsigned int i
, nelt
, eltsz
;
40331 || !d
->one_operand_p
40332 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40339 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40341 /* Generate two permutation masks. If the required element is within
40342 the same lane, it is shuffled in. If the required element from the
40343 other lane, force a zero by setting bit 7 in the permutation mask.
40344 In the other mask the mask has non-negative elements if element
40345 is requested from the other lane, but also moved to the other lane,
40346 so that the result of vpshufb can have the two V2TImode halves
40348 m128
= GEN_INT (-128);
40349 for (i
= 0; i
< nelt
; ++i
)
40351 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40352 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40354 for (j
= 0; j
< eltsz
; ++j
)
40356 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
40357 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
40361 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40362 vperm
= force_reg (V32QImode
, vperm
);
40364 h
= gen_reg_rtx (V32QImode
);
40365 op
= gen_lowpart (V32QImode
, d
->op0
);
40366 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40368 /* Swap the 128-byte lanes of h into hp. */
40369 hp
= gen_reg_rtx (V4DImode
);
40370 op
= gen_lowpart (V4DImode
, h
);
40371 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
40374 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40375 vperm
= force_reg (V32QImode
, vperm
);
40377 l
= gen_reg_rtx (V32QImode
);
40378 op
= gen_lowpart (V32QImode
, d
->op0
);
40379 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40381 op
= gen_lowpart (V32QImode
, d
->target
);
40382 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
40387 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40388 and extract-odd permutations of two V32QImode and V16QImode operand
40389 with two vpshufb insns, vpor and vpermq. We should have already
40390 failed all two or three instruction sequences. */
40393 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
40395 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
40396 unsigned int i
, nelt
, eltsz
;
40399 || d
->one_operand_p
40400 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40403 for (i
= 0; i
< d
->nelt
; ++i
)
40404 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
40411 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40413 /* Generate two permutation masks. In the first permutation mask
40414 the first quarter will contain indexes for the first half
40415 of the op0, the second quarter will contain bit 7 set, third quarter
40416 will contain indexes for the second half of the op0 and the
40417 last quarter bit 7 set. In the second permutation mask
40418 the first quarter will contain bit 7 set, the second quarter
40419 indexes for the first half of the op1, the third quarter bit 7 set
40420 and last quarter indexes for the second half of the op1.
40421 I.e. the first mask e.g. for V32QImode extract even will be:
40422 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40423 (all values masked with 0xf except for -128) and second mask
40424 for extract even will be
40425 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40426 m128
= GEN_INT (-128);
40427 for (i
= 0; i
< nelt
; ++i
)
40429 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40430 unsigned which
= d
->perm
[i
] >= nelt
;
40431 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
40433 for (j
= 0; j
< eltsz
; ++j
)
40435 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
40436 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
40440 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40441 vperm
= force_reg (V32QImode
, vperm
);
40443 l
= gen_reg_rtx (V32QImode
);
40444 op
= gen_lowpart (V32QImode
, d
->op0
);
40445 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40447 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40448 vperm
= force_reg (V32QImode
, vperm
);
40450 h
= gen_reg_rtx (V32QImode
);
40451 op
= gen_lowpart (V32QImode
, d
->op1
);
40452 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40454 ior
= gen_reg_rtx (V32QImode
);
40455 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
40457 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40458 op
= gen_lowpart (V4DImode
, d
->target
);
40459 ior
= gen_lowpart (V4DImode
, ior
);
40460 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
40461 const1_rtx
, GEN_INT (3)));
40466 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40467 and extract-odd permutations. */
40470 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
40477 t1
= gen_reg_rtx (V4DFmode
);
40478 t2
= gen_reg_rtx (V4DFmode
);
40480 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40481 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40482 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40484 /* Now an unpck[lh]pd will produce the result required. */
40486 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
40488 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
40494 int mask
= odd
? 0xdd : 0x88;
40496 t1
= gen_reg_rtx (V8SFmode
);
40497 t2
= gen_reg_rtx (V8SFmode
);
40498 t3
= gen_reg_rtx (V8SFmode
);
40500 /* Shuffle within the 128-bit lanes to produce:
40501 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40502 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
40505 /* Shuffle the lanes around to produce:
40506 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40507 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
40510 /* Shuffle within the 128-bit lanes to produce:
40511 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40512 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
40514 /* Shuffle within the 128-bit lanes to produce:
40515 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40516 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
40518 /* Shuffle the lanes around to produce:
40519 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40520 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
40529 /* These are always directly implementable by expand_vec_perm_1. */
40530 gcc_unreachable ();
40534 return expand_vec_perm_pshufb2 (d
);
40537 /* We need 2*log2(N)-1 operations to achieve odd/even
40538 with interleave. */
40539 t1
= gen_reg_rtx (V8HImode
);
40540 t2
= gen_reg_rtx (V8HImode
);
40541 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
40542 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
40543 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
40544 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
40546 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
40548 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40555 return expand_vec_perm_pshufb2 (d
);
40558 t1
= gen_reg_rtx (V16QImode
);
40559 t2
= gen_reg_rtx (V16QImode
);
40560 t3
= gen_reg_rtx (V16QImode
);
40561 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40562 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40563 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40564 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40565 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40566 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40568 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40570 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40577 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40582 struct expand_vec_perm_d d_copy
= *d
;
40583 d_copy
.vmode
= V4DFmode
;
40584 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40585 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40586 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40587 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40590 t1
= gen_reg_rtx (V4DImode
);
40591 t2
= gen_reg_rtx (V4DImode
);
40593 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40594 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40595 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40597 /* Now an vpunpck[lh]qdq will produce the result required. */
40599 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40601 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40608 struct expand_vec_perm_d d_copy
= *d
;
40609 d_copy
.vmode
= V8SFmode
;
40610 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40611 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40612 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40613 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40616 t1
= gen_reg_rtx (V8SImode
);
40617 t2
= gen_reg_rtx (V8SImode
);
40619 /* Shuffle the lanes around into
40620 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40621 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40622 gen_lowpart (V4DImode
, d
->op0
),
40623 gen_lowpart (V4DImode
, d
->op1
),
40625 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40626 gen_lowpart (V4DImode
, d
->op0
),
40627 gen_lowpart (V4DImode
, d
->op1
),
40630 /* Swap the 2nd and 3rd position in each lane into
40631 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40632 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40633 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40634 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40635 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40637 /* Now an vpunpck[lh]qdq will produce
40638 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40640 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40641 gen_lowpart (V4DImode
, t1
),
40642 gen_lowpart (V4DImode
, t2
));
40644 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40645 gen_lowpart (V4DImode
, t1
),
40646 gen_lowpart (V4DImode
, t2
));
40651 gcc_unreachable ();
40657 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40658 extract-even and extract-odd permutations. */
40661 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40663 unsigned i
, odd
, nelt
= d
->nelt
;
40666 if (odd
!= 0 && odd
!= 1)
40669 for (i
= 1; i
< nelt
; ++i
)
40670 if (d
->perm
[i
] != 2 * i
+ odd
)
40673 return expand_vec_perm_even_odd_1 (d
, odd
);
40676 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40677 permutations. We assume that expand_vec_perm_1 has already failed. */
40680 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40682 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40683 enum machine_mode vmode
= d
->vmode
;
40684 unsigned char perm2
[4];
40692 /* These are special-cased in sse.md so that we can optionally
40693 use the vbroadcast instruction. They expand to two insns
40694 if the input happens to be in a register. */
40695 gcc_unreachable ();
40701 /* These are always implementable using standard shuffle patterns. */
40702 gcc_unreachable ();
40706 /* These can be implemented via interleave. We save one insn by
40707 stopping once we have promoted to V4SImode and then use pshufd. */
40711 rtx (*gen
) (rtx
, rtx
, rtx
)
40712 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40713 : gen_vec_interleave_lowv8hi
;
40717 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40718 : gen_vec_interleave_highv8hi
;
40723 dest
= gen_reg_rtx (vmode
);
40724 emit_insn (gen (dest
, op0
, op0
));
40725 vmode
= get_mode_wider_vector (vmode
);
40726 op0
= gen_lowpart (vmode
, dest
);
40728 while (vmode
!= V4SImode
);
40730 memset (perm2
, elt
, 4);
40731 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40740 /* For AVX2 broadcasts of the first element vpbroadcast* or
40741 vpermq should be used by expand_vec_perm_1. */
40742 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40746 gcc_unreachable ();
40750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40751 broadcast permutations. */
40754 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40756 unsigned i
, elt
, nelt
= d
->nelt
;
40758 if (!d
->one_operand_p
)
40762 for (i
= 1; i
< nelt
; ++i
)
40763 if (d
->perm
[i
] != elt
)
40766 return expand_vec_perm_broadcast_1 (d
);
40769 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40770 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40771 all the shorter instruction sequences. */
40774 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40776 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40777 unsigned int i
, nelt
, eltsz
;
40781 || d
->one_operand_p
40782 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40789 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40791 /* Generate 4 permutation masks. If the required element is within
40792 the same lane, it is shuffled in. If the required element from the
40793 other lane, force a zero by setting bit 7 in the permutation mask.
40794 In the other mask the mask has non-negative elements if element
40795 is requested from the other lane, but also moved to the other lane,
40796 so that the result of vpshufb can have the two V2TImode halves
40798 m128
= GEN_INT (-128);
40799 for (i
= 0; i
< 32; ++i
)
40801 rperm
[0][i
] = m128
;
40802 rperm
[1][i
] = m128
;
40803 rperm
[2][i
] = m128
;
40804 rperm
[3][i
] = m128
;
40810 for (i
= 0; i
< nelt
; ++i
)
40812 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40813 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40814 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40816 for (j
= 0; j
< eltsz
; ++j
)
40817 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40818 used
[which
] = true;
40821 for (i
= 0; i
< 2; ++i
)
40823 if (!used
[2 * i
+ 1])
40828 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40829 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40830 vperm
= force_reg (V32QImode
, vperm
);
40831 h
[i
] = gen_reg_rtx (V32QImode
);
40832 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40833 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40836 /* Swap the 128-byte lanes of h[X]. */
40837 for (i
= 0; i
< 2; ++i
)
40839 if (h
[i
] == NULL_RTX
)
40841 op
= gen_reg_rtx (V4DImode
);
40842 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40843 const2_rtx
, GEN_INT (3), const0_rtx
,
40845 h
[i
] = gen_lowpart (V32QImode
, op
);
40848 for (i
= 0; i
< 2; ++i
)
40855 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40856 vperm
= force_reg (V32QImode
, vperm
);
40857 l
[i
] = gen_reg_rtx (V32QImode
);
40858 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40859 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40862 for (i
= 0; i
< 2; ++i
)
40866 op
= gen_reg_rtx (V32QImode
);
40867 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40874 gcc_assert (l
[0] && l
[1]);
40875 op
= gen_lowpart (V32QImode
, d
->target
);
40876 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40880 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40881 With all of the interface bits taken care of, perform the expansion
40882 in D and return true on success. */
40885 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40887 /* Try a single instruction expansion. */
40888 if (expand_vec_perm_1 (d
))
40891 /* Try sequences of two instructions. */
40893 if (expand_vec_perm_pshuflw_pshufhw (d
))
40896 if (expand_vec_perm_palignr (d
))
40899 if (expand_vec_perm_interleave2 (d
))
40902 if (expand_vec_perm_broadcast (d
))
40905 if (expand_vec_perm_vpermq_perm_1 (d
))
40908 if (expand_vec_perm_vperm2f128 (d
))
40911 /* Try sequences of three instructions. */
40913 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40916 if (expand_vec_perm_pshufb2 (d
))
40919 if (expand_vec_perm_interleave3 (d
))
40922 if (expand_vec_perm_vperm2f128_vblend (d
))
40925 /* Try sequences of four instructions. */
40927 if (expand_vec_perm_vpshufb2_vpermq (d
))
40930 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40933 /* ??? Look for narrow permutations whose element orderings would
40934 allow the promotion to a wider mode. */
40936 /* ??? Look for sequences of interleave or a wider permute that place
40937 the data into the correct lanes for a half-vector shuffle like
40938 pshuf[lh]w or vpermilps. */
40940 /* ??? Look for sequences of interleave that produce the desired results.
40941 The combinatorics of punpck[lh] get pretty ugly... */
40943 if (expand_vec_perm_even_odd (d
))
40946 /* Even longer sequences. */
40947 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40953 /* If a permutation only uses one operand, make it clear. Returns true
40954 if the permutation references both operands. */
40957 canonicalize_perm (struct expand_vec_perm_d
*d
)
40959 int i
, which
, nelt
= d
->nelt
;
40961 for (i
= which
= 0; i
< nelt
; ++i
)
40962 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40964 d
->one_operand_p
= true;
40971 if (!rtx_equal_p (d
->op0
, d
->op1
))
40973 d
->one_operand_p
= false;
40976 /* The elements of PERM do not suggest that only the first operand
40977 is used, but both operands are identical. Allow easier matching
40978 of the permutation by folding the permutation into the single
40983 for (i
= 0; i
< nelt
; ++i
)
40984 d
->perm
[i
] &= nelt
- 1;
40993 return (which
== 3);
40997 ix86_expand_vec_perm_const (rtx operands
[4])
40999 struct expand_vec_perm_d d
;
41000 unsigned char perm
[MAX_VECT_LEN
];
41005 d
.target
= operands
[0];
41006 d
.op0
= operands
[1];
41007 d
.op1
= operands
[2];
41010 d
.vmode
= GET_MODE (d
.target
);
41011 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41012 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41013 d
.testing_p
= false;
41015 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41016 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41017 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41019 for (i
= 0; i
< nelt
; ++i
)
41021 rtx e
= XVECEXP (sel
, 0, i
);
41022 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41027 two_args
= canonicalize_perm (&d
);
41029 if (ix86_expand_vec_perm_const_1 (&d
))
41032 /* If the selector says both arguments are needed, but the operands are the
41033 same, the above tried to expand with one_operand_p and flattened selector.
41034 If that didn't work, retry without one_operand_p; we succeeded with that
41036 if (two_args
&& d
.one_operand_p
)
41038 d
.one_operand_p
= false;
41039 memcpy (d
.perm
, perm
, sizeof (perm
));
41040 return ix86_expand_vec_perm_const_1 (&d
);
41046 /* Implement targetm.vectorize.vec_perm_const_ok. */
41049 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41050 const unsigned char *sel
)
41052 struct expand_vec_perm_d d
;
41053 unsigned int i
, nelt
, which
;
41057 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41058 d
.testing_p
= true;
41060 /* Given sufficient ISA support we can just return true here
41061 for selected vector modes. */
41062 if (GET_MODE_SIZE (d
.vmode
) == 16)
41064 /* All implementable with a single vpperm insn. */
41067 /* All implementable with 2 pshufb + 1 ior. */
41070 /* All implementable with shufpd or unpck[lh]pd. */
41075 /* Extract the values from the vector CST into the permutation
41077 memcpy (d
.perm
, sel
, nelt
);
41078 for (i
= which
= 0; i
< nelt
; ++i
)
41080 unsigned char e
= d
.perm
[i
];
41081 gcc_assert (e
< 2 * nelt
);
41082 which
|= (e
< nelt
? 1 : 2);
41085 /* For all elements from second vector, fold the elements to first. */
41087 for (i
= 0; i
< nelt
; ++i
)
41090 /* Check whether the mask can be applied to the vector type. */
41091 d
.one_operand_p
= (which
!= 3);
41093 /* Implementable with shufps or pshufd. */
41094 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41097 /* Otherwise we have to go through the motions and see if we can
41098 figure out how to generate the requested permutation. */
41099 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41100 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41101 if (!d
.one_operand_p
)
41102 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41105 ret
= ix86_expand_vec_perm_const_1 (&d
);
41112 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41114 struct expand_vec_perm_d d
;
41120 d
.vmode
= GET_MODE (targ
);
41121 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41122 d
.one_operand_p
= false;
41123 d
.testing_p
= false;
41125 for (i
= 0; i
< nelt
; ++i
)
41126 d
.perm
[i
] = i
* 2 + odd
;
41128 /* We'll either be able to implement the permutation directly... */
41129 if (expand_vec_perm_1 (&d
))
41132 /* ... or we use the special-case patterns. */
41133 expand_vec_perm_even_odd_1 (&d
, odd
);
41137 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41139 struct expand_vec_perm_d d
;
41140 unsigned i
, nelt
, base
;
41146 d
.vmode
= GET_MODE (targ
);
41147 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41148 d
.one_operand_p
= false;
41149 d
.testing_p
= false;
41151 base
= high_p
? nelt
/ 2 : 0;
41152 for (i
= 0; i
< nelt
/ 2; ++i
)
41154 d
.perm
[i
* 2] = i
+ base
;
41155 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41158 /* Note that for AVX this isn't one instruction. */
41159 ok
= ix86_expand_vec_perm_const_1 (&d
);
41164 /* Expand a vector operation CODE for a V*QImode in terms of the
41165 same operation on V*HImode. */
41168 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41170 enum machine_mode qimode
= GET_MODE (dest
);
41171 enum machine_mode himode
;
41172 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41173 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41174 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41175 struct expand_vec_perm_d d
;
41176 bool ok
, full_interleave
;
41177 bool uns_p
= false;
41184 gen_il
= gen_vec_interleave_lowv16qi
;
41185 gen_ih
= gen_vec_interleave_highv16qi
;
41188 himode
= V16HImode
;
41189 gen_il
= gen_avx2_interleave_lowv32qi
;
41190 gen_ih
= gen_avx2_interleave_highv32qi
;
41193 gcc_unreachable ();
41196 op2_l
= op2_h
= op2
;
41200 /* Unpack data such that we've got a source byte in each low byte of
41201 each word. We don't care what goes into the high byte of each word.
41202 Rather than trying to get zero in there, most convenient is to let
41203 it be a copy of the low byte. */
41204 op2_l
= gen_reg_rtx (qimode
);
41205 op2_h
= gen_reg_rtx (qimode
);
41206 emit_insn (gen_il (op2_l
, op2
, op2
));
41207 emit_insn (gen_ih (op2_h
, op2
, op2
));
41210 op1_l
= gen_reg_rtx (qimode
);
41211 op1_h
= gen_reg_rtx (qimode
);
41212 emit_insn (gen_il (op1_l
, op1
, op1
));
41213 emit_insn (gen_ih (op1_h
, op1
, op1
));
41214 full_interleave
= qimode
== V16QImode
;
41222 op1_l
= gen_reg_rtx (himode
);
41223 op1_h
= gen_reg_rtx (himode
);
41224 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41225 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41226 full_interleave
= true;
41229 gcc_unreachable ();
41232 /* Perform the operation. */
41233 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41235 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41237 gcc_assert (res_l
&& res_h
);
41239 /* Merge the data back into the right place. */
41241 d
.op0
= gen_lowpart (qimode
, res_l
);
41242 d
.op1
= gen_lowpart (qimode
, res_h
);
41244 d
.nelt
= GET_MODE_NUNITS (qimode
);
41245 d
.one_operand_p
= false;
41246 d
.testing_p
= false;
41248 if (full_interleave
)
41250 /* For SSE2, we used an full interleave, so the desired
41251 results are in the even elements. */
41252 for (i
= 0; i
< 32; ++i
)
41257 /* For AVX, the interleave used above was not cross-lane. So the
41258 extraction is evens but with the second and third quarter swapped.
41259 Happily, that is even one insn shorter than even extraction. */
41260 for (i
= 0; i
< 32; ++i
)
41261 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41264 ok
= ix86_expand_vec_perm_const_1 (&d
);
41267 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41268 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41271 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41272 if op is CONST_VECTOR with all odd elements equal to their
41273 preceding element. */
41276 const_vector_equal_evenodd_p (rtx op
)
41278 enum machine_mode mode
= GET_MODE (op
);
41279 int i
, nunits
= GET_MODE_NUNITS (mode
);
41280 if (GET_CODE (op
) != CONST_VECTOR
41281 || nunits
!= CONST_VECTOR_NUNITS (op
))
41283 for (i
= 0; i
< nunits
; i
+= 2)
41284 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41290 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41291 bool uns_p
, bool odd_p
)
41293 enum machine_mode mode
= GET_MODE (op1
);
41294 enum machine_mode wmode
= GET_MODE (dest
);
41296 rtx orig_op1
= op1
, orig_op2
= op2
;
41298 if (!nonimmediate_operand (op1
, mode
))
41299 op1
= force_reg (mode
, op1
);
41300 if (!nonimmediate_operand (op2
, mode
))
41301 op2
= force_reg (mode
, op2
);
41303 /* We only play even/odd games with vectors of SImode. */
41304 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41306 /* If we're looking for the odd results, shift those members down to
41307 the even slots. For some cpus this is faster than a PSHUFD. */
41310 /* For XOP use vpmacsdqh, but only for smult, as it is only
41312 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41314 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41315 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41319 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41320 if (!const_vector_equal_evenodd_p (orig_op1
))
41321 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
41322 x
, NULL
, 1, OPTAB_DIRECT
);
41323 if (!const_vector_equal_evenodd_p (orig_op2
))
41324 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
41325 x
, NULL
, 1, OPTAB_DIRECT
);
41326 op1
= gen_lowpart (mode
, op1
);
41327 op2
= gen_lowpart (mode
, op2
);
41330 if (mode
== V8SImode
)
41333 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
41335 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
41338 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
41339 else if (TARGET_SSE4_1
)
41340 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
41343 rtx s1
, s2
, t0
, t1
, t2
;
41345 /* The easiest way to implement this without PMULDQ is to go through
41346 the motions as if we are performing a full 64-bit multiply. With
41347 the exception that we need to do less shuffling of the elements. */
41349 /* Compute the sign-extension, aka highparts, of the two operands. */
41350 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41351 op1
, pc_rtx
, pc_rtx
);
41352 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41353 op2
, pc_rtx
, pc_rtx
);
41355 /* Multiply LO(A) * HI(B), and vice-versa. */
41356 t1
= gen_reg_rtx (wmode
);
41357 t2
= gen_reg_rtx (wmode
);
41358 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
41359 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
41361 /* Multiply LO(A) * LO(B). */
41362 t0
= gen_reg_rtx (wmode
);
41363 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
41365 /* Combine and shift the highparts into place. */
41366 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
41367 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
41370 /* Combine high and low parts. */
41371 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
41378 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
41379 bool uns_p
, bool high_p
)
41381 enum machine_mode wmode
= GET_MODE (dest
);
41382 enum machine_mode mode
= GET_MODE (op1
);
41383 rtx t1
, t2
, t3
, t4
, mask
;
41388 t1
= gen_reg_rtx (mode
);
41389 t2
= gen_reg_rtx (mode
);
41390 if (TARGET_XOP
&& !uns_p
)
41392 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41393 shuffle the elements once so that all elements are in the right
41394 place for immediate use: { A C B D }. */
41395 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
41396 const1_rtx
, GEN_INT (3)));
41397 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
41398 const1_rtx
, GEN_INT (3)));
41402 /* Put the elements into place for the multiply. */
41403 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
41404 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
41407 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
41411 /* Shuffle the elements between the lanes. After this we
41412 have { A B E F | C D G H } for each operand. */
41413 t1
= gen_reg_rtx (V4DImode
);
41414 t2
= gen_reg_rtx (V4DImode
);
41415 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
41416 const0_rtx
, const2_rtx
,
41417 const1_rtx
, GEN_INT (3)));
41418 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
41419 const0_rtx
, const2_rtx
,
41420 const1_rtx
, GEN_INT (3)));
41422 /* Shuffle the elements within the lanes. After this we
41423 have { A A B B | C C D D } or { E E F F | G G H H }. */
41424 t3
= gen_reg_rtx (V8SImode
);
41425 t4
= gen_reg_rtx (V8SImode
);
41426 mask
= GEN_INT (high_p
41427 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41428 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41429 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
41430 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
41432 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
41437 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
41438 uns_p
, OPTAB_DIRECT
);
41439 t2
= expand_binop (mode
,
41440 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
41441 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
41442 gcc_assert (t1
&& t2
);
41444 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
41449 t1
= gen_reg_rtx (wmode
);
41450 t2
= gen_reg_rtx (wmode
);
41451 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
41452 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
41454 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
41458 gcc_unreachable ();
41463 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
41467 res_1
= gen_reg_rtx (V4SImode
);
41468 res_2
= gen_reg_rtx (V4SImode
);
41469 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
41470 op1
, op2
, true, false);
41471 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
41472 op1
, op2
, true, true);
41474 /* Move the results in element 2 down to element 1; we don't care
41475 what goes in elements 2 and 3. Then we can merge the parts
41476 back together with an interleave.
41478 Note that two other sequences were tried:
41479 (1) Use interleaves at the start instead of psrldq, which allows
41480 us to use a single shufps to merge things back at the end.
41481 (2) Use shufps here to combine the two vectors, then pshufd to
41482 put the elements in the correct order.
41483 In both cases the cost of the reformatting stall was too high
41484 and the overall sequence slower. */
41486 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
41487 const0_rtx
, const0_rtx
));
41488 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
41489 const0_rtx
, const0_rtx
));
41490 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
41492 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
41496 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
41498 enum machine_mode mode
= GET_MODE (op0
);
41499 rtx t1
, t2
, t3
, t4
, t5
, t6
;
41501 if (TARGET_XOP
&& mode
== V2DImode
)
41503 /* op1: A,B,C,D, op2: E,F,G,H */
41504 op1
= gen_lowpart (V4SImode
, op1
);
41505 op2
= gen_lowpart (V4SImode
, op2
);
41507 t1
= gen_reg_rtx (V4SImode
);
41508 t2
= gen_reg_rtx (V4SImode
);
41509 t3
= gen_reg_rtx (V2DImode
);
41510 t4
= gen_reg_rtx (V2DImode
);
41513 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
41519 /* t2: (B*E),(A*F),(D*G),(C*H) */
41520 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
41522 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41523 emit_insn (gen_xop_phadddq (t3
, t2
));
41525 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41526 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
41528 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41529 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
41533 enum machine_mode nmode
;
41534 rtx (*umul
) (rtx
, rtx
, rtx
);
41536 if (mode
== V2DImode
)
41538 umul
= gen_vec_widen_umult_even_v4si
;
41541 else if (mode
== V4DImode
)
41543 umul
= gen_vec_widen_umult_even_v8si
;
41547 gcc_unreachable ();
41550 /* Multiply low parts. */
41551 t1
= gen_reg_rtx (mode
);
41552 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
41554 /* Shift input vectors right 32 bits so we can multiply high parts. */
41556 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
41557 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
41559 /* Multiply high parts by low parts. */
41560 t4
= gen_reg_rtx (mode
);
41561 t5
= gen_reg_rtx (mode
);
41562 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
41563 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
41565 /* Combine and shift the highparts back. */
41566 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
41567 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
41569 /* Combine high and low parts. */
41570 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
41573 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41574 gen_rtx_MULT (mode
, op1
, op2
));
41577 /* Expand an insert into a vector register through pinsr insn.
41578 Return true if successful. */
41581 ix86_expand_pinsr (rtx
*operands
)
41583 rtx dst
= operands
[0];
41584 rtx src
= operands
[3];
41586 unsigned int size
= INTVAL (operands
[1]);
41587 unsigned int pos
= INTVAL (operands
[2]);
41589 if (GET_CODE (dst
) == SUBREG
)
41591 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41592 dst
= SUBREG_REG (dst
);
41595 if (GET_CODE (src
) == SUBREG
)
41596 src
= SUBREG_REG (src
);
41598 switch (GET_MODE (dst
))
41605 enum machine_mode srcmode
, dstmode
;
41606 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41608 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41613 if (!TARGET_SSE4_1
)
41615 dstmode
= V16QImode
;
41616 pinsr
= gen_sse4_1_pinsrb
;
41622 dstmode
= V8HImode
;
41623 pinsr
= gen_sse2_pinsrw
;
41627 if (!TARGET_SSE4_1
)
41629 dstmode
= V4SImode
;
41630 pinsr
= gen_sse4_1_pinsrd
;
41634 gcc_assert (TARGET_64BIT
);
41635 if (!TARGET_SSE4_1
)
41637 dstmode
= V2DImode
;
41638 pinsr
= gen_sse4_1_pinsrq
;
41645 dst
= gen_lowpart (dstmode
, dst
);
41646 src
= gen_lowpart (srcmode
, src
);
41650 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41659 /* This function returns the calling abi specific va_list type node.
41660 It returns the FNDECL specific va_list type. */
41663 ix86_fn_abi_va_list (tree fndecl
)
41666 return va_list_type_node
;
41667 gcc_assert (fndecl
!= NULL_TREE
);
41669 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41670 return ms_va_list_type_node
;
41672 return sysv_va_list_type_node
;
41675 /* Returns the canonical va_list type specified by TYPE. If there
41676 is no valid TYPE provided, it return NULL_TREE. */
41679 ix86_canonical_va_list_type (tree type
)
41683 /* Resolve references and pointers to va_list type. */
41684 if (TREE_CODE (type
) == MEM_REF
)
41685 type
= TREE_TYPE (type
);
41686 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41687 type
= TREE_TYPE (type
);
41688 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41689 type
= TREE_TYPE (type
);
41691 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41693 wtype
= va_list_type_node
;
41694 gcc_assert (wtype
!= NULL_TREE
);
41696 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41698 /* If va_list is an array type, the argument may have decayed
41699 to a pointer type, e.g. by being passed to another function.
41700 In that case, unwrap both types so that we can compare the
41701 underlying records. */
41702 if (TREE_CODE (htype
) == ARRAY_TYPE
41703 || POINTER_TYPE_P (htype
))
41705 wtype
= TREE_TYPE (wtype
);
41706 htype
= TREE_TYPE (htype
);
41709 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41710 return va_list_type_node
;
41711 wtype
= sysv_va_list_type_node
;
41712 gcc_assert (wtype
!= NULL_TREE
);
41714 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41716 /* If va_list is an array type, the argument may have decayed
41717 to a pointer type, e.g. by being passed to another function.
41718 In that case, unwrap both types so that we can compare the
41719 underlying records. */
41720 if (TREE_CODE (htype
) == ARRAY_TYPE
41721 || POINTER_TYPE_P (htype
))
41723 wtype
= TREE_TYPE (wtype
);
41724 htype
= TREE_TYPE (htype
);
41727 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41728 return sysv_va_list_type_node
;
41729 wtype
= ms_va_list_type_node
;
41730 gcc_assert (wtype
!= NULL_TREE
);
41732 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41734 /* If va_list is an array type, the argument may have decayed
41735 to a pointer type, e.g. by being passed to another function.
41736 In that case, unwrap both types so that we can compare the
41737 underlying records. */
41738 if (TREE_CODE (htype
) == ARRAY_TYPE
41739 || POINTER_TYPE_P (htype
))
41741 wtype
= TREE_TYPE (wtype
);
41742 htype
= TREE_TYPE (htype
);
41745 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41746 return ms_va_list_type_node
;
41749 return std_canonical_va_list_type (type
);
41752 /* Iterate through the target-specific builtin types for va_list.
41753 IDX denotes the iterator, *PTREE is set to the result type of
41754 the va_list builtin, and *PNAME to its internal type.
41755 Returns zero if there is no element for this index, otherwise
41756 IDX should be increased upon the next call.
41757 Note, do not iterate a base builtin's name like __builtin_va_list.
41758 Used from c_common_nodes_and_builtins. */
41761 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41771 *ptree
= ms_va_list_type_node
;
41772 *pname
= "__builtin_ms_va_list";
41776 *ptree
= sysv_va_list_type_node
;
41777 *pname
= "__builtin_sysv_va_list";
41785 #undef TARGET_SCHED_DISPATCH
41786 #define TARGET_SCHED_DISPATCH has_dispatch
41787 #undef TARGET_SCHED_DISPATCH_DO
41788 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41789 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41790 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41791 #undef TARGET_SCHED_REORDER
41792 #define TARGET_SCHED_REORDER ix86_sched_reorder
41793 #undef TARGET_SCHED_ADJUST_PRIORITY
41794 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41795 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41796 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41797 ix86_dependencies_evaluation_hook
41799 /* The size of the dispatch window is the total number of bytes of
41800 object code allowed in a window. */
41801 #define DISPATCH_WINDOW_SIZE 16
41803 /* Number of dispatch windows considered for scheduling. */
41804 #define MAX_DISPATCH_WINDOWS 3
41806 /* Maximum number of instructions in a window. */
41809 /* Maximum number of immediate operands in a window. */
41812 /* Maximum number of immediate bits allowed in a window. */
41813 #define MAX_IMM_SIZE 128
41815 /* Maximum number of 32 bit immediates allowed in a window. */
41816 #define MAX_IMM_32 4
41818 /* Maximum number of 64 bit immediates allowed in a window. */
41819 #define MAX_IMM_64 2
41821 /* Maximum total of loads or prefetches allowed in a window. */
41824 /* Maximum total of stores allowed in a window. */
41825 #define MAX_STORE 1
41831 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41832 enum dispatch_group
{
41847 /* Number of allowable groups in a dispatch window. It is an array
41848 indexed by dispatch_group enum. 100 is used as a big number,
41849 because the number of these kind of operations does not have any
41850 effect in dispatch window, but we need them for other reasons in
41852 static unsigned int num_allowable_groups
[disp_last
] = {
41853 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41856 char group_name
[disp_last
+ 1][16] = {
41857 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41858 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41859 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41862 /* Instruction path. */
41865 path_single
, /* Single micro op. */
41866 path_double
, /* Double micro op. */
41867 path_multi
, /* Instructions with more than 2 micro op.. */
41871 /* sched_insn_info defines a window to the instructions scheduled in
41872 the basic block. It contains a pointer to the insn_info table and
41873 the instruction scheduled.
41875 Windows are allocated for each basic block and are linked
41877 typedef struct sched_insn_info_s
{
41879 enum dispatch_group group
;
41880 enum insn_path path
;
41885 /* Linked list of dispatch windows. This is a two way list of
41886 dispatch windows of a basic block. It contains information about
41887 the number of uops in the window and the total number of
41888 instructions and of bytes in the object code for this dispatch
41890 typedef struct dispatch_windows_s
{
41891 int num_insn
; /* Number of insn in the window. */
41892 int num_uops
; /* Number of uops in the window. */
41893 int window_size
; /* Number of bytes in the window. */
41894 int window_num
; /* Window number between 0 or 1. */
41895 int num_imm
; /* Number of immediates in an insn. */
41896 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41897 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41898 int imm_size
; /* Total immediates in the window. */
41899 int num_loads
; /* Total memory loads in the window. */
41900 int num_stores
; /* Total memory stores in the window. */
41901 int violation
; /* Violation exists in window. */
41902 sched_insn_info
*window
; /* Pointer to the window. */
41903 struct dispatch_windows_s
*next
;
41904 struct dispatch_windows_s
*prev
;
41905 } dispatch_windows
;
41907 /* Immediate valuse used in an insn. */
41908 typedef struct imm_info_s
41915 static dispatch_windows
*dispatch_window_list
;
41916 static dispatch_windows
*dispatch_window_list1
;
41918 /* Get dispatch group of insn. */
41920 static enum dispatch_group
41921 get_mem_group (rtx insn
)
41923 enum attr_memory memory
;
41925 if (INSN_CODE (insn
) < 0)
41926 return disp_no_group
;
41927 memory
= get_attr_memory (insn
);
41928 if (memory
== MEMORY_STORE
)
41931 if (memory
== MEMORY_LOAD
)
41934 if (memory
== MEMORY_BOTH
)
41935 return disp_load_store
;
41937 return disp_no_group
;
41940 /* Return true if insn is a compare instruction. */
41945 enum attr_type type
;
41947 type
= get_attr_type (insn
);
41948 return (type
== TYPE_TEST
41949 || type
== TYPE_ICMP
41950 || type
== TYPE_FCMP
41951 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41954 /* Return true if a dispatch violation encountered. */
41957 dispatch_violation (void)
41959 if (dispatch_window_list
->next
)
41960 return dispatch_window_list
->next
->violation
;
41961 return dispatch_window_list
->violation
;
41964 /* Return true if insn is a branch instruction. */
41967 is_branch (rtx insn
)
41969 return (CALL_P (insn
) || JUMP_P (insn
));
41972 /* Return true if insn is a prefetch instruction. */
41975 is_prefetch (rtx insn
)
41977 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41980 /* This function initializes a dispatch window and the list container holding a
41981 pointer to the window. */
41984 init_window (int window_num
)
41987 dispatch_windows
*new_list
;
41989 if (window_num
== 0)
41990 new_list
= dispatch_window_list
;
41992 new_list
= dispatch_window_list1
;
41994 new_list
->num_insn
= 0;
41995 new_list
->num_uops
= 0;
41996 new_list
->window_size
= 0;
41997 new_list
->next
= NULL
;
41998 new_list
->prev
= NULL
;
41999 new_list
->window_num
= window_num
;
42000 new_list
->num_imm
= 0;
42001 new_list
->num_imm_32
= 0;
42002 new_list
->num_imm_64
= 0;
42003 new_list
->imm_size
= 0;
42004 new_list
->num_loads
= 0;
42005 new_list
->num_stores
= 0;
42006 new_list
->violation
= false;
42008 for (i
= 0; i
< MAX_INSN
; i
++)
42010 new_list
->window
[i
].insn
= NULL
;
42011 new_list
->window
[i
].group
= disp_no_group
;
42012 new_list
->window
[i
].path
= no_path
;
42013 new_list
->window
[i
].byte_len
= 0;
42014 new_list
->window
[i
].imm_bytes
= 0;
42019 /* This function allocates and initializes a dispatch window and the
42020 list container holding a pointer to the window. */
42022 static dispatch_windows
*
42023 allocate_window (void)
42025 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42026 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42031 /* This routine initializes the dispatch scheduling information. It
42032 initiates building dispatch scheduler tables and constructs the
42033 first dispatch window. */
42036 init_dispatch_sched (void)
42038 /* Allocate a dispatch list and a window. */
42039 dispatch_window_list
= allocate_window ();
42040 dispatch_window_list1
= allocate_window ();
42045 /* This function returns true if a branch is detected. End of a basic block
42046 does not have to be a branch, but here we assume only branches end a
42050 is_end_basic_block (enum dispatch_group group
)
42052 return group
== disp_branch
;
42055 /* This function is called when the end of a window processing is reached. */
42058 process_end_window (void)
42060 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42061 if (dispatch_window_list
->next
)
42063 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42064 gcc_assert (dispatch_window_list
->window_size
42065 + dispatch_window_list1
->window_size
<= 48);
42071 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42072 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42073 for 48 bytes of instructions. Note that these windows are not dispatch
42074 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42076 static dispatch_windows
*
42077 allocate_next_window (int window_num
)
42079 if (window_num
== 0)
42081 if (dispatch_window_list
->next
)
42084 return dispatch_window_list
;
42087 dispatch_window_list
->next
= dispatch_window_list1
;
42088 dispatch_window_list1
->prev
= dispatch_window_list
;
42090 return dispatch_window_list1
;
42093 /* Increment the number of immediate operands of an instruction. */
42096 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42101 switch ( GET_CODE (*in_rtx
))
42106 (imm_values
->imm
)++;
42107 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42108 (imm_values
->imm32
)++;
42110 (imm_values
->imm64
)++;
42114 (imm_values
->imm
)++;
42115 (imm_values
->imm64
)++;
42119 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42121 (imm_values
->imm
)++;
42122 (imm_values
->imm32
)++;
42133 /* Compute number of immediate operands of an instruction. */
42136 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42138 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42139 (rtx_function
) find_constant_1
, (void *) imm_values
);
42142 /* Return total size of immediate operands of an instruction along with number
42143 of corresponding immediate-operands. It initializes its parameters to zero
42144 befor calling FIND_CONSTANT.
42145 INSN is the input instruction. IMM is the total of immediates.
42146 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42150 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42152 imm_info imm_values
= {0, 0, 0};
42154 find_constant (insn
, &imm_values
);
42155 *imm
= imm_values
.imm
;
42156 *imm32
= imm_values
.imm32
;
42157 *imm64
= imm_values
.imm64
;
42158 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42161 /* This function indicates if an operand of an instruction is an
42165 has_immediate (rtx insn
)
42167 int num_imm_operand
;
42168 int num_imm32_operand
;
42169 int num_imm64_operand
;
42172 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42173 &num_imm64_operand
);
42177 /* Return single or double path for instructions. */
42179 static enum insn_path
42180 get_insn_path (rtx insn
)
42182 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42184 if ((int)path
== 0)
42185 return path_single
;
42187 if ((int)path
== 1)
42188 return path_double
;
42193 /* Return insn dispatch group. */
42195 static enum dispatch_group
42196 get_insn_group (rtx insn
)
42198 enum dispatch_group group
= get_mem_group (insn
);
42202 if (is_branch (insn
))
42203 return disp_branch
;
42208 if (has_immediate (insn
))
42211 if (is_prefetch (insn
))
42212 return disp_prefetch
;
42214 return disp_no_group
;
42217 /* Count number of GROUP restricted instructions in a dispatch
42218 window WINDOW_LIST. */
42221 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42223 enum dispatch_group group
= get_insn_group (insn
);
42225 int num_imm_operand
;
42226 int num_imm32_operand
;
42227 int num_imm64_operand
;
42229 if (group
== disp_no_group
)
42232 if (group
== disp_imm
)
42234 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42235 &num_imm64_operand
);
42236 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42237 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42238 || (num_imm32_operand
> 0
42239 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42240 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42241 || (num_imm64_operand
> 0
42242 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42243 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42244 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42245 && num_imm64_operand
> 0
42246 && ((window_list
->num_imm_64
> 0
42247 && window_list
->num_insn
>= 2)
42248 || window_list
->num_insn
>= 3)))
42254 if ((group
== disp_load_store
42255 && (window_list
->num_loads
>= MAX_LOAD
42256 || window_list
->num_stores
>= MAX_STORE
))
42257 || ((group
== disp_load
42258 || group
== disp_prefetch
)
42259 && window_list
->num_loads
>= MAX_LOAD
)
42260 || (group
== disp_store
42261 && window_list
->num_stores
>= MAX_STORE
))
42267 /* This function returns true if insn satisfies dispatch rules on the
42268 last window scheduled. */
42271 fits_dispatch_window (rtx insn
)
42273 dispatch_windows
*window_list
= dispatch_window_list
;
42274 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
42275 unsigned int num_restrict
;
42276 enum dispatch_group group
= get_insn_group (insn
);
42277 enum insn_path path
= get_insn_path (insn
);
42280 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42281 instructions should be given the lowest priority in the
42282 scheduling process in Haifa scheduler to make sure they will be
42283 scheduled in the same dispatch window as the reference to them. */
42284 if (group
== disp_jcc
|| group
== disp_cmp
)
42287 /* Check nonrestricted. */
42288 if (group
== disp_no_group
|| group
== disp_branch
)
42291 /* Get last dispatch window. */
42292 if (window_list_next
)
42293 window_list
= window_list_next
;
42295 if (window_list
->window_num
== 1)
42297 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
42300 || (min_insn_size (insn
) + sum
) >= 48)
42301 /* Window 1 is full. Go for next window. */
42305 num_restrict
= count_num_restricted (insn
, window_list
);
42307 if (num_restrict
> num_allowable_groups
[group
])
42310 /* See if it fits in the first window. */
42311 if (window_list
->window_num
== 0)
42313 /* The first widow should have only single and double path
42315 if (path
== path_double
42316 && (window_list
->num_uops
+ 2) > MAX_INSN
)
42318 else if (path
!= path_single
)
42324 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42325 dispatch window WINDOW_LIST. */
42328 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
42330 int byte_len
= min_insn_size (insn
);
42331 int num_insn
= window_list
->num_insn
;
42333 sched_insn_info
*window
= window_list
->window
;
42334 enum dispatch_group group
= get_insn_group (insn
);
42335 enum insn_path path
= get_insn_path (insn
);
42336 int num_imm_operand
;
42337 int num_imm32_operand
;
42338 int num_imm64_operand
;
42340 if (!window_list
->violation
&& group
!= disp_cmp
42341 && !fits_dispatch_window (insn
))
42342 window_list
->violation
= true;
42344 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42345 &num_imm64_operand
);
42347 /* Initialize window with new instruction. */
42348 window
[num_insn
].insn
= insn
;
42349 window
[num_insn
].byte_len
= byte_len
;
42350 window
[num_insn
].group
= group
;
42351 window
[num_insn
].path
= path
;
42352 window
[num_insn
].imm_bytes
= imm_size
;
42354 window_list
->window_size
+= byte_len
;
42355 window_list
->num_insn
= num_insn
+ 1;
42356 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
42357 window_list
->imm_size
+= imm_size
;
42358 window_list
->num_imm
+= num_imm_operand
;
42359 window_list
->num_imm_32
+= num_imm32_operand
;
42360 window_list
->num_imm_64
+= num_imm64_operand
;
42362 if (group
== disp_store
)
42363 window_list
->num_stores
+= 1;
42364 else if (group
== disp_load
42365 || group
== disp_prefetch
)
42366 window_list
->num_loads
+= 1;
42367 else if (group
== disp_load_store
)
42369 window_list
->num_stores
+= 1;
42370 window_list
->num_loads
+= 1;
42374 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42375 If the total bytes of instructions or the number of instructions in
42376 the window exceed allowable, it allocates a new window. */
42379 add_to_dispatch_window (rtx insn
)
42382 dispatch_windows
*window_list
;
42383 dispatch_windows
*next_list
;
42384 dispatch_windows
*window0_list
;
42385 enum insn_path path
;
42386 enum dispatch_group insn_group
;
42394 if (INSN_CODE (insn
) < 0)
42397 byte_len
= min_insn_size (insn
);
42398 window_list
= dispatch_window_list
;
42399 next_list
= window_list
->next
;
42400 path
= get_insn_path (insn
);
42401 insn_group
= get_insn_group (insn
);
42403 /* Get the last dispatch window. */
42405 window_list
= dispatch_window_list
->next
;
42407 if (path
== path_single
)
42409 else if (path
== path_double
)
42412 insn_num_uops
= (int) path
;
42414 /* If current window is full, get a new window.
42415 Window number zero is full, if MAX_INSN uops are scheduled in it.
42416 Window number one is full, if window zero's bytes plus window
42417 one's bytes is 32, or if the bytes of the new instruction added
42418 to the total makes it greater than 48, or it has already MAX_INSN
42419 instructions in it. */
42420 num_insn
= window_list
->num_insn
;
42421 num_uops
= window_list
->num_uops
;
42422 window_num
= window_list
->window_num
;
42423 insn_fits
= fits_dispatch_window (insn
);
42425 if (num_insn
>= MAX_INSN
42426 || num_uops
+ insn_num_uops
> MAX_INSN
42429 window_num
= ~window_num
& 1;
42430 window_list
= allocate_next_window (window_num
);
42433 if (window_num
== 0)
42435 add_insn_window (insn
, window_list
, insn_num_uops
);
42436 if (window_list
->num_insn
>= MAX_INSN
42437 && insn_group
== disp_branch
)
42439 process_end_window ();
42443 else if (window_num
== 1)
42445 window0_list
= window_list
->prev
;
42446 sum
= window0_list
->window_size
+ window_list
->window_size
;
42448 || (byte_len
+ sum
) >= 48)
42450 process_end_window ();
42451 window_list
= dispatch_window_list
;
42454 add_insn_window (insn
, window_list
, insn_num_uops
);
42457 gcc_unreachable ();
42459 if (is_end_basic_block (insn_group
))
42461 /* End of basic block is reached do end-basic-block process. */
42462 process_end_window ();
42467 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42469 DEBUG_FUNCTION
static void
42470 debug_dispatch_window_file (FILE *file
, int window_num
)
42472 dispatch_windows
*list
;
42475 if (window_num
== 0)
42476 list
= dispatch_window_list
;
42478 list
= dispatch_window_list1
;
42480 fprintf (file
, "Window #%d:\n", list
->window_num
);
42481 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
42482 list
->num_insn
, list
->num_uops
, list
->window_size
);
42483 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42484 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
42486 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
42488 fprintf (file
, " insn info:\n");
42490 for (i
= 0; i
< MAX_INSN
; i
++)
42492 if (!list
->window
[i
].insn
)
42494 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42495 i
, group_name
[list
->window
[i
].group
],
42496 i
, (void *)list
->window
[i
].insn
,
42497 i
, list
->window
[i
].path
,
42498 i
, list
->window
[i
].byte_len
,
42499 i
, list
->window
[i
].imm_bytes
);
42503 /* Print to stdout a dispatch window. */
42505 DEBUG_FUNCTION
void
42506 debug_dispatch_window (int window_num
)
42508 debug_dispatch_window_file (stdout
, window_num
);
42511 /* Print INSN dispatch information to FILE. */
42513 DEBUG_FUNCTION
static void
42514 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
42517 enum insn_path path
;
42518 enum dispatch_group group
;
42520 int num_imm_operand
;
42521 int num_imm32_operand
;
42522 int num_imm64_operand
;
42524 if (INSN_CODE (insn
) < 0)
42527 byte_len
= min_insn_size (insn
);
42528 path
= get_insn_path (insn
);
42529 group
= get_insn_group (insn
);
42530 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42531 &num_imm64_operand
);
42533 fprintf (file
, " insn info:\n");
42534 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
42535 group_name
[group
], path
, byte_len
);
42536 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42537 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
42540 /* Print to STDERR the status of the ready list with respect to
42541 dispatch windows. */
42543 DEBUG_FUNCTION
void
42544 debug_ready_dispatch (void)
42547 int no_ready
= number_in_ready ();
42549 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
42551 for (i
= 0; i
< no_ready
; i
++)
42552 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
42555 /* This routine is the driver of the dispatch scheduler. */
42558 do_dispatch (rtx insn
, int mode
)
42560 if (mode
== DISPATCH_INIT
)
42561 init_dispatch_sched ();
42562 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
42563 add_to_dispatch_window (insn
);
42566 /* Return TRUE if Dispatch Scheduling is supported. */
42569 has_dispatch (rtx insn
, int action
)
42571 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
42572 && flag_dispatch_scheduler
)
42578 case IS_DISPATCH_ON
:
42583 return is_cmp (insn
);
42585 case DISPATCH_VIOLATION
:
42586 return dispatch_violation ();
42588 case FITS_DISPATCH_WINDOW
:
42589 return fits_dispatch_window (insn
);
42595 /* Implementation of reassociation_width target hook used by
42596 reassoc phase to identify parallelism level in reassociated
42597 tree. Statements tree_code is passed in OPC. Arguments type
42600 Currently parallel reassociation is enabled for Atom
42601 processors only and we set reassociation width to be 2
42602 because Atom may issue up to 2 instructions per cycle.
42604 Return value should be fixed if parallel reassociation is
42605 enabled for other processors. */
42608 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42609 enum machine_mode mode
)
42613 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42615 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42621 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42622 place emms and femms instructions. */
42624 static enum machine_mode
42625 ix86_preferred_simd_mode (enum machine_mode mode
)
42633 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42635 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42637 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42639 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42642 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42648 if (!TARGET_VECTORIZE_DOUBLE
)
42650 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42652 else if (TARGET_SSE2
)
42661 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42664 static unsigned int
42665 ix86_autovectorize_vector_sizes (void)
42667 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42672 /* Return class of registers which could be used for pseudo of MODE
42673 and of class RCLASS for spilling instead of memory. Return NO_REGS
42674 if it is not possible or non-profitable. */
42676 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42678 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42679 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42680 && INTEGER_CLASS_P (rclass
))
42685 /* Implement targetm.vectorize.init_cost. */
42688 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42690 unsigned *cost
= XNEWVEC (unsigned, 3);
42691 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42695 /* Implement targetm.vectorize.add_stmt_cost. */
42698 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42699 struct _stmt_vec_info
*stmt_info
, int misalign
,
42700 enum vect_cost_model_location where
)
42702 unsigned *cost
= (unsigned *) data
;
42703 unsigned retval
= 0;
42705 if (flag_vect_cost_model
)
42707 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42708 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42710 /* Statements in an inner loop relative to the loop being
42711 vectorized are weighted more heavily. The value here is
42712 arbitrary and could potentially be improved with analysis. */
42713 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42714 count
*= 50; /* FIXME. */
42716 retval
= (unsigned) (count
* stmt_cost
);
42717 cost
[where
] += retval
;
42723 /* Implement targetm.vectorize.finish_cost. */
42726 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42727 unsigned *body_cost
, unsigned *epilogue_cost
)
42729 unsigned *cost
= (unsigned *) data
;
42730 *prologue_cost
= cost
[vect_prologue
];
42731 *body_cost
= cost
[vect_body
];
42732 *epilogue_cost
= cost
[vect_epilogue
];
42735 /* Implement targetm.vectorize.destroy_cost_data. */
42738 ix86_destroy_cost_data (void *data
)
42743 /* Validate target specific memory model bits in VAL. */
42745 static unsigned HOST_WIDE_INT
42746 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42748 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42751 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42753 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42755 warning (OPT_Winvalid_memory_model
,
42756 "Unknown architecture specific memory model");
42757 return MEMMODEL_SEQ_CST
;
42759 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42760 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42762 warning (OPT_Winvalid_memory_model
,
42763 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42764 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42766 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42768 warning (OPT_Winvalid_memory_model
,
42769 "HLE_RELEASE not used with RELEASE or stronger memory model");
42770 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42775 /* Initialize the GCC target structure. */
42776 #undef TARGET_RETURN_IN_MEMORY
42777 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42779 #undef TARGET_LEGITIMIZE_ADDRESS
42780 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42782 #undef TARGET_ATTRIBUTE_TABLE
42783 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42784 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
42785 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
42786 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42787 # undef TARGET_MERGE_DECL_ATTRIBUTES
42788 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42791 #undef TARGET_COMP_TYPE_ATTRIBUTES
42792 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42794 #undef TARGET_INIT_BUILTINS
42795 #define TARGET_INIT_BUILTINS ix86_init_builtins
42796 #undef TARGET_BUILTIN_DECL
42797 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42798 #undef TARGET_EXPAND_BUILTIN
42799 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42801 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42802 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42803 ix86_builtin_vectorized_function
42805 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42806 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42808 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42809 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42811 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42812 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42814 #undef TARGET_BUILTIN_RECIPROCAL
42815 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42817 #undef TARGET_ASM_FUNCTION_EPILOGUE
42818 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42820 #undef TARGET_ENCODE_SECTION_INFO
42821 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42822 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42824 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42827 #undef TARGET_ASM_OPEN_PAREN
42828 #define TARGET_ASM_OPEN_PAREN ""
42829 #undef TARGET_ASM_CLOSE_PAREN
42830 #define TARGET_ASM_CLOSE_PAREN ""
42832 #undef TARGET_ASM_BYTE_OP
42833 #define TARGET_ASM_BYTE_OP ASM_BYTE
42835 #undef TARGET_ASM_ALIGNED_HI_OP
42836 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42837 #undef TARGET_ASM_ALIGNED_SI_OP
42838 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42840 #undef TARGET_ASM_ALIGNED_DI_OP
42841 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42844 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42845 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42847 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42848 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42850 #undef TARGET_ASM_UNALIGNED_HI_OP
42851 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42852 #undef TARGET_ASM_UNALIGNED_SI_OP
42853 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42854 #undef TARGET_ASM_UNALIGNED_DI_OP
42855 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42857 #undef TARGET_PRINT_OPERAND
42858 #define TARGET_PRINT_OPERAND ix86_print_operand
42859 #undef TARGET_PRINT_OPERAND_ADDRESS
42860 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42861 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42862 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42863 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42864 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42866 #undef TARGET_SCHED_INIT_GLOBAL
42867 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42868 #undef TARGET_SCHED_ADJUST_COST
42869 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42870 #undef TARGET_SCHED_ISSUE_RATE
42871 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42872 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42873 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42874 ia32_multipass_dfa_lookahead
42876 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42877 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42879 #undef TARGET_MEMMODEL_CHECK
42880 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42883 #undef TARGET_HAVE_TLS
42884 #define TARGET_HAVE_TLS true
42886 #undef TARGET_CANNOT_FORCE_CONST_MEM
42887 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42888 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42889 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42891 #undef TARGET_DELEGITIMIZE_ADDRESS
42892 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42894 #undef TARGET_MS_BITFIELD_LAYOUT_P
42895 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42898 #undef TARGET_BINDS_LOCAL_P
42899 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42901 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42902 #undef TARGET_BINDS_LOCAL_P
42903 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42906 #undef TARGET_ASM_OUTPUT_MI_THUNK
42907 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42908 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42909 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42911 #undef TARGET_ASM_FILE_START
42912 #define TARGET_ASM_FILE_START x86_file_start
42914 #undef TARGET_OPTION_OVERRIDE
42915 #define TARGET_OPTION_OVERRIDE ix86_option_override
42917 #undef TARGET_REGISTER_MOVE_COST
42918 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42919 #undef TARGET_MEMORY_MOVE_COST
42920 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42921 #undef TARGET_RTX_COSTS
42922 #define TARGET_RTX_COSTS ix86_rtx_costs
42923 #undef TARGET_ADDRESS_COST
42924 #define TARGET_ADDRESS_COST ix86_address_cost
42926 #undef TARGET_FIXED_CONDITION_CODE_REGS
42927 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42928 #undef TARGET_CC_MODES_COMPATIBLE
42929 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42931 #undef TARGET_MACHINE_DEPENDENT_REORG
42932 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42934 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42935 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42937 #undef TARGET_BUILD_BUILTIN_VA_LIST
42938 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42940 #undef TARGET_FOLD_BUILTIN
42941 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42943 #undef TARGET_COMPARE_VERSION_PRIORITY
42944 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42946 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42947 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42948 ix86_generate_version_dispatcher_body
42950 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42951 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42952 ix86_get_function_versions_dispatcher
42954 #undef TARGET_ENUM_VA_LIST_P
42955 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42957 #undef TARGET_FN_ABI_VA_LIST
42958 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42960 #undef TARGET_CANONICAL_VA_LIST_TYPE
42961 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42963 #undef TARGET_EXPAND_BUILTIN_VA_START
42964 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42966 #undef TARGET_MD_ASM_CLOBBERS
42967 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42969 #undef TARGET_PROMOTE_PROTOTYPES
42970 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42971 #undef TARGET_STRUCT_VALUE_RTX
42972 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42973 #undef TARGET_SETUP_INCOMING_VARARGS
42974 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42975 #undef TARGET_MUST_PASS_IN_STACK
42976 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42977 #undef TARGET_FUNCTION_ARG_ADVANCE
42978 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42979 #undef TARGET_FUNCTION_ARG
42980 #define TARGET_FUNCTION_ARG ix86_function_arg
42981 #undef TARGET_FUNCTION_ARG_BOUNDARY
42982 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42983 #undef TARGET_PASS_BY_REFERENCE
42984 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42985 #undef TARGET_INTERNAL_ARG_POINTER
42986 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42987 #undef TARGET_UPDATE_STACK_BOUNDARY
42988 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42989 #undef TARGET_GET_DRAP_RTX
42990 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42991 #undef TARGET_STRICT_ARGUMENT_NAMING
42992 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42993 #undef TARGET_STATIC_CHAIN
42994 #define TARGET_STATIC_CHAIN ix86_static_chain
42995 #undef TARGET_TRAMPOLINE_INIT
42996 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42997 #undef TARGET_RETURN_POPS_ARGS
42998 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
43000 #undef TARGET_LEGITIMATE_COMBINED_INSN
43001 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
43003 #undef TARGET_ASAN_SHADOW_OFFSET
43004 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
43006 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
43007 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
43009 #undef TARGET_SCALAR_MODE_SUPPORTED_P
43010 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
43012 #undef TARGET_VECTOR_MODE_SUPPORTED_P
43013 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
43015 #undef TARGET_C_MODE_FOR_SUFFIX
43016 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
43019 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
43020 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
43023 #ifdef SUBTARGET_INSERT_ATTRIBUTES
43024 #undef TARGET_INSERT_ATTRIBUTES
43025 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
43028 #undef TARGET_MANGLE_TYPE
43029 #define TARGET_MANGLE_TYPE ix86_mangle_type
43032 #undef TARGET_STACK_PROTECT_FAIL
43033 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43036 #undef TARGET_FUNCTION_VALUE
43037 #define TARGET_FUNCTION_VALUE ix86_function_value
43039 #undef TARGET_FUNCTION_VALUE_REGNO_P
43040 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43042 #undef TARGET_PROMOTE_FUNCTION_MODE
43043 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43045 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43046 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43048 #undef TARGET_INSTANTIATE_DECLS
43049 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43051 #undef TARGET_SECONDARY_RELOAD
43052 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43054 #undef TARGET_CLASS_MAX_NREGS
43055 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43057 #undef TARGET_PREFERRED_RELOAD_CLASS
43058 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43059 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43060 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43061 #undef TARGET_CLASS_LIKELY_SPILLED_P
43062 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43064 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43065 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43066 ix86_builtin_vectorization_cost
43067 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43068 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43069 ix86_vectorize_vec_perm_const_ok
43070 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43071 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43072 ix86_preferred_simd_mode
43073 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43074 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43075 ix86_autovectorize_vector_sizes
43076 #undef TARGET_VECTORIZE_INIT_COST
43077 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43078 #undef TARGET_VECTORIZE_ADD_STMT_COST
43079 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43080 #undef TARGET_VECTORIZE_FINISH_COST
43081 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43082 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43083 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43085 #undef TARGET_SET_CURRENT_FUNCTION
43086 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43088 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43089 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43091 #undef TARGET_OPTION_SAVE
43092 #define TARGET_OPTION_SAVE ix86_function_specific_save
43094 #undef TARGET_OPTION_RESTORE
43095 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43097 #undef TARGET_OPTION_PRINT
43098 #define TARGET_OPTION_PRINT ix86_function_specific_print
43100 #undef TARGET_OPTION_FUNCTION_VERSIONS
43101 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43103 #undef TARGET_CAN_INLINE_P
43104 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43106 #undef TARGET_EXPAND_TO_RTL_HOOK
43107 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43109 #undef TARGET_LEGITIMATE_ADDRESS_P
43110 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43112 #undef TARGET_LRA_P
43113 #define TARGET_LRA_P hook_bool_void_true
43115 #undef TARGET_REGISTER_PRIORITY
43116 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43118 #undef TARGET_REGISTER_USAGE_LEVELING_P
43119 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43121 #undef TARGET_LEGITIMATE_CONSTANT_P
43122 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43124 #undef TARGET_FRAME_POINTER_REQUIRED
43125 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43127 #undef TARGET_CAN_ELIMINATE
43128 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43130 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43131 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43133 #undef TARGET_ASM_CODE_END
43134 #define TARGET_ASM_CODE_END ix86_code_end
43136 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43137 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43140 #undef TARGET_INIT_LIBFUNCS
43141 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43144 #undef TARGET_SPILL_CLASS
43145 #define TARGET_SPILL_CLASS ix86_spill_class
43147 struct gcc_target targetm
= TARGET_INITIALIZER
;
43149 #include "gt-i386.h"